Esempio n. 1
0
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett, validation_set),
                 (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set, fixed_valid)
                 ]  #,(garbored_train_set,garbored_valid_set)]
    for (t, v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels,
                            False)
        net_preds = []
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds, validation_set_labels, True))
        print "done iter"

    create_csv(accuracys, 'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax, accuracys)
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett,validation_set), (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set,fixed_valid)]#,(garbored_train_set,garbored_valid_set)]
    for (t,v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels, False)
        net_preds =[]
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds,validation_set_labels, True))
        print"done iter"

    create_csv(accuracys,'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax,accuracys)
def run_my_votin(training_set, train_set_labels, validation_set=None, validation_set_labels=None, train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet


    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation= standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True,
                                                                             intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                             max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
        svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

        print"train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None, None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1,res_f )
        res_f.close()
        print "Knn done"
        print"train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2,res_f )
        res_f.close()
        print "done bg LR"
        print"train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels ,svm_class,  None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3,res_f )
        res_f.close()
        print "done bg svm"
        print"train Neural-Nets"
        net_clf = net_class(standard_train_inputs,train_set_labels, None, None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf,res_f)
        res_f.close()
        print "nets done"
    else:
        print"Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print"Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print"Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print"Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26,0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds =[]
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w ,pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0)+1) for ar in fin_pred]
    create_csv(fin_labels,'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i==c-1) for i in range(0,7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k
Esempio n. 4
0
def run_my_votin(training_set,
                 train_set_labels,
                 validation_set=None,
                 validation_set_labels=None,
                 train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet

    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation = standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(
            penalty='l2',
            dual=False,
            tol=0.01,
            C=1.0,
            fit_intercept=True,
            intercept_scaling=1,
            class_weight=None,
            random_state=None,
            solver='newton-cg',
            max_iter=200,
            multi_class='ovr',
            verbose=0,
            warm_start=False,
            n_jobs=2)
        svm_class = svm.SVC(kernel='rbf',
                            C=50,
                            shrinking=False,
                            decision_function_shape='ovr',
                            tol=0.001,
                            max_iter=-1)

        print "train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None,
                          None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1, res_f)
        res_f.close()
        print "Knn done"
        print "train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels,
                          logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2, res_f)
        res_f.close()
        print "done bg LR"
        print "train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels, svm_class,
                          None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3, res_f)
        res_f.close()
        print "done bg svm"
        print "train Neural-Nets"
        net_clf = net_class(standard_train_inputs, train_set_labels, None,
                            None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf, res_f)
        res_f.close()
        print "nets done"
    else:
        print "Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print "Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print "Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print "Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26, 0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds = []
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w, pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred]
    create_csv(fin_labels, 'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i == c - 1) for i in range(0, 7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k