def  run_svm(training_set,  train_set_labels, validation_set, validation_set_labels, pre=True):
    from sklearn import decomposition

    # training_set, validation_set, train_set_labels, validation_set_labels = cross_validation.train_test_split(
    #                 all_data_in, all_data_labels, test_size = 0.3, random_state=1, stratify=ids)

    if pre:
        standard_train_inputs = fix_pixels(training_set)
        standard_valid_inputs = fix_pixels(validation_set)
    else:
        standard_train_inputs = training_set
        standard_valid_inputs = validation_set


    clf = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    clf.fit(standard_train_inputs, train_set_labels.ravel())

    accuracy = clf.score(standard_valid_inputs, validation_set_labels.ravel())

    res_f = open('trained_svm.dump', 'w')
    pickle.dump(clf,res_f )
    res_f.close()
    print "the new best acc is:" , accuracy, 'the prams are g={}, c={}'.format(0,50)
    return accuracy
Example #2
0
def run_svm(training_set,
            train_set_labels,
            validation_set,
            validation_set_labels,
            pre=True):
    from sklearn import decomposition

    # training_set, validation_set, train_set_labels, validation_set_labels = cross_validation.train_test_split(
    #                 all_data_in, all_data_labels, test_size = 0.3, random_state=1, stratify=ids)

    if pre:
        standard_train_inputs = fix_pixels(training_set)
        standard_valid_inputs = fix_pixels(validation_set)
    else:
        standard_train_inputs = training_set
        standard_valid_inputs = validation_set

    clf = svm.SVC(kernel='rbf',
                  C=50,
                  shrinking=False,
                  decision_function_shape='ovr',
                  tol=0.001,
                  max_iter=-1)

    clf.fit(standard_train_inputs, train_set_labels.ravel())

    accuracy = clf.score(standard_valid_inputs, validation_set_labels.ravel())

    res_f = open('trained_svm.dump', 'w')
    pickle.dump(clf, res_f)
    res_f.close()
    print "the new best acc is:", accuracy, 'the prams are g={}, c={}'.format(
        0, 50)
    return accuracy
def run_public_test_on(class_name):

    if class_name == 'knn':
        res_1 = open('bg1knn.dump', 'r')
        clf = pickle.load(res_1)
        res_1.close()
        print "knn done"
    elif class_name == 'lr':
        res_2 = open('bg2lr.dump', 'r')
        clf = pickle.load(res_2)
        res_2.close()
        print "LR done"
    elif class_name == 'svm':
        res_3 = open('bg3svm.dump', 'r')
        clf = pickle.load(res_3)
        res_3.close()
        print "svm done"
    elif class_name == 'nn':
        res_4 = open('bestNet.dump', 'r')
        clf = pickle.load(res_4)
        res_4.close()
        print "net done"
    validation_set = LoadData('public_test_images.mat', False, False)
    fixed_valid = fix_pixels(validation_set)
    fin_pred = clf.predict_proba(fixed_valid)
    fin_labels = [(np.argmax(ar, axis=0)+1) for ar in fin_pred]
    create_csv(fin_labels,'res_csv.csv')
Example #4
0
def run_public_test_on(class_name):

    if class_name == 'knn':
        res_1 = open('bg1knn.dump', 'r')
        clf = pickle.load(res_1)
        res_1.close()
        print "knn done"
    elif class_name == 'lr':
        res_2 = open('bg2lr.dump', 'r')
        clf = pickle.load(res_2)
        res_2.close()
        print "LR done"
    elif class_name == 'svm':
        res_3 = open('bg3svm.dump', 'r')
        clf = pickle.load(res_3)
        res_3.close()
        print "svm done"
    elif class_name == 'nn':
        res_4 = open('bestNet.dump', 'r')
        clf = pickle.load(res_4)
        res_4.close()
        print "net done"
    validation_set = LoadData('public_test_images.mat', False, False)
    fixed_valid = fix_pixels(validation_set)
    fin_pred = clf.predict_proba(fixed_valid)
    fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred]
    create_csv(fin_labels, 'res_csv.csv')
def knn(training_inputs, training_labels, valid_inputs, valid_label, pre=True):
    knn_class = KNeighborsClassifier(weights='distance', n_neighbors=13)
    if pre:
        standard_train_inputs = fix_pixels(training_inputs)
        standard_valid_inputs  = fix_pixels(validation_set)
    else:
        standard_train_inputs = training_inputs
        standard_valid_inputs = valid_inputs

    fitted_knn = knn_class.fit(standard_train_inputs, np.ravel(training_labels))
    res_f = open('trained_lr.dump', 'w')
    pickle.dump(fitted_knn,res_f )
    res_f.close()

    accuracy = knn_class.score(standard_valid_inputs, np.ravel(valid_label))

    print "Accuracy for knn is:{}".format(accuracy)
    return accuracy
Example #6
0
def knn(training_inputs, training_labels, valid_inputs, valid_label, pre=True):
    knn_class = KNeighborsClassifier(weights='distance', n_neighbors=13)
    if pre:
        standard_train_inputs = fix_pixels(training_inputs)
        standard_valid_inputs = fix_pixels(validation_set)
    else:
        standard_train_inputs = training_inputs
        standard_valid_inputs = valid_inputs

    fitted_knn = knn_class.fit(standard_train_inputs,
                               np.ravel(training_labels))
    res_f = open('trained_lr.dump', 'w')
    pickle.dump(fitted_knn, res_f)
    res_f.close()

    accuracy = knn_class.score(standard_valid_inputs, np.ravel(valid_label))

    print "Accuracy for knn is:{}".format(accuracy)
    return accuracy
Example #7
0
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett, validation_set),
                 (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set, fixed_valid)
                 ]  #,(garbored_train_set,garbored_valid_set)]
    for (t, v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels,
                            False)
        net_preds = []
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds, validation_set_labels, True))
        print "done iter"

    create_csv(accuracys, 'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax, accuracys)
def make_data_for_barplot():
    accuracys = []
    training_set, train_set_labels, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)

    kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
                                                                             intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                             max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_set)
    fixed_valid = fix_pixels(validation_set)


    accuracys.append(knn(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"knn"
    accuracys.append(logistic_regression(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"logistic_regression"
    accuracys.append(run_svm(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"run_svm"

    accuracys.append( run_bagging(fixed_train_set, train_set_labels, kknn_class,fixed_valid, validation_set_labels, True))
    print" knn B"
    accuracys.append( run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver,standard_valid_inputs, validation_set_labels, True))
    print"logistic_regression  B"
    accuracys.append( run_bagging(fixed_train_set, train_set_labels, svm_class,fixed_valid, validation_set_labels, True))
    print"run_svm  B"

    create_csv(accuracys,'barplot_bagg_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_bagging(ax,accuracys)

    return accuracys
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett,validation_set), (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set,fixed_valid)]#,(garbored_train_set,garbored_valid_set)]
    for (t,v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels, False)
        net_preds =[]
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds,validation_set_labels, True))
        print"done iter"

    create_csv(accuracys,'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax,accuracys)
def run_my_votin(training_set, train_set_labels, validation_set=None, validation_set_labels=None, train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet


    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation= standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True,
                                                                             intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                             max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
        svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

        print"train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None, None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1,res_f )
        res_f.close()
        print "Knn done"
        print"train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2,res_f )
        res_f.close()
        print "done bg LR"
        print"train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels ,svm_class,  None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3,res_f )
        res_f.close()
        print "done bg svm"
        print"train Neural-Nets"
        net_clf = net_class(standard_train_inputs,train_set_labels, None, None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf,res_f)
        res_f.close()
        print "nets done"
    else:
        print"Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print"Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print"Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print"Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26,0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds =[]
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w ,pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0)+1) for ar in fin_pred]
    create_csv(fin_labels,'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i==c-1) for i in range(0,7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k
Example #11
0
def make_data_for_barplot():
    accuracys = []
    training_set, train_set_labels, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)

    kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(
        penalty='l2',
        dual=False,
        tol=0.001,
        C=1.2,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        solver='newton-cg',
        max_iter=200,
        multi_class='ovr',
        verbose=0,
        warm_start=False,
        n_jobs=2)
    svm_class = svm.SVC(kernel='rbf',
                        C=50,
                        shrinking=False,
                        decision_function_shape='ovr',
                        tol=0.001,
                        max_iter=-1)

    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_set)
    fixed_valid = fix_pixels(validation_set)

    accuracys.append(
        knn(training_sett, train_set_labels, validation_set,
            validation_set_labels))
    print "knn"
    accuracys.append(
        logistic_regression(training_sett, train_set_labels, validation_set,
                            validation_set_labels))
    print "logistic_regression"
    accuracys.append(
        run_svm(training_sett, train_set_labels, validation_set,
                validation_set_labels))
    print "run_svm"

    accuracys.append(
        run_bagging(fixed_train_set, train_set_labels, kknn_class, fixed_valid,
                    validation_set_labels, True))
    print " knn B"
    accuracys.append(
        run_bagging(standard_train_inputs, train_set_labels,
                    logistic_regression_solver, standard_valid_inputs,
                    validation_set_labels, True))
    print "logistic_regression  B"
    accuracys.append(
        run_bagging(fixed_train_set, train_set_labels, svm_class, fixed_valid,
                    validation_set_labels, True))
    print "run_svm  B"

    create_csv(accuracys, 'barplot_bagg_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_bagging(ax, accuracys)

    return accuracys
Example #12
0
def run_my_votin(training_set,
                 train_set_labels,
                 validation_set=None,
                 validation_set_labels=None,
                 train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet

    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation = standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(
            penalty='l2',
            dual=False,
            tol=0.01,
            C=1.0,
            fit_intercept=True,
            intercept_scaling=1,
            class_weight=None,
            random_state=None,
            solver='newton-cg',
            max_iter=200,
            multi_class='ovr',
            verbose=0,
            warm_start=False,
            n_jobs=2)
        svm_class = svm.SVC(kernel='rbf',
                            C=50,
                            shrinking=False,
                            decision_function_shape='ovr',
                            tol=0.001,
                            max_iter=-1)

        print "train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None,
                          None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1, res_f)
        res_f.close()
        print "Knn done"
        print "train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels,
                          logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2, res_f)
        res_f.close()
        print "done bg LR"
        print "train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels, svm_class,
                          None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3, res_f)
        res_f.close()
        print "done bg svm"
        print "train Neural-Nets"
        net_clf = net_class(standard_train_inputs, train_set_labels, None,
                            None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf, res_f)
        res_f.close()
        print "nets done"
    else:
        print "Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print "Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print "Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print "Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26, 0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds = []
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w, pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred]
    create_csv(fin_labels, 'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i == c - 1) for i in range(0, 7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k