Esempio n. 1
0
    def __init__(self, symb, predlen, cat='RL', kwargs=None):

        self.symb = symb
        self.predlen = predlen
        self.kwargs = kwargs
        self.cat = cat

        if cat == 'RF':
            if kwargs != None:
                self.learner = RF.RandomForest(**kwargs)
            else:
                self.learner = RF.RandomForest()

        elif cat == 'KNN':
            if kwargs != None:
                self.learner = KNN.KNN(**kwargs)
            else:
                self.learner = KNN.KNN()

        elif cat == 'SVM':
            if kwargs != None:
                self.learner = SVM.SVM(**kwargs)
            else:
                self.learner = SVM.SVM()

        elif cat == 'NN':
            if kwargs != None:
                self.learner = NN.NN(**kwargs)
            else:
                self.learner = NN.NN()
Esempio n. 2
0
def test(path):
    li = []
    X, y = load_data_set(path)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        random_state=0)
    kernel_ = kernel.Kernel.gaussian(0.5)
    svm = SVM.SVM(
        kernel=kernel_,
        c=0.5,
    )
    svm = svm.training(X_train, y_train)
    accuracy1 = svm.calc_accuracy(X_train, y_train)
    accuracy = svm.calc_accuracy(X_test, y_test)
    li.append((accuracy1, accuracy))
    # print("(%.3f%%, %.3f%%)" % (accuracy1 * 100, accuracy * 100))

    for i in range(1, 7):
        try:
            X_train_decom, X_test_decom = svm_decom(X_train, y_train, X_test,
                                                    kernel_, i)
            kernel_ = kernel.Kernel.linear()
            svm = SVM.SVM(
                kernel=kernel_,
                c=0.5,
            )
            svm = svm.training(X_train_decom, y_train)
            accuracy1 = svm.calc_accuracy(X_train_decom, y_train)
            accuracy = svm.calc_accuracy(X_test_decom, y_test)
            li.append((accuracy1, accuracy))
            # print("(%.3f%%, %.3f%%)" % (accuracy1 * 100,accuracy * 100))
        except:
            li.append((0, 0))
    return li
def mix_up():
    """creating a hybrid model mixing ML algorithms and neural net , it accumulates the errors from individual algorithms 
	and it increases the error in neural net so much that the model is not flexible enough to decide the trend in market
	or find patterns in data , model2 removes that redundant error ."""
    ind = 0
    for i in bar(xrange(len(x))):
        b_pred, b_y = bayes.naive_bayes_model(x[i], net=True)
        s_pred, s_y = SVM.svm_model(x[i], net=True)
        k_pred, k_y = KNN.knn_algo_model(x[i], net=True)
        print b_pred, b_y
        mix.new_net(s_pred, b_pred, k_pred, s_y, x[i])
    ind = 0
    report = pd.DataFrame(index=range(0),
                          columns=[
                              'Stock Name', 'accuracy', 'profit count',
                              'loss count', 'total no of rise',
                              'total number of loss'
                          ])
    for i in bar(xrange(len(x))):
        b_pred, b_y = bayes.naive_bayes_model(x[i], net=True, actual=True)
        s_pred, s_y = SVM.svm_model(x[i], net=True, actual=True)
        k_pred, k_y = KNN.knn_algo_model(x[i], net=True, actual=True)
        p_count, total_count_p, l_count, total_count_l, accuracy = mix.new_net(
            s_pred, b_pred, k_pred, s_y, x[i], create=False)
        report.loc[ind] = [
            x[i], accuracy, p_count, l_count, total_count_p, total_count_l
        ]
        ind = ind + 1
    print "Mean accuracy----------", report['accuracy'].mean()
    report.to_csv("./report/mix_result.csv")
Esempio n. 4
0
def plot_convergence_synthetic(svm, lr):
    for i in range(4):
        a = data[i]
        b = Label[i]
        n, m = np.shape(a)
        a = np.vstack((a, np.ones((1, m))))
        b = b.reshape(-1, 1)
        x = np.zeros((3, 1))
        a1 = a
        b1 = b
        if svm:
            x_bt, norm_bt, _, acc_bt, duration_bt, diteration_bt = SVM.BackTracking(
                a, b, x, args.l, args.d, False, a1, b1)
            f_bt = open('svm_acc_bt_' + str(i) + '.txt', mode='a+')
            f_bt.write(
                np.str(acc_bt) + ' ' + str(duration_bt) + ' ' +
                str(diteration_bt))

            x_AGM, norm_AGM, _, acc_AGM, duration_AGM, diteration_AGM = SVM.AGM(
                a, b, x, args.l, args.d, False, a1, b1)
            f_AGM = open('svm_acc_AGM_' + str(i) + '.txt', mode='a+')
            f_AGM.write(
                np.str(acc_AGM) + ' ' + str(duration_AGM) + ' ' +
                str(diteration_AGM))

            x_BFGS, norm_BFGS, _, acc_BFGS, duration_BFGS, diteration_BFGS = SVM.G_BFGS(
                a, b, x, args.l, args.d, False, a1, b1)
            f_BFGS = open('svm_acc_BFGS_' + str(i) + '.txt', mode='a+')
            f_BFGS.write(
                np.str(acc_BFGS) + ' ' + str(duration_BFGS) + ' ' +
                str(diteration_BFGS))

            Draw.gradient_plot(len(norm_bt), norm_bt, 'backtracking')
            Draw.gradient_plot(len(norm_AGM), norm_AGM, 'AGM')
            Draw.gradient_plot(len(norm_BFGS), norm_BFGS, 'BFGS')
            plt.savefig('SVM_convergence_' + str(i))
            # plt.show()
            plt.close()
        if lr:
            x_AGM, norm_AGM, _, acc_AGM, duration_AGM, diteration_AGM = LR.AGM(
                a, b, x, args.l, False, a1, b1)
            f_AGM = open('lr_acc_AGM_' + str(i) + '.txt', mode='a+')
            f_AGM.write(
                np.str(acc_AGM) + ' ' + str(duration_AGM) + ' ' +
                str(diteration_AGM))

            x_L_BFGS, norm_L_BFGS, _, acc_L_BFGS, duration_L_BFGS, diteration_L_BFGS = LR.L_BFGS(
                a, b, x, args.m, args.l, False, a1, b1)
            f_L_BFGS = open('lr_acc_LBFGS_' + str(i) + '.txt', mode='a+')
            f_L_BFGS.write(
                np.str(acc_L_BFGS) + ' ' + str(duration_L_BFGS) + ' ' +
                str(diteration_L_BFGS))

            Draw.gradient_plot(len(norm_AGM), norm_AGM, 'AGM')
            Draw.gradient_plot(len(norm_L_BFGS), norm_L_BFGS, 'L_BFGS')
            plt.savefig('lr_convergence_' + str(i))
            # plt.show()
            plt.close()
Esempio n. 5
0
def start_training(X, y):
    '''
    LogisticRegression.logistic_regression_classifier(X, y)
    
    DecisionTree.decision_tree_classifier(X, y)
    
    KNN.knn_classifier(X,y)
    '''
    SVM.svm_classifier(X, y)
def main():
    init()
    for i in range(10):
        data_quantifier.split_categorical_data(0.4)
    transformer.transform_secondary_structure()
    Verifier.verify_validity("conversion")
    data_quantifier.quantify_data()
    SVM.create_and_store_svm()
    Prediction.predict_and_test()
Esempio n. 7
0
def test(data):
	import numpy as np 
	import feature_extraction
	import SVM

	data = np.ravel(data)

	features = feature_extraction.mfcc_convert(np,data)
	# print(features[:5])
	svc = SVM.control_information()
	print("Predicted as :",SVM.test(svc,features))
Esempio n. 8
0
def test():
    with closing(connect_db()) as db:
        svm = SVM(C=1)
        fill_negative_votes()
        X, Y = get_feature_vecs(db)

        # Divide up X into S chunks
        N = len(X)
        S = N

        # Cross validation, and get the averate number of misclassified
        count = 0
        total_incorrect = 0
        for s in range(S):
            print "iter:", count
            size_of_fold = math.ceil(1.0*N/S)
            start = s*size_of_fold
            end = start + size_of_fold
            if end > N:
                print "end > N"
                end = N
            print "range:", start, end
            holdoutX = X[start:end,:]
            trainingX = np.concatenate( (X[0:start], X[end:N]) )

            holdoutY = Y[start:end]
            trainingY = np.concatenate( (Y[0:start], Y[end:N]) )

            print "len holdoutX:", len(holdoutX)
            print "len trainingX;", len(trainingX)
            print "len holdoutY:", len(holdoutY)
            print "len trainingY;", len(trainingY)

            svm.train_dual(trainingX, trainingY)

            num_misclass = svm.num_incorrect(holdoutX, holdoutY)
            total_incorrect += num_misclass
            print "Num incorrect:", num_misclass

            count +=1
        print "Total misclassified with SVM:", 1.0 * total_incorrect

        # Now use keyword classification
        votes = get_all_votes()
        num_incorrect = 0
        for vote, event, doc in votes:
            classify = keyword_classify(event, doc)
            if classify != vote:
                print "Classified as:", classify, "actual:", vote
                num_incorrect += 1
            else:
                print "Classified as:", classify, "actual:", vote

        print "Total misclassified with keyword approach:", num_incorrect
Esempio n. 9
0
def task_2_results():
    kf = KFold(n_folds=k)
    accuracies = []
    for train_index, test_index in kf.split(y):
        X_train = X_manual[train_index]
        X_test = X_manual[test_index]
        y_train = y[train_index]
        y_test = y[test_index]
        SVM.learn_svm(X_train, y_train, prefix+"task_2_model")
        accuracies.append(SVM.test_svm_accuracy(X_test, y_test, prefix+"task_2_model"))
    print "Accuracy for task_2:", np.mean(accuracies), "+-", np.std(accuracies)
    return accuracies
Esempio n. 10
0
def test_model():
    start_time = time.time()
    # Read all data
    (emails, y) = read_email_data('txt_lists/E-MAILS.txt',
                                  'txt_lists/LABELS.txt')
    (test_emails, test_labels) = read_email_data('txt_lists/TESTS.txt',
                                                 'txt_lists/TEST_LABELS.txt')
    websites = read_website_list('txt_lists/websites.txt')

    #get the website about which the email is
    intended_websites = find_intended_websites(websites, emails)

    #a dictionary with the actual class names
    class_dict = create_class_dict()

    #check for easy typos
    spell_chck = SpellChecker('en')
    [spell_chck.add(word) for word in open('websites.txt').read().split('\n')]
    emails = [detect_and_correct_typos(spell_chck, mail) for mail in emails]
    test_emails = [
        detect_and_correct_typos(spell_chck, mail) for mail in test_emails
    ]

    #extract the needed features from the datasets
    feature_extractor = extract_features.FeatureExtractor()

    #if we laoded it form the disk, then dont train it. read the trained model data
    if not feature_extractor.has_vocab:
        (train_features,
         ) = feature_extractor.extract_email_train_features(emails)
    else:
        #feature_names = feature_extractor.vect.get_feature_names()
        train_features = feature_extractor.vect.training_data_features

    #extract the test data
    test_features = feature_extractor.extract_email_test_features(test_emails)

    #classify
    (clf, already_trained) = SVM.get_classifier()

    if not already_trained:
        SVM.train_clf(clf, train_features, y)

    train_guesses = SVM.classify(clf, train_features)
    test_guesses = SVM.classify(clf, test_features)

    print('training performance: ',
          SVM.eval_performance(train_guesses, y, class_dict))
    print('test performance: ',
          SVM.eval_performance(test_guesses, test_labels, class_dict))

    feature_extractor.save_vectorizer()
    SVM.save_classifier_to_disk(clf)

    print("--- %s seconds ---" % (time.time() - start_time))
Esempio n. 11
0
File: main.py Progetto: Hmlly/AIML
def predicting_using_SVM(train_mat_with, train_mat_without, train_por_with,
                         train_por_without, test_mat_with, test_mat_without,
                         test_por_with, test_por_without, kernel='Linear', c=1.0):

    # Divide into labels and sets
    label_mat_with, train_mat_with_t = extract_label(train_mat_with)
    label_mat_without, train_mat_without_t = extract_label(train_mat_without)
    label_por_with, train_por_with_t = extract_label(train_por_with)
    label_por_without, train_por_without_t = extract_label(train_por_without)

    # To change kernel function, use 'Quadratic' or 'Gaussian' instead
    predictlabel_mat_with = SVM.svm_solver(train_mat_with_t, label_mat_with, test_mat_with, c, kernel=kernel)
    predictlabel_mat_without = SVM.svm_solver(train_mat_without_t, label_mat_without, test_mat_without, c, kernel=kernel)
    predictlabel_por_with = SVM.svm_solver(train_por_with_t, label_por_with, test_por_with, c, kernel=kernel)
    predictlabel_por_without = SVM.svm_solver(train_por_without_t, label_por_without, test_por_without, c, kernel=kernel)

    # get measurement
    f_score1, accuracy1 = SVM.calculate_measurements(predictlabel_mat_without, test_mat_without)
    f_score2, accuracy2 = SVM.calculate_measurements(predictlabel_mat_with, test_mat_with)
    f_score3, accuracy3 = SVM.calculate_measurements(predictlabel_por_without, test_por_without)
    f_score4, accuracy4 = SVM.calculate_measurements(predictlabel_por_with, test_por_with)

    # show result
    print('Kernel function type:' + kernel)
    print('SVM Mat Without G1, G2: Accuracy: ' + str(accuracy1) + '  f_score: ' + str(f_score1))
    print('SVM Mat With G1, G2: Accuracy: ' + str(accuracy2) + '  f_score: ' + str(f_score2))
    print('SVM Por Without G1, G2: Accuracy: ' + str(accuracy3) + '  f_score: ' + str(f_score3))
    print('SVM Por With G1, G2: Accuracy: ' + str(accuracy4) + '  f_score: ' + str(f_score4))

    return 0
Esempio n. 12
0
    def fit_svm(self):
        from numpy import linalg

        def linear_kernel(x1, x2):
            return np.dot(x1, x2)

        def polynomial_kernel(x, y, p=3):
            return (1 + np.dot(x, y))**p

        def gaussian_kernel(x, y, sigma=5.0):
            return np.exp(-linalg.norm(x - y)**2 / (2 * (sigma**2)))

        pos_samples = self._posdata.drop("class", axis=1)
        pos_labels = self._posdata.loc[:, "class"]
        nag_samples = self._nagdata.drop("class", axis=1)
        nag_labels = self._nagdata.loc[:, "class"]
        pos_array = pos_samples.values
        pos_arraylabels = np.ones((len(pos_samples), 1))
        nag_array = nag_samples.values
        nag_arraylabels = np.ones((len(nag_samples), 1)) * -1
        # clf=SVM.SVM(gaussian_kernel)
        # clf = SVM.SVM(C=1)
        clf = SVM.SVM()
        x_train = np.vstack((pos_array, nag_array))
        y_train = np.vstack((pos_arraylabels, nag_arraylabels))
        clf.fit(x_train, y_train)
        y_predict = clf.predict(x_train)
        correct = np.sum(y_predict == y_train)
        acc = correct / len(self._frequency)
def runAll(rawX, rawY,  rawXTesting, rawYTesting): 
  print "\n\nMultinomial NB\n\n"
  nb = multinomialNB(rawX, rawY,  rawXTesting, rawYTesting)
  print "\n\nSVM\n\n"
  svm = support.supportFunction(rawX, rawY,  rawXTesting, rawYTesting)
  km = kmeans.kmeansFunction(rawX, rawY, rawXTesting, rawYTesting)
  return [nb, svm, km]
Esempio n. 14
0
def showVocabDistribution(filename, n_stopword):
    counter = 0
    f = open('data/stopDict_' + filename + '.json', 'r')
    stopDict = json.load(f)
    f.close()

    n_vocab = SVM.getDictSize("VocabDict", filename)

    sum_stopword = 0
    sum_vocab = 0
    sum_other = 0
    for key, value in sorted(stopDict.items(),
                             key=lambda x: int(x[1]),
                             reverse=True):
        if counter < n_stopword:
            sum_stopword += int(value)
        elif counter < n_vocab + n_stopword:
            sum_vocab += int(value)
        else:
            sum_other += int(value)
        counter += 1
    print sum_stopword
    print sum_vocab
    print sum_other
    total = float(sum_stopword + sum_vocab + sum_other)
    print "stopword :", float(sum_stopword) / total * 100, "%"
    print "vocab :", float(sum_vocab) / total * 100, "%"
    print "other :", float(sum_other) / total * 100, "%"
Esempio n. 15
0
def s(x):
    log1,log2 = logistic_regression.predict(x)
    svm1,svm2 = SVM.predict(x)
    nb1,nb2 = NaiveBayes.predict(x)
    X = np.concatenate((log1.reshape(len(log1),1) , log2.reshape(len(log2),1), svm1.reshape(len(svm1),1), svm2.reshape(len(svm2),1),nb1.reshape(len(nb1),1),nb2.reshape(len(nb2),1)),axis = 1)
    prediction = model.predict(X)
    return prediction
Esempio n. 16
0
def question2():
    sum = [0, 0, 0, 0]
    for i in range(5):
        for j in range(i+1,5):
            print("i: ", i , "j: ", j)
            S = sp
            for k in range(5):
                if k != i and k != j:
                    S = S[S['ethnicity'] != k]
            # print(S)
            X = S[['gender', 'education', 'lunch', 'test_preparation_course', 'math', 'reading', 'writing']]
            X = MinMaxScaler().fit_transform(X)
            y = S['ethnicity']

            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

            kernel_mode = 'linear'
            log_pred = logistic_regression.run_log(X_train, X_test, y_train)
            ad_pred = adaboost_test.run_adaboost(X_train, X_test, y_train)
            svm_pred = SVM.run_svm(X_train, X_test, y_train, kernel_mode)
            knn_pred = KNN.run_KNN(X_train, X_test, y_train)

            sum[0] += metrics.accuracy_score(y_test, log_pred)
            sum[1] += metrics.accuracy_score(y_test, ad_pred)
            sum[2] += metrics.accuracy_score(y_test, svm_pred)
            sum[3] += metrics.accuracy_score(y_test, knn_pred)

    for i in range(4):
        sum[i] = sum[i] / 10
    print(sum)
Esempio n. 17
0
def get_predict(svm, X):
    m = X.shape[0]
    predict = []
    for i in range(m):
        pre_y = SVM.svm_predict(svm, X[i, :])
        predict.append(sign(pre_y[0, 0]))
    return mat(predict).T
Esempio n. 18
0
def ClassificationModels():
    classificationPreprocessingVar = CPre.Preprocessing('tmdb_5000_movies_classification.csv', 'tmdb_5000_credits.csv')
    classificationPreprocessingVar.reformat()
    #classificationPreprocessingVar.deleteMissigData()
    #classificationPreprocessingVar.meanNormalization()
    X_train, y_train, X_test, y_test = classificationPreprocessingVar.GetData()

    obj1 = LR.Logistic_Regression(X_train, y_train, X_test, y_test)
    obj1.FitModel()
    obj1.TrainAndTestModel()

    obj2 = svm.SVM(X_train, y_train)
    obj2.FitModel()
    obj2.TestModel(X_test, y_test)

    obj3 = knn.Knn_Classifier(X_train, y_train, 5)
    obj3.FitModel()
    obj3.TestModel(X_test, y_test)

    obj5 = dt.DecisionTreeClassifier(X_train, y_train)
    obj5.FitModel()
    obj5.TestModel(X_test, y_test)

    obj6 = rf.RandomForestClassifier(X_train, y_train)
    obj6.FitModel()
    obj6.TestModel(X_test, y_test)

    obj7 = ad.AdaBoostClassifier(X_train, y_train)
    obj7.FitModel()
    obj7.TestModel(X_test, y_test)
Esempio n. 19
0
def DeveloperTools():
    global dev
    dev = Tk()
    dev.title("Developer Tools")
    root.withdraw()
    dev.geometry("500x200")  #Set window size

    tk.Button(dev, text='Train Binomial Model',
              command=VerifyBinomial).pack(fill=tk.X)

    tk.Button(dev, text='Train Multinomial Model',
              command=VerifyMultinomial).pack(fill=tk.X)

    tk.Button(dev,
              text='Score Predictions Using Pipeline',
              command=lambda: SVM.MakePredictions(True)).pack(fill=tk.X)

    tk.Button(dev, text='Create Vectorizer Model',
              command=VerifyVectorizer).pack(fill=tk.X)

    tk.Button(dev, text='Create K-Means Clusters',
              command=KMeansCluster).pack(fill=tk.X)

    tk.Button(
        dev,
        text=
        'Return to Previous Menu',  #go back to main menu and close developer tools
        command=lambda: [dev.withdraw(), SelectAction()]).pack(fill=tk.X)

    dev.mainloop()
Esempio n. 20
0
def runAll(rawX, rawY, rawXTesting, rawYTesting):
    print "\n\nMultinomial NB\n\n"
    nb = multinomialNB(rawX, rawY, rawXTesting, rawYTesting)
    print "\n\nSVM\n\n"
    svm = support.supportFunction(rawX, rawY, rawXTesting, rawYTesting)
    km = kmeans.kmeansFunction(rawX, rawY, rawXTesting, rawYTesting)
    return [nb, svm, km]
def gridSearch_Gaussian(trainData, trainLabel, testData, testLabel):
    '''
    网格搜索高斯核参数
    ==============
    '''
    C = [np.power(2.0, i) for i in range(-5, 16, 2)]
    Sigma = [np.power(2.0, i) for i in range(-3, 8)]
    Epsilon = [np.power(10.0, i) for i in range(-6, 0)]
    subRange = 100

    maximumArguments = (0, 0, 0)
    maximumF1Score = 0
    for c, sigma, epsilon in [(c, sigma, epsilon) for c in C for sigma in Sigma
                              for epsilon in Epsilon]:
        print(c, sigma, epsilon)
        predictLabel = SVM.predict(trainData[:subRange],
                                   trainLabel[:subRange],
                                   testData,
                                   C=c,
                                   sigma=sigma,
                                   epsilon=epsilon)
        f1Score = modelTest(testLabel, predictLabel)
        if f1Score > maximumF1Score:
            maximumF1Score = f1Score
            maximumArguments = (c, sigma, epsilon)
    print(maximumF1Score, maximumArguments)
Esempio n. 22
0
 def run(self):
     if self.algorithm == 'logistic':
         import logistic
         self.model = logistic.run()
         self.name = 'LogisticRegression'
     elif self.algorithm == 'randomforest':
         import randomforest
         self.model = randomforest.run()
         self.name = 'RandomForest'
     elif self.algorithm == 'xgboost':
         import XGboost
         self.model = XGboost.run()
         self.name = 'xgboost'
     elif self.algorithm == 'SVM':
         import SVM
         self.model = SVM.run()
         self.x_test_scaled = np.load('x_test_scaled.npy')
         self.name = 'SVM'
     else:
         print('算法参数有误')
     if self.algorithm == 'SVM':
         self.y_pred_prob = self.model.decision_function(self.x_test_scaled)
         self.y_pred = self.model.predict(self.x_test_scaled)
     else:
         self.y_pred_prob = self.model.predict_proba(self.x_test)[:, 1]
         self.y_pred = self.model.predict(self.x_test)
def test_model2():
    """A Sort of noise is introduced to remove redundancy error from all the ML algorithms,
	this makes the model more flexible and in turn increases the accuracy ."""
    ind = 0
    pred = []
    report = pd.DataFrame(index=range(0),
                          columns=[
                              'Stock Name', 'accuracy', 'profit count',
                              'loss count', 'total no of rise',
                              'total number of loss'
                          ])
    for i in bar(xrange(len(x))):
        try:
            k_pred, k_y = KNN.knn_algo_model(x[i], net=True)
            b_pred, b_y = bayes.naive_bayes_model(x[i], net=True)
            s_pred, s_y = SVM.svm_model(x[i], net=True)
            p_count, total_count_p, l_count, total_count_l, accuracy = mix.model1(
                s_pred, b_pred, k_pred, s_y, x[i])
            report.loc[ind] = [
                x[i], accuracy, p_count, l_count, total_count_p, total_count_l
            ]
            ind = ind + 1
        except:
            print "!!!!!############ error"
    print "Mean accuracy----------", report['accuracy'].mean()
    report.to_csv("./report/mix_model2_result.csv")
Esempio n. 24
0
 def __init__(self, norm_type="Normalization", iterations=5, base_classifier="SVM"):
     self.iterations = iterations
     self.norm_type = norm_type
     self.base_classifier = SVM.SVMClassifier()
     self.prediction = None
     self.probability = None
     self.classifier_set = None
Esempio n. 25
0
def plotSVM(X,Y,SV_X, SV_Y, SV_a,b_bound,sig):

    C1 = np.where(Y== -1)[0]
    C2 = np.where(Y== 1)[0]
    C = (C1,C2)
    colors = ("red", "green")

    for c,color in zip(C, colors):
        plt.scatter(X[c,0], X[c,1], alpha=1.0, c=color)
   
    step = 0.025
    x_axis = np.arange(0.0, 1.0+step, step)
    y_axis = np.arange(0.0, 1.0+step, step)
    X_mesh, Y_mesh = np.meshgrid(x_axis, y_axis)
    Z_mesh = np.zeros(X_mesh.shape)


    for x in range(X_mesh.shape[0]):
        for y in range(Y_mesh.shape[0]):
            input = np.array([X_mesh[x,y], Y_mesh[x,y]],ndmin=2)
            Z_mesh[x,y] =  SVM.evalSVM(input,SV_X, SV_Y, SV_a,b_bound,sig)

    plt.contour(x_axis, y_axis, Z_mesh,(0.0))
    #contour(X,Y,Z,V)
    #draw contour lines at the values specified in sequence V,
    #which must be in increasing order.

    plt.show()
Esempio n. 26
0
def main(args):
    training_set = utilis.readExamples(args[1])
    training_labels_set = utilis.readLabelSet(args[2])
    init_weights = np.zeros((3, training_set.shape[1]))

    test_set = utilis.readExamples(args[3])
    norm_tra_set, norm_tes_set = norm.Min_Max_normalization(
        training_set, test_set, MAX_RANGE, MIN_RANGE)
    seed = utilis.generate_Seed(SEED_RANGE, BIAS)

    perc_obj = Perceptron.Perceptron(PERC_EPOCHS, PERC_RATE, norm_tra_set,
                                     training_labels_set, seed)
    perceptron_weights = perc_obj.training(None, None, init_weights)

    pa_obj = PA.PA(PA_EPOCHS, norm_tra_set, training_labels_set, seed)
    pa_weights = pa_obj.training(None, None, init_weights)

    norm_tra_set, norm_tes_set = norm.Zscore_normalization(
        training_set, test_set)

    svm_obj = SVM.SVM(SVM_EPOCHS, SVM_LAMBDA, SVM_RATE, norm_tra_set,
                      training_labels_set, seed)
    svm_weights = svm_obj.training(None, None, init_weights)

    utilis.printThePredictions(perceptron_weights, svm_weights, pa_weights,
                               test_set)
Esempio n. 27
0
def softsvm(traindata, trainlabel, testdata, testlabel, sigma, C):
	if sigma == 0:
		kernel_type='linear'
	else:
		kernel_type='quadratic'
	model = SVM.SVM(kernel_type, C, sigma)
	model.fit(traindata, trainlabel)
	result = model.predict(testdata)
	# print(result)
	ypred = []
	np2p = 0
	nn2p = 0
	for i in range(600):
		if result[i] == 1:
			ypred.append(1)
			if testlabel[i] == 1:
				np2p += 1
			else:
				nn2p += 1
		else:
			ypred.append(0)
	#print(nn2p, np2p)
	SP = np2p / (np2p + nn2p) 
	SR = np2p / 100
	F = SP * SR * 2 / (SP + SR)
	print(ypred, SP, SR, F)
Esempio n. 28
0
 def GetAnswer(self, event):
     # The list used to get all the ratio of string similarity.
     ratio = []
     # Get the classifier.
     classifier = SVM.DoSVM()
     # Get the vectoricer.
     vectorizer = KeywordProcessor.DataTransformer()[1]
     # Get the PCA transformer.
     pcaTransform = PCADataProcessor.PCADataProcessor()[1]
     # Get the users' question.
     testData = self.text_user.GetValue()
     # Used to store the users' question.
     example = []
     # Split the users' question.
     seg_list = jieba.cut_for_search(testData)
     # Store the users's question.
     example.append(" ".join(seg_list))
     # Transformed users' question into the matrix.
     transDataExample = vectorizer.transform(example)
     # Store the new matrix.
     dataMatrixExample = transDataExample.toarray()
     # Compress the matrix.
     newExample = pcaTransform.transform(dataMatrixExample)
     # Get the prediction value.
     result = classifier.predict(newExample)
     # Get the question and the answer from the corresponding cluster.
     Questions = sheetForQuestion.col_values(result[0])
     Answers = sheetForAnswer.col_values(result[0])
     # Do the string similarity to get the answer.
     for item in Questions:
         ratio.append(
             difflib.SequenceMatcher(None, testData, item).quick_ratio())
     # Ouput the answer.
     wx.MessageBox(Answers[ratio.index(max(ratio))])
Esempio n. 29
0
    def callTrainSelectionMethods(self):
        if(len(self.train_Algorithms.curselection())!=0 and len(self.trainSet.curselection())!=0):
            alg=self.selectTrainAlg()
            trainSet=self.selectTrainer()
            trainPath=""
            if(trainSet=="IMDB Movie set(25000)"):
                trainPath="Datasets/aclImdb/train"
            elif(trainSet=="IMDB Movie subset(750)"):
                trainPath="Datasets/debugSets/train"
            elif(trainSet=="Custom Set 1"):
                trainPath="Datasets/custom1/train"
            elif(trainSet=="Custom Set 2"):
                trainPath="Datasets/custom2/train"
            elif(trainSet=="Custom Set 3"):
                trainPath="Datasets/custom3/train"

            if(alg=="Naive Bayes"):
                nb=naiveBayes.NaiveBayes()
                nb.train(trainPath)
                print("done training naive bayes")
            elif(alg=="Support Vector Machine"):
                svm=SVM.SVM()
                svm.trainSVM(trainPath)
                print("done training support vector machine")
        else:
            print("Please select an algorithm and a test set.")
Esempio n. 30
0
def PolynomialSVMTest(pca_option):

    import SVM

    SVM.SVMSimulation(SVM.svm_poly, processing.linear_pca,
                      processing.overall_training_data, pca_option)

    processing.final_validation = np.array(processing.final_validation)

    FV_features = []
    FV_labels = []

    FV_features, FV_labels = processing.createFeatures_Labels(
        processing.final_validation)

    FV_features_data = None
    FV_labels_data = None

    FV_features_data, FV_labels_data = processing.convertToDataFrame(
        FV_features, FV_labels, processing.column_titles)

    global SVM_POLY_final_predictions
    if(pca_option == 'yes' or pca_option == 'both'):

        transformed_FV = processing.linear_pca.transform(FV_features_data)

        final_predictions = SVM.svm_poly.predict(transformed_FV)
        SVM_GAUS_final_predictions = final_predictions

        accuracy = metrics.accuracy_score(final_predictions, FV_labels)
        precision = metrics.precision_score(
            FV_labels, final_predictions, average='micro')
        recall = metrics.recall_score(
            FV_labels, final_predictions, average='micro')

        print('POLYNOMIAL SVM MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy)
        print('POLYNOMIAL SVM MODEL FINAL TEST DATA PRECISION: ', 100 * precision)
        print('POLYNOMIAL SVM MODEL FINAL TEST DATA RECALL: ', 100 * recall)
        print()

        return accuracy, precision, recall

    else:

        final_predictions = SVM.svm_poly.predict(FV_features_data)
        SVM_GAUS_final_predictions = final_predictions

        accuracy = metrics.accuracy_score(final_predictions, FV_labels)
        precision = metrics.precision_score(
            FV_labels, final_predictions, average='micro')
        recall = metrics.recall_score(
            FV_labels, final_predictions, average='micro')

        print('POLYNOMIAL SVM MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy)
        print('POLYNOMIAL SVM MODEL FINAL TEST DATA PRECISION: ', 100 * precision)
        print('POLYNOMIAL SVM MODEL FINAL TEST DATA RECALL: ', 100 * recall)
        print()

        return accuracy, precision, recall
Esempio n. 31
0
def grid_search(label,
                X,
                y,
                svm_params,
                methods,
                train_size=0.75,
                graph=False):
    """
    Implementation of the cross validation

    Parameters:
        - kernel: function, kernel function
        - label: int (0, 1 or 2), label of the set of data
        - X: array, observations
        - y: array, labels
        - svm_params: array, parameters of the SVM classifier
        - kernel_params: array, parameters of the kernel function
        - train_size: float (between 0 and 1), proportion of data for the train part
        - graph: bool, plot the evolution of the accuracy wrt log(svm_params) or not

    Returns the best SVM classifier
    """
    Xtr, ytr, Xte, yte = train_test_split(X, y, train_size)

    best_score = 0
    best_clf = None

    for method in methods:
        kernel, kernel_param = kernels.select_method(method)
        print()
        scores = []

        for c in svm_params:
            print('Parameters : ' + str([method, c]))
            gram_file = "../gram_matrix/gramMat_" + str(
                label) + "_" + method + ".p"
            clf = SVM.SupportVectorMachine(kernel=kernel,
                                           C=c,
                                           kernel_params=kernel_param)
            clf.fit(Xtr, ytr, gram_file)
            score = accuracy(clf.predict(Xte), yte)
            if score > best_score:
                best_score = score
                best_clf = clf
            print("Accuracy score = " + str(score) + '\n')
            scores.append(score)

        if graph:
            plt.plot(np.log10(svm_params),
                     scores,
                     label='kernel_param = ' + str(kernel_param))

    if graph:
        plt.title('Evolution of the accuracy wrt log(C)')
        plt.legend()
        plt.savefig('../res/cross_val' + str(label) + '.png')
        plt.show()

    return best_clf
Esempio n. 32
0
def driver(classifier):
    print (getTitle(classifier))
    if classifier == 4:
        trainX, trainY, testX, testY = data_handler.splitData2TestTrain('ATNTFaceImages400.txt', 10, '1:10')
        print ("\nAverage Accuracy for 5 folds: %s"% SVM.cross_validate(trainX, trainY, testX, testY))
    else:
        data, indexes = data_handler.get_data("ATNTFaceImages400.txt")
        print ("\nAverage Accuracy for 5 folds: %s"%cross_validator(5, data, indexes, classifier))
Esempio n. 33
0
def svm_predict(company: str, verbose=False, train_size=0.80, scaled=False):
    X_train, X_test, y_train, y_test, prices, times = get_features(company, train_size=train_size, scaled=scaled)
    true_labels, SVM_predictions = SVM.predict(X_train, y_train, X_test, y_test)
    accuracy = accuracy_score(true_labels, SVM_predictions)
    if verbose:
        print("SVM Accuracy: " + str(accuracy * 100) + "%")
        prediction_distribution(SVM_predictions, true_labels)
    return prices, times, SVM_predictions, accuracy
Esempio n. 34
0
def Part2(createData):
    # optical flow and motion direction histogram calculation
    v = 100

    # mdh_all = OM.createMotionDirectionHistograms('Oberstdorf16-shots.csv', 'videos/oberstdorf16.mp4', v, False, True)
    # FileIO.save_histograms_to_file('mdh_16_' + str(v) + '.csv', mdh_all)

    if createData:
        SVM.save_shot_images('videos/oberstdorf16.mp4', SVM.SSI_CENTER, 'Oberstdorf16-shots.csv', False)

    #svm training and predicting
    mdh_training = FileIO.read_histograms_from_file('mdh_8_' + str(v) + '.csv')
    mdh_test = FileIO.read_histograms_from_file('mdh_16_' + str(v) + '.csv')
    predicted_labels = SVM.svm_use(mdh_training, mdh_test)

    stitched_shots, all_shots, outstitched_shots = SVM.get_results(
        predicted_labels, 'Oberstdorf16-shots.csv', True)


    return stitched_shots, all_shots, outstitched_shots
Esempio n. 35
0
def task_3_results():
    kf = KFold(n_folds=k)
    num_manual_features = len(X_manual[0])
    accuracies = []
    for i in range(num_manual_features):
        accuracies.append([])
    X_manual_np = np.asarray(X_manual)
    for train_index, test_index in kf.split(X_content):
        X_train = X_content[train_index]
        X_test = X_content[test_index]
        for i in range(num_manual_features):
            y_train = X_manual_np[train_index, i]
            y_test = X_manual_np[test_index, i]
            SVM.learn_svm(X_train, list(y_train), prefix+"task_3_model")
            accuracies[i].append(SVM.test_svm_accuracy(X_test, list(y_test), prefix+"task_3_model"))

    for i in range(num_manual_features):
        print "Accuracy for task_3 (" + str(names[i]) + "):", np.mean(accuracies[i]), "+-", np.std(accuracies[i])

    return accuracies
Esempio n. 36
0
def Part1():
    # values = [100, 200, 500, 1000]
    values = [100]

    for v in values:
        print "Points: " + str(v)

        #optical flow and motion direction histogram calculation
        # mdh_all = OM.createMotionDirectionHistograms('GroundTruth.csv', 'videos/oberstdorf08small.mp4', v, False, False)
        # FileIO.save_histograms_to_file('mdh_8_' + str(v) + '.csv', mdh_all)
        # print "Histograms created."

        # #svm training and predicting
        mdh_compl = FileIO.read_histograms_from_file('mdh_8_' + str(v) + '.csv')
        accuracy, ITERATIONS, NF = SVM.svm_accuracy(mdh_compl)
        print "average accuracy: " + str(accuracy/ITERATIONS/NF)
Esempio n. 37
0
def testSVM():
	## loading data
	log = "Step 1: loading data..."
	writeLog(log)
	print log
	test_x, test_y = loadDigitTestData()
	# scales from -1 to 1
	test_x = test_x/255.0*2 - 1

	# initialize the vote matrix for testing data, Votes[m, 10]
	m, dump = shape(test_y)
	Votes = mat(zeros((m, 10)))

	## testing data
	log = "Step 2: testing data..."
	for i in range(10):
		for j in range(i+1, 10):
			log = "--working on model: " + str(i) + '&' + str(j)
			print log
			writeLog(log)
			# loading the models
			d = shelve.open('./models/svm_' + str(i) + '_' + str(j))
			svmClassifier = d['svm']
			d.close()
			# testing using the given model and votes
			Votes_k, Votes_l = SVM.testDigitScores(svmClassifier, test_x, m)

			# write to the Votes
			Votes[:, i] += Votes_k
			Votes[:, j] += Votes_l

	## saving Votes matrix
	log = "Step 3: saving votes..."
	print log
	writeLog(log)
	d = shelve.open('./models/Votes_Score_noscale')
	d['vote'] = Votes
	d.close()
Esempio n. 38
0
def run_with_svm(image_filename="../Wheat_Images/004.jpg", ser_filename=None):
    '''
	Estimates the number of grains in a given image using a
	Support Vector Machine.

	Args:
		image_filename: The path to the image from which a grain count
			is to be obtained.

		ser_filename: path to serialized list of isub-images already extracted
		from the image from which a grain count is to be obtained.

	Returns:
		count: An estimate of the number of grains in the provided image.
	'''

    global img_data

    # Chop image up into sub-images and serilaise or just load serialised data if
    # it already exists.
    if(ser_filename == None and image_filename == "../Wheat_Images/004.jpg"):
		ser_filename = "../Wheat_Images/xxx_004.data"
    if(Helper.unserialize(ser_filename) == None):
        img = img_as_ubyte(io.imread(image_filename))
        roi_img = spectral_roi.extract_roi(img, [1])
        Helper.block_proc(roi_img, (20,20), blockfunc)
        #Helper.serialize(ser_filename, img_data)
    else:
        img_data = Helper.unserialize(ser_filename)

    # classify
    r = SVM.classify(img_data, featureRepresentation='glcm', shouldSaveResult=True)

    # Count number of '1s' in the result and return
    count = r.tolist().count(1)
    print("COUNT: {}".format(count))
    return count
Esempio n. 39
0
def task_4_results():
    kf = KFold(n_folds=k)
    num_manual_features = len(X_manual[0])
    accuracies = []
    X_manual_np = np.asarray(X_manual)
    for train_index, test_index in kf.split(X_content):
        X_content_train = X_content[train_index]
        X_manual_test = X_manual[test_index]  # Just for structure, populated below
        X_content_test = X_content[test_index]
        num_test = len(test_index)
        for i in range(num_manual_features):
            X_np_train = X_manual_np[train_index, i]
            SVM.learn_svm(X_content_train, list(X_np_train), prefix+"task_4_model")
            for j in range(num_test):
                X_manual_test[j][i] = SVM.load_svm(prefix+"task_4_model").predict([X_content_test[j]])[0]
        X_manual_train = X_manual[train_index]
        y_train = y[train_index]
        y_test = y[test_index]
        SVM.learn_svm(X_manual_train, y_train, prefix+"task_4_model_1")
        accuracies.append(SVM.test_svm_accuracy(X_manual_test, y_test, prefix+"task_4_model_1"))

    print "Accuracy for task_4:", np.mean(accuracies), "+-", np.std(accuracies)
    return accuracies
Esempio n. 40
0
from SVM import *
from documento import *
from pattern.web import *
from model import *


import os

# reload(sys)  # Reload does the trick!
# # sys.setdefaultencoding('UTF8')
# print sys.getdefaultencoding()

lecturaArchivo('data/documentos.csv','entrenamiento')

X = getDocumentosAtributos('entrenamiento')
Y = getDocumentosClase()



unSVM = SVM(1.0,'poly',.7,.3,X,Y)
unSVM.training()
print "Precision : " ,unSVM.testing()

lecturaArchivo('data/prediccion.csv','predecir')
X = getDocumentosAtributos('predecir')
print unSVM.predecir(X)
Esempio n. 41
0
            median_fare[f] = test_df[
                test_df.Pclass == f + 1]['Fare'].dropna().median()
        # loop 0 to 2
        for f in range(0, 3):
            test_df.loc[(test_df.Fare.isnull()) & (
                test_df.Pclass == f + 1), 'Fare'] = median_fare[f]

    # Collect the test data's PassengerIds before dropping it
    ids = test_df['PassengerId'].values
    # Remove the Name column, Cabin, Ticket, and Sex (since I copied and
    # filled it to Gender)
    test_df = test_df.drop(
        ['Name', 'Sex', 'Ticket', 'Cabin', 'PassengerId'], axis=1)
    test_data = test_df.values
    return ids, test_data

def clean_data(train_df,test_df):
	train_data,Ports_dict = clean_train_data(train_df)
	ids, test_data = clean_test_data(Ports_dict,test_df)
	return train_data,test_data,ids

if __name__ == '__main__':
    # Data cleanup
    # Load the test file into a dataframe
	test_df = pd.read_csv('data/test.csv', header=0)
    # Load the train file into a dataframe
	train_df = pd.read_csv('data/train.csv', header=0)
	train_data,test_data,ids = clean_data(train_df,test_df)
	SVM.run(train_data,test_data,ids)
    
Esempio n. 42
0
for i in dataSet:
	newI = HN.HyperIntervalNumber(i)
	out = newI.GetAllPosPoint()
	for j in out:
		NewdataSet.append(j)
print "The test data is:"
print dataSet

dataSet = mat(NewdataSet)
print len(dataSet)
labels = mat(labels).T
train_x = dataSet[0:800, :]
train_y = labels[0:800, :]
test_x = dataSet[801:1599, :]
test_y = labels[801:1599, :]

## step 2: training...
print "step 2: training..."
C = 3
toler = 0.001
maxIter = 50
svmClassifier = SVM.trainSVM(train_x, train_y, C, toler, maxIter, kernelOption = ('rbf', 0))

## step 3: testing
print "step 3: testing..."
accuracy = SVM.testSVM(svmClassifier, test_x, test_y)

## step 4: show the result
print "step 4: show the result..."
print 'The classify accuracy is: %.3f%%' % (accuracy * 100)
#SVM.showSVM(svmClassifier)
Esempio n. 43
0
numFeaturesTrueP= getTotalCountFeature(type4, trueTable)
numFeaturesDecP= getTotalCountFeature(type4, decTable) 

numFeaturesPosP= getTotalCountFeature(type4, posTable)
numFeaturesNegP= getTotalCountFeature(type4, negTable) 

numFeaturesTrueGivenPP= getTotalCountFeature(type4, truePosTable)
numFeaturesDecGivenPP= getTotalCountFeature(type4, decPosTable) 

numFeaturesTrueGivenNP= getTotalCountFeature(type4, trueNegTable)
numFeaturesDecGivenNP= getTotalCountFeature(type4, decNegTable) 


for te in test:

	posOrNegC= SVM.getTrueOrDeceptive(type1, numFeaturesPosC, numFeaturesNegC, te[type1], posTable, negTable)
	posOrNegU= SVM.getTrueOrDeceptive(type2, numFeaturesPosU, numFeaturesNegU, te[type2], posTable, negTable)
	posOrNegB= SVM.getTrueOrDeceptive(type3, numFeaturesPosB, numFeaturesNegB, te[type3], posTable, negTable)
	posOrNegP= SVM.getTrueOrDeceptive(type4, numFeaturesPosP, numFeaturesNegP, te[type4], posTable, negTable)

	trueOrDecC=0
	trueOrDecU=0
	trueOrDecB=0
	trueOrDecP=0

	if posOrNegC == 1:
		trueOrDecC= SVM.getTrueOrDeceptive(type1, numFeaturesTrueGivenPC, numFeaturesDecGivenPC, te[type1],truePosTable, decPosTable)
	else:
		trueOrDecC= SVM.getTrueOrDeceptive(type1, numFeaturesTrueGivenNC, numFeaturesDecGivenNC, te[type1],trueNegTable, decNegTable)
	
	if posOrNegU == 1:
def CrossValidation(K,X,gamma, c):  #crossValidation m=1 then use explicit fit, m=2 then use gradient descent fit
    print(X.shape[0])
    classes=[]
    classNum=0
    for y in X[:,-1]:
        if not y in classes:
            classes.append(y)
            classNum+=1

    precisions=np.zeros(shape=(classNum)).tolist()
    recalls=np.zeros(shape=(classNum)).tolist()
    accuracys=0
    fMeasures=np.zeros(shape=(classNum)).tolist()
    for k in xrange(K):
        #for k in range(0,1):
         training=np.ndarray(shape=(0,X.shape[1]))
         validation=np.ndarray(shape=(0,X.shape[1]))

         for i in range(0,X.shape[0]):
              if i % K!=k:
                 training=np.vstack([training,X[i]])
              else:
                 validation=np.vstack([validation,X[i]])

         yExpected=validation[:,-1]
         xs=validation[:,0:validation.shape[1]-1]

         classifier=SVM.svm(training[:,0:X.shape[1]-1],training[:,X.shape[1]-1],gamma=gamma,c=c)
         classifier.train()

         confusionMatrix=np.zeros(shape=(classNum,classNum),dtype=float)

         count=0
         for x in xs:
              #print(classifier.predict(x))
              j=classifier.predict(x)
              confusionMatrix[j,yExpected[count]]=confusionMatrix[j,yExpected[count]]+1
              count+=1

            #confusionMatrix[classes.index(ys[count]),classes.index(y)]=confusionMatrix[classes.index(ys[count]),classes.index(y)]+1

         print(confusionMatrix)
         precision=np.zeros(shape=(classNum),dtype=float)
         recall=np.zeros(shape=(classNum),dtype=float)
         accuracy=0
         fMeasure=np.zeros(shape=(classNum),dtype=float)
         for i in range(classNum):
             if np.sum(confusionMatrix[i,:])==0:
                 precision[i]=0
             else:
                 precision[i]=confusionMatrix[i,i]/np.sum(confusionMatrix[i,:])
             if np.sum(confusionMatrix[:,i])==0:
                 recall[i]=0
             else:
                 recall[i]=confusionMatrix[i,i]/np.sum(confusionMatrix[:,i])
             accuracy+=confusionMatrix[i,i]
             if precision[i]==0 or recall[i]==0:
               fMeasure[i]=0
             else:
               fMeasure[i]=2*precision[i]*recall[i]/(precision[i] +recall[i])
         accuracy=accuracy/validation.shape[0]

         precisions=precisions+precision
         recalls=recalls+recall
         accuracys=accuracys+accuracy
         fMeasures=fMeasures+fMeasure


    p=np.array(precisions)/K
    r=np.array(recalls)/K
    a=accuracys/K
    f=np.array(fMeasures)/K
    print("precision:")
    print(p)
    print("recall:")
    print(r)
    print("accuracy:")
    print(a)
    print("F measure:")
    print(f)
def main(argv):
    SVM.classify()
Esempio n. 46
0
def main():
    #data_set = pd.read_csv('creditdata.csv', index_col=0)
    data_set = pd.read_csv('creditSmall.csv', index_col=0)

    data_set = fix_header(data_set)

    data_set.EDUCATION[data_set.EDUCATION == '0'] = '4'
    data_set.EDUCATION[data_set.EDUCATION == '5'] = '4'
    data_set.EDUCATION[data_set.EDUCATION == '6'] = '4'
    data_set.MARRIAGE[data_set.MARRIAGE == '0'] = '3'
    data_set = data_set.astype(float)
    print(data_set.DEFAULTER.mean()*100)

    data_set['BILL_PAY_RATIO1'] = (data_set['BILL_AMT1'] - data_set['PAY_AMT1']) / data_set['LIMIT_BAL']
    data_set['BILL_PAY_RATIO2'] = (data_set['BILL_AMT2'] - data_set['PAY_AMT2']) / data_set['LIMIT_BAL']
    data_set['BILL_PAY_RATIO3'] = (data_set['BILL_AMT3'] - data_set['PAY_AMT3']) / data_set['LIMIT_BAL']
    data_set['BILL_PAY_RATIO4'] = (data_set['BILL_AMT4'] - data_set['PAY_AMT4']) / data_set['LIMIT_BAL']
    data_set['BILL_PAY_RATIO5'] = (data_set['BILL_AMT5'] - data_set['PAY_AMT5']) / data_set['LIMIT_BAL']
    data_set['BILL_PAY_RATIO6'] = (data_set['BILL_AMT6'] - data_set['PAY_AMT6']) / data_set['LIMIT_BAL']

    x = data_set.drop(['DEFAULTER'], axis=1)
    y = data_set.DEFAULTER

    # rescale the metrics to the same mean and standard deviation
    scaler = preprocessing.StandardScaler()
    x = scaler.fit(x).transform(x)

    # Further divide the train data into train test split 70% & 30% respectively
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, stratify=y, random_state=2)



    # creating classifier

    # classifier = neighbors.KNeighborsClassifier(n_neighbors=3)
    # classifier.fit(X_train, Y_train)
    # accuracy = classifier.score(X_test, Y_test)
    # print("Accuracy :",accuracy)

    #myaccuracy= KNN(x_train,y_train,y_test,x_test,3)
    #print(myaccuracy)


    # Predicition using Naive Bayes
    # model = fit(X_train, Y_train)
    # predictions = getPredictions(model, X_test)
    # accuracies = getAccuracy(Y_test, predictions)
    # print('Accuracy: ', accuracies)
    #
    # gaussNb = GaussianNB()
    # gaussNb.fit(X_train, Y_train)
    # print(gaussNb)
    # y_expect = Y_test
    # y_pred = gaussNb.predict(X_test)
    # print(accuracy_score(y_expect, y_pred))
    #
    # classification = classification_report(Y_test, predictions)
    # print(classification)
    # label = [0, 1]
    # cmatrix = confusion_matrix(Y_test, predictions, label)
    # print(cmatrix)
    #
    # classification1 = classification_report(Y_test, y_pred)
    # print(classification1)
    # label1 = [0, 1]
    # cmatrix1 = confusion_matrix(Y_test, y_pred, label1)
    # print(cmatrix1)
    # plot_confusion_matrix(cmatrix, label)
    # plot_confusion_matrix(cmatrix1, label1, title="2")


    clf = SVM()
    y_svmTrain = np.where(y_train == 0, -1, 1)
    clf.fit(x_train, y_svmTrain)

    y_predict = clf.predict(x_test)
    correct = np.sum(y_predict == y_test)
    print("%d out of %d predictions correct" % (correct, len(y_predict)))
    print()
    print(y_test)

    plot_margin(x_train[y_train == 1], x_train[y_train == -1], clf)
    print(x_test)
Esempio n. 47
0
def buildSVM(k, l):
	'''
	Description: Build the SVM model for classes in Digit Data.

	@param:
		k: the SVM model for first class, 0<=k<=9
		l: the SVM model for second class, 0<=l<=9

	@procedure
		saves the SVM simplier model between class k and l.
	'''
	## Step 1: load data
	log = "Step 1: loading data..."
	writeLog(log)
	print log
	train_x, train_y, test_x, test_y = loadDigitData()

	# set_printoptions(threshold='nan')

	# extract k, l classes
	K_IndexTrain = nonzero(train_y.A == k)[0]
	L_IndexTrain = nonzero(train_y.A == l)[0]
	IndexTrain = concatenate((K_IndexTrain, L_IndexTrain))
	# random shuffle the array
	IndexTrain = random.permutation(IndexTrain)

	K_IndexTest  = nonzero(test_y.A == k)[0]
	L_IndexTest  = nonzero(test_y.A == l)[0]
	IndexTest  = concatenate((K_IndexTest, L_IndexTest))
	# random shuffle the array
	IndexTest = random.permutation(IndexTest)


	train_x = train_x[IndexTrain]
	train_y = train_y[IndexTrain]
	test_x = test_x[IndexTest]
	test_y = test_y[IndexTest]

	# sets label to -1 and +1
	train_y[train_y==k] = -1
	train_y[train_y==l] = 1
	test_y[test_y==k] = -1
	test_y[test_y==l] = 1

	# scales the features value between [-1~1]
	train_x = train_x/255.0*2 - 1
	test_x = test_x/255.0*2 - 1


	## Step 2: training data
	log = "Step 2: training data..."
	writeLog(log)
	print log

	C = 16
	toler = 0.001
	maxIter = 50
	svmClassifier = SVM.train(train_x, train_y.T, C, toler, maxIter, kernel = ('rbf', 13))
	# saves the model to disk for feature prediction
	svmClassifier.save('./models/svm_' + str(k) + '_' + str(l))
	# simpleSVM = SVMSimpleStruct(svmClassifier)
	# simpleSVM.save('./models/simple_svm_' + str(k_class))

	# # load the model
	# print 'Step 2: loading model..
	# d = shelve.open('./models/svm_' + str(k) + '_' + str(l))
	# svmClassifier = d['svm']
	# d.close()


	# # Step 3: testing data
	log = "Step 3: testing data..."
	writeLog(log)
	print log
	accuracy = SVM.test(svmClassifier, test_x, test_y)

	## Step 4: show the results 
	log = 'The classify accuracy is: %.3f%%' % (accuracy * 100)  
	print log
	writeLog(log)
Esempio n. 48
0
        df = Parser.load_parsed_data_from_file(filename + ".parsed_encoded_data")

training_set_fraction = 0.7
training_data = df.loc[:training_set_fraction * float(df.shape[0])]

###### ###### ###### ###### ###### ###### START TRAINING ###### ###### ###### ###### ###### ######

import time
stime = time.time()

### One Class Support Vector Machine ###

import SVM
from sklearn.externals import joblib

OCSVM = SVM.trainOCSVM(training_data, tol=0.001, cache_size=2000, shrinking=False, nu=0.05, verbose=True)
joblib.dump(OCSVM, filename=filename + ".fitted_SVM_model")
clf = joblib.load(filename + ".fitted_SVM_model")

########################################

### Autoassociative NN ###

import Autoencoder
import tensorflow as tf
sess = tf.Session()
x = tf.placeholder("float", [None, df.shape[1]])
autoencoder = Autoencoder.create(x, [48, 24, 12])
EWMACost = 0
Autoencoder.train_AE(df=training_data, sess=sess, x=x,
                     denoising=False, verbose=False, autoencoder=autoencoder)
Esempio n. 49
0
File: UI.py Progetto: annajh/AD
def classify_function(filename, method, k, kernal,n):
    if filename == '':
        raise data.ValidationError('please select file')
    X, Y, subjectID = data.load_data("control_features_combinedSubject.txt", "dementia_features_combinedSubject.txt")
    X = data.get_useful_features_mat(X)

    alz_count = 0
    for y in Y:
        if y:
            alz_count = alz_count + 1

	#normalize features
	X_scaled, normalizer = data.normalize_features(X)
	#print X_scaled


    #print filename
    testList = []
    filenameSubj = filename[0:3]
    testList.append(filenameSubj)
    try:
        X_train, Y_train, X_test, Y_test, trainID, testID = data.split_train_test(X_scaled,Y,subjectID,testID=testList)
    except ValueError:
        print filenameSubj
        raise data.ValidationError("combined data missing!")

    #PCA
    if n < 1 and n != -1:
        raise data.ValidationError('# features has to be greater than 0')
    elif n > 16:
        raise data.ValidationError('# features has to be less than 16')
    elif (n >= 1 and n <= 16):
        pca, explained_variance_ratio_ = data.reduce_dimension(X_train, n)
        X_train = pca.transform(X_train)

    #load the real testing data! Y_test should remain the same!
    X_test = data.load_testing_visit("control_features_per_visit.txt", "dementia_features_per_visit.txt", filename[0:5])
    #print "visit"
    #print X_test
    X_test = data.get_useful_features_mat(X_test)
    #print "visit"
    #print X_test
    X_test = normalizer.transform(X_test)
    #print "visit"
    #print X_test
    #if use PCA
    if (n >= 1 and n <= 16):
        X_test = pca.transform(X_test)
    #print "visit"
    #print X_test
    if method == 0:
        raise data.ValidationError('please select classification method')
    #SVM
    elif method == 2:
        if kernal == 0:
            raise data.ValidationError('please select kernel')
        clf = SVM.train(X_train,Y_train,kernal)
        result = SVM.test(X_test,clf)
        #print X_train
        #print X_test
        #print result
        #print clf
    #KNN
    elif method == 1:
        if k == '':
            raise data.ValidationError('please select number of neighbors (k)')
        neigh = KNN.train(X_train,Y_train,k)
        result = KNN.test(X_test,neigh)

    return result[0], Y_test[0]
Esempio n. 50
0
print "step 1: load data..."  
dataSet = [] 
labels = []
with open('../../data/testSet.txt', 'r') as file:
	for line in file.readlines():
		line = line.strip().split('\t')
		dataSet.append([float(line[0]), float(line[1])])
		labels.append(float(line[2]))
  
dataSet = mat(dataSet)
labels = mat(labels).T
train_x = dataSet[0:81, :]  
train_y = labels[0:81, :]  
test_x = dataSet[80:101, :]  
test_y = labels[80:101, :]  
  
## step 2: training...  
print "step 2: training..."  
C = 0.6  
toler = 0.001  
maxIter = 50  
svmClassifier = SVM.train(train_x, train_y.T, C, toler, maxIter, kernel = ('linear', 0))  
  
## step 3: testing  
print "step 3: testing..."  
accuracy = SVM.test(svmClassifier, test_x, test_y)  
  
## step 4: show the result  
print "step 4: show the result..."    
print 'The classify accuracy is: %.3f%%' % (accuracy * 100)  
SVM.show(svmClassifier) 
def plot_learning_curve(features_train, labels_train, features_test, labels_test, outputClasses=None, K="linear", C=1,
                        G=0.01,
                        method="error", classifier="SVM", a=0, b=1, showFigure=True, saveFigure=False, savePath=None,
                        nb_epochs=10):
    # run for every 10% of training set and compute training error and testing error
    step = len(features_train) / 10

    train = []
    test = []
    maj_clas = []

    for i in range(0, 10):
        print 'iteration : ', i

        # train for (i+1)*10 percent of training set
        f = features_train[0:((i + 1) * (step))]
        l = labels_train[0:((i + 1) * (step))]
        assert f.shape[0] == l.shape[0], 'Wrong number of input data! '

        if classifier == "SVM":
            # train classifier for the specific subset of training set
            model = SVM.train(f, l, k=K, c=C, g=G)

            # get training error
            predictionTrain = SVM.predict(f, model)

            # get testing error
            predictionTest = SVM.predict(features_test, model)

        elif classifier == "LR":
            # train classifier for the specific subset of training set
            model = LogisticRegression.train(f, l, c=C)

            # get training error
            predictionTrain = LogisticRegression.predict(f, model)

            # get testing error
            predictionTest = LogisticRegression.predict(features_test, model)
        elif classifier == "CNN":

            model = Keras_test.CNN().train(features=f, labels=l,outputClasses=outputClasses,
                                           learning_curves_OR_Cross_Val=True,nb_epoch=nb_epochs)
            # get training error
            predictionTrain = Keras_test.CNN().predictClasses(features=f, model=model)
            # get testing error
            predictionTest = Keras_test.CNN().predictClasses(features=features_test, model=model)

        # TODO : CNN MINIBATCHES LEARNING CURVES , Implementation : read 10% of data and train cnn.train with all the data
        # elif classifier == "CNN_minibatches":
        elif classifier == "MLP":

            model = Keras_test.MLP().train(features=f, labels=l, outputClasses=outputClasses,
                                           learning_curves_OR_Cross_Val=True, nb_epoch=nb_epochs)
            # get training error
            predictionTrain = Keras_test.MLP().predict(features=f, model=model, ShowAccuracy=False)
            # get testing error
            predictionTest = Keras_test.MLP().predict(features=features_test, model=model, ShowAccuracy=False)
        elif classifier == "SimpleRNN":

            model = Keras_test.RNN().trainRNN(features=f, labels=l, outputClasses=outputClasses, learning_curves=True)
            # get training error
            predictionTrain = Keras_test.RNN().predict(features=f, model=model, ShowAccuracy=False)
            # get testing error
            predictionTest = Keras_test.RNN().predict(features=features_test, model=model, ShowAccuracy=False)
        elif classifier == "RNN_LSTM":

            model = Keras_test.RNN().trainRnnLSTM(features=f, labels=l, outputClasses=outputClasses,
                                                  learning_curves=True)
            # get training error
            predictionTrain = Keras_test.RNN().predict(features=f, model=model, ShowAccuracy=False)
            # get testing error
            predictionTest = Keras_test.RNN().predict(features=features_test, model=model, ShowAccuracy=False)

        # get error for majority classifier
        predictionMajority = MajorityClassifier.predictMaj(labels_test)

        if method == "error":
            train.append(measures.error(l, predictionTrain))
            test.append(measures.error(labels_test, predictionTest))
            maj_clas.append(measures.error(labels_test, predictionMajority))
        elif method == "avgF1":
            train.append(measures.avgF1(l, predictionTrain, a, b))
            test.append(measures.avgF1(labels_test, predictionTest, a, b))
            maj_clas.append(measures.avgF1(labels_test, predictionMajority, a, b))

    print test[9]
    x = np.arange(len(train)) * 10

    plt.plot(x, train, color="blue", linewidth="2.0", label=classifier)
    plt.plot(x, test, color="blue", linestyle="dashed", linewidth="2.0")
    plt.plot(x, maj_clas, color="red", linewidth="2.0")
    plt.ylim(0, 1)
    plt.ylabel(method)
    plt.xlabel("% of messages")

    if method == "error":
        plt.legend(loc="upper left")
    elif method == "avgF1":
        plt.legend(loc="lower left")

    if saveFigure:
        assert savePath != None, "Give image path to save image"
        # with figure i can save it anywhere i want
        # fig1 = plt.gcf()
        plt.savefig(savePath)
        # clear current canvas . if we have show and save together we will have a problem...
        plt.clf()

    if showFigure:
        plt.show()
Esempio n. 52
0
dataSet = []
labels = []
fileIn = open('/Users/lixurong/Downloads/DataSet.txt')
for line in fileIn.readlines():
    lineArr = line.strip().split('\t')
    dataSet.append([float(lineArr[0]), float(lineArr[1])])
    labels.append(float(lineArr[2]))

dataSet = mat(dataSet)
labels = mat(labels).T
train_x = dataSet[0:81, :]
train_y = labels[0:81, :]
test_x = dataSet[80:101, :]
test_y = labels[80:101, :]

## step 2: training...
print "step 2: training..."
C = 0.6
toler = 0.001
maxIter = 50
svmClassifier = SVM.trainSVM(train_x, train_y, C, toler, maxIter, kernelOption = ('linear', 0))

## step 3: testing
print "step 3: testing..."
accuracy = SVM.testSVM(svmClassifier, test_x, test_y)

## step 4: show the result
print "step 4: show the result..."    
print 'The classify accuracy is: %.3f%%' % (accuracy * 100)
SVM.showSVM(svmClassifier)
Esempio n. 53
0
File: 6.py Progetto: niumeng07/ML
#!/usr/bin/env python

import SVM
dataMat,labelMat=SVM.loadDataSet('testSet.txt')
print(dataMat)
print(labelMat)

b,alphas=SVM.smoSimple(dataMat,labelMat,0.6,0.001,40)
print(b)
print(alphas)

from numpy import *
print("The number of Support Vector:")
print(shape(alphas[alphas>0]))

for i in range(100):
    if(alphas[i]>0.0):
        print(dataMat[i],labelMat[i])




    
    
Esempio n. 54
0
def run():
	#sciPpn.run()
	#logReg.run()
	svm.run()
def rate(review):
    review = SVM.asciify(review)

    inputData = []

    # 1st element = bayes with unigram
    bayesClassifier.loadData("U")
    inputData.append(bayesClassifier.percentPositive(review))
    # bayes with adjective
    bayesClassifier.loadData("A")
    inputData.append(bayesClassifier.percentPositive(review))
    # bayes with POS
    bayesClassifier.loadData("P")
    bayesClassifier.partOfSpeech = True
    inputData.append(bayesClassifier.percentPositive(review))

    # SVM with unigram
    review = review.split()
    SVM.loadModule("U")
    SVM.loadWords("U")
    X = SVM.intersection(SVM.wordList, review)
    # SVM with adjective
    inputData.append(SVM.movieReviewer.predict(X)[0])
    SVM.loadModule("A")
    SVM.loadWords("A")
    X = SVM.intersection(SVM.wordList, review)
    inputData.append(SVM.movieReviewer.predict(X)[0])

    return Tree.predict(inputData)[0]
Esempio n. 56
0
#!/usr/bin/python

import SVM
from sklearn.externals import joblib
import os
from sys import argv

os.system("echo -n 'loading keywords...\t\t'")
SVM.loadModule(argv[1])
SVM.loadWords(argv[1])
os.system("echo -n '[done]\n'")

os.system("echo -n 'testing files:\n[00%'")
files = os.listdir('./test/pos')[:3000]
total = float(len(files)) * 2
done = 0
progress = 0
correct = 0

for i in files:
    f = SVM.asciify(open('./test/pos/' + i, 'r').read()).split()
    result = SVM.movieReviewer.predict(SVM.intersection(SVM.wordList, f))[0]
    if result > 0:
        correct += 1
    done += 1
    if (done / total * 100 >= progress + 5):
        progress += 5
        #os.system("echo -n '\b\b\b=%2d%%'" % progress)

files = os.listdir('./test/neg')[:3000]
Esempio n. 57
0
    def process(self):
        '''
        Description : Process function is designed to pull all the function , load the data & process in the gui.Initially all the scores, tuned parameter ,categorical variable parameters & data parameters from svm frame are assigned to respective values.
        The mask row list generated from mask row function  is loaded to mask row value.
        The imputer parameters are given to Scalar IV, Categorical IV & binary DV then these values are assigned to the respective imputer parameters.
        '''
        scoring = self.svmFrame.getScores()
        tuned_parameters = self.svmFrame.getTunedParameters()
        cv_parameters = self.svmFrame.getCVParameters()
        data_parameters = self.svmFrame.getDataParameters()

        maskRow = self.getMaskRow()
        imputerSIV = preprocessing.Imputer(missing_values='NaN', strategy=data_parameters['impute'], axis=0, copy=True)
        imputerCIV = preprocessing.Imputer(missing_values='NaN', strategy='most_frequent', axis=0, copy=True)
        imputerBDV = preprocessing.Imputer(missing_values='NaN', strategy='most_frequent', axis=1, copy=True)
            
        numRow = len(self.csv) - 1 #-1 because of header row in csv)
        numSIV = 0
        numCIV = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Scalar IV':
                numSIV += 1
            elif variable.selectedType.get() == 'Categorical IV':
                numCIV += 1

        SIV = np.empty(shape=(numRow,numSIV))
        i = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Scalar IV':
                SIV[:,i] = np.asarray(variable.values).T
                i += 1

        CIV = np.empty(shape=(numRow,numCIV))
        i = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Categorical IV':
                variable.catDict = variable.makeCatDict()
                temp = []
                for v in variable.values:
                    temp.append(variable.catDict.get(v, None))
                CIV[:,i] = np.asarray(temp).T
                i += 1

        self.variables[self.indexDV].catDict = self.dvFrame.makeCatDict()
        temp = []
        for v in self.variables[self.indexDV].values:
            temp.append(self.variables[self.indexDV].catDict[v])
        y = np.asarray(temp).T
        
        if data_parameters['cleanup'] == 'delete':
            SIV = np.delete(SIV, maskRow, axis=0)
            CIV = np.delete(CIV, maskRow, axis=0)
            y = np.delete(y, maskRow, axis=0)
        else:
            imputerSIV.fit(SIV)
            SIV = imputerSIV.transform(SIV)
            imputerCIV.fit(CIV)
            CIV = imputerCIV.transform(CIV)
            imputerBDV.fit(y)
            y = imputerBDV.transform(y)[0]

        if data_parameters['scale']:
            self.stdScaler = preprocessing.StandardScaler().fit(SIV)
            SIV = self.stdScaler.transform(SIV)

        if data_parameters['oneHot']:
            self.encScaler = preprocessing.OneHotEncoder().fit(CIV)
            CIV = self.encScaler.transform(CIV).toarray()

        X = np.concatenate((SIV, CIV), axis=1)

        SVM.skSVM(X, y, scoring, tuned_parameters, data_parameters, cv_parameters)