def predict_third_set(gram_train, gram_test, y_label, scale=20000, max_iter=1, lambd=0.00001): gram_train = gram_train[0] + gram_train[1] + gram_train[2] gram_test = gram_test[0] + gram_test[1] + gram_test[2] krl = KRL(gram_m=gram_train / scale, max_iter=max_iter, lambd=lambd) krl.fit(np.array(y_label)) y_pred_krl = krl.predict(gram_test / scale) clf = SVM(gram_m=gram_train) clf.fit(np.array(y_label)) y_pred_svm = clf.predict(gram_test) y_pred = np.sign(y_pred_svm + y_pred_krl) return y_pred
def getConfidenceScores(features_train, labels_train, C): train_confidence = [] #confidence scores for training data are computed using K-fold cross validation kfold = KFold(features_train.shape[0], n_folds=10) for train_index,test_index in kfold: X_train, X_test = features_train[train_index], features_train[test_index] y_train, y_test = labels_train[train_index], labels_train[test_index] #train classifier for the subset of train data m = SVM.train(X_train,y_train,c=C,k="linear") #predict confidence for test data and append it to list conf = m.decision_function(X_test) for x in conf: train_confidence.append(x) return np.array(train_confidence)
Y_test) = imdb.load_data(num_words=max_features) X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.5, recurrent_dropout=0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print("Train...") model.fit(X_train, Y_train, batch_size=batch_size, epochs=15) score, accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size) print('Test score: {0}'.format(score)) print('Test accuracy: {0}'.format(accuracy)) if sys.argv[1] != 'rnn': print("SVM: {0}".format(SVM(X_train, Y_train, X_test, Y_test))) print("Naive Bayes: {0}".format( nultinomialNB(X_train, Y_train, X_test, Y_test))) print("Logistic Regression: {0}".format( LR(X_train, Y_train, X_test, Y_test))) print("Fully connected Neural Net: {0}".format( NN(X_train, Y_train, X_test, Y_test)))
X2_test = test_data_mat_2[0].str.split(' ').values for i , lst in enumerate(X2_test): X2_test[i] = np.array([float(x) for x in lst]) X2_test = np.vstack(X2_test) X3_test = test_data_mat_3[0].str.split(' ').values for i , lst in enumerate(X3_test): X3_test[i] = np.array([float(x) for x in lst]) X3_test= np.vstack(X3_test) if config.Kernel == 'linear': print("-- This will take few milliseconds per dataset to compute the kernel matrix --\n") if config.classifier =='SVM': classifier = SVM(kernel_name=config.Kernel, kernel=linear_kernel, C=20) elif config.classifier =='RIDGE': classifier = Ridge_Classifier(lam = 1e-8, kernel_name=config.Kernel, kernel=linear_kernel, loss_func=log_rg_loss) elif config.Kernel == 'rbf': print("-- This will take few milliseconds per dataset to compute the kernel matrix --\n") if config.classifier =='SVM': classifier = SVM(kernel_name=config.Kernel, kernel=rbf_kernel, C=20) elif config.classifier =='RIDGE': classifier = Ridge_Classifier(lam = 1e-8, kernel_name=config.Kernel, kernel=rbf_kernel, loss_func=log_rg_loss) else: classifier = None print("Kernel not found") if classifier!=None:
def main(args): # Create algorithm objects lbp = LBP() detector = FaceDetector() svm = SVM() knn = KNearest() # Get subjects to train the svm on imgs = [ '/home/arthur/Downloads/lfw_funneled/Gian_Marco/Gian_Marco_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Micky_Ward/Micky_Ward_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Ziwang_Xu/Ziwang_Xu_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Zhu_Rongji/Zhu_Rongji_0001.jpg' ] # Load the subjects and extract their features hists, labels = load_subjects(imgs, detector, lbp) # Transform to np arrays samples = np.array(hists, dtype=np.float32) labels = np.array(labels, dtype=np.int) # Train classifiers svm.train(samples, labels) knn.train(samples, labels) # Check which mode the app is running in (image vs. video) if args.image is not None: # Read the image from the file path provided img = cv2.imread(args.image, 0) # Check the image exists if img is not None: # Run face recognition algorithm classify_snapshot(img, detector, lbp, knn) else: print('The image could not be found...') return # Establish connection to camera cap = cv2.VideoCapture(0) # Continuously grab the next frame from the camera while cap.isOpened(): # Capture frame-by-frame ret, frame = cap.read() # Start timer for performance logging start = time.time() # Convert frame to gray scale for face detector gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Detect a face in the frame and crop the image face_coords = detector.detect(gray) face = detector.crop_face(gray, face_coords) # Check we have detected a face if face is not None: # Apply LBP operator to get feature descriptor hist, bins = lbp.run(face, False) # Convert the LBP descriptor to numpy array for opencv classifiers test_sample = np.array([hist], dtype=np.float32) # Get the class of id of the closest neighbour and its distance dist, class_id = knn.predict(test_sample) # Draw the face if found util.draw_face(dist, class_id, frame, face_coords) # util.segment_face(frame) # Processing finished end = time.time() # Write the fps to the video util.write_fps(start, end, frame) # Display the resulting frame cv2.imshow('frame', frame) # Check if we should stop the application if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def classify(messages_train,labels_train,messages_test,process_messages_train,process_messages_test,tokens_train,tokens_test,process_tokens_train,process_tokens_test,pos_tags_train,pos_tags_test,negationList,clusters,slangDictionary,lexicons,mpqa_lexicons): # 0 - negative messages # 1 - positives messages labels_train = [0 if x=="negative" else 1 for x in labels_train] #compute pos tag bigrams for all messages pos_bigrams_train = getBigrams(pos_tags_train) pos_bigrams_test = getBigrams(pos_tags_test) #compute pos tag trigrams for all messages pos_trigrams_train = getTrigrams(pos_tags_train) pos_trigrams_test = getTrigrams(pos_tags_test) #get the unique pos bigrams and trigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams= getTrigramsSet(pos_trigrams_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_tags_scores_negative = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train) #assign a precision and F1 score to each word of to all mpqa lexicons mpqaScores = getScores(mpqa_lexicons,process_messages_train,labels_train) #get features from train messages features_train = features.getFeatures(messages_train,process_messages_train,tokens_train,process_tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters) #regularize train features features_train=regularization.regularize(features_train) #get features from test messages features_test = features.getFeatures(messages_test,process_messages_test,tokens_test,process_tokens_test,pos_tags_test,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_test,pos_trigrams_test,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters) #regularize test features features_test=regularization.regularize(features_test) #feature selection #features_train, features_test = selection.feature_selection(features_train,labels_train,features_test,1150) #C parameter of SVM C = 0.001953125 #C = 19.3392161013 #train classifier and return trained model #model = LogisticRegression.train(features_train,labels_train) model = SVM.train(features_train,labels_train,c=C,k="linear") #predict labels #prediction = LogisticRegression.predict(features_test,model) prediction = SVM.predict(features_test,model) return prediction
def main(f): print "System training started" #load training dataset dataset_train = f ids, labels_train, messages_train = tsvreader.opentsv(dataset_train) print "Train data loaded" #labels for subjectivity detection (2 categories) temp_labels_train = [0 if x == "neutral" else 1 for x in labels_train] #labels for polarity detection (3 categories) labels_train = [ 0 if x == "neutral" else -1 if x == "negative" else 1 for x in labels_train ] #convert labels to numpy arrays temp_labels_train = np.array(temp_labels_train) labels_train = np.array(labels_train) #load word clusters clusters = loadClusters() print "Clusters loaded" #load Lexicons negationList, slangDictionary, lexicons, mpqa_lexicons = loadLexicons() print "Lexicons loaded" #tokenize all messages tokens_train = tokenize(messages_train) print "Messages tokenized" #compute pos tags for all messages pos_tags_train = arktagger.pos_tag_list(messages_train) print "Pos tags computed" #compute pos tag bigrams pos_bigrams_train = getBigrams(pos_tags_train) #compute pos tag trigrams pos_trigrams_train = getTrigrams(pos_tags_train) #get the unique pos bigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams = getTrigramsSet(pos_trigrams_train) #compute POS tag scores pos_tags_scores_neutral = posTagsScore(unique_pos_tags, 0, pos_tags_train, labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags, 1, pos_tags_train, labels_train) pos_tags_scores_negative = posTagsScore(unique_pos_tags, -1, pos_tags_train, labels_train) pos_bigrams_scores_neutral = posBigramsScore(unique_bigrams, 0, pos_bigrams_train, labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams, 1, pos_bigrams_train, labels_train) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams, -1, pos_bigrams_train, labels_train) pos_trigrams_scores_neutral = posTrigramsScore(unique_trigrams, 0, pos_trigrams_train, labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams, 1, pos_trigrams_train, labels_train) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams, -1, pos_trigrams_train, labels_train) #compute mpqa scores mpqaScores = getScores(mpqa_lexicons, messages_train, labels_train, neutral=True) #save scores and other resources for future use savePosScores(pos_tags_scores_neutral, pos_tags_scores_positive, pos_tags_scores_negative, pos_bigrams_scores_neutral, pos_bigrams_scores_positive, pos_bigrams_scores_negative, pos_trigrams_scores_neutral, pos_trigrams_scores_positive, pos_trigrams_scores_negative, mpqaScores) #save lexicons saveLexicons(negationList, slangDictionary, lexicons, mpqa_lexicons) #save clusters saveClusters(clusters) #load Glove embeddings d = 200 glove = GloveDictionary.Glove(d) #save Glove embeddings for future use saveGlove(glove) #Subjectivity Detection Features #SD1 features features_train_1 = features.getFeatures( messages_train, tokens_train, pos_tags_train, slangDictionary, lexicons, mpqa_lexicons, pos_bigrams_train, pos_trigrams_train, pos_bigrams_scores_negative, pos_bigrams_scores_positive, pos_trigrams_scores_negative, pos_trigrams_scores_positive, pos_tags_scores_negative, pos_tags_scores_positive, mpqaScores, negationList, clusters, pos_bigrams_scores_neutral, pos_trigrams_scores_neutral, pos_tags_scores_neutral) #SD2 features features_train_2 = [] #for message in tokens_train : for i in range(0, len(messages_train)): features_train_2.append(glove.findCentroid(tokens_train[i])) features_train_2 = np.array(features_train_2) #regularize features features_train_1 = regularization.regularize(features_train_1) features_train_2 = regularization.regularizeHorizontally(features_train_2) #Penalty parameter C of the error term for every SD system C1 = 0.001953125 C2 = 1.4068830572470667 #get confidence scores train_confidence_1 = getConfidenceScores(features_train_1, temp_labels_train, C1) train_confidence_2 = getConfidenceScores(features_train_2, temp_labels_train, C2) #normalize confidence scores softmax = lambda x: 1 / (1. + math.exp(-x)) train_confidence_1 = [softmax(conf) for conf in train_confidence_1] train_confidence_2 = [softmax(conf) for conf in train_confidence_2] train_confidence_1 = np.array(train_confidence_1) train_confidence_2 = np.array(train_confidence_2) #train SD classifiers sd1 = SVM.train(features_train_1, temp_labels_train, c=C1, k="linear") sd2 = SVM.train(features_train_2, temp_labels_train, c=C2, k="linear") #Sentiment Polarity Features (append confidence scores to SD features) #SP1 features features_train_1 = np.hstack( (features_train_1, train_confidence_1.reshape(train_confidence_1.shape[0], 1))) #SP1 features features_train_2 = np.hstack( (features_train_2, train_confidence_2.reshape(train_confidence_2.shape[0], 1))) #Penalty parameter C of the error term for every SP system C1 = 0.003410871889693192 C2 = 7.396183688299606 #train SP classifiers sp1 = SVM.train(features_train_1, labels_train, c=C1, k="linear") sp2 = SVM.train(features_train_2, labels_train, c=C2, k="linear") #save trained models saveModels(sd1, sd2, sp1, sp2) print "System training completed!"
import sys import os import numpy as np import math import cv2 from plyfile import PlyData from LBP import LBP, LocalBinaryPatterns from classifiers import SVM, KNearest # todo test the classifier # todo validate on a 80/20 split lbp = LBP() svm = SVM() knn = KNearest() hists = [] labels = [] def main(photoface_dir): traverse(photoface_dir, describe_face) samples = np.array(hists, dtype=np.float32) ids = np.array(labels, dtype=np.int) # Train classifiers svm.train(samples, ids) knn.train(samples, ids)
def main(f): print "System training started" #load training dataset dataset_train = f ids,labels_train,messages_train=tsvreader.opentsv(dataset_train) print "Train data loaded" #labels for subjectivity detection (2 categories) temp_labels_train = [0 if x=="neutral" else 1 for x in labels_train] #labels for polarity detection (3 categories) labels_train = [0 if x=="neutral" else -1 if x =="negative" else 1 for x in labels_train] #convert labels to numpy arrays temp_labels_train=np.array(temp_labels_train) labels_train=np.array(labels_train) #load word clusters clusters = loadClusters() print "Clusters loaded" #load Lexicons negationList, slangDictionary, lexicons, mpqa_lexicons = loadLexicons() print "Lexicons loaded" #tokenize all messages tokens_train = tokenize(messages_train) print "Messages tokenized" #compute pos tags for all messages pos_tags_train = arktagger.pos_tag_list(messages_train) print "Pos tags computed" #compute pos tag bigrams pos_bigrams_train = getBigrams(pos_tags_train) #compute pos tag trigrams pos_trigrams_train = getTrigrams(pos_tags_train) #get the unique pos bigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams= getTrigramsSet(pos_trigrams_train) #compute POS tag scores pos_tags_scores_neutral = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train) pos_tags_scores_negative = posTagsScore(unique_pos_tags,-1,pos_tags_train,labels_train) pos_bigrams_scores_neutral = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,-1,pos_bigrams_train,labels_train) pos_trigrams_scores_neutral = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,-1,pos_trigrams_train,labels_train) #compute mpqa scores mpqaScores = getScores(mpqa_lexicons,messages_train,labels_train,neutral=True) #save scores and other resources for future use savePosScores(pos_tags_scores_neutral, pos_tags_scores_positive,pos_tags_scores_negative,pos_bigrams_scores_neutral,pos_bigrams_scores_positive,pos_bigrams_scores_negative,pos_trigrams_scores_neutral,pos_trigrams_scores_positive,pos_trigrams_scores_negative,mpqaScores) #save lexicons saveLexicons(negationList,slangDictionary,lexicons,mpqa_lexicons) #save clusters saveClusters(clusters) #load Glove embeddings d = 200 glove = GloveDictionary.Glove(d) #save Glove embeddings for future use saveGlove(glove) #Subjectivity Detection Features #SD1 features features_train_1 = features.getFeatures(messages_train,tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters,pos_bigrams_scores_neutral,pos_trigrams_scores_neutral,pos_tags_scores_neutral) #SD2 features features_train_2 = [] #for message in tokens_train : for i in range(0,len(messages_train)): features_train_2.append(glove.findCentroid(tokens_train[i])) features_train_2 = np.array(features_train_2) #regularize features features_train_1 = regularization.regularize(features_train_1) features_train_2 = regularization.regularizeHorizontally(features_train_2) #Penalty parameter C of the error term for every SD system C1=0.001953125 C2=1.4068830572470667 #get confidence scores train_confidence_1 = getConfidenceScores(features_train_1, temp_labels_train, C1) train_confidence_2 = getConfidenceScores(features_train_2, temp_labels_train, C2) #normalize confidence scores softmax = lambda x: 1 / (1. + math.exp(-x)) train_confidence_1 = [softmax(conf) for conf in train_confidence_1] train_confidence_2 = [softmax(conf) for conf in train_confidence_2] train_confidence_1 = np.array(train_confidence_1) train_confidence_2 = np.array(train_confidence_2) #train SD classifiers sd1 = SVM.train(features_train_1,temp_labels_train,c=C1,k="linear") sd2 = SVM.train(features_train_2,temp_labels_train,c=C2,k="linear") #Sentiment Polarity Features (append confidence scores to SD features) #SP1 features features_train_1 = np.hstack((features_train_1,train_confidence_1.reshape(train_confidence_1.shape[0],1))) #SP1 features features_train_2 = np.hstack((features_train_2,train_confidence_2.reshape(train_confidence_2.shape[0],1))) #Penalty parameter C of the error term for every SP system C1=0.003410871889693192 C2=7.396183688299606 #train SP classifiers sp1 = SVM.train(features_train_1,labels_train,c=C1,k="linear") sp2 = SVM.train(features_train_2,labels_train,c=C2,k="linear") #save trained models saveModels(sd1,sd2,sp1,sp2) print "System training completed!"
def performCrossvalidationSVM(mat, c): scores = SVM.performCrossValidationSVM(mat, c) return checkResultsCrossvalidation(scores)
def performLinearSVC(training, test): prediction, prediction_prob = SVM.performSVM(training, test) return checkResultsPredicted(test, training, prediction, prediction_prob)
nb.predict(data_test_compact) print("Predict the EVAL set") y_preds = nb.predict(data_eval_compact)['y_preds'] make_submission_data(y_preds, 'nb_1214.csv') # run Perceptron ----------------------- perceptron = Perceptron(r=0.1, margin=0.01, n_epoch=20) perceptron.fit(data_train) print("Predict the TEST set") perceptron.predict(data_test, perceptron.weights[-1]) print("Predict the EVAL set") y_preds = perceptron.predict(data_eval, perceptron.weights[-1])['y_preds'] make_submission_data(y_preds, 'perceptron.csv') # run SVM svm = SVM(r=0.01, c=1, n_epoch=17) svm.fit(data_train) print("Predict the TEST set") svm.predict(data_test, svm.weights[-1]) print("Predict the EVAL set") y_preds = svm.predict(data_eval, svm.weights[-1])['y_preds'] make_submission_data(y_preds, 'svm.csv') # run Logistic ----------------------------- logistic = Logistic(r=0.01, sigma=100, n_epoch=10) logistic.fit(data_train) print("Predict the TEST set") logistic.predict(data_test, logistic.weights[-1]) print("Preidict the EVAL set") y_preds = logistic.predict(data_eval, logistic.weights[-1])['y_preds'] make_submission_data(y_preds, 'logistic.csv')