def getConfidenceScores(features_train, labels_train, C): train_confidence = [] #confidence scores for training data are computed using K-fold cross validation kfold = KFold(features_train.shape[0], n_folds=10) for train_index,test_index in kfold: X_train, X_test = features_train[train_index], features_train[test_index] y_train, y_test = labels_train[train_index], labels_train[test_index] #train classifier for the subset of train data m = SVM.train(X_train,y_train,c=C,k="linear") #predict confidence for test data and append it to list conf = m.decision_function(X_test) for x in conf: train_confidence.append(x) return np.array(train_confidence)
def main(args): # Create algorithm objects lbp = LBP() detector = FaceDetector() svm = SVM() knn = KNearest() # Get subjects to train the svm on imgs = [ '/home/arthur/Downloads/lfw_funneled/Gian_Marco/Gian_Marco_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Micky_Ward/Micky_Ward_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Ziwang_Xu/Ziwang_Xu_0001.jpg', '/home/arthur/Downloads/lfw_funneled/Zhu_Rongji/Zhu_Rongji_0001.jpg' ] # Load the subjects and extract their features hists, labels = load_subjects(imgs, detector, lbp) # Transform to np arrays samples = np.array(hists, dtype=np.float32) labels = np.array(labels, dtype=np.int) # Train classifiers svm.train(samples, labels) knn.train(samples, labels) # Check which mode the app is running in (image vs. video) if args.image is not None: # Read the image from the file path provided img = cv2.imread(args.image, 0) # Check the image exists if img is not None: # Run face recognition algorithm classify_snapshot(img, detector, lbp, knn) else: print('The image could not be found...') return # Establish connection to camera cap = cv2.VideoCapture(0) # Continuously grab the next frame from the camera while cap.isOpened(): # Capture frame-by-frame ret, frame = cap.read() # Start timer for performance logging start = time.time() # Convert frame to gray scale for face detector gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Detect a face in the frame and crop the image face_coords = detector.detect(gray) face = detector.crop_face(gray, face_coords) # Check we have detected a face if face is not None: # Apply LBP operator to get feature descriptor hist, bins = lbp.run(face, False) # Convert the LBP descriptor to numpy array for opencv classifiers test_sample = np.array([hist], dtype=np.float32) # Get the class of id of the closest neighbour and its distance dist, class_id = knn.predict(test_sample) # Draw the face if found util.draw_face(dist, class_id, frame, face_coords) # util.segment_face(frame) # Processing finished end = time.time() # Write the fps to the video util.write_fps(start, end, frame) # Display the resulting frame cv2.imshow('frame', frame) # Check if we should stop the application if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def classify(messages_train,labels_train,messages_test,process_messages_train,process_messages_test,tokens_train,tokens_test,process_tokens_train,process_tokens_test,pos_tags_train,pos_tags_test,negationList,clusters,slangDictionary,lexicons,mpqa_lexicons): # 0 - negative messages # 1 - positives messages labels_train = [0 if x=="negative" else 1 for x in labels_train] #compute pos tag bigrams for all messages pos_bigrams_train = getBigrams(pos_tags_train) pos_bigrams_test = getBigrams(pos_tags_test) #compute pos tag trigrams for all messages pos_trigrams_train = getTrigrams(pos_tags_train) pos_trigrams_test = getTrigrams(pos_tags_test) #get the unique pos bigrams and trigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams= getTrigramsSet(pos_trigrams_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_tags_scores_negative = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train) #calculate pos bigrams score for all categories #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train) #assign a precision and F1 score to each word of to all mpqa lexicons mpqaScores = getScores(mpqa_lexicons,process_messages_train,labels_train) #get features from train messages features_train = features.getFeatures(messages_train,process_messages_train,tokens_train,process_tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters) #regularize train features features_train=regularization.regularize(features_train) #get features from test messages features_test = features.getFeatures(messages_test,process_messages_test,tokens_test,process_tokens_test,pos_tags_test,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_test,pos_trigrams_test,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters) #regularize test features features_test=regularization.regularize(features_test) #feature selection #features_train, features_test = selection.feature_selection(features_train,labels_train,features_test,1150) #C parameter of SVM C = 0.001953125 #C = 19.3392161013 #train classifier and return trained model #model = LogisticRegression.train(features_train,labels_train) model = SVM.train(features_train,labels_train,c=C,k="linear") #predict labels #prediction = LogisticRegression.predict(features_test,model) prediction = SVM.predict(features_test,model) return prediction
def main(f): print "System training started" #load training dataset dataset_train = f ids, labels_train, messages_train = tsvreader.opentsv(dataset_train) print "Train data loaded" #labels for subjectivity detection (2 categories) temp_labels_train = [0 if x == "neutral" else 1 for x in labels_train] #labels for polarity detection (3 categories) labels_train = [ 0 if x == "neutral" else -1 if x == "negative" else 1 for x in labels_train ] #convert labels to numpy arrays temp_labels_train = np.array(temp_labels_train) labels_train = np.array(labels_train) #load word clusters clusters = loadClusters() print "Clusters loaded" #load Lexicons negationList, slangDictionary, lexicons, mpqa_lexicons = loadLexicons() print "Lexicons loaded" #tokenize all messages tokens_train = tokenize(messages_train) print "Messages tokenized" #compute pos tags for all messages pos_tags_train = arktagger.pos_tag_list(messages_train) print "Pos tags computed" #compute pos tag bigrams pos_bigrams_train = getBigrams(pos_tags_train) #compute pos tag trigrams pos_trigrams_train = getTrigrams(pos_tags_train) #get the unique pos bigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams = getTrigramsSet(pos_trigrams_train) #compute POS tag scores pos_tags_scores_neutral = posTagsScore(unique_pos_tags, 0, pos_tags_train, labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags, 1, pos_tags_train, labels_train) pos_tags_scores_negative = posTagsScore(unique_pos_tags, -1, pos_tags_train, labels_train) pos_bigrams_scores_neutral = posBigramsScore(unique_bigrams, 0, pos_bigrams_train, labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams, 1, pos_bigrams_train, labels_train) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams, -1, pos_bigrams_train, labels_train) pos_trigrams_scores_neutral = posTrigramsScore(unique_trigrams, 0, pos_trigrams_train, labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams, 1, pos_trigrams_train, labels_train) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams, -1, pos_trigrams_train, labels_train) #compute mpqa scores mpqaScores = getScores(mpqa_lexicons, messages_train, labels_train, neutral=True) #save scores and other resources for future use savePosScores(pos_tags_scores_neutral, pos_tags_scores_positive, pos_tags_scores_negative, pos_bigrams_scores_neutral, pos_bigrams_scores_positive, pos_bigrams_scores_negative, pos_trigrams_scores_neutral, pos_trigrams_scores_positive, pos_trigrams_scores_negative, mpqaScores) #save lexicons saveLexicons(negationList, slangDictionary, lexicons, mpqa_lexicons) #save clusters saveClusters(clusters) #load Glove embeddings d = 200 glove = GloveDictionary.Glove(d) #save Glove embeddings for future use saveGlove(glove) #Subjectivity Detection Features #SD1 features features_train_1 = features.getFeatures( messages_train, tokens_train, pos_tags_train, slangDictionary, lexicons, mpqa_lexicons, pos_bigrams_train, pos_trigrams_train, pos_bigrams_scores_negative, pos_bigrams_scores_positive, pos_trigrams_scores_negative, pos_trigrams_scores_positive, pos_tags_scores_negative, pos_tags_scores_positive, mpqaScores, negationList, clusters, pos_bigrams_scores_neutral, pos_trigrams_scores_neutral, pos_tags_scores_neutral) #SD2 features features_train_2 = [] #for message in tokens_train : for i in range(0, len(messages_train)): features_train_2.append(glove.findCentroid(tokens_train[i])) features_train_2 = np.array(features_train_2) #regularize features features_train_1 = regularization.regularize(features_train_1) features_train_2 = regularization.regularizeHorizontally(features_train_2) #Penalty parameter C of the error term for every SD system C1 = 0.001953125 C2 = 1.4068830572470667 #get confidence scores train_confidence_1 = getConfidenceScores(features_train_1, temp_labels_train, C1) train_confidence_2 = getConfidenceScores(features_train_2, temp_labels_train, C2) #normalize confidence scores softmax = lambda x: 1 / (1. + math.exp(-x)) train_confidence_1 = [softmax(conf) for conf in train_confidence_1] train_confidence_2 = [softmax(conf) for conf in train_confidence_2] train_confidence_1 = np.array(train_confidence_1) train_confidence_2 = np.array(train_confidence_2) #train SD classifiers sd1 = SVM.train(features_train_1, temp_labels_train, c=C1, k="linear") sd2 = SVM.train(features_train_2, temp_labels_train, c=C2, k="linear") #Sentiment Polarity Features (append confidence scores to SD features) #SP1 features features_train_1 = np.hstack( (features_train_1, train_confidence_1.reshape(train_confidence_1.shape[0], 1))) #SP1 features features_train_2 = np.hstack( (features_train_2, train_confidence_2.reshape(train_confidence_2.shape[0], 1))) #Penalty parameter C of the error term for every SP system C1 = 0.003410871889693192 C2 = 7.396183688299606 #train SP classifiers sp1 = SVM.train(features_train_1, labels_train, c=C1, k="linear") sp2 = SVM.train(features_train_2, labels_train, c=C2, k="linear") #save trained models saveModels(sd1, sd2, sp1, sp2) print "System training completed!"
def main(f): print "System training started" #load training dataset dataset_train = f ids,labels_train,messages_train=tsvreader.opentsv(dataset_train) print "Train data loaded" #labels for subjectivity detection (2 categories) temp_labels_train = [0 if x=="neutral" else 1 for x in labels_train] #labels for polarity detection (3 categories) labels_train = [0 if x=="neutral" else -1 if x =="negative" else 1 for x in labels_train] #convert labels to numpy arrays temp_labels_train=np.array(temp_labels_train) labels_train=np.array(labels_train) #load word clusters clusters = loadClusters() print "Clusters loaded" #load Lexicons negationList, slangDictionary, lexicons, mpqa_lexicons = loadLexicons() print "Lexicons loaded" #tokenize all messages tokens_train = tokenize(messages_train) print "Messages tokenized" #compute pos tags for all messages pos_tags_train = arktagger.pos_tag_list(messages_train) print "Pos tags computed" #compute pos tag bigrams pos_bigrams_train = getBigrams(pos_tags_train) #compute pos tag trigrams pos_trigrams_train = getTrigrams(pos_tags_train) #get the unique pos bigrams from training set unique_pos_tags = getPosTagsSet(pos_tags_train) unique_bigrams = getBigramsSet(pos_bigrams_train) unique_trigrams= getTrigramsSet(pos_trigrams_train) #compute POS tag scores pos_tags_scores_neutral = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train) pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train) pos_tags_scores_negative = posTagsScore(unique_pos_tags,-1,pos_tags_train,labels_train) pos_bigrams_scores_neutral = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train) pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train) pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,-1,pos_bigrams_train,labels_train) pos_trigrams_scores_neutral = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train) pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train) pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,-1,pos_trigrams_train,labels_train) #compute mpqa scores mpqaScores = getScores(mpqa_lexicons,messages_train,labels_train,neutral=True) #save scores and other resources for future use savePosScores(pos_tags_scores_neutral, pos_tags_scores_positive,pos_tags_scores_negative,pos_bigrams_scores_neutral,pos_bigrams_scores_positive,pos_bigrams_scores_negative,pos_trigrams_scores_neutral,pos_trigrams_scores_positive,pos_trigrams_scores_negative,mpqaScores) #save lexicons saveLexicons(negationList,slangDictionary,lexicons,mpqa_lexicons) #save clusters saveClusters(clusters) #load Glove embeddings d = 200 glove = GloveDictionary.Glove(d) #save Glove embeddings for future use saveGlove(glove) #Subjectivity Detection Features #SD1 features features_train_1 = features.getFeatures(messages_train,tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters,pos_bigrams_scores_neutral,pos_trigrams_scores_neutral,pos_tags_scores_neutral) #SD2 features features_train_2 = [] #for message in tokens_train : for i in range(0,len(messages_train)): features_train_2.append(glove.findCentroid(tokens_train[i])) features_train_2 = np.array(features_train_2) #regularize features features_train_1 = regularization.regularize(features_train_1) features_train_2 = regularization.regularizeHorizontally(features_train_2) #Penalty parameter C of the error term for every SD system C1=0.001953125 C2=1.4068830572470667 #get confidence scores train_confidence_1 = getConfidenceScores(features_train_1, temp_labels_train, C1) train_confidence_2 = getConfidenceScores(features_train_2, temp_labels_train, C2) #normalize confidence scores softmax = lambda x: 1 / (1. + math.exp(-x)) train_confidence_1 = [softmax(conf) for conf in train_confidence_1] train_confidence_2 = [softmax(conf) for conf in train_confidence_2] train_confidence_1 = np.array(train_confidence_1) train_confidence_2 = np.array(train_confidence_2) #train SD classifiers sd1 = SVM.train(features_train_1,temp_labels_train,c=C1,k="linear") sd2 = SVM.train(features_train_2,temp_labels_train,c=C2,k="linear") #Sentiment Polarity Features (append confidence scores to SD features) #SP1 features features_train_1 = np.hstack((features_train_1,train_confidence_1.reshape(train_confidence_1.shape[0],1))) #SP1 features features_train_2 = np.hstack((features_train_2,train_confidence_2.reshape(train_confidence_2.shape[0],1))) #Penalty parameter C of the error term for every SP system C1=0.003410871889693192 C2=7.396183688299606 #train SP classifiers sp1 = SVM.train(features_train_1,labels_train,c=C1,k="linear") sp2 = SVM.train(features_train_2,labels_train,c=C2,k="linear") #save trained models saveModels(sd1,sd2,sp1,sp2) print "System training completed!"