def loading_target_CRFs(command): if command == 'twitter': # loading target labels path_ = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF' name_ = 'labeling_all.txt' list_line_ = filterTxt_CRF(load_file(path_, name_), command='removeLink') elif command == 'sgforums': # loading target labels path_ = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF' name_ = 'Label_all_crf.txt' list_line_ = load_file(path_, name_) elif command == 'facebook': # loading target labels path_ = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF' name_ = 'label.txt' list_line_ = filterTxt_CRF(load_file(path_, name_), command='removePunc') Y = np.array(load_target_label(list_line_)) print 'Finish loading target label ' + command return Y
# path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_30' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_70' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_100' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_150' path_ftr = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_200" files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) # construct_ftr_CRF_wordVector(features) X = np.array(construct_ftr_CRF_wordVector(features)) # construct the features for CRF print X.shape print "Finish loading features for CRF" # # # loading target labels path_ = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF" name_ = "labeling_all.txt" list_line_ = filterTxt_CRF(load_file(path_, name_), "removeLink", "model") Y = np.array(load_target_label(list_line_)) print Y.shape print "Finish loading target label" # # # for index in range(0, len(X)): # # print len(X[index]), len(Y[index]) # # # running CRF models # n_cross_valid_crf(X, Y, K=5, command='metrics_F1') # use to calculate the F1 for classification n_cross_valid_crf(X, Y, K=5, command="confusion_matrix") # use to calculate the F1 for classification # # n_cross_valid_crf(X, Y, K=5, command='write_results') # use to calculate the confusion matrix # # stop = timeit.default_timer() # print 'Finish running CRF model %.3f sec' % (stop - start)