def loading_ftr_CRFs(command): ############################################################################## if command == 'twitter_vs_sgforums_twitter_training': # loading CRF features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink' elif command == 'twitter_vs_sgforums_sgforums_testing': # loading CRF features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_twitter' ############################################################################## if command == 'twitter_vs_facebook_twitter_training': # loading CRF features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink' elif command == 'twitter_vs_facebook_facebook_testing': # loading CRF features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_twitter' ############################################################################## elif command == 'sgforums_vs_twitter_sgforums_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features' elif command == 'sgforums_vs_twitter_twitter_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_sgforums' ############################################################################## elif command == 'sgforums_vs_facebook_sgforums_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features' elif command == 'sgforums_vs_facebook_facebook_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_sgforums' ############################################################################## elif command == 'facebook_vs_twitter_facebook_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features' elif command == 'facebook_vs_twitter_twitter_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_facebook' ############################################################################## elif command == 'facebook_vs_sgforums_facebook_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features' elif command == 'facebook_vs_sgforums_sgforums_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_facebook' files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) X = np.array(construct_ftr_CRF(features)) # construct the features for CRF print 'Finish loading features for CRF ' + command return X
# # running CRF models # n_cross_valid_crf(X, Y, K=2, command='metrics_F1') # use to calculate the F1 for classification # # n_cross_valid_crf(X, Y, K=2, command='confusion_matrix') # use to calculate the F1 for classification # # n_cross_valid_crf(X, Y, K=2, command='write_results') # use to calculate the confusion matrix # # stop = timeit.default_timer() # print 'Finish running CRF model %.3f sec' % (stop - start) ############################################################################ start = timeit.default_timer() # get the start time # loading CRF features path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink' files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) X = np.array(construct_ftr_CRF(features)) # construct the features for CRF print X.shape print 'Finish loading features for CRF' # loading target labels path_ = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF' name_ = 'labeling_all.txt' list_line_ = filterTxt_CRF(load_file(path_, name_), 'removeLink', 'model') Y = np.array(load_target_label(list_line_)) print Y.shape print 'Finish loading target label' # for index in range(0, len(X)): # print len(X[index]), len(Y[index]) # running CRF models