def loading_ftr_CRFs(command): ############################################################################## if command == 'twitter_vs_sgforums_twitter_training': # loading CRF features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink' elif command == 'twitter_vs_sgforums_sgforums_testing': # loading CRF features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_twitter' ############################################################################## if command == 'twitter_vs_facebook_twitter_training': # loading CRF features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink' elif command == 'twitter_vs_facebook_facebook_testing': # loading CRF features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_twitter' ############################################################################## elif command == 'sgforums_vs_twitter_sgforums_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features' elif command == 'sgforums_vs_twitter_twitter_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_sgforums' ############################################################################## elif command == 'sgforums_vs_facebook_sgforums_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features' elif command == 'sgforums_vs_facebook_facebook_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_sgforums' ############################################################################## elif command == 'facebook_vs_twitter_facebook_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features' elif command == 'facebook_vs_twitter_twitter_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_facebook' ############################################################################## elif command == 'facebook_vs_sgforums_facebook_training': # features for training path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features' elif command == 'facebook_vs_sgforums_sgforums_testing': # features for testing path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_facebook' files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) X = np.array(construct_ftr_CRF(features)) # construct the features for CRF print 'Finish loading features for CRF ' + command return X
def load_demo_ftr(command): if command == 'twitter': path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/twitter/crf_features' elif command == 'sgforums': path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/sgforums/crf_features' elif command == 'facebook': path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/facebook/crf_features' files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) X = np.array(construct_ftr_CRF(features)) return X
# print svc tag = tags[1] tag_time = tag[1].replace('<td width="60%">', "").replace("</td>", "") split_tag = tag_time.split("-") first, last = split_tag[0], split_tag[1] # print first.strip() + ":" + last.strip() return svc + "\t" + first.strip() + ":" + last.strip() ######################################################################################################## ######################################################################################################## if __name__ == "__main__": path = "D:/Project/Transportation_SMU-NEC_collaboration/Data" name = "bussvc_mytransport.csv" path_write = "D:/Project/Transportation_SMU-NEC_collaboration/Data/mytransport/html_svc" name_write = "svc_" svcs = load_file(path, name) # get_svc_html(svcs, path_write, name_write) files = folder_files(path_write) for f in files: # print '--------------------- ' + f + '--------------------- ' list_ = load_file(path_write, f) result = extract_time_svc_html(f, list_) if result != "": print result print len(files)
num_list_sentence = np.array(list_ftr_sentence) # IMPORTANT. We need to convert to array before adding to list list_all_sentences.append(num_list_sentence) return list_all_sentences if __name__ == "__main__": start = timeit.default_timer() # get the start time # loading CRF features # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_30' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_70' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_100' # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_150' path_ftr = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_200" files_ = folder_files(path_ftr) features = featuers_CRF(files_, path_ftr) # construct_ftr_CRF_wordVector(features) X = np.array(construct_ftr_CRF_wordVector(features)) # construct the features for CRF print X.shape print "Finish loading features for CRF" # # # loading target labels path_ = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF" name_ = "labeling_all.txt" list_line_ = filterTxt_CRF(load_file(path_, name_), "removeLink", "model") Y = np.array(load_target_label(list_line_)) print Y.shape print "Finish loading target label" # # # for index in range(0, len(X)):