コード例 #1
0
def loading_ftr_CRFs(command):
    ##############################################################################
    if command == 'twitter_vs_sgforums_twitter_training':
        # loading CRF features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink'

    elif command == 'twitter_vs_sgforums_sgforums_testing':
        # loading CRF features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_twitter'

    ##############################################################################
    if command == 'twitter_vs_facebook_twitter_training':
        # loading CRF features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLink'

    elif command == 'twitter_vs_facebook_facebook_testing':
        # loading CRF features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_twitter'

    ##############################################################################
    elif command == 'sgforums_vs_twitter_sgforums_training':
        # features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features'

    elif command == 'sgforums_vs_twitter_twitter_testing':
        # features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_sgforums'

    ##############################################################################
    elif command == 'sgforums_vs_facebook_sgforums_training':
        # features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/sgforums/20152207_singaporebuses_all_posts/labeling_CRF/crf_features/features'

    elif command == 'sgforums_vs_facebook_facebook_testing':
        # features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/facebook/ftr_sgforums'

    ##############################################################################
    elif command == 'facebook_vs_twitter_facebook_training':
        # features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features'

    elif command == 'facebook_vs_twitter_twitter_testing':
        # features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/twitter/ftr_facebook'

    ##############################################################################
    elif command == 'facebook_vs_sgforums_facebook_training':
        # features for training
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/facebook/BusNews/labeling_CRF/crf_features/features'

    elif command == 'facebook_vs_sgforums_sgforums_testing':
        # features for testing
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/CRFs_compareModel/sgforums/ftr_facebook'

    files_ = folder_files(path_ftr)
    features = featuers_CRF(files_, path_ftr)
    X = np.array(construct_ftr_CRF(features))  # construct the features for CRF
    print 'Finish loading features for CRF ' + command
    return X
コード例 #2
0
def load_demo_ftr(command):
    if command == 'twitter':
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/twitter/crf_features'
    elif command == 'sgforums':
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/sgforums/crf_features'
    elif command == 'facebook':
        path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data_demo_Dec_2015/facebook/crf_features'

    files_ = folder_files(path_ftr)
    features = featuers_CRF(files_, path_ftr)
    X = np.array(construct_ftr_CRF(features))
    return X
コード例 #3
0
    # print svc
    tag = tags[1]
    tag_time = tag[1].replace('<td width="60%">', "").replace("</td>", "")
    split_tag = tag_time.split("-")
    first, last = split_tag[0], split_tag[1]
    # print first.strip() + ":" + last.strip()
    return svc + "\t" + first.strip() + ":" + last.strip()


########################################################################################################
########################################################################################################
if __name__ == "__main__":
    path = "D:/Project/Transportation_SMU-NEC_collaboration/Data"
    name = "bussvc_mytransport.csv"

    path_write = "D:/Project/Transportation_SMU-NEC_collaboration/Data/mytransport/html_svc"
    name_write = "svc_"
    svcs = load_file(path, name)
    # get_svc_html(svcs, path_write, name_write)

    files = folder_files(path_write)
    for f in files:
        # print '--------------------- ' + f + '--------------------- '
        list_ = load_file(path_write, f)

        result = extract_time_svc_html(f, list_)
        if result != "":
            print result

    print len(files)
コード例 #4
0
        num_list_sentence = np.array(list_ftr_sentence)  # IMPORTANT. We need to convert to array before adding to list
        list_all_sentences.append(num_list_sentence)
    return list_all_sentences


if __name__ == "__main__":
    start = timeit.default_timer()  # get the start time

    # loading CRF features
    # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_30'
    # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_70'
    # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_100'
    # path_ftr = 'D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_150'
    path_ftr = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF/crf_features/features_rmLinkWordVector/wordVec_200"
    files_ = folder_files(path_ftr)
    features = featuers_CRF(files_, path_ftr)
    # construct_ftr_CRF_wordVector(features)
    X = np.array(construct_ftr_CRF_wordVector(features))  # construct the features for CRF
    print X.shape
    print "Finish loading features for CRF"
    #
    # # loading target labels
    path_ = "D:/Project/Transportation_SMU-NEC_collaboration/Data/twitter/labeling_CRF"
    name_ = "labeling_all.txt"
    list_line_ = filterTxt_CRF(load_file(path_, name_), "removeLink", "model")
    Y = np.array(load_target_label(list_line_))
    print Y.shape
    print "Finish loading target label"
    #
    # # for index in range(0, len(X)):