SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance',
         SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
    combine_feat(feat_names,
                 feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
Ejemplo n.º 2
0
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),

        ######################
        ## word match share ##
        ######################
        ('ratio_of_question1_question2_unigram_share', SimpleTransform()),
        ('ratio_of_question1_question2_unigram_share_tfidf',
         SimpleTransform()),

        #################################
        ## sentence hash and frequency ##
        #################################
        ('sentence_hash_of_question1', SimpleTransform()),
        ('sentence_hash_of_question2', SimpleTransform()),
        ('sentence_freq_of_question1', SimpleTransform()),
        ('sentence_freq_of_question2', SimpleTransform()),
    ]

    # gen_info(feat_path_name="LSA_and_stats_feat_May21")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May21")
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]
    

    gen_info(feat_path_name="svd100_and_bow_Jun23")
    combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")    
Ejemplo n.º 4
0
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]
    

    gen_info(feat_path_name="svd100_and_bow_Jun23")
    combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")    
        # ('description_tfidf_common_vocabulary', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]

    gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
    combine_feat(feat_names, feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")    
Ejemplo n.º 6
0
        ############
        # ('question1_tfidf_common_vocabulary', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary', SimpleTransform()),
        #('question2_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        #        ('question2_tfidf_common_vocabulary_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        #('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        #('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        #('question2_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        #        ('question2_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
    ]

    if sys.argv[1] == "Training":
        mode = "Training"
        combine_feat(feat_names, "LSA_svd150_and_Jaccard_coef_May19", mode)
    if sys.argv[1] == "Testing":
        mode = "Testing"
        if sys.argv[2] == "All":
            Ntest = range(config.test_subset_number)
        else:
            exec("Ntest =" + sys.argv[2])
        combine_feat(feat_names, "LSA_svd150_and_Jaccard_coef_May19", mode,
                     Ntest)
Ejemplo n.º 7
0
if __name__ == "__main__":

    feat_names = [

        ## jaccard coef
        ('jaccard_coef_of_unigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_unigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_unigram_between_title_description', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_title_description', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_title_description', SimpleTransform()),

        ## dice dist
        ('dice_dist_of_unigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_unigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_unigram_between_title_description', SimpleTransform()),
        ('dice_dist_of_bigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_bigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_bigram_between_title_description', SimpleTransform()),
        ('dice_dist_of_trigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_trigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_trigram_between_title_description', SimpleTransform()),

    ]

    gen_info(feat_path_name="HD_dist")
    combine_feat(feat_names, feat_path_name="HD_dist")
Ejemplo n.º 8
0
        # ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100',
         SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
    ]

    # 724 维
    gen_info(feat_path_name="LSA_and_stats_feat_Jun09")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
        # ('description_tfidf_common_vocabulary', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]

    gen_info(feat_path_name="LSA_and_stats_feat_Jun09")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),

        ######################
        ## word match share ##
        ######################
        ('ratio_of_question1_question2_unigram_share', SimpleTransform()),
        ('ratio_of_question1_question2_unigram_share_tfidf', SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_and_stats_feat_May03_CV_Time")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May03_CV_Time")
Ejemplo n.º 11
0
        # ('question1_tfidf_common_vocabulary', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary', SimpleTransform()),
        #('question2_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        #        ('question2_tfidf_common_vocabulary_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        #('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        #('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_question1_relevance', SimpleTransform()),
    ]

    print sys.argv[1]

    if sys.argv[1] == "Training":
        mode = "Training"
        combine_feat(feat_names, "LSA_and_stats_feat_May19", mode)

    if sys.argv[1] == "Testing":
        mode = "Testing"
        if sys.argv[2] == "All":
            Ntest = range(config.test_subset_number)
        else:
            exec("Ntest =" + sys.argv[2])
        combine_feat(feat_names, "LSA_and_stats_feat_May19", mode, Ntest)
Ejemplo n.º 12
0
        ('question1_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        ('question2_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        ('question2_tfidf_common_vocabulary_common_svd150', SimpleTransform()),

        ('question1_bow_common_vocabulary_common_svd100', SimpleTransform()),
        ('question2_bow_common_vocabulary_common_svd100', SimpleTransform()),
        ('question1_bow_common_vocabulary_common_svd150', SimpleTransform()),
        ('question2_bow_common_vocabulary_common_svd150', SimpleTransform()),

        ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),

        ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

    ]

    gen_info(feat_path_name="LSA_and_stats_feat_Apr25")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Apr25")
        ## Cooccurrence TF-IDF ##
        #########################
        #('question1_unigram_question2_unigram_tfidf', SimpleTransform()),
        # ('question1_unigram_question2_bigram_tfidf', SimpleTransform()),
        #('question1_bigram_question2_unigram_tfidf', SimpleTransform()),
        # ('question1_bigram_question2_bigram_tfidf', SimpleTransform()),
        #        ('question1_id_question2_unigram_tfidf', SimpleTransform()),
        # ('question1_id_question2_bigram_tfidf', SimpleTransform()),
        # # ('question1_cat_id_question2_unigram_tfidf', SimpleTransform()),
        # # ('question1_cat_id_question2_bigram_tfidf', SimpleTransform()),
        # ('question1_unigram_question2_unigram_tfidf_individual_svd100', SimpleTransform()),
        # # ('question1_unigram_question2_bigram_tfidf_individual_svd100', SimpleTransform()),
        # ('question1_bigram_question2_unigram_tfidf_individual_svd100', SimpleTransform()),
        # # ('question1_bigram_question2_bigram_tfidf_individual_svd100', SimpleTransform()),
        # ('question1_id_question2_unigram_tfidf_individual_svd100', SimpleTransform()),
        # # ('question1_id_question2_bigram_tfidf_individual_svd100', SimpleTransform()),
        # # ('question1_cat_id_question2_unigram_tfidf_individual_svd100', SimpleTransform()),
        # # ('question1_cat_id_question2_bigram_tfidf_individual_svd100', SimpleTransform()),
    ]

    if sys.argv[1] == "Training":
        mode = "Training"
        combine_feat(feat_names, "svd100_and_bow_high_May19", mode)
    if sys.argv[1] == "Testing":
        mode = "Testing"
        if sys.argv[2] == "All":
            Ntest = range(config.test_subset_number)
        else:
            exec("Ntest =" + sys.argv[2])
        combine_feat(feat_names, "svd100_and_bow_high_May19", mode, Ntest)