コード例 #1
0
        ('ps_calc_10math_mean_range', SimpleTransform()),
        ('ps_calc_11math_median_range', SimpleTransform()),
        ('ps_calc_11math_mean_range', SimpleTransform()),
        ('ps_calc_12math_median_range', SimpleTransform()),
        ('ps_calc_12math_mean_range', SimpleTransform()),
        ('ps_calc_13math_median_range', SimpleTransform()),
        ('ps_calc_13math_mean_range', SimpleTransform()),
        ('ps_calc_14math_median_range', SimpleTransform()),
        ('ps_calc_14math_mean_range', SimpleTransform()),

        ## both feats
        ('both_ps_car_13_x_ps_reg_03', SimpleTransform()),

        ## nan feats
        ('nan_sum', SimpleTransform()),
    ]

    ##loading data##
    dfTrain = pd.read_csv(config.original_train_data_path)
    dfTest = pd.read_csv(config.original_test_data_path)
    with open(
            "%s/stratifiedKFold.%s.pkl" %
        (config.data_folder, config.stratified_label), "rb") as f:
        skf = pickle.load(f)
    gen_info(feat_path_name="the1ow_1023")
    combine_feat(feat_names,
                 feat_path_name="the1ow_1023",
                 dfTrain=dfTrain,
                 dfTest=dfTest,
                 skf=skf)
コード例 #2
0
        # ('description_tfidf_common_vocabulary', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]
    

    gen_info(feat_path_name="svd100_and_bow_Jun23")
    combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")    
         SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance',
         SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance',
         SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
    combine_feat(feat_names,
                 feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),

        ######################
        ## word match share ##
        ######################
        ('ratio_of_question1_question2_unigram_share', SimpleTransform()),
        ('ratio_of_question1_question2_unigram_share_tfidf',
         SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_and_stats_feat_May03_CV_random")
    # combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May03_CV_random")
コード例 #5
0
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]
    

    gen_info(feat_path_name="svd100_and_bow_Jun23")
    combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")    
コード例 #6
0
        # ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim',
         SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100',
         SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
    ]

    # 724 维
    gen_info(feat_path_name="LSA_and_stats_feat_Jun09")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
        # ('title_tfidf_common_vocabulary', SimpleTransform()),
        # ('description_tfidf_common_vocabulary', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]

    gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
    combine_feat(feat_names, feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")    
コード例 #8
0
if __name__ == "__main__":

    feat_names = [

        ## jaccard coef
        ('jaccard_coef_of_unigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_unigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_unigram_between_title_description', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_bigram_between_title_description', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_query_title', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_query_description', SimpleTransform()),
        ('jaccard_coef_of_trigram_between_title_description', SimpleTransform()),

        ## dice dist
        ('dice_dist_of_unigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_unigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_unigram_between_title_description', SimpleTransform()),
        ('dice_dist_of_bigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_bigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_bigram_between_title_description', SimpleTransform()),
        ('dice_dist_of_trigram_between_query_title', SimpleTransform()),
        ('dice_dist_of_trigram_between_query_description', SimpleTransform()),
        ('dice_dist_of_trigram_between_title_description', SimpleTransform()),

    ]

    gen_info(feat_path_name="HD_dist")
    combine_feat(feat_names, feat_path_name="HD_dist")
コード例 #9
0
        # ('question2_bow_common_vocabulary_common_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_common_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_common_svd150', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_and_stats_feat_Apr29")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Apr29")
        # ('title_tfidf_common_vocabulary', SimpleTransform()),
        # ('description_tfidf_common_vocabulary', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),
        ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()),
        # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()),

    ]

    gen_info(feat_path_name="LSA_and_stats_feat_Jun09")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),

        ######################
        ## word match share ##
        ######################
        ('ratio_of_question1_question2_unigram_share', SimpleTransform()),
        ('ratio_of_question1_question2_unigram_share_tfidf', SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_and_stats_feat_May03_CV_Time")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May03_CV_Time")
コード例 #12
0
        # ('question2_bow_common_vocabulary_common_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_common_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_common_svd150', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()),
        # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()),

        # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()),

        # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()),
        # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()),
        # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()),

        #########################
        ## Cooccurrence TF-IDF ##
        #########################
        # ('question1_unigram_question2_unigram', SimpleTransform()),
        # ('question1_unigram_question2_bigram', SimpleTransform()),
        # ('question1_bigram_question2_unigram', SimpleTransform()),
        # ('question1_bigram_question2_bigram', SimpleTransform()),
    ]

    gen_info(feat_path_name="LSA_and_stats_feat_Map02")
    combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Map02")