def load_get_coefficients(config_learning, config_data):

        output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "coefficients.txt", "w")

        feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data)
        combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data)

        learning_config = config_learning.get("learning", None)
        method_name = learning_config.get("method", None)

        estimator = joblib.load(os.path.expanduser(config_data.get("Learner", "models")) + "/" + method_name + ".pkl")
        coefficients = estimator.coef_

        feature_list = []
        for i, feature_name in enumerate(feature_names):
            if combination_methods[i] == 'both':
                feature_list.append(feature_name)
                feature_list.append(feature_name)
            else:
                feature_list.append(feature_name)

        for i, name in enumerate(feature_list):
            output.write(name + "\t" + str(coefficients[0][i]) + "\n")

        output.close()
    def recursive_feature_elimination_cv(config_learning, config_data):

        output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w")

        feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data)
        combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data)

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        y_train = read_reference_file(config_learning.get('y_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')
        estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train)

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(y_train, 2), scoring='accuracy')
        rfecv.fit(x_train, y_train)

        feature_list = []

        for i, feature_name in enumerate(feature_names):
             if combination_methods[i] == 'both':
                 feature_list.append(feature_name)
                 feature_list.append(feature_name)
             else:
                 feature_list.append(feature_name)

        for i, name in enumerate(feature_list):
            output.write(name + "\t" + str(rfecv.ranking_[i]) + "\n")

        output.close()

        predictions = rfecv.predict(x_test)

        return predictions
Exemple #3
0
def average_feature_values():

    config_path = os.getcwd() + "/" + "config" + "/" + "wmt.cfg"
    config = ConfigParser()
    config.readfp(open(config_path))

    my_dir = os.path.expanduser("~/Dropbox/experiments_fluency/test_learn_to_rank")
    feature_file = my_dir + "/" + "x_newstest2015.cobalt_comb_min_fluency_features_all.cs-en.tsv"
    feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config)
    strategies = FeatureExtractor.get_combinations_from_config_file(config)

    feature_values = read_features_file(feature_file, "\t")
    averages = np.mean(feature_values, axis=0)

    feature_list = []
    for i, feature_name in enumerate(feature_names):
        # if strategies[i] == 'both':
        #     feature_list.append(feature_name)
        #     feature_list.append(feature_name)
        # else:
        feature_list.append(feature_name)

    for i, name in enumerate(feature_list):
        print(name + "\t" + str(averages[i]))