Ejemplo n.º 1
0
    def recursive_feature_elimination(config_learning, config_data, number_features):

        output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w")

        feature_names = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data)

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        y_train = read_reference_file(config_learning.get('y_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')
        estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train)

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        rfe = RFE(estimator, number_features, step=1)
        rfe.fit(x_train, y_train)

        for i, name in enumerate(feature_names):
            output.write(name + "\t" + str(rfe.ranking_[i]) + "\n")
            print(name + "\t" + str(rfe.ranking_[i]))

        predictions = rfe.predict(x_test)

        output.close()

        return predictions
Ejemplo n.º 2
0
    def test_learn_to_rank_coefficients(config_learning, config):

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        learner = config_learning.get("learning", None)
        method_name = learner.get("method", None)

        estimator = joblib.load(os.path.expanduser(config.get('Learner', 'models')) + '/' + method_name + '.pkl')

        coefficients = estimator.coef_[0]

        predictions = []

        for instance in x_test:
            result = 0.0
            for k, val in enumerate(instance):
                result += val * coefficients[k]

            predictions.append(result)

        return predictions
Ejemplo n.º 3
0
    def test_learn_to_rank(config_learning, config):

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        learner = config_learning.get("learning", None)
        method_name = learner.get("method", None)

        estimator = joblib.load(os.path.expanduser(config.get('Learner', 'models')) + '/' + method_name + '.pkl')

        return [x[0] for x in estimator.predict_proba(x_test)]
Ejemplo n.º 4
0
    def train_save(config_learning, config_data):

        learning_config = config_learning.get("learning", None)
        method_name = learning_config.get("method", None)

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        y_train = read_reference_file(config_learning.get('y_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train)

        estimator.fit(x_train, y_train)
        joblib.dump(estimator, os.path.expanduser(config_data.get('Learner', 'models')) + '/' + method_name + '.pkl')
Ejemplo n.º 5
0
    def load_predict(config_learning, config_data):

        learning_config = config_learning.get("learning", None)
        method_name = learning_config.get("method", None)

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        y_train = read_reference_file(config_learning.get('y_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')
        y_test = read_reference_file(config_learning.get('y_test'), '\t')

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        estimator = joblib.load(os.path.expanduser(config_data.get("Learner", "models")) + "/" + method_name + ".pkl")
        predictions = estimator.predict(x_test)

        return predictions
Ejemplo n.º 6
0
    def recursive_feature_elimination_cv(config_learning, config_data):

        output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w")

        feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data)
        combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data)

        x_train = read_features_file(config_learning.get('x_train'), '\t')
        y_train = read_reference_file(config_learning.get('y_train'), '\t')
        x_test = read_features_file(config_learning.get('x_test'), '\t')
        estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train)

        scale = config_learning.get("scale", True)

        if scale:
            x_train, x_test = scale_datasets(x_train, x_test)

        rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(y_train, 2), scoring='accuracy')
        rfecv.fit(x_train, y_train)

        feature_list = []

        for i, feature_name in enumerate(feature_names):
             if combination_methods[i] == 'both':
                 feature_list.append(feature_name)
                 feature_list.append(feature_name)
             else:
                 feature_list.append(feature_name)

        for i, name in enumerate(feature_list):
            output.write(name + "\t" + str(rfecv.ranking_[i]) + "\n")

        output.close()

        predictions = rfecv.predict(x_test)

        return predictions