def recursive_feature_elimination(config_learning, config_data, number_features): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w") feature_names = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) rfe = RFE(estimator, number_features, step=1) rfe.fit(x_train, y_train) for i, name in enumerate(feature_names): output.write(name + "\t" + str(rfe.ranking_[i]) + "\n") print(name + "\t" + str(rfe.ranking_[i])) predictions = rfe.predict(x_test) output.close() return predictions
def test_learn_to_rank_coefficients(config_learning, config): x_train = read_features_file(config_learning.get('x_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) learner = config_learning.get("learning", None) method_name = learner.get("method", None) estimator = joblib.load(os.path.expanduser(config.get('Learner', 'models')) + '/' + method_name + '.pkl') coefficients = estimator.coef_[0] predictions = [] for instance in x_test: result = 0.0 for k, val in enumerate(instance): result += val * coefficients[k] predictions.append(result) return predictions
def test_learn_to_rank(config_learning, config): x_train = read_features_file(config_learning.get('x_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) learner = config_learning.get("learning", None) method_name = learner.get("method", None) estimator = joblib.load(os.path.expanduser(config.get('Learner', 'models')) + '/' + method_name + '.pkl') return [x[0] for x in estimator.predict_proba(x_test)]
def train_save(config_learning, config_data): learning_config = config_learning.get("learning", None) method_name = learning_config.get("method", None) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) estimator.fit(x_train, y_train) joblib.dump(estimator, os.path.expanduser(config_data.get('Learner', 'models')) + '/' + method_name + '.pkl')
def load_predict(config_learning, config_data): learning_config = config_learning.get("learning", None) method_name = learning_config.get("method", None) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') y_test = read_reference_file(config_learning.get('y_test'), '\t') scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) estimator = joblib.load(os.path.expanduser(config_data.get("Learner", "models")) + "/" + method_name + ".pkl") predictions = estimator.predict(x_test) return predictions
def recursive_feature_elimination_cv(config_learning, config_data): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w") feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data) combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(y_train, 2), scoring='accuracy') rfecv.fit(x_train, y_train) feature_list = [] for i, feature_name in enumerate(feature_names): if combination_methods[i] == 'both': feature_list.append(feature_name) feature_list.append(feature_name) else: feature_list.append(feature_name) for i, name in enumerate(feature_list): output.write(name + "\t" + str(rfecv.ranking_[i]) + "\n") output.close() predictions = rfecv.predict(x_test) return predictions