def load_get_coefficients(config_learning, config_data): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "coefficients.txt", "w") feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data) combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) learning_config = config_learning.get("learning", None) method_name = learning_config.get("method", None) estimator = joblib.load(os.path.expanduser(config_data.get("Learner", "models")) + "/" + method_name + ".pkl") coefficients = estimator.coef_ feature_list = [] for i, feature_name in enumerate(feature_names): if combination_methods[i] == 'both': feature_list.append(feature_name) feature_list.append(feature_name) else: feature_list.append(feature_name) for i, name in enumerate(feature_list): output.write(name + "\t" + str(coefficients[0][i]) + "\n") output.close()
def recursive_feature_elimination_cv(config_learning, config_data): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w") feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data) combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(y_train, 2), scoring='accuracy') rfecv.fit(x_train, y_train) feature_list = [] for i, feature_name in enumerate(feature_names): if combination_methods[i] == 'both': feature_list.append(feature_name) feature_list.append(feature_name) else: feature_list.append(feature_name) for i, name in enumerate(feature_list): output.write(name + "\t" + str(rfecv.ranking_[i]) + "\n") output.close() predictions = rfecv.predict(x_test) return predictions
def average_feature_values(): config_path = os.getcwd() + "/" + "config" + "/" + "wmt.cfg" config = ConfigParser() config.readfp(open(config_path)) my_dir = os.path.expanduser("~/Dropbox/experiments_fluency/test_learn_to_rank") feature_file = my_dir + "/" + "x_newstest2015.cobalt_comb_min_fluency_features_all.cs-en.tsv" feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config) strategies = FeatureExtractor.get_combinations_from_config_file(config) feature_values = read_features_file(feature_file, "\t") averages = np.mean(feature_values, axis=0) feature_list = [] for i, feature_name in enumerate(feature_names): # if strategies[i] == 'both': # feature_list.append(feature_name) # feature_list.append(feature_name) # else: feature_list.append(feature_name) for i, name in enumerate(feature_list): print(name + "\t" + str(averages[i]))