def predict_get_spearman_value(test_set, regressor): test_df = pd.DataFrame(test_set['age_in_days']) test_df['predicted'] = regressor.predict( test_set.loc[:, test_set.columns != 'age_in_days']) spearman_values = use_spearmanr(test_set['age_in_days'].values, test_df['predicted'].values) return test_df, spearman_values
def predict_get_spearman_value(X, y, regressor): predicted_y = regressor.predict(X) spearman_values = use_spearmanr(y, predicted_y) pearson_values = use_pearsonr(y, predicted_y) return predicted_y, spearman_values, pearson_values
def calc_results(y_train_dict, y_test_dict, algo_name, n_rows=3, n_cols=3, visualize=False): train_res = {} test_res = {} for sample_num, ((key, train), (_, test)) in enumerate( zip(y_train_dict.items(), y_test_dict.items())): if visualize: if len(y_train_dict.items()) == 1: n_rows = n_cols = 1 bbox_to_anchor = None else: bbox_to_anchor = (0.5, -0.65) subplot_idx = sample_num % (n_rows * n_cols) if subplot_idx == 0 and sample_num != 0: plt.subplots_adjust(hspace=1.1, wspace=0.8) plt.show() train_res[key] = { 'spearman': use_spearmanr(train['y_train_values'], train['y_train_predicted_values']), 'pearson': use_pearsonr(train['y_train_values'], train['y_train_predicted_values']) } test_res[key] = { 'spearman': use_spearmanr(test['y_test_values'], test['y_test_predicted_values']), 'pearson': use_pearsonr(test['y_test_values'], test['y_test_predicted_values']) } print('\n**********') print('Sample number: ', str(sample_num)) print(key) print('train ' + str(train_res[key]) + ', test' + str(test_res[key])) if visualize: spearman_train_data = train_res[key]['spearman'] pearson_train_data = train_res[key]['pearson'] plt.subplot(n_rows, n_cols, subplot_idx + 1) plt.scatter(train['y_train_values'], train['y_train_predicted_values'], label='Train\nSpearman - rho: ' + str(round(spearman_train_data['rho'], 2)) + ' pval: ' + str(round(spearman_train_data['pvalue'], 10)) + '\nPearson - rho: ' + str(round(pearson_train_data['rho'], 2)) + ' pval: ' + str(round(pearson_train_data['pvalue'], 10))) spearman_test_data = test_res[key]['spearman'] pearson_test_data = test_res[key]['pearson'] plt.scatter(test['y_test_values'], test['y_test_predicted_values'], label='Test\nSpearman - rho: ' + str(round(spearman_test_data['rho'], 2)) + ' pval: ' + str(round(spearman_test_data['pvalue'], 10)) + '\nPearson - rho: ' + str(round(pearson_test_data['rho'], 2)) + ' pval: ' + str(round(pearson_test_data['pvalue'], 10))) plt.xlabel('Real Values') plt.ylabel('Predicted Values') key_as_list = key.split('|') title = [ key_as_list[idx * 3] + ', ' + key_as_list[(idx + 1) * 3 - 1] + ', ' + key_as_list[(idx + 1) * 3 - 2] for idx in range(math.floor(len(key_as_list) / 3)) ] if len(key_as_list) % 3 != 0: title.append(key_as_list[-2] + ', ' + key_as_list[-1]) plt.title(algo_name + '\n' + '\n'.join(title)) plt.legend(loc='lower center', bbox_to_anchor=bbox_to_anchor) plt.figure() train_data = [x['spearman']['rho'] for x in train_res.values()] plt.scatter(list(range(len(train_data))), train_data, label='Train', linewidth=0.3) test_data = [x['spearman']['rho'] for x in test_res.values()] plt.scatter(list(range(len(test_data))), test_data, label='Test', linewidth=0.3) plt.legend() plt.title(algo_name + ' Spearman' + r'$\rho$ vs params') plt.xlabel('sample #') plt.ylabel(r'$\rho$ value') plt.show() return train_res, test_res
def calc_results_and_plot(y_train_values, y_train_predicted_values, y_test_values, y_test_predicted_values, algo_name, visualize=False, title=None, show=True): train_res = { 'spearman': use_spearmanr(y_train_values, y_train_predicted_values), 'pearson': use_pearsonr(y_train_values, y_train_predicted_values), 'mse': mean_squared_error(y_train_values, y_train_predicted_values) } test_res = { 'spearman': use_spearmanr(y_test_values, y_test_predicted_values), 'pearson': use_pearsonr(y_test_values, y_test_predicted_values), 'mse': mean_squared_error(y_test_values, y_test_predicted_values) } spearman_train_data = train_res['spearman'] pearson_train_data = train_res['pearson'] mse_train_data = train_res['mse'] spearman_test_data = test_res['spearman'] pearson_test_data = test_res['pearson'] mse_test_data = test_res['mse'] train_label = 'Train\nSpearman - rho: ' + str( round(spearman_train_data['rho'], 2)) + ' pval: ' + str( round(spearman_train_data['pvalue'], 10)) + '\nPearson - rho: ' + str( round(pearson_train_data['rho'], 2)) + ' pval: ' + str( round(pearson_train_data['pvalue'], 10)) + '\nmse: ' + str(round( mse_train_data, 2)) test_label = 'Test\nSpearman - rho: ' + str( round(spearman_test_data['rho'], 2)) + ' pval: ' + str( round(spearman_test_data['pvalue'], 10)) + '\nPearson - rho: ' + str( round(pearson_test_data['rho'], 2)) + ' pval: ' + str( round(pearson_test_data['pvalue'], 10)) + '\nmse: ' + str(round(mse_test_data, 2)) if visualize: fig = plt.figure() ax = plt.subplot(111) ax.scatter(y_train_values, y_train_predicted_values, label=train_label) ax.scatter(y_test_values, y_test_predicted_values, label=test_label) plt.xlabel('Real Values') plt.ylabel('Predicted Values') title = '' if title is None else title plt.title(algo_name + '\n' + title) # ax.axis('equal') # Shrink current axis's height by 10% on the bottom box = ax.get_position() ax.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) # Put a legend below current axis ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5) if show: plt.show() return train_res, test_res