Python pearson 예제들, scipy.stats.pearson Python 예제들

예제 #1

0

파일 보기

파일: utilities.py 프로젝트: junshipeng/TF-RNNLM-CUED

def reject(y, y_exp, var, plot=False, L1=None, name='Rejection.png'):

  error = (y-y_exp)**2
  P_0 = pearson(y, y_exp)[0][0]
  if L1 is None:
    array = np.concatenate((y, y_exp, error, var), axis=1)
  else:
    L1[196:]=40
    array = np.concatenate((y, y_exp, error, var, L1[:, np.newaxis]), axis=1)
  sorted_array = array[array[:,2].argsort()]
  results=[[0.0, P_0]]
  results_var=[[0.0, P_0]]
  results_min = [[0.0, P_0]]
  for i in xrange(1, array.shape[0]):
    x = np.concatenate((sorted_array[:-i,0], sorted_array[-i:,1]), axis=0)
    p = pearson(x, sorted_array[:, 1])[0]
    results.append([float(i)/float(array.shape[0]), p])
    results_min.append([float(i)/float(array.shape[0]), P_0 + (1.0-P_0)*float(i)/float(array.shape[0])])
  if L1 is not None:
    L1_best = sorted_array[:, 4]
  tpr = []
  sorted_array = array[array[:,3].argsort()]
  for i in xrange(1, array.shape[0]):
    x = np.concatenate((sorted_array[:-i,0], sorted_array[-i:,1]), axis=0)
    p = pearson(x, sorted_array[:, 1])[0]
    if ( float(i)/float(array.shape[0]) <= 0.100001 ) and (float(i)/float(array.shape[0]) >= 0.090009):
      tpr.append(p)                                 
    results_var.append([float(i)/float(array.shape[0]), p])
  if L1 is not None:
    L1_var = sorted_array[:, 4]
    
  max_auc = auc([x[0] for x in results], [x[1] - P_0 for x in results], reorder=True)
  var_auc = auc([x[0] for x in results_var], [x[1] - P_0 for x in results_var], reorder=True)
  min_auc = auc([x[0] for x in results_min], [x[1] - P_0 for x in results_min], reorder=True)
  

  if plot:
    plt.scatter([x[0] for x in results], [x for x in np.asarray(sorted(var, reverse=True))])
    plt.xlim(0.0, 1.0)
    plt.savefig('Variance.png', bbox_inches='tight')
    plt.close()
    if L1 is not None:
      plt.scatter([x[0] for x in results], [x[1] for x in results],  c=L1_best, cmap=plt.cm.winter)
      plt.scatter([x[0] for x in results_var], [x[1] for x in results_var], c=L1_var, cmap=plt.cm.winter)
      plt.scatter([x[0] for x in results_var], [x[1] for x in results_min], c=L1_var, cmap=plt.cm.winter)
    else:
      plt.plot([x[0] for x in results], [x[1] for x in results],  'b^',
               [x[0] for x in results_var], [x[1] for x in results_var], 'ro',
               [x[0] for x in results_var], [x[1] for x in results_min], 'go')
    plt.legend(['Optimal-Rejection', 'Model-Rejection', 'Expected Random-Rejection'],loc=4, prop={'size':18.5})
    plt.xlim(0.0, 1.0)
    plt.ylim(0.86, 1.0)
    plt.xlabel('Rejection Fraction')
    plt.ylabel('Pearson Correlation')
    #plt.show()
    plt.savefig(name, bbox_inches='tight')
    plt.close()
    
    print 'AUC', auc([x[0] for x in results_var], [x[1] for x in results_var], reorder=True)
  return var_auc/(1.0-P_0), max_auc/(1.0-P_0), min_auc/(1.0-P_0), (var_auc-min_auc)/(max_auc-min_auc), np.mean(tpr)

예제 #2

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def get_pearson(gp, test_data, samples=1000, its=10):
    feats = test_data[:, :-1]
    gold_labels = test_data[:, -1]
    mean, cov = gp.predict(feats, full_cov=True)
    mean = mean.flatten()
    prs_preds = prs_loss(mean, cov, samples=samples, its=its)
    r_mean = pearson(mean, gold_labels)
    r_loss = pearson(prs_preds.flatten(), gold_labels)
    return r_mean, r_loss

예제 #3

0

파일 보기

파일: utilities.py 프로젝트: junshipeng/TF-RNNLM-CUED

def reject_fill(y, y_exp, var, plot=False, L1=None, name='Rejection.png'):

  error = (y-y_exp)**2
  P_0 = pearson(y, y_exp)[0][0]
  if L1 is None:
    array = np.concatenate((y, y_exp, error, var), axis=1)
  else:
    L1[196:]=40
    array = np.concatenate((y, y_exp, error, var, L1[:, np.newaxis]), axis=1)
  sorted_array = array[array[:,2].argsort()]
  results=[[0.0, P_0]]
  results_var=[[0.0, P_0]]
  results_min = [[0.0, P_0]]
  for i in xrange(1, array.shape[0]):
    x = np.concatenate((sorted_array[:-i,0], sorted_array[-i:,1]), axis=0)
    p = pearson(x, sorted_array[:, 1])[0]
    results.append([float(i)/float(array.shape[0]), p])
    results_min.append([float(i)/float(array.shape[0]), P_0 + (1.0-P_0)*float(i)/float(array.shape[0])])
  if L1 is not None:
    L1_best = sorted_array[:, 4]

  sorted_array = array[array[:,3].argsort()]
  for i in xrange(1, array.shape[0]):
    x = np.concatenate((sorted_array[:-i,0], sorted_array[-i:,1]), axis=0)
    p = pearson(x, sorted_array[:, 1])[0]
    results_var.append([float(i)/float(array.shape[0]), p])
  if L1 is not None:
    L1_var = sorted_array[:, 4]

  max_auc = auc([x[0] for x in results], [x[1] - P_0 for x in results], reorder=True)
  var_auc = auc([x[0] for x in results_var], [x[1] - P_0 for x in results_var], reorder=True)
  min_auc = auc([x[0] for x in results_min], [x[1] - P_0 for x in results_min], reorder=True)

  if plot:
    fig, ax = plt.subplots()
    plt.fill_between([x[0] for x in results], np.zeros(224), P_0*np.ones(224), alpha=0.01, color='g')
    plt.fill_between([x[0] for x in results], P_0*np.ones(224), [x[1] for x in results_min], alpha=0.07, color='g')
    plt.fill_between([x[0] for x in results_var], [x[1] for x in results_min], [x[1] for x in results_var], alpha=0.5, color='r')
    plt.fill_between([x[0] for x in results_var], [x[1] for x in results_var], [x[1] for x in results], alpha=0.5, color='b')
    #plt.legend([r'AUC $\rho$', 'AUC Random', 'AUC Variance', 'AUC Maximum'],loc=4 )
    plt.xlim(0.0, 1.0)
    plt.ylim(0.86, 1.0)
    ypoints = [0.86, P_0, 0.897, 1.0]
    xpoints = [0.0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0]
    plt.yticks(ypoints, ['0.0' , 'PCC', '10% Rej.\nPCC', '1.0'], fontsize=17)
    plt.yticks(ypoints, fontsize=17)
    plt.xticks(xpoints, ['0.0', '0.1', '0.2', '0.4', '0.6', '0.8', '1.0'], fontsize=17)
    plt.xlabel('Rejection Fraction', fontsize=17)
    plt.ylabel('Pearson Correlation', fontsize=17)
    #plt.show()
    plt.savefig('auc_diagramm.png', bbox_inches='tight')
    plt.close()

예제 #4

0

파일 보기

파일: LSTM_model.py 프로젝트: maryamag85/Automatic_Alzheimer_Detection

def exploratory_analysis(data):
    box_features = [
        'ARI', 'CLI', 'count_trailing', 'count_repetitions', 'count_pauses',
        'SIM_score', 'MMSE'
    ]
    for feat in box_features:
        temp_data = [
            np.array(data[feat][:242]),
            np.array(data[feat][data['Category'] == 1])
        ]  #,
        #                     np.array(data[feat][data['Category']==2]), np.array(data[feat][data['Category']==3])]

        plt.figure()
        plt.boxplot(temp_data,
                    medianprops=dict(linestyle='-',
                                     linewidth=2,
                                     color='firebrick'))
        plt.xticks([1, 2], ['Control', 'AD'], fontsize=21.0, fontweight='bold')
        plt.yticks(fontsize=21.0, fontweight='bold')
        #        plt.ylabel(feat, fontsize=21.0, fontweight='bold')
        plt.title(feat, fontsize=21.0, fontweight='bold')
        plt.show()

    box_features = [
        'ttr', 'R', 'num_concepts_mentioned', 'ARI', 'CLI', 'prp_count',
        'VP_count', 'NP_count', 'prp_noun_ratio', 'word_sentence_ratio',
        'count_pauses', 'count_unintelligible', 'count_trailing',
        'count_repetitions'
    ]
    for feat in box_features:
        [r, p] = pearson(data[feat], data['Category'])
        print('{}--{}--{}'.format(feat, r**2, p))

예제 #5

0

파일 보기

def interpolate(targets, model_1, model_2, dir, name, mse_plot=False):
    """ Function to create an interpolation plot of two models.
  targets: Targets which both models are compared with
  model_1: Predictions from model 1
  model_2: Predictions from model 2
  dir: Directory where to save to.
  name: name of chart
  """

    #Interpolation
    correlations = []
    MSEs = []
    for i in xrange(100):
        interp = (100.0 -
                  float(i)) / 100.0 * model_1 + model_2 * (float(i)) / 100.0
        p = pearson(interp, targets)
        mse = MSE(interp, targets)
        correlations.append([float(i) / 100.0, p[0]])
        MSEs.append([float(i) / 100.0, mse])

    print np.max(np.asarray(correlations)[:, 1])
    print np.min(np.asarray(MSEs)[:, 1])

    plt.plot([i[0] for i in correlations], [i[1] for i in correlations])
    plt.xlabel('DNN Fraction')
    plt.ylabel('Pearson Correlation')
    plt.savefig(os.path.join(dir, 'interpolation_pearson_' + name + '.png'))
    plt.close()

    if mse_plot:
        plt.plot([i[0] for i in MSEs], [i[1] for i in MSEs])
        plt.xlabel('DNN Fraction')
        plt.ylabel('MSE')
        plt.savefig(os.path.join(dir, 'interpolation_mse_' + name + '.png'))
        plt.close()

예제 #6

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def save_cautious_curves(model, test_data, target, median=False):
    """
    Sort predictions by variance and calculate
    metrics on the top X% most confident ones,
    generating a curve on X.
    """
    feats = test_data[:, :-1]
    gold_labels = test_data[:, -1]
    if median: # should only be used for Warped GPs
        preds = model.predict(feats, median=True)
    else:
        preds = model.predict(feats)
    preds = zip(preds[0].flatten(), preds[1].flatten(), gold_labels)
    preds.sort(key=lambda x: x[0])
    preds = np.array(preds)
    metric_vals = []
    #import pprint; pprint.pprint(preds)
    for i in xrange(1, len(preds) + 1):
        sub_preds = preds[:i, 0]
        sub_gold = preds[:i, 2]
        mae = MAE(sub_preds, sub_gold)
        rmse = np.sqrt(MSE(sub_preds, sub_gold))
        prs = pearson(sub_preds, sub_gold)
        metric_vals.append([mae, rmse, prs[0], prs[1]])
    np.savetxt(target, metric_vals, fmt='%.4f')

예제 #7

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def get_rec_metrics(model, test_data, median=False):
    """
    Get predictions and evaluate.
    """
    feats = test_data[:, :-1]
    gold_labels = 1 / test_data[:, -1]
    if median: # should only be used for Warped GPs
        preds = model.predict(feats, median=True)
    else:
        preds = model.predict(feats)
    preds_mean = preds[0].flatten()
    rec_preds = model.predict_reciprocal(feats).flatten()

    mae_naive = MAE(1/preds_mean, gold_labels)
    rmse_naive = np.sqrt(MSE(1/preds_mean, gold_labels))
    prs_naive = pearson(1/preds_mean, gold_labels)

    mae_rec = MAE(rec_preds, gold_labels)
    rmse_rec = np.sqrt(MSE(rec_preds, gold_labels))
    prs_rec = pearson(rec_preds, gold_labels)
    return mae_naive, rmse_naive, prs_naive, mae_rec, rmse_rec, prs_rec

예제 #8

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def prs_loss(mean, cov, samples=1000, its=10):
    curr_a = np.copy(mean) # start with mean
    #curr_a = np.ones_like(mean) + np.random.random(size=(SIZE))

    n = curr_a.shape[0]
    initial_a = np.copy(curr_a)
    curr_a = norm_a(curr_a, mean)
    for evals in xrange(its):
        mv_samples = np.random.multivariate_normal(mean, cov, samples)
        print pearson(mean, curr_a)
        for i in xrange(n):
            mask = np.ones(curr_a.shape, dtype=bool)
            mask[i] = 0
            ai = curr_a[mask]
            yi = mv_samples.T[mask]
            yk = mv_samples.T[i]
            ak = dloss(ai, yi, yk)
            curr_a[i] = ak / samples
            #curr_a[i] = ak
        #curr_a = norm_a(curr_a, mean)
        print curr_a
        print np.mean(np.abs(initial_a - curr_a))
    return curr_a

예제 #9

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def get_metrics(model, test_data):
    """
    Get predictions and evaluate.
    """
    feats = test_data[:, :-1]
    gold_labels = test_data[:, -1]
    preds = model.predict_y(feats)
    preds_mean = preds[0].flatten()
    preds_var = preds[1]
    #print preds_mean[:10]
    #print gold_labels[:10]
    mae = MAE(preds_mean, gold_labels)
    rmse = np.sqrt(MSE(preds_mean, gold_labels))
    prs = pearson(preds_mean, gold_labels)
    nlpd = - np.mean(model.predict_density(feats, gold_labels[:, None]))
    return mae, rmse, prs, nlpd

예제 #10

0

파일 보기

파일: util.py 프로젝트: beckdaniel/uncertainty_qe

def get_metrics(model, test_data, median=False):
    """
    Get predictions and evaluate.
    """
    feats = test_data[:, :-1]
    gold_labels = test_data[:, -1]
    if median and isinstance(model, GPy.models.WarpedGP): # should only be used for Warped GPs
        preds = model.predict(feats, median=True)
    else:
        preds = model.predict(feats)
    preds_mean = preds[0].flatten()
    preds_var = preds[1]
    #print preds_mean[:10]
    #print gold_labels[:10]
    mae = MAE(preds_mean, gold_labels)
    rmse = np.sqrt(MSE(preds_mean, gold_labels))
    prs = pearson(preds_mean, gold_labels)
    nlpd = - np.mean(model.log_predictive_density(feats, gold_labels[:, None]))
    pred_q = model.predict_quantiles(feats, quantiles=(25., 75.))[1].flatten()    
    return mae, rmse, prs, nlpd

예제 #11

0

파일 보기

파일: turo_takehome.py 프로젝트: jkvalentine/Turo_Presentation

def make_reg_plot(vehicle_df):
    '''
    Create a plot relating price difference to total reservations
    showing the regression line

    Args:
        vehicle_df: pandas data frame of vehicle attributes
    Returns:
        None
    '''
    x = vehicle_df['price_difference']
    y = vehicle_df['total_reservations']
    stat = pearson(x, y)
    stats = "pearsonr= {:0.2f}; p={:0.2e}".format(stat[0], stat[1])
    fig, ax = plt.subplots()
    sns.regplot(x, y)
    ax.set_ylabel('Total Reservations')
    ax.set_xlabel('Price Difference')
    ax.set_title('Total Reservations vs. Price Difference')
    ax.annotate(stats, xy=(350, 320), xycoords='axes points')
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    plt.savefig('total_res_vs_price_diff')

예제 #12

0

파일 보기

파일: seaborn_1.py 프로젝트: Libardo1/moose-drool

sns.set(palette='Purples_r')
sns.set(palette='Reds_r')
mpl.rc('figure', figsize=(5, 5))
np.random.seed(9221999)

x = randn(50)
y = x + randn(50)
sns.regplot(x, y)

df = pd.DataFrame(np.transpose([x, y]), columns=["X", "Y"])
sns.regplot("X", 'Y', df)

sns.regplot("X", 'Y', df, ci=None, color='slategray')

r2 = lambda x, y: stats.pearson(x, y)[0] ** 2
sns.regplot('X', 'Y', df, corr_func=r2, func_name='$R^2$', color='seagreen')


tips = pd.read_csv("https://raw.github.com/mwaskom/seaborn/master/examples/tips.csv")
tips["big_tip"] = tips.tip > (.2 * tips.total_bill)
tips["smoker"] = tips["smoker"] == "Yes"
tips["female"] = tips["sex"] == "Female"
mpl.rc("figure", figsize=(7, 7))
sns.corrplot(tips)
sns.corrplot(tips, sig_stars=False)
sns.corrplot(tips, sig_tail='upper', cmap='PuRd', cmap_range=(-.2, .8))

mpl.rc('figure', figsize=(5, 5))
sns.lmplot('total_bill', 'tip', tips)
sns.lmplot('total_bill', 'tip', tips, color='time')