Beispiel #1
0
def get_pval_from_predictions(m0_predictions, m1_predictions, ground_truth, twotailed=False, method='steiger'):
    '''    
    If twotailed==False, then need to check that the one of corr0 and corr1 that is higher is the correct one
    '''
    import corrstats
    n0 = len(m0_predictions)
    n1 = len(m0_predictions)
    n2 = len(ground_truth)
    assert(n0==n1)
    assert(n0==n2)
    corr0, _ = scipy.stats.spearmanr(m0_predictions, ground_truth)
    corr1, _ = scipy.stats.spearmanr(m1_predictions, ground_truth)
    corr01, _ =scipy.stats.spearmanr(m0_predictions, m1_predictions)
    t2, pv = corrstats.dependent_corr(corr0, corr1, corr01, n0, twotailed=twotailed, method=method)
    return t2, pv, corr0, corr1, corr01
Beispiel #2
0
for key in ['Baseline']:
  # Baseline model just assigns a constant rating to each descriptor-molecule combo in the case where there are zero training molecules
  for i in range(PredY.shape[0]):
    corrs[key][test_size].append(0)
    sqerrs[key][test_size].append(np.linalg.norm(PredY[i,:])**2)           

# Generate predictions for Semantics and Half models (they are the same in this case are there are no training ratings)
for key in ['Semantics2','Half2']:
  hat = modelX2.predict(PredX)
  for i in range(PredY.shape[0]):
    corrs[key][test_size].append(mu.corrcoef(hat[i,:],PredY[i,:])) 
    sqerrs[key][test_size].append(np.linalg.norm(hat[i,:]-PredY[i,:])**2)
  sqmeans[key][test_size].append(mu.sqmean(corrs[key][test_size]))
  medians[key][test_size].append(np.median(corrs[key][test_size]))
  mediansPvals[key][test_size].append(np.median([mu.nanreplace(corrstats.dependent_corr(jj,0,0,hat[i,:].size,twotailed=False)[1],diff=jj) 
                                             for jj in corrs[key][test_size]]))
  mediansSqErrReductions[key][test_size].append(np.median([jj/kk for jj,kk in zip(sqerrs[key][test_size], sqerrs['Baseline'][test_size])]))
  meansSqErrReductions[key][test_size].append(np.mean([jj/kk for jj,kk in zip(sqerrs[key][test_size], sqerrs['Baseline'][test_size])]))

  # Generate predictions for Perceptual model (they are the same in this case are there are no training ratings)
for key in ['Perceptual']:
  hat = np.zeros((Py2.shape[1]))
  for i in range(PredY.shape[0]):
    corrs[key][test_size].append(mu.corrcoef(hat,PredY[i,:])) 
    sqerrs[key][test_size].append(np.linalg.norm(hat-PredY[i,:])**2)
  sqmeans[key][test_size].append(mu.sqmean(corrs[key][test_size]))
  medians[key][test_size].append(np.median(corrs[key][test_size]))
  mediansPvals[key][test_size].append(0)

#######################################################################################################
Beispiel #3
0
        for key in [k for k in keys if '-' not in k]:
            medians[key][j].append(np.median(corrs[key][j]))
            sqmeans[key][j].append(mu.sqmean(corrs[key][j]))
        for key in [k for k in keys if 'Baseline' not in k]:
            key2 = 'Baseline-' + key
            for i in range(Py_test.shape[0]):
                corrs[key2][j].append(
                    mu.corrcoef(hats['Baseline'], hats[key][i, :]))
            medians[key2][j].append(np.median(corrs[key][j]))
            sqmeans[key2][j].append(mu.sqmean(corrs[key][j]))
            mediansPvals[key][j].append(
                np.median([
                    mu.nanreplace(
                        corrstats.dependent_corr(jj,
                                                 kk,
                                                 ll,
                                                 Py_trainmean.size,
                                                 twotailed=False)[1])
                    for jj, kk, ll in zip(corrs[key][j], corrs['Baseline'][j],
                                          corrs[key2][j])
                ]))
        # print [np.median(corrs),np.median(corrs1),np.median(corrs2),np.median(corrs3)]#,np.median(corrs1)])) #+'\t'+str(np.mean(corrs))+'\t'+str(err)
        if test_size < 1.0:

            pickle.dump(
                {
                    'corrs': corrs,
                    'medians': medians,
                    'iter': qq,
                    'sqmeans': sqmeans,
                    'mediansPvals': mediansPvals