def k_means_cluster(data_list):
	if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10:
		array_diagnal=array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
		ks = range(1,min([5,len(data_list[0])+1]))
		KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
		KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks]
		BIC=[]
		BIC_rec=[]
		for x in ks:
			if KMeans_predict[x-1].max()<x-1: continue
			else:
				BIC_i=compute_bic(KMeans[x-1],array_diagnal)
				if abs(BIC_i)<10**8:
					BIC.append(BIC_i)
					BIC_rec.append(x)
		#BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
		#ks_picked=ks[BIC.index(max(BIC))]
		ks_picked=BIC_rec[BIC.index(max(BIC))]
		if ks_picked==1:
			return [data_list]
		else:
			out=[]
			std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
			whitened = whiten(array_diagnal)
			centroids, distortion=kmeans(whitened,ks_picked)
			idx,_= vq(whitened,centroids)
			for x in range(ks_picked):
				group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
				out.append(group1)
			return out
	else:
		return [data_list]
def Corr(GDP,I,C):
	m = sp.shape(GDP)[1]
	GDPIcorr = []
	GDPCcorr = []
	for i in range(0, m):
		gdp = GDP[:,i]
		inv = I[:,i]
		con = C[:,i]
		#Correlation between output and investment for each series
		gdpi = sp.corrcoef(gdp,inv)
		GDPIcorr.append(gdpi[0,1])
		#Correlation between output and consumption for each series
		gdpc = sp.corrcoef(gdp,con)
		GDPCcorr.append(gdpc[0,1])
	#Mean and standard deviation of correlation between GDP and
	#Investment and Consumption over total number of simulations
	GDPICORR = sp.array(GDPIcorr)
	gdpimean = sp.mean(GDPICORR)
	gdpistdev = sp.std(GDPICORR)
	GDPCCORR = sp.array(GDPCcorr)
	gdpcmean = sp.mean(GDPCCORR)
	gdpcstdev = sp.std(GDPCCORR)
	sp.savetxt('GDPICORR.csv',GDPICORR)
	sp.savetxt('GDPCCORR.csv',GDPCCORR)
	print "The mean and standard deviation between GDP and"
	print "Investment and GDP and Consumption followed by"
	print "The lists of each correlation coefficient for"
	print "each series are saved in csv files"
	return gdpimean, gdpistdev, gdpcmean, gdpcstdev
Esempio n. 3
0
def testLogisticError():
    k = 5

    data = Data(k, 0, 0)
    data.importDataFromMat()
    data.normalize()

    lg = LogisticLinearClassifier(0.03, 0.03, 576, k, data)
    err_train, miss_train, err_val, miss_val = lg.train(30)
    mis_fig = plt.figure()
    ax2 = mis_fig.add_subplot(111)
    ax2.plot(err_val, label="error (validation)")
    ax2.plot(err_train, label="error (training)")
    title = "std(val)=%f std(err)=%f" % (sp.std(err_val), sp.std(err_train))
    mis_fig.suptitle(title)
    ax2.set_ylabel("error")
    ax2.set_xlabel("epoch")
    plt.legend()

    mis_fig = plt.figure()
    ax2 = mis_fig.add_subplot(111)
    ax2.plot(miss_val, label="misclassification ratio (validation)")
    ax2.plot(miss_train, label="misclassification ratio (training)")
    mis_fig.suptitle(title)
    ax2.set_ylabel("misclassification ratio")
    ax2.set_xlabel("epoch")
    plt.legend()

    results, cat = lg.classify(data.test_left, data.test_right)
    lg.confusion_matrix(cat, data.test_cat.argmax(axis=0))

    err = Error()
    err, misclass = err.norm_total_error(results.T, data.test_cat, k)
    print "Error on the test set " + str(err)
    print "Misclassification ratio on the test set " + str(misclass)
Esempio n. 4
0
def PrintValues( outfile, values,  options, prefix = "",titles = None):

    if options.flat or options.aggregate_column:

        if options.add_header:
            if prefix: outfile.write( "prefix\t" )
            
            if titles: outfile.write( "column\t" )
                
            print "\t".join( ("nval", "min", "max", "mean", "median", "stddev", "sum", "q1", "q3" ) )
        
        for x in range(len(values)):

            vals = values[x]

            if len(vals) == 0:

                if options.output_empty:
                    if titles: outfile.write( titles[x] + "\t" )
                    if prefix: outfile.write( prefix + "\t" )

                    outfile.write( "0" + "\tna" * 8  + "\n" )

                continue

            if titles: outfile.write( titles[x] + "\t" )
            if prefix: outfile.write( prefix + "\t" )

            vals.sort()
            if len(vals) > 4:
                q1 = options.value_format % vals[len(vals) // 4]
                q3 = options.value_format % vals[len(vals) * 3 // 4]
            else:
                q1 = options.value_format % vals[0]
                q3 = options.value_format % vals[-1]

            outfile.write( "\t".join( ( "%i" % len(vals),
                                        options.value_format % float(min(vals)),
                                        options.value_format % float(max(vals)),
                                        options.value_format % scipy.mean(vals),
                                        options.value_format % scipy.median(vals),
                                        options.value_format % scipy.std(vals),                                      
                                        options.value_format % reduce( lambda x, y: x+y, vals),
                                        q1, q3,
                                        )) + "\n")
            
    else:

        if titles:
            print "category\t%s" % string.join(titles,"\t")

        print "count\t%s"  % (string.join( map(lambda v: "%i" % len(v), values), "\t"))
        print "min\t%s"    % (string.join( map(lambda v: options.value_format % min(v), values), "\t"))
        print "max\t%s"    % (string.join( map(lambda v: options.value_format % max(v), values), "\t"))
        print "mean\t%s"   % (string.join( map(lambda v: options.value_format % scipy.mean(v), values), "\t"))
        print "median\t%s" % (string.join( map(lambda v: options.value_format % scipy.median(v), values), "\t"))
        print "stddev\t%s" % (string.join( map(lambda v: options.value_format % scipy.std(v), values), "\t"))
        print "sum\t%s"    % (string.join( map(lambda v: options.value_format % reduce( lambda x,y: x+y, v), values), "\t"))
        print "q1\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=25), values), "\t"))
        print "q3\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=75), values), "\t"))
Esempio n. 5
0
def centroid_shift(array, levels=None):
  """
  Measure the how does the centroid vary by changing the isopleth that is used
  to measure it, similar to wX in X-rays.

  Parameters
  ----------
    array     : array of floats
                (smoothed) galaxy luminosity or number density. Must be a
                square array (i.e., same number of rows and columns).
    levels    : list of floats
                intensity levels to use for the centroid shift

  """
  cm = ndimage.measurements.center_of_mass
  xo = len(array)/2.
  yo = len(array[0])/2.
  if levels is None:
    rms = -1
    masked = array
    while rms != scipy.std(masked):
      rms = scipy.std(masked)
      masked = masked[masked < 3*rms]
    levels = scipy.arange(2, 5.1, 0.5) * rms
  for l in levels:
    masked = array
    masked[masked < l] = 0
    #c = cm(masked
  return
Esempio n. 6
0
def WriteRadius(mali, identifiers, prefix="", gap_char="-"):
    """write percent identities in pairwise comparisons both for nucleotide acids and amino acids."""

    pides_na = []
    seq_aa = []
    for x in range(0, len(identifiers)):

        seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]]))

        for y in range(x + 1, len(identifiers)):
            if x == y:
                continue
            pides_na.append(MaliIO.getPercentIdentity(
                mali[identifiers[x]], mali[identifiers[y]], gap_char))

    pides_aa = []
    for x in range(0, len(identifiers) - 1):
        for y in range(x + 1, len(identifiers)):
            pides_aa.append(
                MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char))

    print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_na),
                                                 max(pides_na),
                                                 scipy.mean(pides_na),
                                                 scipy.median(pides_na),
                                                 scipy.std(pides_na))), "\t") + "\t" +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_aa),
                                                 max(pides_aa),
                                                 scipy.mean(pides_aa),
                                                 scipy.median(pides_aa),
                                                 scipy.std(pides_aa))), "\t")
Esempio n. 7
0
def estimate_performance_xgboost(X,labels,param, num_round, folds):
    '''
    Cross validation for XGBoost performance
    '''
    f=open("summary_bst_scan.txt","a")
    start = np.random.random_integers(1000) #time.time()
    # Cross validate
    kf = cv.KFold(labels.size, n_folds=folds, random_state=start)
    # Dictionary to store all the AMSs
    all_rmse = []
    for train_indices, test_indices in kf:
        X_train, X_test = X.loc[train_indices], X.loc[test_indices]
        y_train, y_test = labels[train_indices], labels[test_indices]
        xgmat = xgb.DMatrix(X_train, label=y_train)
        plst = param.items()#+[('eval_metric', '[email protected]')]

        watchlist = []#[(xgmat, 'train')]
        bst = xgb.train(plst, xgmat, num_round, watchlist)

        xgmat_test = xgb.DMatrix(X_test)
        y_out = bst.predict(xgmat_test)
        num=y_test.shape[0]
        y_test=np.reshape(y_test,num)
        rmse_score=rmse(y_out,y_test)
        print('rmse={}'.format(rmse_score))
        f.write('rmse={}'.format(rmse_score))
        f.write('\n')
        all_rmse.append(rmse_score)
    print ("------------------------------------------------------")
    print ("mean rmse ={} with std={}".format(sp.mean(all_rmse),sp.std(all_rmse)))
    f.write("mean rmse ={} with std={}".format(sp.mean(all_rmse),sp.std(all_rmse)))
    f.write('\n')   
    f.close()
Esempio n. 8
0
def _test_():
    singleton_snps = genotypes.simulate_k_tons(n=500, m=1000)
    doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000)
    common_snps = genotypes.simulate_common_genotypes(500, 1000) 
    
    snps = sp.vstack([common_snps, singleton_snps, doubleton_snps])
    print snps
    snps = snps.T
    snps = (snps - sp.mean(snps, 0)) / sp.std(snps, 0)
    snps = snps.T
    print snps, snps.shape
    file_prefix = os.environ['HOME'] + '/tmp/test'
    phen_list = phenotypes.simulate_traits_w_snps_to_hdf5(snps, hdf5_file_prefix=file_prefix,
                                           num_traits=30, p=0.1)
    
    singletons_thres = []
    doubletons_thres = []
    common_thres = []
    for i, y in enumerate(phen_list['phenotypes']):
        
        K = kinship.calc_ibd_kinship(snps)
        K = kinship.scale_k(K)
        lmm = lm.LinearMixedModel(y)
        lmm.add_random_effect(K)
        r1 = lmm.get_REML()
        print 'pseudo_heritability:', r1['pseudo_heritability']

        ex_res = lm.emmax(snps, y, K)
        plt.figure()
        plt.hist(y, 50)
        plt.savefig('%s_%d_phen.png' % (file_prefix, i))
        plt.clf()
        
        
        agr.plot_simple_qqplots_pvals('%s_%d' % (file_prefix, i),
                                      [ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:]],
                                      result_labels=['Common SNPs', 'Singletons', 'Doubletons'],
                                      line_colors=['b', 'r', 'y'],
                                      num_dots=200, max_neg_log_val=3)
        
        # Cholesky permutations..
        res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        singletons_thres.append(res['threshold_05'][0])
        res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        doubletons_thres.append(res['threshold_05'][0])
        res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        common_thres.append(res['threshold_05'][0])
        
        #ATT permutations (Implement)
        
        #PC permutations (Implement)
        

    print sp.mean(singletons_thres), sp.std(singletons_thres)
    print sp.mean(doubletons_thres), sp.std(doubletons_thres)
    print sp.mean(common_thres), sp.std(common_thres)
Esempio n. 9
0
def window_hanning(x):
    """
    FUNC: window_hanning
    DESCR: return x times the hanning window of len(x)
    """
    sigma = std(x)
    win =  hanning(len(x))*x #USED THE MLAB window_hanning SINCE MATPLOTLIB DOESN'T HAVE IT ANYMORE -eli
    return win*(sigma/std(win))
Esempio n. 10
0
 def _compute_standard_deviations(self):
     """"""
     for index in range(NUM_OF_VOXELS):
         voxel_vector = self.data_wrapper.get_voxels_of_same_index(index, "P")
         self.standard_deviations_picture.append(scipy.std(voxel_vector))
     for index in range(NUM_OF_VOXELS):
         voxel_vector = self.data_wrapper.get_voxels_of_same_index(index, "S")
         self.standard_deviations_sentence.append(scipy.std(voxel_vector))
Esempio n. 11
0
 def is_near_constant(self, pid, min_num_diff=10):
     vals = sp.array(self.phen_dict[pid]["values"])
     if sp.std(vals) > 0:
         vals = 50 * (vals - sp.mean(vals)) / sp.std(vals)
         vals = vals - vals.min() + 0.1
         b_counts = sp.bincount(sp.array(sp.around(vals), dtype="int"))
         b = b_counts.max() > len(vals) - min_num_diff
         return b
     else:
         return True
Esempio n. 12
0
 def is_near_constant(self, min_num_diff=10):
     vals = sp.array(self.values)
     if sp.std(vals) > 0:
         vals = 50 * (vals - sp.mean(vals)) / sp.std(vals)
         vals = vals - vals.min() + 0.1
         b_counts = sp.bincount(sp.array(sp.around(vals), dtype='int'))
         b = b_counts.max() > len(vals) - min_num_diff
         return b
     else:
         return True
Esempio n. 13
0
File: cyclo.py Progetto: xou/mcmatch
 def _get_dict(self, distances, n_total):
   if not len(distances):
     _mean, _stddev, _mean_rel, _stddev_rel = 0,0,0,0
   else:
     print distances, std(distances)
     _mean = mean(distances)
     _stddev = std(distances)
     _mean_rel = _mean*1.0/n_total
     _stddev_rel = _stddev*1.0/n_total
   return [_mean, _stddev, _mean_rel, _stddev_rel]
Esempio n. 14
0
 def stats(self, startdate, enddate, mktbasket, avdate, output=False, mappingoverride=None):
     """
     Calculates statistics for a fund over a period.
     
     Parameters
     ----------
     startdate : datetime
         beginning of statistic period
     enddate : datetime
         end of statistic period
     mktbasket : dict
         dictionary of market streams
     output : bool
         if True, output results to db
     mappingoverride : None or mapping dictionary
     	whether to override the db mapping
     
     Returns
     -------
     stats : dict
         dictionary of statistics
     """
     actualstream, projstream = self.project(mktbasket, mappingoverride)
     if actualstream[startdate:enddate] is None: return None
     if projstream[startdate:enddate] is None: return None 
     actual = actualstream[startdate:enddate].returns
     projected = projstream[startdate:enddate].returns
     diff = actual - projected
     outdata = {
              'TE'     : scipy.std(diff) * 100.0 * 100.0,
              'BETA'   : scipy.cov(projected, actual, bias=1)[1, 0] / scipy.var(projected),
              'ALPHA'  : (scipy.product(diff + 1.0)) ** (1.0 / diff.size) - 1.0,
              'VOL'    : scipy.std(actual) * scipy.sqrt(252.0),
              'PROJ'   : scipy.product(1.0 + projected) - 1.0,
              'ACT'    : scipy.product(1.0 + actual) - 1.0,
              'R2'     : 0.0 if scipy.all(actual == 0.0) else scipy.corrcoef(projected, actual)[1, 0] ** 2.0,
              'AV'     : self.av(avdate),
              'DELTA'  : self.deltaestimate(avdate)
             }
     outdata['DIFF'] = outdata['ACT'] - outdata['PROJ']
     outdata['PL'] = outdata['DELTA'] * outdata['DIFF'] * 100.0 
     if output:
         cnxn = pyodbc.connect(ORACLESTRING)
         cursor = cnxn.cursor()
         sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
         sql = sql.format(self.fundcode, outdata['PROJ'], outdata['ACT'], outdata['DIFF'],
                    outdata['DELTA'], outdata['PL'], oracledatebuilder(startdate),
                    oracledatebuilder(enddate), outdata['TE'], outdata['R2'], outdata['BETA'],
                    outdata['ALPHA'], outdata['VOL'], outdata['AV'])
         cursor.execute(sql)
         cnxn.commit()
         cnxn.close()
     return outdata
Esempio n. 15
0
def wlsfit():
  """
  Fit the RS with a WLS
  """
  rs = fit_rs(rsg, mag, color, color_err,
              fix_slope=fix_slope, fix_norm=fix_norm, method=method)
  if fix_slope is False or fix_norm is False:
    if converge:
      rs1 = []
      nit = 0
      while len(rs1) != len(rsg):
        rs1 = rsg
        rsg = rsgalaxies(rs1, mag, color, color_err, rs[0], rs[1],
                        fit='tilted')
        rs = fit_rs(rsg, mag, color, color_err, fix_norm=fix_norm,
                    fix_slope=fix_slope, method='wls')
        nit += 1
      if verbose:
        print '%d iteration(s), final sample: %d galaxies' %(nit, len(rsg))
    else:
      if verbose:
        print 'Only one iteration set: %d galaxies' %len(rsg)
    # nice printing
    if debug:
      if rs[1] >=0:
        print 'CMR : %s = %.3f + %.3f(%s - %.2f)' \
              %(color_label, rs[0], rs[1], mag_label, pivot)
      else:
        print 'CMR : %s = %.3f - %.3f(%s - %.2f)' \
              %(color_label, rs[0], -rs[1], mag_label, pivot)

    a = scipy.zeros(npoints)
    b = scipy.zeros(npoints)
    s = scipy.zeros(npoints)
    n = len(mag)
    for i in xrange(npoints):
      j = scipy.random.random_integers(0, n - 1, n)
      rsboot = fit_rs(j, mag, color, color_err, method='wls')
      a[i] = rsboot[0]
      b[i] = rsboot[1]
      s[i] = rsboot[2]
    a = (rs[0], scipy.std(a))
    b = (rs[1], scipy.std(b))
    s = (rs[2], scipy.std(s))

  # if both "fixes" are not False
  else:
    rsg = rsgalaxies(rsg, mag, color, color_err, fix_norm, fix_slope,
                      fit='tilted')
    a = (fix_norm, 0)
    b = (fix_slope, 0)
    s = (rs[2], 0)
  return
Esempio n. 16
0
            self.mapping[indexes[i]] = finalbeta[i]
        return self.mapping

    def stats(self, startdate, enddate, mktbasket, output = False):
        """
        Calculates statistics for a fund over a period.
        
        Parameters
        ----------
        startdate : datetime
            beginning of statistic period
        enddate : datetime
            end of statistic period
        mktbasket : dict
            dictionary of market streams
        output : bool
            if True, output results to db
        
        Returns
        -------
        stats : dict
            dictionary of statistics
        """
        inputmatrix, fundreturns, indexes, daterange = self.align(startdate, enddate, mktbasket)
        if self.mapping and not(inputmatrix is None):
            weights = scipy.array([self.mapping[mykey] if mykey in self.mapping else 0.0 for mykey in mktbasket.keys()])
            projected = scipy.dot(inputmatrix,weights.reshape(len(indexes),1)).flatten()
            actual = fundreturns.flatten()
            diff = actual-projected
            outdata = {
                     'TE'     : scipy.std(diff)*100.0*100.0,
                     'BETA'   : scipy.cov(projected,actual)[1,0]/scipy.var(projected),
                     'ALPHA'  : (scipy.product(diff+1.0))**(1.0/diff.size)-1.0,
                     'VOL'    : scipy.std(actual)*scipy.sqrt(252.0),
                     'PROJ'   : scipy.product(1.0+projected)-1.0,
                     'ACT'    : scipy.product(1.0+actual)-1.0,
                     'R2'     : 0.0 if scipy.all(actual==0.0) else scipy.corrcoef(projected,actual)[1,0]**2.0,
                     'AV'     : self.av(startdate),
                     'DELTA'  : self.deltaestimate(startdate)
                    }
            outdata['DIFF'] = outdata['ACT']-outdata['PROJ']
            outdata['PL'] = outdata['DELTA']*outdata['DIFF']*100.0 
            if output:
                cnxn = pyodbc.connect(ORACLESTRING)
                cursor = cnxn.cursor()
                sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
                sql = sql.format(self.fundcode,outdata['PROJ'],outdata['ACT'],outdata['DIFF'],
                           outdata['DELTA'],outdata['PL'],oracledatebuilder(startdate),
                           oracledatebuilder(enddate),outdata['TE'],outdata['R2'],outdata['BETA'],
                           outdata['ALPHA'],outdata['VOL'],outdata['AV'])
                cursor.execute(sql)
                cnxn.commit()            
                cnxn.close()
Esempio n. 17
0
  def _bess(npts, x1, x2, x1err, x2err, cerr):
    """
    Do the entire regression calculation for 4 slopes:
      OLS(Y|X), OLS(X|Y), bisector, orthogonal
    """

    # calculate sigma's for datapoints using length of confidence intervals
    sig11var = sum(x1err ** 2) / npts
    sig22var = sum(x2err ** 2) / npts
    sig12var = sum(cerr) / npts

    # calculate means and variances
    x1av = scipy.average(x1)
    x1var = scipy.std(x1) ** 2
    x2av = scipy.average(x2)
    x2var = scipy.std(x2) ** 2
    covar_x1x2 = sum((x1 - x1av) * (x2 - x2av)) / npts

    # compute the regression slopes for OLS(X2|X1), OLS(X1|X2), 
    # bisector and orthogonal
    b = scipy.zeros(4)
    b[0] = (covar_x1x2 - sig12var) / (x1var - sig11var)
    b[1] = (x2var - sig22var) / (covar_x1x2 - sig12var)
    b[2] = (b[0] * b[1] - 1 + scipy.sqrt((1 + b[0] ** 2) * \
           (1 + b[1] ** 2))) / (b[0] + b[1])
    b[3] = 0.5 * ((b[1] - 1 / b[0]) + scipy.sign(covar_x1x2) * \
           scipy.sqrt(4 + (b[1] - 1 / b[0]) ** 2))

    # compute intercepts for above 4 cases:
    a = x2av - b * x1av

    # set up variables to calculate standard deviations of slope and intercept
    xi = []
    xi.append(((x1 - x1av) * (x2 - b[0] * x1 - a[0]) + b[0] * x1err ** 2) / \
              (x1var - sig11var))
    xi.append(((x2 - x2av) * (x2 - b[1] * x1 - a[1]) + x2err ** 2) / \
              covar_x1x2)
    xi.append((xi[0] * (1 + b[1] ** 2) + xi[1] * (1 + b[0] ** 2)) / \
              ((b[0] + b[1]) * scipy.sqrt((1 + b[0] ** 2) * (1 + b[1] ** 2))))
    xi.append((xi[0] / b[0] ** 2 + xi[1]) * b[3] / \
              scipy.sqrt(4 + (b[1] - 1 / b[0]) ** 2))
    zeta = []
    for i in xrange(4):
      zeta.append(x2 - b[i] * x1 - x1av * xi[i])

    # calculate  variance for all a and b
    bvar = scipy.zeros(4)
    avar = scipy.zeros(4)
    for i in xrange(4):
      bvar[i] = scipy.std(xi[i]) ** 2 / npts
      avar[i] = scipy.std(zeta[i]) ** 2 / npts

    return a, b, avar, bvar, xi, zeta
Esempio n. 18
0
    def fitToRefMesh(self, refMesh, otherMesh, angleconversion=1.0):
        """ fit a mesh to a Reference mesh, return the fit parameters
        and errors and convenient arrays for plots
        """

        # resultsDict contents
        #      delta fit result
        #      thetax fit result
        #      thetay fit result
        #      error on delta fit result
        #      error on thetax fit result
        #      error on thetay fit result
        #      mean of differences before fit
        #      sigma of residual distribution
        #      array of differences before the fit
        #      array of differences after the fit

        resultDict = {}

        # fit the two meshes
        # note that this fits to
        #       zDiff = otherMesh - interp of refMesh
        results, zDiff, xColumn, yColumn = refMesh.fitMesh(otherMesh, "RLM")

        if results != None:
            # fill resultsDict
            resultDict["delta"] = results.params[2]
            resultDict["deltaErr"] = results.bse[2]

            # convert from [Values/Dictance] to another unit
            resultDict["thetax"] = results.params[0] * angleconversion
            resultDict["thetay"] = results.params[1] * angleconversion
            resultDict["thetaxErr"] = results.bse[0] * angleconversion
            resultDict["thetayErr"] = results.bse[1] * angleconversion

            # mean delta before the fit - no truncation now
            resultDict["meanDeltaBefore"] = scipy.mean(zDiff)
            resultDict["rmsDeltaBefore"] = scipy.std(zDiff)

            # rms of Differences after fit
            resultDict["meanDeltaAfter"] = scipy.mean(results.resid)
            resultDict["rmsDeltaAfter"] = scipy.std(results.resid)

            # array of differences before the fit
            resultDict["deltaArrayBefore"] = zDiff

            # array of differences after the fit
            resultDict["deltaArrayAfter"] = results.resid
            resultDict["deltaArrayX"] = xColumn
            resultDict["deltaArrayY"] = yColumn

        return resultDict
Esempio n. 19
0
 def sqr_transform(self, pid, method='standard'):
     a = sp.array(self.phen_dict[pid]['values'])
     if method == 'standard':
         vals = ((a - min(a)) + 0.1 * sp.std(a)) * ((a - min(a)) + 0.1 * sp.std(a))
     else:
         vals = a * a
     if not self.phen_dict[pid]['transformation']:
         self.phen_dict[pid]['raw_values'] = self.phen_dict[pid]['values']
         self.phen_dict[pid]['transformation'] = 'sqr'
     else:
         self.phen_dict[pid]['transformation'] = 'sqr(' + self.phen_dict[pid]['transformation'] + ')'
     self.phen_dict[pid]['values'] = vals.tolist()
     return True
Esempio n. 20
0
def compute_correlation(im1,im2,nbNeigh):
    cor = sp.zeros( (2*nbNeigh+1,2*nbNeigh+1) )

    std1 = sp.std(im1[im1!=0.])
    std2 = sp.std(im2[im2!=0.])
    norm = std1*std2

    for i in range(-nbNeigh,nbNeigh+1):
        for j in range(-nbNeigh,nbNeigh+1):
            mult = im1*sp.roll(sp.roll(im2,i,axis=1),j,axis=0)/norm
            cor[nbNeigh+j,nbNeigh+i] = (mult[mult!=0.]).mean()

    return cor
Esempio n. 21
0
def load_data(name='diabetes.data'):
    print "loading ", name
    dat = scipy.loadtxt(name, skiprows=1)
    print "dat.shape = ", dat.shape
    cs = dat.shape[1]

    X = dat[:,:cs-1]
    y = dat[:,cs-1]
    
    X = (X - mean(X, axis=0))/std(X, axis=0)
    y = (y - mean(y))/std(y)

    return X,y
Esempio n. 22
0
 def sqr_transform(self, pid, method="standard"):
     a = sp.array(self.phen_dict[pid]["values"])
     if method == "standard":
         vals = ((a - min(a)) + 0.1 * sp.std(a)) * ((a - min(a)) + 0.1 * sp.std(a))
     else:
         vals = a * a
     if not self.phen_dict[pid]["transformation"]:
         self.phen_dict[pid]["raw_values"] = self.phen_dict[pid]["values"]
         self.phen_dict[pid]["transformation"] = "sqr"
     else:
         self.phen_dict[pid]["transformation"] = "sqr(" + self.phen_dict[pid]["transformation"] + ")"
     self.phen_dict[pid]["values"] = vals.tolist()
     return True
Esempio n. 23
0
    def baseline_guesser(self, filename, length, iterations):
        """
        Equivalent of predictions, but just makes predictions from the root of
        the concept tree. This is the equivalent of guessing the distribution
        of all attribute values.
        """
        n = iterations
        runs = []
        nodes = []

        for i in range(0,n):
            print("run %i" % i)
            t = self.__class__()
            accuracy, num = t.sequential_prediction(filename, length, True)
            runs.append(accuracy)
            nodes.append(num)
            #print(json.dumps(t.output_json()))

        #print(runs)
        print("MEAN Accuracy")
        for i in range(0,len(runs[0])):
            a = []
            for r in runs:
                a.append(r[i])
            print("%0.2f" % (scipy.mean(a)))

        print()
        print("STD Accuracy")
        for i in range(0,len(runs[0])):
            a = []
            for r in runs:
                a.append(r[i])
            print("%0.2f" % (scipy.std(a)))

        print()
        print("MEAN Concepts")
        for i in range(0,len(runs[0])):
            a = []
            for r in nodes:
                a.append(r[i])
            print("%0.2f" % (scipy.mean(a)))

        print()
        print("STD Concepts")
        for i in range(0,len(runs[0])):
            a = []
            for r in nodes:
                a.append(r[i])
            print("%0.2f" % (scipy.std(a)))
Esempio n. 24
0
def main():
    
    loss_patterns = [ '5', '10', '20' ]
    file_names = ['test3.txt', 'test5.txt', 'test6.txt']
    file_sizes = [1000, 100000, 1000000]
    receiver_domain_name = 'localhost'
    receiver_port = '1234'
    delay_time = 0.25
    delay_time2 = 0.05
    number_experiments = 100

    for loss_pattern in loss_patterns:
        # loss pattern 5
        print >> sys.stderr, "Loss pattern", loss_pattern
        results = [loss_pattern]
        for i in range(0,len(file_names)):
            time_reqs = []
            throughputs = []
            file_size = file_sizes[i]
            file_name = file_names[i]
            for experiment_id in range(0, number_experiments):
                os.system('python receiver.py ' + receiver_port + ' ' + loss_pattern  + ' > /dev/null &')
                # wait 1 second 
                time.sleep(delay_time)
                os.system('python sender.py ' + file_name + ' ' + receiver_domain_name + ' ' + receiver_port)
                time.sleep(delay_time2)

                # read the last line of trace file
                last_line = os.popen("tail -n 1 %s" % 'trace').read()[:-1]
                
                time_req = float(last_line.split(' ')[0])
                time_reqs.append(time_req)
                throughput = file_size * 8000.0 / time_req
                throughputs.append(throughput)
                print >> sys.stderr, experiment_id, "File", file_name, "Size", file_size, "Time", time_req, "ms Throughput", throughput/1000000, "Mbps"

            mean_time = scipy.mean(time_reqs)
            std_time = scipy.std(time_reqs)
            min_time = mean_time - std_time
            max_time = mean_time + std_time
            mean_throughput = scipy.mean(throughputs)
            std_throughput = scipy.std(throughputs)
            max_throughput = mean_throughput + std_throughput
            min_throughput = mean_throughput - std_throughput
            this_result = [ str(mean_throughput), str(min_throughput), str( max_throughput)]
            results += this_result
            print >> sys.stderr, ' '.join(this_result)
        print ' '.join(results)
Esempio n. 25
0
def _test_scz_():
    # Load Schizophrenia data
    
    singleton_snps = genotypes.simulate_k_tons(n=500, m=1000)
    doubleton_snps = genotypes.simulate_k_tons(k=2, n=500, m=1000)
    common_snps = genotypes.simulate_common_genotypes(500, 1000) 
    
    snps = sp.vstack([common_snps, singleton_snps, doubleton_snps])
    test_snps = sp.vstack([singleton_snps, doubleton_snps])
    print snps
    phen_list = phenotypes.simulate_traits(snps, hdf5_file_prefix='/home/bv25/tmp/test', num_traits=30, p=1.0)
    
    singletons_thres = []
    doubletons_thres = []
    common_thres = []
    for i, y in enumerate(phen_list):
        
        K = kinship.calc_ibd_kinship(snps)
        K = kinship.scale_k(K)
        lmm = lm.LinearMixedModel(y)
        lmm.add_random_effect(K)
        r1 = lmm.get_REML()
        print 'pseudo_heritability:', r1['pseudo_heritability']

        ex_res = lm.emmax(snps, y, K)
        plt.figure()
        plt.hist(y, 50)
        plt.savefig('/home/bv25/tmp/test_%d_phen.png' % i)
        plt.clf()
        agr.plot_simple_qqplots_pvals('/home/bv25/tmp/test_%d' % i,
                                      [ex_res['ps'][:1000], ex_res['ps'][1000:2000], ex_res['ps'][2000:]],
                                      result_labels=['Common SNPs', 'Singletons', 'Doubletons'],
                                      line_colors=['b', 'r', 'y'],
                                      num_dots=200, max_neg_log_val=3)
        
        # Now permutations..
        res = lm.emmax_perm_test(singleton_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        singletons_thres.append(res['threshold_05'][0])
        res = lm.emmax_perm_test(doubleton_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        doubletons_thres.append(res['threshold_05'][0])
        res = lm.emmax_perm_test(common_snps, y, K, num_perm=1000)
        print 1.0 / (20 * 1000.0), res['threshold_05']
        common_thres.append(res['threshold_05'][0])
    print sp.mean(singletons_thres), sp.std(singletons_thres)
    print sp.mean(doubletons_thres), sp.std(doubletons_thres)
    print sp.mean(common_thres), sp.std(common_thres)
Esempio n. 26
0
	def initialize(self,data,random=False):
		self.data = data
		self.n_dim = data.shape[1]
		if random:
			mins = sp.zeros(self.n_dim)
			maxes = sp.zeros(self.n_dim)
			sds = sp.zeros(self.n_dim)
			centers = sp.zeros((self.n_components,self.n_dim))
			for i in xrange(self.n_dim):
				mins[i] = min(self.data[:,i])
				maxes[i] = max(self.data[:,i])
				sds[i] = sp.std(self.data[:,i])
				centers[:,i] = sp.random.uniform(mins[i],maxes[i],self.n_components)
			self.comp = sp.ones(self.n_components)/float(self.n_components) + sp.random.uniform(-1./self.n_components,1./self.n_components,self.n_components)
			self.comp /= sp.sum(self.comp)
			covars = sp.array([sp.diag(sds**2) for i in xrange(self.n_components)])
			self.centers = centers
			self.covars = covars
		else:
			clust = cluster.KMeans(self.n_components)
			clust.fit(self.data)
			self.centers = sp.copy(clust.cluster_centers_)
			labels = sp.copy(clust.labels_)
			self.covars = sp.zeros((self.n_components,self.n_dim,self.n_dim))
			self.comp = sp.zeros(self.n_components)
			for i in xrange(self.n_components):
				inds = labels == i
				temp = self.data[inds,:]
				self.covars[i,:,:] = sp.dot(temp.T,temp)
				self.comp[i] = sum(inds)/float(self.data.shape[0])
Esempio n. 27
0
def least_squares(timeseries):
    """
    A timeseries is anomalous if the average of the last three datapoints
    on a projected least squares model is greater than three sigma.
    """

    x = np.array([t[0] for t in timeseries])
    y = np.array([t[1] for t in timeseries])
    A = np.vstack([x, np.ones(len(x))]).T
    results = np.linalg.lstsq(A, y)
    residual = results[1]
    m, c = np.linalg.lstsq(A, y)[0]
    errors = []
    for i, value in enumerate(y):
        projected = m * x[i] + c
        error = value - projected
        errors.append(error)

    if len(errors) < 3:
        return False

    std_dev = scipy.std(errors)
    t = (errors[-1] + errors[-2] + errors[-3]) / 3

    return abs(t) > std_dev * 3 and round(std_dev) != 0 and round(t) != 0
Esempio n. 28
0
def simulate_genotypes_w_ld(n, m, n_samples, m_ld_chunk_size=100, r2=0.9, validation = False):
	val_set_gen = []
	val_set_D = []
	for i in xrange(n_samples):
		snps = sp.zeros((m, n), dtype='float32')
		num_chunks = m / m_ld_chunk_size
		for chunk in xrange(num_chunks):
			X = sp.zeros((m_ld_chunk_size, n), dtype='float32')
			X[0] = stats.norm.rvs(size=n)
			for j in xrange(1, m_ld_chunk_size):
				X[j] = sp.sqrt(r2) * X[j - 1] + sp.sqrt(1 - r2) * stats.norm.rvs(size=n)
			start_i = chunk * m_ld_chunk_size
			stop_i = start_i + m_ld_chunk_size
			snps[start_i:stop_i] = X
		snps_means = sp.mean(snps, axis=1)
		snps_stds = sp.std(snps, axis=1)
		snps_means.shape = (m, 1)
		snps_stds.shape = (m, 1)
		snps = (snps - snps_means) / snps_stds
		val_set_gen.append(snps)
		if validation:
			val_set_D = 0
		else:
			val_set_D.append(get_sample_D(n=n, m=m, num_sim=100, r2=r2))
		if sp.isnan(val_set_gen).any():
			return simulate_genotypes_w_ld(n, m, n_samples, m_ld_chunk_size=100, r2=0.9)
	return val_set_gen, val_set_D
Esempio n. 29
0
def calc_pwm_bg_dist(matrix, genome, keepScores=False, sampleSize=10000, samples=None, **kwargs):
    """ Sample the given genome to approximate the background distribution for the given matrix
            returns the (mean,stdev) of the distribution
        The function takes about 40 seconds for 100,000 samples or 6 minutes for 1 million

        matrix should be a motility matrix
                (import motility; matrix=motility.PWM([[1,2,3,4],[5,6,7,8]])
        genome should be a pygr resource genome
                (from pygr import worldbase; genome=worldbase.Bio.Seq.Genome.MOUSE.mm9())
        returns: mu, sd [, allScores]
    """
    if not samples:
        sizeDistribution = [len(matrix)]
        allScores = [0.0] * sampleSize
        if len(kwargs) > 0:
            samples = sample_genome(genome, sizeDistribution, sampleSize, **kwargs)
        else:
            samples = sample_genome(genome, sizeDistribution, sampleSize)
    else:
        allScores = [0.0] * len(samples)
    for index, seq in enumerate(samples):
        seq = str(seq)
        if len(seq) < len(matrix):
            raise RuntimeError("seq is too short to be scored! seq: %s, matrix: %sbp %s" % (seq, len(matrix), matrix.matrix))
        revcompSeq = reverseComplement(seq)
        allScores[index] = max(matrix.calc_score(seq), matrix.calc_score(revcompSeq))
    mu = scipy.mean(allScores)
    sd = scipy.std(allScores)
    if keepScores:
        return mu, sd, allScores
    else:
        return mu,sd
Esempio n. 30
0
def synchrony(s1, s2, t, winLen, freq, overlap=.1) :
    """
    FUNC: synchrony
    DESCR: phase synchrony implementation
    """
    #print "utils.synchrony(s1(", len(s1), ",), s2(", len(s2), "), t(", len(t),"),", winLen, freq, overlap, ")"
    # Compute phase difference
    #print "utils.synchrony(): doing hilbert_phaser(s1)"
    p1 = hilbert_phaser(s1)
    #print "utils.synchrony(): doing hilbert_phaser(s2)"
    p2 = hilbert_phaser(s2)
    pdiff = p1 - p2

    sync = []
    time = []
    nWin = int(winLen * freq)
    nOverlap = int(overlap * freq)
    #print "utils.synchrony(): doing ", len(arange(0, len(pdiff), nWin - nOverlap)), " pdiffs"
    
    for i in arange(0, len(pdiff), nWin - nOverlap):
        sWin = pdiff[i:i + nWin]
        if len(sWin) < 3 : continue
        s = 1. / (1 + std(sWin))
        #print "utils.synchrony(): [", i, ", ", i+nWin, "]: appending value s=", s
        try : sync.append(s)
        except :
            print 'sWin:', sWin
            sys.exit()
        if t is not None :
            time.append(t[i] + winLen / 2.)

    return array(sync), array(time)
Esempio n. 31
0
dfs_left_sm.attributes = sp.mean(rho_null, axis=0)
dfs_left_sm = patch_color_attrib(dfs_left_sm, clim=[0, 1])
view_patch_vtk(dfs_left_sm,
               azimuth=90,
               elevation=180,
               roll=90,
               outfile='rest_mean1_left.png',
               show=1)
view_patch_vtk(dfs_left_sm,
               azimuth=-90,
               elevation=180,
               roll=-90,
               outfile='rest_mean2_left.png',
               show=1)

dfs_left_sm.attributes = sp.std(rho_null, axis=0)
dfs_left_sm = patch_color_attrib(dfs_left_sm, clim=[0, .1])
view_patch_vtk(dfs_left_sm,
               azimuth=90,
               elevation=180,
               roll=90,
               outfile='rest_var1_left.png',
               show=1)
view_patch_vtk(dfs_left_sm,
               azimuth=-90,
               elevation=180,
               roll=-90,
               outfile='rest_var2_left.png',
               show=1)
Esempio n. 32
0
m = np.array([[1, 5, 2], [4, 7, 4], [2, 0, 9]])
eigen_vals, eigen_vecs = np.linalg.eig(m)
print('Eigen Values:', eigen_vals, '\n')
print('Eigen Vectors:\n', eigen_vecs)

# SVD

m = np.array([[1, 5, 2], [4, 7, 4], [2, 0, 9]])
U, S, VT = np.linalg.svd(m)
print('Getting SVD outputs:-\n')
print('U:\n', U, '\n')
print('S:\n', S, '\n')
print('VT:\n', VT, '\n')

# In [74]: # descriptive statistics
import scipy as sp
import numpy as np

# get data
nums = np.random.randint(1, 20, size=(1, 15))[0]
print('Data: ', nums)
# get descriptive stats
print('Mean:', sp.mean(nums))
print('Median:', sp.median(nums))
print('Mode:', sp.stats.mode(nums))
print('Standard Deviation:', sp.std(nums))
print('Variance:', sp.var(nums))
print('Skew:', sp.stats.skew(nums))
print('Kurtosis:', sp.stats.kurtosis(nums))
Esempio n. 33
0
                dofs[-1] += 1

            incr(num_frags, len(frags_l))
            incr(num_frags, len(frags_r))
            if len(frags_l) > 1:
                nontriv_matches[-1] += 1
            if len(frags_l) > 2:
                alignswith3[-1] = +1

        matches[-1] += 1

    print(
        "--- Number of matched fragments or combined fragments per alignment ---"
    )
    print("Mean: ", scipy.mean(matches))
    print("StdDev: ", scipy.std(matches))
    print("Samples: ", len(matches))
    print("Max: ", max(matches))
    print("Min: ", min(matches))

    print(
        "--- Number of non-trivial matched combined fragments on left side per alignment ---"
    )
    print("Mean: ", scipy.mean(nontriv_matches))
    print("StdDev: ", scipy.std(nontriv_matches))
    print("Samples: ", len(nontriv_matches))
    print("Max: ", max(nontriv_matches))
    print("Min: ", min(nontriv_matches))

    print("--- Number frags per possibly combined fragment ---")
    print("Mean: ", scipy.mean(num_frags))
Esempio n. 34
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None):
    print 'Parsing PLINK bed file: %s' % bed_file
    num_individs = len(phen_map)
    assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'

    if split_by_chrom:
        raw_effects_prs = sp.zeros(num_individs)
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    print 'Working on chromosome %d' % i
                    prs_dict = get_prs(genotype_file, rs_id_map, phen_map)

                    raw_effects_prs += prs_dict['raw_effects_prs']
                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            else:
                print 'Skipping chromosome'

    else:
        prs_dict = get_prs(bed_file, rs_id_map, phen_map)
        raw_effects_prs = prs_dict['raw_effects_prs']
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']
        true_phens = prs_dict['true_phens']

    # Report prediction accuracy
    raw_eff_corr = sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0, 1]
    raw_eff_r2 = raw_eff_corr**2
    pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                prs_dict['true_phens'])[0, 1]
    pval_eff_r2 = pval_eff_corr**2

    print 'Final raw effects PRS correlation: %0.4f' % raw_eff_corr
    print 'Final raw effects PRS r2: %0.4f' % raw_eff_r2
    print 'Final weighted effects PRS correlation: %0.4f' % pval_eff_corr
    print 'Final weighted effects PRS r2: %0.4f' % pval_eff_r2

    res_dict = {'pred_r2': pval_eff_r2}

    raw_effects_prs.shape = (len(raw_effects_prs), 1)
    pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
    true_phens = sp.array(true_phens)
    true_phens.shape = (len(true_phens), 1)

    # Store covariate weights, slope, etc.
    weights_dict = {}

    # Store Adjusted predictions
    adj_pred_dict = {}

    # Direct effect
    Xs = sp.hstack([pval_derived_effects_prs, sp.ones((len(true_phens), 1))])
    (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                        true_phens)
    (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
    pred_r2 = 1 - rss / rss00
    weights_dict['unadjusted'] = {
        'Intercept': betas[1][0],
        'ldpred_prs_effect': betas[0][0]
    }

    # Adjust for sex
    if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
        sex = sp.array(prs_dict['sex'])
        sex.shape = (len(sex), 1)
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, sex,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs, sex,
             sp.ones((len(true_phens), 1))])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['sex_adj'] = {
            'Intercept': betas[2][0],
            'ldpred_prs_effect': betas[0][0],
            'sex': betas[1][0]
        }
        print 'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:', betas
        adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

    # Adjust for PCs
    if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
        pcs = prs_dict['pcs']
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, pcs,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1)), pcs])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['pc_adj'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0],
            'pcs': betas[2][0]
        }
        adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

        # Adjust for both PCs and Sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]),
                true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack(
                    [raw_effects_prs, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, sex,
                sp.ones((len(true_phens), 1)), pcs
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_pc_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0],
                'pcs': betas[3][0]
            }
            adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss / rss00
            print 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss_pd / rss0
            print 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

    # Adjust for covariates
    if adjust_for_covariates and 'covariates' in prs_dict and len(
            prs_dict['covariates']) > 0:
        covariates = prs_dict['covariates']
        (betas, rss0, r, s) = linalg.lstsq(
            sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack(
                [raw_effects_prs, covariates,
                 sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack([
            pval_derived_effects_prs, covariates,
            sp.ones((len(true_phens), 1))
        ])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['Cov_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

        if adjust_for_pcs and 'pcs' in prs_dict and len(
                prs_dict['pcs']) and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
            pcs = prs_dict['pcs']
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack(
                    [covariates, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack([
                    raw_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates, sex, pcs,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss / rss00
            print 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss_pd / rss0
            print 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

    # Now calibration
    y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
    denominator = sp.dot(raw_effects_prs.T, raw_effects_prs)
    numerator = sp.dot(raw_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print 'The slope for predictions with raw effects is:', regression_slope

    denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs)
    numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print 'The slope for predictions with weighted effects is:', regression_slope

    num_individs = len(prs_dict['pval_derived_effects_prs'])

    # Write PRS out to file.
    if out_file != None:
        with open(out_file, 'w') as f:
            out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs'
            if 'sex' in prs_dict:
                out_str = out_str + ', sex'
            if 'pcs' in prs_dict:
                pcs_str = ', '.join([
                    'PC%d' % (1 + pc_i)
                    for pc_i in range(len(prs_dict['pcs'][0]))
                ])
                out_str = out_str + ', ' + pcs_str
            out_str += '\n'
            f.write(out_str)
            for i in range(num_individs):
                out_str = '%s, %0.6e, %0.6e, %0.6e, ' % (
                    prs_dict['iids'][i], prs_dict['true_phens'][i],
                    raw_effects_prs[i], pval_derived_effects_prs[i])
                if 'sex' in prs_dict:
                    out_str = out_str + '%d, ' % prs_dict['sex'][i]
                if 'pcs' in prs_dict:
                    pcs_str = ', '.join(map(str, prs_dict['pcs'][i]))
                    out_str = out_str + pcs_str
                out_str += '\n'
                f.write(out_str)

        if len(adj_pred_dict.keys()) > 0:
            with open(out_file + '.adj', 'w') as f:
                adj_prs_labels = adj_pred_dict.keys()
                out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs, ' + \
                    ', '.join(adj_prs_labels)
                out_str += '\n'
                f.write(out_str)
                for i in range(num_individs):
                    out_str = '%s, %0.6e, %0.6e, %0.6e' % (
                        prs_dict['iids'][i], prs_dict['true_phens'][i],
                        raw_effects_prs[i], pval_derived_effects_prs[i])
                    for adj_prs in adj_prs_labels:
                        out_str += ', %0.4f' % adj_pred_dict[adj_prs][i]
                    out_str += '\n'
                    f.write(out_str)
        if weights_dict != None:
            oh5f = h5py.File(out_file + '.weights.hdf5', 'w')
            for k1 in weights_dict.keys():
                kg = oh5f.create_group(k1)
                for k2 in weights_dict[k1]:
                    kg.create_dataset(k2, data=sp.array(weights_dict[k1][k2]))
            oh5f.close()
    return res_dict
Esempio n. 35
0
#The plot
plt.plot([mean, mean], [0, 0.05], c='r', label="Mean")
plt.plot([median, median], [0, 0.05], c='g', label="Median")
plt.plot([mode, mode], [0, 0.05], c='b', label="Mode")
plt.legend()
plt.show()

# Variance
stand_var = np.var(array_data)  # biased
print("Biased Variance:", stand_var)

unbiased_stand_var = np.var(array_data, ddof=1)  # Unbiased
print("Unbiased Variance:", unbiased_stand_var)

# Skewness
stand_dev = sp.std(array_data)
skewness = sp.stats.moment(matrix_data, 3) / (stand_dev**3)
print("The Skewness:", skewness[0])

# KDE
kde = KDEUnivariate(array_data)
kde.fit(kernel="uni", fft=False)
support = kde.support
density = kde.density
plt.plot(support, density)
plt.show()

# Different kernel contraction
sns.kdeplot(array_data, kernel='gau', label='Gaussian Kernel')
sns.kdeplot(array_data, kernel='uni', label='Uniform Kernel')
sns.kdeplot(array_data, kernel='tri', label='Triangle Kernel')
Esempio n. 36
0
if porosity_profile: 
    
    start_time = datetime.now().replace(microsecond=0)
    
    fig, axs = plt.subplots(3, sharex=True, sharey=True, figsize=(16, 16))
    fig.suptitle('Профиль пористости', fontsize=20)
    
    
    phiX=ps.metrics.porosity_profile(im,0)
    axs[0].plot(phiX, linewidth=3)
    axs[0].set_xlabel('мкм', fontsize=20)
    axs[0].set_ylabel('$\phi_x$', fontsize=20)
    axs[0].grid()
    axs[0].set_title('$E(\phi_x)=$'+sp.array2string(sp.mean(phiX),precision=2)+', $\sigma=$'+\
              sp.array2string(sp.std(phiX),precision=2), fontsize=20)
        
    phiY=ps.metrics.porosity_profile(im,1)
    axs[1].plot(phiY, linewidth=3)
    axs[1].set_xlabel('мкм', fontsize=20)
    axs[1].set_ylabel('$\phi_y$', fontsize=20)
    axs[1].grid()
    axs[1].set_title('$E(\phi_y)=$'+sp.array2string(sp.mean(phiY),precision=2)+', $\sigma=$'+\
              sp.array2string(sp.std(phiY),precision=2), fontsize=20)
        
    phiZ=ps.metrics.porosity_profile(im,2)
    axs[2].plot(phiZ, linewidth=3)
    axs[2].set_xlabel('мкм', fontsize=20)
    axs[2].set_ylabel('$\phi_z$', fontsize=20)
    axs[2].grid()
    axs[2].set_title('$E(\phi_z)=$'+sp.array2string(sp.mean(phiZ),precision=2)+', $\sigma=$'+\
df.dropna(inplace = True)

# historical rate of return
#eR = (df['20ma']['2019'][0]/df['20ma']['2012'][0])**(1/7)

#%% rolling return
returns_cum = df['Adj Close'].pct_change(periods=(252*hp), axis=0)
returns_cum.dropna(inplace = True)
returns_1year = (1.0 + returns_cum)**(1/hp) -1
# benchmark
returns_cum_b = df_benchmark['Adj Close'].pct_change(periods=(252*hp), axis=0)
returns_cum_b.dropna(inplace = True)
returns_1year_b = (1.0 + returns_cum_b)**(1/hp) -1

eR = mean(returns_1year)
sR = std(returns_1year)
eRL = eR - 1.96*sR
eRU = eR + 1.96*sR

#%% figures
#df['Adj Close'].plot()
#plt.show
ax1 = plt.subplot2grid((10,1),(0,0),rowspan=5)
ax2 = plt.subplot2grid((10,1),(6,0),rowspan=5,sharex=ax1)

ax1.plot(df.index, df['Adj Close'])
ax1.plot(df.index, df['20ma'])
ax1.set_ylabel('Price (USD)')
ax1.set_title(ticker)
if ticker=='VOO':
    cost_basis=272.91
            if "MUTATION SCORE:" in line:
                score = float(line.split()[-1])
    if score is not None:
        if name not in data:
            data[name] = [score]
        else:
            data[name].append(score)

sortKeys = sorted(data.keys(), key=lambda c: scipy.mean(data[c]))

compared = []
for config in sortKeys:
    d = data[config]
    print "=" * 80
    print "SEQLEN", label(config), "RUNS:", len(d), "MEAN:", scipy.mean(
        d), "MEDIAN:", scipy.median(d), "STD:", scipy.std(d)
    if len(d) > 2:
        for config2 in sortKeys:
            if (config != config2) and (sorted([config, config2])
                                        not in compared):
                P = scipy.stats.mannwhitneyu(d, data[config2]).pvalue
                #print ".vs", config2, round(P, 2),
                if P < 0.05:
                    print "STATISTICALLY WORSE THAN", label(
                        config2), "P =", round(P, 3)
                compared.append(sorted([config, config2]))

f1 = plt.figure(figsize=(10, 4))

plt.ylabel("Mutation score")
plt.xlabel("seqLen/coverage")
Esempio n. 39
0
def DataArrayStatisticsReport(parent, titleString, tempdata):
    scrolledText = tk_stxt.ScrolledText(parent,
                                        width=textboxWidth,
                                        height=textboxHeight,
                                        wrap=tk.NONE)
    scrolledText.insert(tk.END, titleString + '\n\n')

    # must at least have max and min
    minData = min(tempdata)
    maxData = max(tempdata)

    if maxData == minData:
        scrolledText.insert(tk.END, 'All data has the same value,\n')
        scrolledText.insert(tk.END, "value = %-.16E\n" % (minData))
        scrolledText.insert(tk.END, 'statistics cannot be calculated.')
    else:
        scrolledText.insert(tk.END, "max = %-.16E\n" % (maxData))
        scrolledText.insert(tk.END, "min = %-.16E\n" % (minData))

        try:
            temp = scipy.mean(tempdata)
            scrolledText.insert(tk.END, "mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "mean gave error in calculation\n")

        try:
            temp = scipy.stats.sem(tempdata)
            scrolledText.insert(tk.END,
                                "standard error of mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(
                tk.END, "standard error of mean gave error in calculation\n")

        try:
            temp = scipy.median(tempdata)
            scrolledText.insert(tk.END, "median = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "median gave error in calculation\n")

        try:
            temp = scipy.var(tempdata)
            scrolledText.insert(tk.END, "variance = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "variance gave error in calculation\n")

        try:
            temp = scipy.std(tempdata)
            scrolledText.insert(tk.END, "std. deviation = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END,
                                "std. deviation gave error in calculation\n")

        try:
            temp = scipy.stats.skew(tempdata)
            scrolledText.insert(tk.END, "skew = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "skew gave error in calculation\n")

        try:
            temp = scipy.stats.kurtosis(tempdata)
            scrolledText.insert(tk.END, "kurtosis = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "kurtosis gave error in calculation\n")

    return scrolledText
Esempio n. 40
0
def dist_vals_for(arr):
  return [sp.mean(arr), sp.std(arr)]
    ret = x.aclose[1:] / x.aclose[:-1] - 1
    d0 = x.date[1:]
    return pd.DataFrame(ret, index=d0, columns=[ticker])


# Step 3
n = np.size(tickers)
final = ret_f(tickers[0], begdate, enddate)
for i in np.arange(1, n):
    a = ret_f(tickers[i], begdate, enddate)
    if i > 0:
        final = pd.merge(final, a, left_index=True, right_index=True)
#
# Step 4: get porfolio returns
portRet = sp.dot(final, weight)
portStd = sp.std(portRet)
portMean = sp.mean(portRet)
VaR = position * (portMean - z * portStd)
print("Holding=", position, "VaR=", round(VaR, 2), "tomorrow")

# compare
total2 = 0.0
for i in np.arange(n):
    stock = tickers[i]
    ret = final[stock]
    position2 = position * weight[i]
    mean = sp.mean(ret)
    std = sp.std(ret)
    VaR = position2 * (mean - z * std)
    total2 += VaR
    print("For ", stock, "with a value of ", position2, "VaR=", round(VaR, 2))
Esempio n. 42
0
        #euclid_num = 0.6*magn_counter - 7.5
        magn_counter += 1
    
    fit,cov =sp.polyfit(x_fit,y_fit,deg=1,w=sp.array([1,1,1,1,1]),cov=True) 
    func = sp.poly1d(fit)
    gradient.append(sp.asarray(fit[0]))
    count +=1

grad = sp.asarray(gradient)

df1 = pd.DataFrame([gradient],
                   index=['gradient'])
df1.to_excel("output.xlsx")  

print(sp.mean(grad))
print(sp.std(grad))
histo.gen_histogram(grad)

#%% Plotting the expected and outputted gradients for the dsitributions in Test 16C:
import matplotlib.pyplot as plt

x_plot=sp.array([0,0.6,0.4,0.5,0.3])
x=sp.array([0.6,0.4,0.5,0.3])
x_comp=[0,0.6]
y_comp=[0,0.6]
y=sp.array([0.55,0.39,0.473,0.307])
error= sp.array([0.02,0.011,0.012,0.01])
fit,cov =sp.polyfit(x,y,deg=1,w=1/error,cov=True) 
func = sp.poly1d(fit)
plt.plot(x_plot,func(x_plot),color='k')
plt.plot(x_comp,y_comp,color='grey',ls=':')
Esempio n. 43
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None,
                     only_score=False,
                     verbose=False,
                     summary_dict=None):
    print('Parsing PLINK bed file: %s' % bed_file)

    if split_by_chrom:
        num_individs = len(phen_map)
        assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    if verbose:
                        print('Working on chromosome %d' % i)
                    prs_dict = get_prs(genotype_file,
                                       rs_id_map,
                                       phen_map,
                                       only_score=only_score,
                                       verbose=verbose)

                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            elif verbose:
                print('Skipping chromosome')

    else:
        prs_dict = get_prs(bed_file,
                           rs_id_map,
                           phen_map,
                           only_score=only_score,
                           verbose=verbose)
        num_individs = len(prs_dict['iids'])
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']

    if only_score:
        write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs)
        res_dict = {}
    elif sp.std(prs_dict['true_phens']) == 0:
        print('No variance left to explain in phenotype.')
        res_dict = {'pred_r2': 0}
    else:
        # Report prediction accuracy
        assert len(
            phen_map
        ) > 0, 'No individuals found.  Problems parsing the phenotype file?'

        pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                    prs_dict['true_phens'])[0, 1]
        pval_eff_r2 = pval_eff_corr**2

        res_dict = {'pred_r2': pval_eff_r2}

        pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
        true_phens = sp.array(prs_dict['true_phens'])
        true_phens.shape = (len(true_phens), 1)

        # Store covariate weights, slope, etc.
        weights_dict = {}

        # Store Adjusted predictions
        adj_pred_dict = {}

        # Direct effect
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1))])
        (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                            true_phens)
        (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
        pred_r2 = 1 - rss / rss00
        weights_dict['unadjusted'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0]
        }

        if verbose:
            print('PRS correlation: %0.4f' % pval_eff_corr)
        print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2)

        # Adjust for sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([sex,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs, sex,
                 sp.ones((len(true_phens), 1))])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0]
            }
            if verbose:
                print(
                    'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:',
                    betas)
            adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2+Sex'] = pred_r2

        # Adjust for PCs
        if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
            pcs = prs_dict['pcs']
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([pcs,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs,
                 sp.ones((len(true_phens), 1)), pcs])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['pc_adj'] = {
                'Intercept': betas[1][0],
                'ldpred_prs_effect': betas[0][0],
                'pcs': betas[2][0]
            }
            adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2+PC'] = pred_r2

            # Adjust for both PCs and Sex
            if adjust_for_sex and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([sex, pcs,
                               sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, sex,
                    sp.ones((len(true_phens), 1)), pcs
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                weights_dict['sex_pc_adj'] = {
                    'Intercept': betas[2][0],
                    'ldpred_prs_effect': betas[0][0],
                    'sex': betas[1][0],
                    'pcs': betas[3][0]
                }
                adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

        # Adjust for covariates
        if adjust_for_covariates and 'covariates' in prs_dict and len(
                prs_dict['covariates']) > 0:
            covariates = prs_dict['covariates']
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([covariates,
                           sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

            if adjust_for_pcs and 'pcs' in prs_dict and len(
                    prs_dict['pcs']) and 'sex' in prs_dict and len(
                        prs_dict['sex']) > 0:
                pcs = prs_dict['pcs']
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack(
                        [covariates, sex, pcs,
                         sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

        # Now calibration
        y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
        denominator = sp.dot(pval_derived_effects_prs.T,
                             pval_derived_effects_prs)
        numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
        regression_slope = (numerator / denominator)[0][0]
        if verbose:
            print('The slope for predictions with weighted effects is: %0.4f' %
                  regression_slope)

        num_individs = len(prs_dict['pval_derived_effects_prs'])

        # Write PRS out to file.
        if out_file != None:
            write_scores_file(out_file,
                              prs_dict,
                              pval_derived_effects_prs,
                              adj_pred_dict,
                              weights_dict=weights_dict)

    return res_dict
Esempio n. 44
0
#vmotor1 = vmotor1[ind_rois,]
#vrest = nib.load('/big_disk/ajoshi/HCP5/' + sub + '/MNINonLinear/Resu\
#lts/rfMRI_REST2_LR/rfMRI_REST2_LR_Atlas_hp2000_clean.dtseries.nii')
vrest = scipy.io.loadmat(
    '/big_disk/ajoshi/with_andrew/100307/100307.tfMRI_MOTOR_LR.reduce3.ftdata.NLM_11N_hvar_5.mat'
)
LR_flag = msk['LR_flag']
LR_flag = np.squeeze(LR_flag) > 0
data = vrest['ftdata_NLM']
#data = sp.squeeze(vrest.get_data()).T
vrest = data[LR_flag]
vrest = vrest[ind_rois, ]
vrest = vrest[:, :vmotor1.shape[1]]  # make their length same
m = np.mean(vrest, 1)
vrest = vrest - m[:, None]
s = sp.std(vrest, axis=1) + 1e-116
vmotor2 = vrest / s[:, None]

rho1 = sp.sum(vmotor2 * vmotor1, axis=1) / vmotor2.shape[1]
diffbefore = vmotor2 - vmotor1

vmotor1orig = vmotor1.copy()
vmotor1, Rot = rot_sub_data(
    ref=vmotor2, sub=vmotor1)  #, area_weight=sp.sqrt(surf_weight[ind_rois]))
rho1rot = sp.sum(vmotor2 * vmotor1, axis=1) / vmotor2.shape[1]

diffafter = vmotor2 - vmotor1

#diffbefore = gaussian_filter(diffbefore,[0,5])
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
Esempio n. 45
0
	def std(self,par,error='real',ddof=1,**kwargs):
		std = scipy.std(self[par],ddof=ddof,axis=-1,**kwargs)
		if error == 'mean': return std/scipy.sqrt(len(self))
		return std
Esempio n. 46
0
def flux_features(lc):
    lc.remove_gaps()
    # Centered and sorted flux to work with
    mean = numpy.mean(lc.flux)
    stddev = numpy.std(lc.flux)
    centered_flux = [(f - mean) / (1.0 * stddev) for f in lc.flux]
    sorted_flux = sorted(centered_flux)

    # Median
    median_pos = len(sorted_flux) / 2
    flux_median = sorted_flux[median_pos]

    # Skew
    flux_skew = scipy.stats.skew(centered_flux)

    # Kurtosis
    flux_kurtosis = scipy.stats.kurtosis(centered_flux)

    # Stuff for percentiles
    stddev = scipy.std(centered_flux)  # is 1 but let's just do it again anyway
    mean = numpy.mean(centered_flux)  # is 0 but we'll be explicit

    # Percentiles - one, fraction of data within regular inc * stddev from MEAN
    increments = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 1, 1.5, 2, 3]
    flux_pos_percentiles = []
    flux_mean = numpy.mean(centered_flux)
    flux_std = numpy.std(centered_flux)
    inc_upto = 0
    len_flux = len(centered_flux)
    flux_pos = 0
    sorted_pos_flux = sorted(filter(lambda f: f >= 0, centered_flux))
    #print sorted_flux
    for inc in increments:
        f_index = 0
        while f_index < len(
                sorted_pos_flux) and sorted_pos_flux[f_index] < flux_std * inc:
            f_index += 1
        #print "broke at bandwidth:", flux_std * inc
        if len(sorted_pos_flux) == 0:
            flux_pos_percentiles.append(0)
        else:
            flux_pos_percentiles.append((f_index * 1.0) / len(sorted_pos_flux))
        # Percentiles - one, fraction of data within regular inc * stddev from MEAN

    flux_neg_percentiles = []
    inc_upto = 0
    flux_pos = 0
    sorted_neg_flux = sorted(
        [math.fabs(f) for f in filter(lambda f: f < 0, centered_flux)])
    #print sorted_flux
    for inc in increments:
        f_index = 0
        while f_index < len(
                sorted_neg_flux) and sorted_neg_flux[f_index] < flux_std * inc:
            f_index += 1
        #print "broke at bandwidth:", flux_std * inc
        if len(sorted_neg_flux) == 0:
            flux_neg_percentiles.append(0)
        else:
            flux_neg_percentiles.append((f_index * 1.0) / len(sorted_neg_flux))
    # Percentiles - two, spread of data within regular inc * stddev from MEDIAN
    #stddev_increments = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 1, 1.5, 2, 3]
    #flux_med_percentiles = []
    #for inc in stddev_increments:
    #	in_range = 0
    #	bound = stddev * inc
    #	for f in centered_flux:
    #		if (math.fabs(f) - flux_median) < bound:
    #			in_range += 1
    #	flux_med_percentiles.append((in_range * 1.0) / len(centered_flux))

    # Maximum and minimum
    flux_max = max(centered_flux)
    flux_min = min(centered_flux)
    #print "flux:", centered_flux
    #print "flux_pos_percentiles:", flux_pos_percentiles
    #print "flux_neg_percentiles:", flux_neg_percentiles
    features = [flux_median, flux_skew, flux_kurtosis] + \
           [flux_max, flux_min] + \
            flux_pos_percentiles + flux_neg_percentiles #+ flux_med_percentiles
    #print "len flux only:", len(features)
    return features
def correlation_summary(ticker, tweet_file):
    all_dates = all_date(tweet_file)

    start = all_dates[-1]
    end = all_dates[0]

    date_generated = {(start + datetime.timedelta(days=x)): 0
                      for x in range(0, (end - start).days + 1)}
    # Count tweets for each day in range
    for dt in all_dates:
        increment_hash_counter(date_generated, dt)

    df = yf.download(ticker, extract_dash(str(start)), extract_dash(str(end)))
    list_of_date = [str(i) for i in list(df.index)]

    lo_tweet = []
    lo_price = []
    lo_vwap = []

    vol_so_far = 0.0
    vp_so_far = 0.0

    ic = 0
    for high, low, close, vol in zip(df['High'], df['Low'], df['Close'],
                                     df['Volume']):
        vol = float(vol)
        tp = typical_price(high, low, close)

        vp_so_far += vol * tp
        vol_so_far += vol

        date = list_of_date[ic]
        dt = year_month_day(date)

        lo_tweet.append(date_generated[dt])
        lo_price.append(tp)
        lo_vwap.append(vp_so_far / vol_so_far)

        ic += 1

    pcc, ppc_alpha = pearsonr(lo_tweet, lo_price)
    scc, scc_alpha = spearmanr(lo_tweet, lo_price)
    kcc, kcc_alpha = kendalltau(lo_tweet, lo_price)

    print('______________________________________________________________')
    print('Only tweets during trading days are considered for comparison.')
    print("Total number of tweets: ", len(all_dates))
    print("Total number of trading days: ", len(list_of_date))
    print("Starting date: ", extract_dash(str(start)))
    print("End date: ", extract_dash(str(end)))
    print('Pearson Correlation Coefficient: ', pcc, '   P-Value: ',
          str(ppc_alpha))
    print('Spearmann Rank Correlation Coefficient: ', scc, '    P-Value',
          str(scc_alpha))
    print('Kendall Tau Rank Correlation Coefficient: ', kcc, '   P-Value: ',
          str(kcc_alpha))
    print('Inter-Correlation-Coefficient Standard Deviation: ',
          std([pcc, scc, kcc]))
    print('Global Time Lag: ',
          TimeLa.positive_correlation_time_lag(lo_tweet, lo_price))
    print('______________________________________________________________')

    plt.plot(lo_tweet, label='Number of Tweets')
    plt.plot(lo_price, label='Price Index')
    plt.legend()
    plt.show()
Esempio n. 48
0
dev_Te_TS = data_dict['dev_Te'][Z_sort, :]
# dev_Te_TS = scipy.delete(dev_Te_TS, (10,), axis=0)
t_Te_TS = data_dict['t']
R_mid_CTS = data_dict['R_mid'][Z_sort, :]
# R_mid_CTS = scipy.delete(R_mid_CTS, (10,), axis=0)
R_mag = data_dict['R_mag']

Te_ETS = data_dict['Te_ETS'] / 1e3
t_Te_ETS = data_dict['t_ETS']
dev_Te_ETS = data_dict['dev_Te_ETS'] / 1e3
R_mid_ETS = data_dict['R_mid_ETS']

Te_ETS[(Te_ETS == 0) & (dev_Te_ETS == 1)] = scipy.nan

R_mag_mean = scipy.mean(R_mag)
R_mag_std = scipy.std(R_mag)

# Compute weighted mean and weighted corected sample standard deviation:
# idx = 45
# Te_TS_w = Te_TS[:, idx]
# dev_Te_TS_w = dev_Te_TS[:, idx]
# R_mid_w = R_mid_CTS[:, idx]
# 
# Te_ETS_w_a = Te_ETS[:, idx]
# good_idxs = ~scipy.isnan(Te_ETS_w_a)
# Te_ETS_w = Te_ETS_w_a[good_idxs]
# dev_Te_ETS_w_a = dev_Te_ETS[:, idx]
# dev_Te_ETS_w = dev_Te_ETS_w_a[good_idxs]
# R_mid_ETS_w_a = R_mid_ETS[:, idx]
# R_mid_ETS_w = R_mid_ETS_w_a[good_idxs]
Esempio n. 49
0
def estimate_performance_xgboost(training_file, param, num_round, folds):
    '''
    Cross validation for XGBoost performance 
    '''
    # Load training data
    X, labels, weights = get_training_data(training_file)

    # Cross validate
    kf = cv.KFold(labels.size, n_folds=folds)
    npoints  = 6
    # Dictionary to store all the AMSs
    all_AMS = {}
    for curr in range(npoints):
        all_AMS[curr] = []
    # These are the cutoffs used for the XGBoost predictions
    cutoffs  = sp.linspace(0.05, 0.30, npoints)
    for train_indices, test_indices in kf:
        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = labels[train_indices], labels[test_indices]
        w_train, w_test = weights[train_indices], weights[test_indices]

        # Rescale weights so that their sum is the same as for the entire training set
        w_train *= (sum(weights) / sum(w_train))
        w_test  *= (sum(weights) / sum(w_test))

        sum_wpos = sum(w_train[y_train == 1])
        sum_wneg = sum(w_train[y_train == 0])

        # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
        xgmat = xgb.DMatrix(X_train, label=y_train, missing=-999.0, weight=w_train)

        # scale weight of positive examples
        param['scale_pos_weight'] = sum_wneg / sum_wpos
        # you can directly throw param in, though we want to watch multiple metrics here 
        plst = param.items()#+[('eval_metric', '[email protected]')]

        watchlist = []#[(xgmat, 'train')]
        bst = xgb.train(plst, xgmat, num_round, watchlist)

        # Construct matrix for test set
        xgmat_test = xgb.DMatrix(X_test, missing=-999.0)
        y_out = bst.predict(xgmat_test)
        res  = [(i, y_out[i]) for i in xrange(len(y_out))]
        rorder = {}
        for k, v in sorted(res, key = lambda x:-x[1]):
            rorder[k] = len(rorder) + 1

        # Explore changing threshold_ratio and compute AMS
        best_AMS = -1.
        for curr, threshold_ratio in enumerate(cutoffs):
            y_pred = sp.zeros(len(y_out))
            ntop = int(threshold_ratio * len(rorder))
            for k, v in res:
                if rorder[k] <= ntop:
                    y_pred[k] = 1

            truePos, falsePos = get_rates(y_pred, y_test, w_test)
            this_AMS = AMS(truePos, falsePos)
            all_AMS[curr].append(this_AMS)
            if this_AMS > best_AMS:
                best_AMS = this_AMS
        print "Best AMS =", best_AMS
    print "------------------------------------------------------"
    for curr, cut in enumerate(cutoffs):
        print "Thresh = %.2f: AMS = %.4f, std = %.4f" % \
            (cut, sp.mean(all_AMS[curr]), sp.std(all_AMS[curr]))
    print "------------------------------------------------------"
Esempio n. 50
0
def ld_pruning(data_file=None,
               ld_radius=None,
               out_file_prefix=None,
               p_thres=None,
               verbose=False,
               max_r2=0.2):
    """
    LD pruning + P-value thresholding
    """

    df = h5py.File(data_file, 'r')
    has_phenotypes = False
    if 'y' in df.keys():
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores = sp.zeros(num_individs)
        has_phenotypes = True

    print ''
    if max_r2 < 1:
        print 'Applying LD-pruning + P-value thresholding with p-value threshold of %0.2e, a LD radius of %d SNPs, and a max r2 of %0.2f' % (
            p_thres, ld_radius, max_r2)
    else:
        if p_thres < 1:
            print 'Applying P-value thresholding with p-value threshold of %0.2e' % (
                p_thres)
        else:
            print 'Calculating polygenic risk score using all SNPs'
    results_dict = {}
    num_snps = 0
    cord_data_g = df['cord_data']

    chromsomes = []
    for chrom_str in cord_data_g.keys():
        g = cord_data_g[chrom_str]
        betas = g['betas'][...]
        n_snps = len(betas)
        num_snps += n_snps
        chromsomes.append(int((chrom_str.split('_'))[1]))

    chromsomes.sort()
    p_str = '%0.4f' % p_thres
    results_dict[p_str] = {}

    if out_file_prefix:
        #Preparing output files
        raw_effect_sizes = []
        raw_pval_effect_sizes = []
        updated_effect_sizes = []
        updated_pval_effect_sizes = []
        sids = []
        chromosomes = []
        positions = []
        nts = []

    tot_num_snps = 0
    num_snps_used = 0
    for chrom in chromsomes:
        chrom_str = 'chrom_%d' % chrom
        #print 'Chromosome %s:' % chrom_str
        g = cord_data_g[chrom_str]
        pvalues = g['ps'][...]
        snp_filter = pvalues < p_thres
        num_snps = sp.sum(snp_filter)
        if num_snps == 0:
            #print 'No SNPs, skipping chromosome'
            continue
        tot_num_snps += num_snps

        pvalues = pvalues[snp_filter]
        if 'raw_snps_val' in g.keys():
            raw_snps = g['raw_snps_val'][...][snp_filter]

        else:
            raw_snps = g['raw_snps_ref'][...][snp_filter]

        snp_means = g['snp_means_ref'][...][snp_filter]
        snp_stds = g['snp_stds_ref'][...][snp_filter]
        raw_betas = g['log_odds'][...][snp_filter]
        pval_derived_betas = g['betas'][...][snp_filter]
        if out_file_prefix:
            chromosomes.extend([chrom_str] * len(pval_derived_betas))
            positions.extend(g['positions'][...][snp_filter])
            sids.extend(g['sids'][...][snp_filter])
            raw_effect_sizes.extend(raw_betas)
            raw_pval_effect_sizes.extend(pval_derived_betas)
            nts.extend(g['nts'][...][snp_filter])

        if max_r2 < 1:
            #print 'Generating LD table from genotypes.'
            snp_means.shape = (len(snp_means), 1)
            snp_stds.shape = (len(snp_means), 1)
            #Normalize SNPs..
            norm_ref_snps = sp.array((raw_snps - snp_means) / snp_stds,
                                     dtype='float32')
            ld_table = ld.calc_ld_table(norm_ref_snps,
                                        max_ld_dist=ld_radius,
                                        min_r2=max_r2,
                                        verbose=verbose)

            updated_raw_betas, pruning_vector = smart_ld_pruning(
                raw_betas,
                ld_table,
                pvalues=pvalues,
                max_ld=max_r2,
                verbose=verbose)
            updated_pval_derived_betas = pval_derived_betas * pruning_vector
            num_snps_used += sp.sum(pruning_vector)
        else:
            updated_raw_betas = sp.copy(raw_betas)
            updated_pval_derived_betas = sp.copy(pval_derived_betas)
            updated_pval_derived_betas = updated_pval_derived_betas / (
                snp_stds.flatten())
            pruning_vector = sp.ones(len(pval_derived_betas))
            num_snps_used += sp.sum(pruning_vector)

        if out_file_prefix:
            updated_effect_sizes.extend(updated_raw_betas)
            updated_pval_effect_sizes.extend(updated_pval_derived_betas)

        if has_phenotypes:
            print 'Calculating scores for Chromosome %s' % chrom_str
            prs = sp.dot(updated_raw_betas, raw_snps)
            risk_scores += prs
            corr = sp.corrcoef(y, prs)[0, 1]
            r2 = corr**2
            print 'The R2 prediction accuracy of PRS using %s was: %0.4f' % (
                chrom_str, r2)

    print 'There were %d (SNP) effects after p-value thresholding' % tot_num_snps
    print 'After LD-pruning %d SNPs had non-zero effects' % num_snps_used
    if has_phenotypes:
        num_indivs = len(y)
        results_dict[p_str]['y'] = y
        results_dict[p_str]['risk_scores'] = risk_scores
        print 'Prediction accuracy was assessed using %d individuals.' % (
            num_indivs)

        corr = sp.corrcoef(y, risk_scores)[0, 1]
        r2 = corr**2
        results_dict[p_str]['r2_pd'] = r2
        print 'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (
            r2, ((1 - r2)**2) / num_indivs)

        if corr < 0:
            risk_scores = -1 * risk_scores

        #Now calibration
        denominator = sp.dot(risk_scores.T, risk_scores)
        y_norm = (y - sp.mean(y)) / sp.std(y)
        numerator = sp.dot(risk_scores.T, y_norm)
        regression_slope = (numerator / denominator)
        print 'The slope for predictions with P-value derived  effects is:', regression_slope
        results_dict[p_str]['slope_pd'] = regression_slope

    if max_r2 == 1:
        weights_out_file = '%s_all_snps.txt' % (out_file_prefix)
    else:
        weights_out_file = '%s_P+T_p%0.4e.txt' % (out_file_prefix, p_thres)
    with open(weights_out_file, 'w') as f:
        f.write(
            'chrom    pos    sid    nt1    nt2    raw_beta    raw_pval_beta    updated_beta    updated_pval_beta \n'
        )
        for chrom, pos, sid, nt, raw_beta, raw_pval_beta, upd_beta, upd_pval_beta in it.izip(
                chromosomes, positions, sids, nts, raw_effect_sizes,
                raw_pval_effect_sizes, updated_effect_sizes,
                updated_pval_effect_sizes):
            nt1, nt2 = nt[0], nt[1]
            f.write(
                '%s    %d    %s    %s    %s    %0.4e    %0.4e    %0.4e    %0.4e\n'
                % (chrom, pos, sid, nt1, nt2, raw_beta, raw_pval_beta,
                   upd_beta, upd_pval_beta))
"""

import numpy as np
import scipy as sp
import pandas as pd
from matplotlib.finance import quotes_historical_yahoo_ochl as getData
#
# input area
ticker='F'            # stock
begdate1=(1982,9,1)   # starting date for period 1 
enddate1=(1987,9,1)   # ending date for period   1 
begdate2=(1987,12,1)  # starting date for period 2 
enddate2=(1992,12,1)  # ending   date for period 2
#
# define a function
def ret_f(ticker,begdate,enddate):
    p =getData(ticker, begdate, enddate,asobject=True, adjusted=True)
    ret = p.aclose[1:]/p.aclose[:-1]-1 
    date_=p.date
    return pd.DataFrame(data=ret,index=date_[1:],columns=['ret'])
#
# call the above function twice 
ret1=ret_f(ticker,begdate1,enddate1) 
ret2=ret_f(ticker,begdate2,enddate2)
#
# output
print('Std period #1	vs. std period #2') 
print(round(sp.std(ret1.ret),6),round(sp.std(ret2.ret),6)) 
print('T value ,	p-value ') 
print(sp.stats.bartlett(ret1.ret,ret2.ret))
Esempio n. 52
0
    series = []
    for j in range(5, 45, 2):
        holder, weights = [], []
        for k in range(len(data[:, 0])):
            if data[k, 0] < j: continue
            elif data[k, 0] >= (j + 2): continue
            else:
                holder.append(data[k, sigr])
                weights.append(1.0 / (data[k, errors] * data[k, errors]))
        if (len(holder) == 0): continue
        else:
            mean, stdev = 0.0, 0.0
            #for k in range(len(holder)):
            #    mean = mean + holder[k]*weights[k]
            #    stdev = stdev + weights[k]
            series.append([(j + 1), sc.mean(holder), sc.std(holder)])
            #series.append([(j+1), (mean/stdev), (1.0/stdev)])
    out.append(sc.array(series))
#print out
fig = plt.figure()
ax1 = fig.add_subplot(111)
for i in range(len(data_list)):
    ax1.errorbar(out[i][:, 0], out[i][:, 1], yerr=out[i][:, 2])
    #ax1.plot(out[i][:,0], out[i][:,1], 'k-')
x_tick = ax1.get_xticks()
ax1.set_xticks(x_tick)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
new_ticks = []
for i in range(len(x_tick)):
    new_ticks.append(round((5.0 * m.log10(x_tick[i] / 0.01) + g_0), 1))
Esempio n. 53
0
  if 'MD.Barostat' in cq_params.keys():
    if cq_params['MD.Barostat'] == 'iso-ssm':
      extended_system = True
    if cq_params['MD.Barostat'] == 'ortho-ssm':
      extended_system = True
    if cq_params['MD.Barostat'] == 'iso-mttk':
      extended_system = True

  # Parse the statistics file
  nsteps, data = read_stats(opts.statfile,opts.nstop)
  avg = {}
  std = {}
  for key in data:
    data[key] = sp.array(data[key])
    avg[key] = sp.mean(data[key][opts.nequil:-1])
    std[key] = sp.std(data[key][opts.nequil:-1])
  time = [float(s)*dt for s in data['step']]
  data['time'] = sp.array(time)

  # Plot the statistics
  if opts.landscape:
    fig1, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, sharex=True, figsize=(11,7))
    plt.tight_layout(pad=6.5)
  else:
    fig1, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, ncols=1, sharex=True, figsize=(7,10))

  ax1.plot(data['time'][opts.nskip:], data['pe'][opts.nskip:], 'r-', label='Potential energy')
  ax1a = ax1.twinx()
  ax1a.plot(data['time'][opts.nskip:], data['ke'][opts.nskip:], 'b-', label='Kinetic energy')
  if cq_params['MD.Ensemble'][2] == 't':
    if cq_params['MD.Thermostat'] == 'nhc':
Esempio n. 54
0
def stats(var, tmpFilters, tmpReturns):

    results = {'total': None, 'win_ct': None, 'lose_ct': None, 'win_ratio': None, 
                    'lose_ratio': None, 'return_med': None, 'return_avg': None, 'return_stddev': None, 
                    'return_min': None, 'return_max': None, 'slope': None, 'intercept': None, 'r': None, 'r_low': None, 
                    'r_high': None, '2_tail_prob': None, 'std_err': None}
    
    total = float( len(tmpReturns) )
    winct = float( sp.greater( tmpReturns, 0 ).sum() )
    losect = total - winct
    if total > 0:
        winrt = winct / total
        losert = losect / total
    else:
        winrt = 0
        losert = 0
    if len(tmpReturns) > 0:
        returnMed = sp.median( tmpReturns )
        returnAvg = sp.mean( tmpReturns )
        returnStdDev = sp.std( tmpReturns )
        returnMin = np.min( tmpReturns )
        returnMax = np.max( tmpReturns )
    else:
        returnMed = 0
        returnAvg = 0
        returnStdDev = 0
        returnMin = 0
        returnMax = 0
    if total > 0 and var != None:
        r = scipy.stats.linregress( tmpFilters, tmpReturns )
        corr = r[2]
        z_r = np.arctanh(corr)
        ci = 1.96
        z_low = z_r - ci/np.sqrt(len(tmpReturns)-3)
        z_high = z_r + ci/np.sqrt(len(tmpReturns)-3)
        r_low = ( np.exp(1) ** ( 2 * z_low ) - 1 ) / ( np.exp(1) ** ( 2 * z_low ) + 1 )
        r_high = ( np.exp(1) ** ( 2 * z_high ) - 1 ) / ( np.exp(1) ** ( 2 * z_high ) + 1 )

        slope = r[0]
        intercept = r[1]
        twoTail = r[3]
        stdErr = r[4]

    else:
        corr = 0
        r_low = 0
        r_high = 0
        slope = 0
        intercept = 0
        twoTail = 0
        stdErr = 0
                    
    if len(tmpReturns) > 0:
        results =  { 'total': total, 
                    'win_ct': winct, 
                    'lose_ct': losect, 
                    'win_ratio': winrt, 
                    'lose_ratio': losert, 
                    'return_med': returnMed,
                    'return_avg': returnAvg, 
                    'return_stddev': returnStdDev, 
                    'return_min': returnMin,
                    'return_max': returnMax,
                    'slope': slope, 
                    'intercept': intercept, 
                    'r': corr,
                    'r_low': r_low, 
                    'r_high': r_high, 
                    '2_tail_prob': twoTail, 
                    'std_err': stdErr}
            
    return results
Esempio n. 55
0
import numpy
import scipy
import matplotlib.pyplot as plt
from scipy import stats #conda remove scipy --force pip install scipy
scores = numpy.array([114, 100, 104, 89, 102, 91, 114, 114, 103, 105,
108, 130, 120, 132, 111, 128, 118, 119, 86, 72, 111, 103, 74, 112, 107,
103, 98, 96, 112, 112, 93])
xmean = scipy.mean(scores)
sigma = scipy.std(scores)

print((xmean, sigma ))
n = scipy.size(scores)

print(xmean, xmean - 2.576*sigma /scipy.sqrt(n), xmean + 2.576*sigma / scipy.sqrt(n))
plt.stem(scores)
plt.show()

result=scipy.stats.bayes_mvs(scores)
help(scipy.stats.bayes_mvs)
print(result[0])

print('#',50*"-")
# ----------------------
Esempio n. 56
0
def run():
    from optparse import OptionParser

    usage = "usage: python redsequence [options] \n\nIdentifies and fits the red sequence using apparent magnitude and one color.\nOption of identifying star column and only using objects larger.\n"
    parser = OptionParser(usage)

    parser.add_option("-c",
                      "--cluster",
                      help="name of cluster (i.e. MACS0717+37)")
    parser.add_option("-d",
                      "--detectband",
                      help="detection band (i.e. W-J-V)",
                      default='W-J-V')
    parser.add_option(
        "--c1",
        help=
        "name of first filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-J-V)",
        default='MAG_APER1-SUBARU-COADD-1-W-J-V')
    parser.add_option(
        "--c2",
        help=
        "name of second filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-C-RC)",
        default='MAG_APER1-SUBARU-COADD-1-W-C-RC')
    parser.add_option(
        "-m",
        '--m',
        help=
        "name of filter to be used as 'galaxy magnitude' (default is '--c2')",
        default=None)
    parser.add_option("-s",
                      "--starcolumn",
                      help="add to filter out star column",
                      action="store_true",
                      default=False)
    parser.add_option('--lm',
                      help="limiting magnitude applied to 'galaxy magnitude'",
                      default=False)
    parser.add_option(
        '-r',
        "--center_radius",
        help=
        "maximum galaxy radius from cluster center (in arcsec) (default=440)",
        default=660.)
    parser.add_option("-l",
                      "--location",
                      help="write output directory",
                      default=None)
    parser.add_option("-w",
                      "--web",
                      help="instead write to web (Pat's space)",
                      action="store_true",
                      default=False)
    parser.add_option(
        "-z",
        "--z",
        help=
        "see what the photometric redshifts are of redsequence galaxies (requires redshift catalog, obviously)",
        action='store_true',
        default=False)
    parser.add_option(
        "--cat",
        help=
        "name of alternate input catalog (if you don't want to use the default photometry catalog)",
        default=None)
    parser.add_option("--existingcolor",
                      help="use existing colors of red sequence fit",
                      action="store_true",
                      default=False)
    parser.add_option("-e",
                      "--existing",
                      help="use existing red sequence fit",
                      action="store_true",
                      default=False)

    (options, args) = parser.parse_args()

    if options.m is None:
        options.m = options.c2

    if options.location is not None and options.web:
        print 'Either specify location or web but not both at once'
        raise Exception

    if options.location is None and options.web is False:
        options.location = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/PHOTOMETRY_' + options.detectband + '_iso/'
    elif options.web:
        options.location = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + options.cluster + '/CWWSB_capak.list/'

    if options.location[-1] != '/':
        options.location = options.location + '/'
    print options.location
    import os

    if options.existingcolor or options.existing:
        dir = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/LENSING_' + options.detectband + '_' + options.detectband + '_aper/good/'
        dict = {}
        print 'file', dir + 'redseqfit_2.orig'
        redseqfit = open(dir + 'redseqfit_2.orig', 'r').readlines()
        slope = float(redseqfit[1].split('=')[1].split('*')[0])
        intercept = float(redseqfit[1][:-1].split('+')[1])

        upper_intercept = float(redseqfit[3][:-1].split('+')[1])
        lower_intercept = float(redseqfit[4][:-1].split('+')[1])

        polycoeffs = [slope, intercept]
        std = (upper_intercept - intercept) / 1.2

        info = open(dir + 'redseq_all.params', 'r').readlines()
        print info, dir + 'redseq_all.params'
        for l in info:
            if len(l.split(':')) > 1:
                key, value = l[:-1].split(': ')
                dict[key] = value

        print dict

        #options.center_radius = dict['radcut']

        def prefix(filt):
            if filt is 'g' or filt is 'r' or filt is 'u':
                return 'MAG_APER1-MEGAPRIME-COADD-1-' + filt
            else:
                return 'MAG_APER1-SUBARU-COADD-1-' + filt

        dict['slope'] = slope
        dict['intercept'] = intercept
        dict['lower_intercept'] = lower_intercept
        dict['upper_intercept'] = upper_intercept

        if options.existing:
            options.m = prefix(dict['xmag'])
            options.c1 = prefix(dict['greenmag'])
            options.c2 = prefix(dict['redmag'])
            options.lm = dict['magcut2']
            print 'finished'
        elif options.existingcolor:
            options.c1 = prefix(dict['greenmag'])
            options.c2 = prefix(dict['redmag'])

    cluster = options.cluster
    c1 = options.c1
    c2 = options.c2
    m = options.m

    if options.z:
        import pyfits
        cat = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.APER1.1.CWWSB_capak.list.all.bpz.tab'
        p = pyfits.open(cat)
        photoz = p['STDTAB'].data
        zero_IDs = len(photoz[photoz.field('SeqNr') == 0])
        if zero_IDs > 0:
            print 'Wrong photoz catalog?', cat
            print str(zero_IDs) + ' many SeqNr=0'
            raise Exception

        print cat

    if options.cat is None:  #not hasattr(options,'cat'):
        input_mags = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.slr.alter.cat'
    else:
        input_mags = options.cat

    import pyfits, os, sys, pylab, do_multiple_photoz, commands, re, math, scipy
    from copy import copy
    print 'input magnitude catalog:', input_mags, options.cat, hasattr(
        options, 'cat')

    filterlist = do_multiple_photoz.get_filters(input_mags, 'OBJECTS')
    #print filterlist

    print input_mags
    w = pyfits.open(input_mags)
    mags = w['OBJECTS'].data

    #print mags.field('Xpos')

    mask = mags.field(c1) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field(c2) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field(m) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field('Flag') == 0
    if options.z: photoz_star = photoz[mask]
    mags_star = mags[mask]

    #mask = mags_star.field(c2) < 23
    ''' get cluster redshift '''
    command = 'grep ' + cluster + ' ' + '/nfs/slac/g/ki/ki05/anja/SUBARU/' + '/clusters.redshifts '
    print command
    cluster_info = commands.getoutput(command)
    cluster_redshift = float(re.split('\s+', cluster_info)[1])
    print cluster_redshift

    if options.lm:
        mag_cut = float(options.lm)
    else:
        ''' compute faint magnitude cutoff '''
        if m[-6:] == 'W-C-RC' or m[-1] == 'r':
            mag_cut = 21.5 + 2.5 * math.log10((cluster_redshift / 0.19)**2.)
        if m[-5:] == 'W-J-V' or m[-5:] == 'W-J-B' or m[-1] == 'g':
            mag_cut = 22. + 2.5 * math.log10((cluster_redshift / 0.19)**2.)

    if not options.center_radius:
        ''' compute radial size of cut '''
        options.center_radius = 400 / (z / 0.4)

    options.center_radius = 400

    print mag_cut, options.lm

    if True:  #not options.existing:
        ''' identify star column (optional) '''
        if options.starcolumn:
            savepng = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + cluster + '/seeing.png'
            left, right = fit_starcolumn(
                mags_star[mask].field('FLUX_RADIUS') * 0.2, savepng)

            savepng = options.location + 'column.png'

            pylab.axvline(x=left, ymin=-10, ymax=100)
            pylab.axvline(x=right, ymin=-10, ymax=100)
            pylab.scatter(mags.field('FLUX_RADIUS') * 0.2,
                          mags.field(m),
                          s=0.25)
            pylab.xlim(0, 2.5)
            pylab.xlabel('SIZE (arcsec)')
            pylab.ylabel(m)
            pylab.savefig(savepng)
            pylab.clf()

            mask = mags.field('FLUX_RADIUS') * 0.2 > right
            if options.z: photoz = photoz[mask]
            mags = mags[mask]
        ''' select galaxies near center of field '''
        #options.center_radius=240
        mask = ((mags.field('Xpos') - 5000. * scipy.ones(len(mags)))**2. +
                (mags.field('Ypos') - 5000. * scipy.ones(len(mags)))**
                2.)**0.5 * 0.2 < float(options.center_radius)
        if options.z: photoz = photoz[mask]
        mags = mags[mask]

        print len(mags)
        if options.z: print len(photoz)

        from copy import copy
        mags_mask = copy(mags)
        x = copy(mags.field(m))
        y = copy(mags.field(c1) - mags.field(c2))

        print mags.field(c1), mags.field(c2), c1, c2

        mask = x < mag_cut

        print mag_cut
        #print x, y

        savedir = options.location
        os.system('mkdir -p ' + savedir)

        savepng = options.location + 'redselection.png'

        print options.center_radius, len(y[mask])
        left, right = fit(y[mask], c1, c2, m, savepng)

        if options.z:
            mask = photoz.field('NFILT') > 3
            reg_mags = mags_mask[mask]
            reg_photoz = photoz[mask]
            mask = photoz.field('BPZ_ODDS') > 0.95
            reg_mags = mags_mask[mask]
            reg_photoz = photoz[mask]

            print len(reg_photoz)

            print 'making reg'
            reg = open('all.reg', 'w')
            reg.write(
                'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
            )
            for i in range(len(reg_mags.field('Xpos'))):
                reg.write('circle(' + str(reg_mags.field('Xpos')[i]) + ',' +
                          str(reg_mags.field('Ypos')[i]) + ',' + str(5) +
                          ') # color=red width=2 text={' +
                          str(reg_photoz.field('BPZ_Z_B')[i]) + '}\n')
            reg.close()

            print 'finished reg'

        mask = x < mag_cut
        if options.z:
            photoz2 = photoz[mask]
            mags_mask = mags_mask[mask]
        x2 = x[mask]
        y2 = y[mask]

        #print sorted(x2)
        print savepng

        print left, right

        if not options.existing:
            mask = y2 > left
            if options.z:
                photoz2 = photoz2[mask]
                mags_mask = mags_mask[mask]
            x2 = x2[mask]
            y2 = y2[mask]

            mask = y2 < right
            if options.z:
                photoz2 = photoz2[mask]
                mags_mask = mags_mask[mask]
            x2 = x2[mask]
            y2 = y2[mask]

        if not options.existing: polycoeffs = scipy.polyfit(x2, y2, 1)
        print polycoeffs

        yfit = scipy.polyval(polycoeffs, x2)

        print x2, yfit
        if not options.existing: std = scipy.std(abs(yfit - y2))
        print std
        mask = abs(yfit - y2) < std * 2.5
        if options.z: photoz3 = photoz2[mask]
        x3 = x2[mask]
        y3 = y2[mask]

        if not options.existing: polycoeffs = scipy.polyfit(x3, y3, 1)

        print polycoeffs
        yfit = scipy.polyval(polycoeffs, sorted(x2))
        print x2, yfit
        if not options.existing: std = scipy.std(abs(yfit - y2))
        print std
        std_fac = 1.2

    mask = abs(yfit - y2) < std * std_fac
    if options.z:
        photoz2 = photoz2[mask]
        mags_mask = mags_mask[mask]
        print photoz2.field('SeqNr')
        print photoz2.field('BPZ_Z_B')

        fred = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.redseq'

        f = open(fred, 'w')
        for id in photoz2.field('SeqNr'):
            f.write(str(id) + '\n')
        f.close()

        reg = open('regseq.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags_mask.field('Xpos'))):
            reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' +
                      str(mags_mask.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=green width=2 text={' +
                      str(photoz2.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.clf()

    savepng = options.location + 'redhistogram.png'
    savepdf = options.location + 'redhistogram.pdf'

    if options.z:
        lower_lim = cluster_redshift - 0.3
        if lower_lim < 0: lower_lim = 0.0001
        print photoz2.field('BPZ_Z_B')
        a, b, varp = pylab.hist(photoz2.field('BPZ_Z_B'),
                                bins=scipy.arange(lower_lim,
                                                  cluster_redshift + 0.3,
                                                  0.01),
                                color='red')
        pylab.axvline(x=cluster_redshift,
                      ymin=0,
                      ymax=100,
                      color='blue',
                      linewidth=3)
        pylab.xlabel('Redshift')
        pylab.ylabel('Galaxies')
        pylab.savefig(savepng)
        pylab.savefig(savepdf)

        reg = open('reg.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags_mask.field('Xpos'))):
            reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' +
                      str(mags_mask.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=blue width=2 text={' +
                      str(photoz2.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.clf()
    pylab.plot(sorted(x2), yfit, 'b-')
    pylab.plot(sorted(x2), yfit + scipy.ones(len(yfit)) * std * std_fac, 'b-')
    pylab.plot(sorted(x2), yfit - scipy.ones(len(yfit)) * std * std_fac, 'b-')
    pylab.scatter(x, y, color='red', s=0.5)
    pylab.axhline(y=left, xmin=-10, xmax=100)
    pylab.axvline(x=mag_cut, ymin=-10, ymax=10)
    pylab.axhline(y=right, xmin=-10, xmax=100)
    pylab.xlabel(m)
    pylab.ylabel(c1 + ' - ' + c2)

    if options.z:
        mask = abs(photoz.field('BPZ_Z_B') - cluster_redshift) < 0.04
        mags = mags[mask]
        photoz = photoz[mask]

        mask = photoz.field('NFILT') > 4
        mags = mags[mask]
        photoz = photoz[mask]

        print 'priormag'
        print photoz.field('priormag')
        print 'nfilt'
        print photoz.field('NFILT')

        import pylab
        x = mags.field(m)
        y = mags.field(c1) - mags.field(c2)
        pylab.scatter(x, y, s=0.5)

        reg = open('reg.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags.field('Xpos'))):
            reg.write('circle(' + str(mags.field('Xpos')[i]) + ',' +
                      str(mags.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=red width=2 text={' +
                      str(photoz.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.xlim(sorted(x)[0], sorted(x)[-2])
    span = (sorted(y)[-2] - sorted(y)[2]) / 2
    if span > 1: span = 1
    median = scipy.median(scipy.array(y))
    pylab.ylim(median - 2, median + 2)

    savepng = options.location + 'cmd.png'
    pylab.savefig(savepng)

    pylab.clf()
    pylab.scatter(mags.field('Xpos'), mags.field('Ypos'), s=0.02)
    pylab.xlim([0, 10000])
    pylab.ylim([0, 10000])
    pylab.xlabel('X Pixel')
    pylab.ylabel('Y Pixel')

    savepng = options.location + '/positions.png'
    print savepng
    pylab.savefig(savepng)

    s = "\nBest fit: y = " + str(polycoeffs[0]) + "*x +" + str(
        polycoeffs[1]) + '\n'
    s += "\nCut: y < " + str(
        polycoeffs[0]) + "*x +" + str(polycoeffs[1] + std_fac * std) + '\n'
    s += "Cut: y > " + str(
        polycoeffs[0]) + "*x +" + str(polycoeffs[1] - std_fac * std) + '\n'
    s += "x < " + str(mag_cut) + '\n'
    s += 'x = ' + m + '\n'
    s += 'y = ' + c1 + ' - ' + c2 + '\n'

    print s

    f = open(options.location + '/redseqfit', 'w')
    f.write(s)
    f.close()

    from datetime import datetime
    t2 = datetime.now()

    print options.location
    f = open(options.location + '/redsequence.html', 'w')
    f.write(
        '<html><tr><td>' + t2.strftime("%Y-%m-%d %H:%M:%S") +
        '</td></tr><tr><td><h2>Photometric Redshifts of the Red Sequence</h2></td></tr><tr><td><img src="redhistogram.png"></img></td></tr><tr><td><img src="seeing.png"></img></td></tr><<tr><td><img src="column.png"></img></td></tr><tr><td><img src="redselection.png"></img></td></tr><tr><td><img src="cmd.png"></img></td></tr><tr><td><img src="positions.png"></img></td></tr><tr><td>'
        + s.replace('\n', '<br>') + '</td></tr>        </html>')

    print 'Wrote output to:', options.location
    print 'Best fit parameters in:', options.location + '/redseqfit'
Esempio n. 57
0
            zs_rec[key].append(Zs[i])

#print shears[0]

mean = []
std = []
shearratio = []
zs = []
amps = []
ampErrs = []

for i in range(len(ran)):
    print len(scipy.array(shears[i])), ran[i]
    #raw_input()
    mean.append(scipy.mean(scipy.array(shears[i])))
    std.append(scipy.std(scipy.array(shears[i])))
    #print shears[i], radii[i]
    if len(shears[i]) > 0:
        zs.append(scipy.mean(scipy.array(zs_rec[i])))
        amp, ampErr = fit_exp(scipy.array(radii[i]), scipy.array(shears[i]),
                              scipy.array(shears_err[i]))
        amps.append(amp)
        ampErrs.append(ampErr)
        print i

print amps, ampErrs, zs
print mean
#pylab.scatter(arange(0,1.6,0.2),scipy.array(mean))
max = 1.5 * sorted(scipy.array(amps))[-1]
pylab.errorbar(scipy.array(zs),
               scipy.array(amps) / max,
Esempio n. 58
0
def Laplace_approach(phi_t,
                     R,
                     Delta,
                     t,
                     N,
                     num_samples,
                     go_parallel,
                     pt_sampling=False):
    # Prepare the stuff for the case of maxent or finite t
    if not np.isfinite(t):
        G = len(phi_t)
        alpha = Delta._kernel_dim
        Delta_sparse = Delta.get_sparse_matrix()
        Delta_mat = Delta_sparse.todense() * (N / G)
        Delta_diagonalized = np.linalg.eigh(Delta_mat)
        kernel_basis = np.zeros([G, alpha])
        for i in range(alpha):
            kernel_basis[:, i] = Delta_diagonalized[1][:, i].ravel()
        M_mat = diags(sp.exp(-phi_t), 0).todense() * (N / G)
        M_mat_on_kernel = sp.mat(kernel_basis).T * M_mat * sp.mat(kernel_basis)
        U_mat_on_kernel = np.linalg.eigh(M_mat_on_kernel)
        # Below are what will be used
        y_dim = alpha
        eig_vals = np.abs(sp.array(U_mat_on_kernel[0]))
        transf_matrix = sp.mat(kernel_basis) * U_mat_on_kernel[1]
        lambdas = sp.exp(-phi_t) * (N / G)
    else:
        G = len(phi_t)
        H = deft_core.hessian(phi_t, R, Delta, t, N)
        # H = deft_code.deft_core.hessian(phi_t, R, Delta, t, N)
        A_mat = H.todense() * (N / G)
        U_mat = np.linalg.eigh(A_mat)
        # Below are what will be used
        y_dim = G
        eig_vals = np.abs(sp.array(U_mat[0]))
        transf_matrix = U_mat[1]
        lambdas = sp.exp(-phi_t) * (N / G)

    # If requested to go parallel, set up a pool of workers for parallel computation
    if go_parallel:
        num_cores = mp.cpu_count()
        pool = mp.Pool(processes=num_cores)

    # For each eigen-component, draw y samples according to the distribution
    if go_parallel:
        inputs = itertools.izip(itertools.repeat(num_samples), eig_vals)
        outputs = pool.map(y_sampling_of_Lap, inputs)
        y_samples = sp.array(outputs)
    else:
        y_samples = np.zeros([y_dim, num_samples])
        for i in range(y_dim):
            inputs = [num_samples, eig_vals[i]]
            outputs = y_sampling_of_Lap(inputs)
            y_samples[i, :] = outputs

    # Transform y samples to x samples
    x_samples = sp.array(transf_matrix * sp.mat(y_samples))
    for i in range(G):
        x_vec = x_samples[i, :]
        x_vec[x_vec < x_MIN] = x_MIN

    # Shift x samples to get phi samples
    phi_samples = np.zeros([G, num_samples])
    for k in range(num_samples):
        phi_samples[:, k] = x_samples[:, k] + phi_t

    # Calculate the weight of each sample
    x_combo = sp.exp(-x_samples) - np.ones(
        [G, num_samples]) + x_samples - 0.5 * np.square(x_samples)
    dS_vals = sp.array(sp.mat(lambdas) * sp.mat(x_combo)).ravel()
    phi_weights = sp.exp(-dS_vals)

    # If called from posterior sampling, return phi samples along with their weights at this point
    if pt_sampling:
        return phi_samples, phi_weights

    # Calculate sample mean and sample mean std
    w_sample_mean = sp.mean(phi_weights)
    w_sample_mean_std = sp.std(phi_weights) / sp.sqrt(num_samples)

    # Return correction and other stuff
    correction = sp.log(w_sample_mean)
    return correction, w_sample_mean, w_sample_mean_std
Esempio n. 59
0
def main():
    parser = OptionParser(usage)
    parser.add_option(
        "-x",
        "--xwin",
        action="store_true",
        dest="xwin",
        default=False,
        help="Don't make a postscript plot, just use an X-window")
    parser.add_option("-p",
                      "--noplot",
                      action="store_false",
                      dest="makeplot",
                      default=True,
                      help="Look for pulses but do not generate a plot")
    parser.add_option(
        "-m",
        "--maxwidth",
        type="float",
        dest="maxwidth",
        default=0.0,
        help="Set the max downsampling in sec (see below for default)")
    parser.add_option("-t",
                      "--threshold",
                      type="float",
                      dest="threshold",
                      default=5.0,
                      help="Set a different threshold SNR (default=5.0)")
    parser.add_option("-s",
                      "--start",
                      type="float",
                      dest="T_start",
                      default=0.0,
                      help="Only plot events occuring after this time (s)")
    parser.add_option("-e",
                      "--end",
                      type="float",
                      dest="T_end",
                      default=1e9,
                      help="Only plot events occuring before this time (s)")
    parser.add_option("-g",
                      "--glob",
                      type="string",
                      dest="globexp",
                      default=None,
                      help="Process the files from this glob expression")
    parser.add_option("-f",
                      "--fast",
                      action="store_true",
                      dest="fast",
                      default=False,
                      help="Use a faster method of de-trending (2x speedup)")
    parser.add_option(
        "-b",
        "--nobadblocks",
        action="store_false",
        dest="badblocks",
        default=True,
        help="Don't check for bad-blocks (may save strong pulses)")
    parser.add_option("-d",
                      "--detrendlen",
                      type="int",
                      dest="detrendfact",
                      default=1,
                      help="Chunksize for detrending (pow-of-2 in 1000s)")
    (opts, args) = parser.parse_args()
    if len(args) == 0:
        if opts.globexp == None:
            print full_usage
            sys.exit(0)
        else:
            args = []
            for globexp in opts.globexp.split():
                args += glob.glob(globexp)
    useffts = True
    dosearch = True
    if opts.xwin:
        pgplot_device = "/XWIN"
    else:
        pgplot_device = ""

    fftlen = 8192  # Should be a power-of-two for best speed
    chunklen = 8000  # Must be at least max_downfact less than fftlen
    assert (opts.detrendfact in [1, 2, 4, 8, 16, 32])
    detrendlen = opts.detrendfact * 1000
    if (detrendlen > chunklen):
        chunklen = detrendlen
        fftlen = int(next2_to_n(chunklen))
    blocks_per_chunk = chunklen / detrendlen
    overlap = (fftlen - chunklen) / 2
    worklen = chunklen + 2 * overlap  # currently it is fftlen...

    max_downfact = 30
    default_downfacts = [2, 3, 4, 6, 9, 14, 20, 30, 45, 70, 100, 150, 220, 300]

    if args[0].endswith(".singlepulse"):
        filenmbase = args[0][:args[0].rfind(".singlepulse")]
        dosearch = False
    elif args[0].endswith(".dat"):
        filenmbase = args[0][:args[0].rfind(".dat")]
    else:
        filenmbase = args[0]

    # Don't do a search, just read results and plot
    if not dosearch:
        info, DMs, candlist, num_v_DMstr = \
              read_singlepulse_files(args, opts.threshold, opts.T_start, opts.T_end)
        orig_N, orig_dt = int(info.N), info.dt
        obstime = orig_N * orig_dt
    else:
        DMs = []
        candlist = []
        num_v_DMstr = {}

        # Loop over the input files
        for filenm in args:
            if filenm.endswith(".dat"):
                filenmbase = filenm[:filenm.rfind(".dat")]
            else:
                filenmbase = filenm
            info = infodata.infodata(filenmbase + ".inf")
            DMstr = "%.2f" % info.DM
            DMs.append(info.DM)
            N, dt = int(info.N), info.dt
            obstime = N * dt
            # Choose the maximum width to search based on time instead
            # of bins.  This helps prevent increased S/N when the downsampling
            # changes as the DM gets larger.
            if opts.maxwidth > 0.0:
                downfacts = [
                    x for x in default_downfacts if x * dt <= opts.maxwidth
                ]
            else:
                downfacts = [x for x in default_downfacts if x <= max_downfact]
            if len(downfacts) == 0:
                downfacts = [default_downfacts[0]]
            if (filenm == args[0]):
                orig_N = N
                orig_dt = dt
                if useffts:
                    fftd_kerns = make_fftd_kerns(default_downfacts, fftlen)
            if info.breaks:
                offregions = zip([x[1] for x in info.onoff[:-1]],
                                 [x[0] for x in info.onoff[1:]])

                # If last break spans to end of file, don't read it in (its just padding)
                if offregions[-1][1] == N - 1:
                    N = offregions[-1][0] + 1

            outfile = open(filenmbase + '.singlepulse', mode='w')

            # Compute the file length in detrendlens
            roundN = N / detrendlen * detrendlen
            numchunks = roundN / chunklen
            # Read in the file
            print 'Reading "%s"...' % filenm
            timeseries = Num.fromfile(filenm, dtype=Num.float32, count=roundN)
            # Split the timeseries into chunks for detrending
            numblocks = roundN / detrendlen
            timeseries.shape = (numblocks, detrendlen)
            stds = Num.zeros(numblocks, dtype=Num.float64)
            # de-trend the data one chunk at a time
            print '  De-trending the data and computing statistics...'
            for ii, chunk in enumerate(timeseries):
                if opts.fast:  # use median removal instead of detrending (2x speedup)
                    tmpchunk = chunk.copy()
                    tmpchunk.sort()
                    med = tmpchunk[detrendlen / 2]
                    chunk -= med
                    tmpchunk -= med
                else:
                    # The detrend calls are the most expensive in the program
                    timeseries[ii] = scipy.signal.detrend(chunk, type='linear')
                    tmpchunk = timeseries[ii].copy()
                    tmpchunk.sort()
                # The following gets rid of (hopefully) most of the
                # outlying values (i.e. power dropouts and single pulses)
                # If you throw out 5% (2.5% at bottom and 2.5% at top)
                # of random gaussian deviates, the measured stdev is ~0.871
                # of the true stdev.  Thus the 1.0/0.871=1.148 correction below.
                # The following is roughly .std() since we already removed the median
                stds[ii] = Num.sqrt(
                    (tmpchunk[detrendlen / 40:-detrendlen / 40]**2.0).sum() /
                    (0.95 * detrendlen))
            stds *= 1.148
            # sort the standard deviations and separate those with
            # very low or very high values
            sort_stds = stds.copy()
            sort_stds.sort()
            # identify the differences with the larges values (this
            # will split off the chunks with very low and very high stds
            locut = (sort_stds[1:numblocks / 2 + 1] -
                     sort_stds[:numblocks / 2]).argmax() + 1
            hicut = (sort_stds[numblocks / 2 + 1:] -
                     sort_stds[numblocks / 2:-1]).argmax() + numblocks / 2 - 2
            std_stds = scipy.std(sort_stds[locut:hicut])
            median_stds = sort_stds[(locut + hicut) / 2]
            print "    pseudo-median block standard deviation = %.2f" % (
                median_stds)
            if (opts.badblocks):
                lo_std = median_stds - 4.0 * std_stds
                hi_std = median_stds + 4.0 * std_stds
                # Determine a list of "bad" chunks.  We will not search these.
                bad_blocks = Num.nonzero((stds < lo_std) | (stds > hi_std))[0]
                print "    identified %d bad blocks out of %d (i.e. %.2f%%)" % \
                      (len(bad_blocks), len(stds),
                       100.0*float(len(bad_blocks))/float(len(stds)))
                stds[bad_blocks] = median_stds
            else:
                bad_blocks = []
            print "  Now searching..."

            # Now normalize all of the data and reshape it to 1-D
            timeseries /= stds[:, Num.newaxis]
            timeseries.shape = (roundN, )
            # And set the data in the bad blocks to zeros
            # Even though we don't search these parts, it is important
            # because of the overlaps for the convolutions
            for bad_block in bad_blocks:
                loind, hiind = bad_block * detrendlen, (bad_block +
                                                        1) * detrendlen
                timeseries[loind:hiind] = 0.0
            # Convert to a set for faster lookups below
            bad_blocks = set(bad_blocks)

            # Step through the data
            dm_candlist = []
            for chunknum in xrange(numchunks):
                loind = chunknum * chunklen - overlap
                hiind = (chunknum + 1) * chunklen + overlap
                # Take care of beginning and end of file overlap issues
                if (chunknum == 0):  # Beginning of file
                    chunk = Num.zeros(worklen, dtype=Num.float32)
                    chunk[overlap:] = timeseries[loind + overlap:hiind]
                elif (chunknum == numchunks - 1):  # end of the timeseries
                    chunk = Num.zeros(worklen, dtype=Num.float32)
                    chunk[:-overlap] = timeseries[loind:hiind - overlap]
                else:
                    chunk = timeseries[loind:hiind]

                # Make a set with the current block numbers
                lowblock = blocks_per_chunk * chunknum
                currentblocks = set(Num.arange(blocks_per_chunk) + lowblock)
                localgoodblocks = Num.asarray(
                    list(currentblocks - bad_blocks)) - lowblock
                # Search this chunk if it is not all bad
                if len(localgoodblocks):
                    # This is the good part of the data (end effects removed)
                    goodchunk = chunk[overlap:-overlap]

                    # need to pass blocks/chunklen, localgoodblocks
                    # dm_candlist, dt, opts.threshold to cython routine

                    # Search non-downsampled data first
                    # NOTE:  these nonzero() calls are some of the most
                    #        expensive calls in the program.  Best bet would
                    #        probably be to simply iterate over the goodchunk
                    #        in C and append to the candlist there.
                    hibins = Num.flatnonzero(goodchunk > opts.threshold)
                    hivals = goodchunk[hibins]
                    hibins += chunknum * chunklen
                    hiblocks = hibins / detrendlen
                    # Add the candidates (which are sorted by bin)
                    for bin, val, block in zip(hibins, hivals, hiblocks):
                        if block not in bad_blocks:
                            time = bin * dt
                            dm_candlist.append(
                                candidate(info.DM, val, time, bin, 1))

                    # Prepare our data for the convolution
                    if useffts: fftd_chunk = rfft(chunk, -1)

                    # Now do the downsampling...
                    for ii, downfact in enumerate(downfacts):
                        if useffts:
                            # Note:  FFT convolution is faster for _all_ downfacts, even 2
                            goodchunk = fft_convolve(fftd_chunk,
                                                     fftd_kerns[ii], overlap,
                                                     -overlap)
                        else:
                            # The normalization of this kernel keeps the post-smoothing RMS = 1
                            kernel = Num.ones(downfact, dtype=Num.float32) / \
                                     Num.sqrt(downfact)
                            smoothed_chunk = scipy.signal.convolve(
                                chunk, kernel, 1)
                            goodchunk = smoothed_chunk[overlap:-overlap]
                        #hibins = Num.nonzero(goodchunk>opts.threshold)[0]
                        hibins = Num.flatnonzero(goodchunk > opts.threshold)
                        hivals = goodchunk[hibins]
                        hibins += chunknum * chunklen
                        hiblocks = hibins / detrendlen
                        hibins = hibins.tolist()
                        hivals = hivals.tolist()
                        # Now walk through the new candidates and remove those
                        # that are not the highest but are within downfact/2
                        # bins of a higher signal pulse
                        hibins, hivals = prune_related1(
                            hibins, hivals, downfact)
                        # Insert the new candidates into the candlist, but
                        # keep it sorted...
                        for bin, val, block in zip(hibins, hivals, hiblocks):
                            if block not in bad_blocks:
                                time = bin * dt
                                bisect.insort(
                                    dm_candlist,
                                    candidate(info.DM, val, time, bin,
                                              downfact))

            # Now walk through the dm_candlist and remove the ones that
            # are within the downsample proximity of a higher
            # signal-to-noise pulse
            dm_candlist = prune_related2(dm_candlist, downfacts)
            print "  Found %d pulse candidates" % len(dm_candlist)

            # Get rid of those near padding regions
            if info.breaks: prune_border_cases(dm_candlist, offregions)

            # Write the pulses to an ASCII output file
            if len(dm_candlist):
                #dm_candlist.sort(cmp_sigma)
                outfile.write(
                    "# DM      Sigma      Time (s)     Sample    Downfact\n")
                for cand in dm_candlist:
                    outfile.write(str(cand))
            outfile.close()

            # Add these candidates to the overall candidate list
            for cand in dm_candlist:
                candlist.append(cand)
            num_v_DMstr[DMstr] = len(dm_candlist)

    if (opts.makeplot):

        # Step through the candidates to make a SNR list
        DMs.sort()
        snrs = []
        for cand in candlist:
            if not Num.isinf(cand.sigma):
                snrs.append(cand.sigma)
        if snrs:
            maxsnr = max(int(max(snrs)), int(opts.threshold)) + 3
        else:
            maxsnr = int(opts.threshold) + 3

        # Generate the SNR histogram
        snrs = Num.asarray(snrs)
        (num_v_snr, lo_snr, d_snr, num_out_of_range) = \
                    scipy.stats.histogram(snrs,
                                          int(maxsnr-opts.threshold+1),
                                          [opts.threshold, maxsnr])
        snrs = Num.arange(maxsnr-opts.threshold+1, dtype=Num.float64) * d_snr \
               + lo_snr + 0.5*d_snr
        num_v_snr = num_v_snr.astype(Num.float32)
        num_v_snr[num_v_snr == 0.0] = 0.001

        # Generate the DM histogram
        num_v_DM = Num.zeros(len(DMs))
        for ii, DM in enumerate(DMs):
            num_v_DM[ii] = num_v_DMstr["%.2f" % DM]
        DMs = Num.asarray(DMs)

        # open the plot device
        short_filenmbase = filenmbase[:filenmbase.find("_DM")]
        if opts.T_end > obstime:
            opts.T_end = obstime
        if pgplot_device:
            ppgplot.pgopen(pgplot_device)
        else:
            if (opts.T_start > 0.0 or opts.T_end < obstime):
                ppgplot.pgopen(short_filenmbase +
                               '_%.0f-%.0fs_singlepulse.ps/VPS' %
                               (opts.T_start, opts.T_end))
            else:
                ppgplot.pgopen(short_filenmbase + '_singlepulse.ps/VPS')
        ppgplot.pgpap(7.5, 1.0)  # Width in inches, aspect

        # plot the SNR histogram
        ppgplot.pgsvp(0.06, 0.31, 0.6, 0.87)
        ppgplot.pgswin(opts.threshold, maxsnr, Num.log10(0.5),
                       Num.log10(2 * max(num_v_snr)))
        ppgplot.pgsch(0.8)
        ppgplot.pgbox("BCNST", 0, 0, "BCLNST", 0, 0)
        ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Signal-to-Noise")
        ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses")
        ppgplot.pgsch(1.0)
        ppgplot.pgbin(snrs, Num.log10(num_v_snr), 1)

        # plot the DM histogram
        ppgplot.pgsvp(0.39, 0.64, 0.6, 0.87)
        # Add [1] to num_v_DM in YMAX below so that YMIN != YMAX when max(num_v_DM)==0
        ppgplot.pgswin(
            min(DMs) - 0.5,
            max(DMs) + 0.5, 0.0, 1.1 * max(num_v_DM + [1]))
        ppgplot.pgsch(0.8)
        ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0)
        ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "DM (pc cm\u-3\d)")
        ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses")
        ppgplot.pgsch(1.0)
        ppgplot.pgbin(DMs, num_v_DM, 1)

        # plot the SNR vs DM plot
        ppgplot.pgsvp(0.72, 0.97, 0.6, 0.87)
        ppgplot.pgswin(min(DMs) - 0.5, max(DMs) + 0.5, opts.threshold, maxsnr)
        ppgplot.pgsch(0.8)
        ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0)
        ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "DM (pc cm\u-3\d)")
        ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Signal-to-Noise")
        ppgplot.pgsch(1.0)
        cand_ts = Num.zeros(len(candlist), dtype=Num.float32)
        cand_SNRs = Num.zeros(len(candlist), dtype=Num.float32)
        cand_DMs = Num.zeros(len(candlist), dtype=Num.float32)
        for ii, cand in enumerate(candlist):
            cand_ts[ii], cand_SNRs[ii], cand_DMs[ii] = \
                         cand.time, cand.sigma, cand.DM
        ppgplot.pgpt(cand_DMs, cand_SNRs, 20)

        # plot the DM vs Time plot
        ppgplot.pgsvp(0.06, 0.97, 0.08, 0.52)
        ppgplot.pgswin(opts.T_start, opts.T_end,
                       min(DMs) - 0.5,
                       max(DMs) + 0.5)
        ppgplot.pgsch(0.8)
        ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0)
        ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Time (s)")
        ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "DM (pc cm\u-3\d)")
        # Circles are symbols 20-26 in increasing order
        snr_range = 12.0
        cand_symbols = (cand_SNRs - opts.threshold) / snr_range * 6.0 + 20.5
        cand_symbols = cand_symbols.astype(Num.int32)
        cand_symbols[cand_symbols > 26] = 26
        for ii in [26, 25, 24, 23, 22, 21, 20]:
            inds = Num.nonzero(cand_symbols == ii)[0]
            ppgplot.pgpt(cand_ts[inds], cand_DMs[inds], ii)

        # Now fill the infomation area
        ppgplot.pgsvp(0.05, 0.95, 0.87, 0.97)
        ppgplot.pgsch(1.0)
        ppgplot.pgmtxt('T', 0.5, 0.0, 0.0,
                       "Single pulse results for '%s'" % short_filenmbase)
        ppgplot.pgsch(0.8)
        # first row
        ppgplot.pgmtxt('T', -1.1, 0.02, 0.0, 'Source: %s'%\
                       info.object)
        ppgplot.pgmtxt('T', -1.1, 0.33, 0.0, 'RA (J2000):')
        ppgplot.pgmtxt('T', -1.1, 0.5, 0.0, info.RA)
        ppgplot.pgmtxt('T', -1.1, 0.73, 0.0, 'N samples: %.0f' % orig_N)
        # second row
        ppgplot.pgmtxt('T', -2.4, 0.02, 0.0, 'Telescope: %s'%\
                       info.telescope)
        ppgplot.pgmtxt('T', -2.4, 0.33, 0.0, 'DEC (J2000):')
        ppgplot.pgmtxt('T', -2.4, 0.5, 0.0, info.DEC)
        ppgplot.pgmtxt('T', -2.4, 0.73, 0.0, 'Sampling time: %.2f \gms'%\
                       (orig_dt*1e6))
        # third row
        if info.instrument.find("pigot") >= 0:
            instrument = "Spigot"
        else:
            instrument = info.instrument
        ppgplot.pgmtxt('T', -3.7, 0.02, 0.0, 'Instrument: %s' % instrument)
        if (info.bary):
            ppgplot.pgmtxt('T', -3.7, 0.33, 0.0,
                           'MJD\dbary\u: %.12f' % info.epoch)
        else:
            ppgplot.pgmtxt('T', -3.7, 0.33, 0.0,
                           'MJD\dtopo\u: %.12f' % info.epoch)
        ppgplot.pgmtxt('T', -3.7, 0.73, 0.0, 'Freq\dctr\u: %.1f MHz'%\
                       ((info.numchan/2-0.5)*info.chan_width+info.lofreq))
        ppgplot.pgiden()
        ppgplot.pgend()
Esempio n. 60
0
import scipy

x = [4, 6, 7, 11, 14, 17, 21]
y = [18, 12, 13, 8, 7, 7, 4]

scipy.mean(x)
11.428571428571429
scipy.median(x)
11.0
scipy.std(x)
5.7782138331876247
scipy.mean(y)
9.8571428571428577
scipy.median(y)
8.0
scipy.std(y)
4.3892261416392051

import scipy.stats as stats

stats.pearsonr(x, y)
(-0.92698953702675413, 0.0026589803215800443)

lr = stats.linregress
slope, intercept, rvalue, pvalue, stderr = lr(x, y)

slope
-0.70415647921760383
intercept
17.904645476772615
rsq = rvalue**2