def test_reductions(): assert compute(t.a.sum(), b) == 6 assert compute(t.a.min(), b) == 1 assert compute(t.a.max(), b) == 3 assert compute(t.a.mean(), b) == 2.0 assert abs(compute(t.a.std(), b) - np.std([1, 2, 3])) < 1e-5 assert abs(compute(t.a.var(), b) - np.var([1, 2, 3])) < 1e-5 assert abs(compute(t.a.std(unbiased=True), b) - np.std([1, 2, 3], ddof=1)) < 1e-5 assert abs(compute(t.a.var(unbiased=True), b) - np.var([1, 2, 3], ddof=1)) < 1e-5 assert len(list(compute(t.distinct(), b))) == 3 assert len(list(compute(t.a.distinct(), b))) == 3 assert compute(t.a.nunique(), b) == 3 assert isinstance(compute(t.a.nunique(), b), np.integer) assert compute(t.a.count(), b) == 3 assert isinstance(compute(t.date.count(), b), np.integer) assert compute(t.date.nunique(), b) == 2 assert isinstance(compute(t.date.nunique(), b), np.integer) assert compute(t.date.count(), b) == 2 assert isinstance(compute(t.a.count(), b), np.integer) assert compute(t.a[0], b) == 1 assert compute(t.a[-1], b) == 3 assert compute(t[0], b) == compute(t[0], b) assert compute(t[-1], b) == compute(t[-1], b)
def test_em_gmm_largedim(): # testing the GMM model in larger dimensions # generate some data dim = 10 x = nr.randn(100, dim) x[:30] += 2 # estimate different GMMs of that data maxiter, delta = 100, 1.e-4 for k in range(2, 3): lgmm = GMM(k,dim) bgmm = lgmm.initialize_and_estimate(x, None, maxiter, delta, ninit=5) z = bgmm.map_label(x) # define the correct labelling u = np.zeros(100) u[:30] = 1 #check the correlation between the true labelling # and the computed one eta = np.absolute(np.dot(z - z.mean(), u - u.mean()) /\ (np.std(z) * np.std(u) * 100)) assert_true(eta > 0.3)
def Haffine_from_points(fp, tp): '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的''' if fp.shape != tp.shape: raise RuntimeError('number of points do not match') # 对点进行归一化 # 映射起始点 m = numpy.mean(fp[:2], axis=1) maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9 C1 = numpy.diag([1/maxstd, 1/maxstd, 1]) C1[0, 2] = -m[0] / maxstd C1[1, 2] = -m[1] / maxstd fp_cond = numpy.dot(C1, fp) # 映射对应点 m = numpy.mean(tp[:2], axis=1) maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9 C2 = numpy.diag([1/maxstd, 1/maxstd, 1]) C2[0, 2] = -m[0] / maxstd C2[1, 2] = -m[1] / maxstd tp_cond = numpy.dot(C2, tp) # 因为归一化之后点的均值为0,所以平移量为0 A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0) U, S, V = numpy.linalg.svd(A.T) # 创建矩阵B和C tmp = V[:2].T B = tmp[:2] C = tmp[2:4] tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1) H = numpy.vstack((tmp2, [0, 0, 1])) H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1)) # 反归一化 return H / H[2, 2] # 归一化,然后返回
def _get_mean_std_from_runs(results_for_runs,decider): ''' For a collection of runs (usually from HPs) return the average and std of the decider error and test error. Usually decider error will be validation or train error (which we then also get the average test error) results_for_runs = array with all results for runs (each run usually corresponds to a speicfic HP) for a specific model e.g. [result1, ..., result200] decider = namespace holding the appropriate function handler/pointer named get_errors_from (e.g. get_errors_based_on_train_error). So decider must be able to call decider.get_errors_from(run) ''' decider_errors_for_runs = [] # #train_errors_for_runs = [] #cv_errors_for_runs = [] test_errors_for_runs = [] # for current_result in results_for_runs: decider_error, train_error, cv_error, test_error = decider.get_errors_from(current_result) print('decider_error ', decider_error) # # if np.isnan( decider_error ): # pdb.set_trace() decider_errors_for_runs.append(decider_error) #train_errors_for_runs.append(train_error) #cv_errors_for_runs.append(cv_error) test_errors_for_runs.append(test_error) decider_mean, decider_std = np.mean(decider_errors_for_runs), np.std(decider_errors_for_runs) test_mean, test_std = np.mean(test_errors_for_runs), np.std(test_errors_for_runs) #pdb.set_trace() return decider_mean, decider_std, test_mean, test_std
def testNormalizeLike(self): a = np.empty((10, 3)) a[:, 0] = np.random.random(10) a[:, 1] = np.random.random(10) a[:, 2] = np.random.random(10) b = np.empty((10, 3)) b[:, 0] = np.random.random(10) b[:, 1] = np.random.random(10) b[:, 2] = np.random.random(10) b = b * 2 c = normalizeArrayLike(b, a) # Should be normalized like a mean = [] std = [] mean.append(np.mean(a[:, 0])) mean.append(np.mean(a[:, 1])) mean.append(np.mean(a[:, 2])) std.append(np.std(a[:, 0])) std.append(np.std(a[:, 1])) std.append(np.std(a[:, 2])) # Check all values for col in xrange(b.shape[1]): for bval, cval in zip(b[:, col].flat, c[:, col].flat): print cval, (bval - mean[col]) / std[col] print cval, bval assert cval == (bval - mean[col]) / std[col] print ("TestNormalizeLike success")
def summarize_features_mfcc(mfccs, v=False): """ Given mfcc matrix, return summary for a window :param mfccs: NxM matrix mfcc matrix :param i_start: int index for beginning of window :param i_end: int index for end of window :return: 1xL array feature vector """ # Summarize features features = np.max(mfccs, axis=1) features = np.append(features, np.mean(mfccs, axis=1)) features = np.append(features, np.std(mfccs, axis=1)) d_mfccs = np.diff(mfccs, axis=1) features = np.append(features, np.mean(d_mfccs, axis=1)) features = np.append(features, np.std(d_mfccs, axis=1)) d_d_mfccs = np.diff(d_mfccs, axis=1) features = np.append(features, np.mean(d_d_mfccs, axis=1)) features = np.append(features, np.std(d_d_mfccs, axis=1)) # print np.shape(d_d_mfccs) # print np.shape(features) return np.reshape(features, (1, len(features)))
def __init__(self, fndark, nblocksize): if (os.path.isfile(fndark+'-dark.npz')): npzfile=np.load(fndark+'-dark.npz'); self.dmean=npzfile['dmean']; self.dstd=npzfile['dstd']; self.dbpm=npzfile['dbpm']; else: dark=Binary(fndark); nframes=dark.nframes; my=dark.my; mx=dark.mx; nblocks=nframes//nblocksize; bmed=np.zeros((nblocks,my,mx)); bstd=np.zeros((nblocks,my,mx)); for iblock in range(nblocks): t0=time.clock(); a=dark.data[iblock*nblocksize:(iblock+1)*nblocksize]; a,idx=dropbadframes(a); print '- read block, dropped bad, subtracted dark in '+str(time.clock()-t0)+'s'; nfb=a.shape[0]; bmed[iblock,:,:]=np.median(a,axis=0); bstd[iblock,:,:]=np.std(a,axis=0); self.dmean=np.mean(bmed,axis=0); self.dstd=np.sqrt(np.sum((bstd)**2,axis=0)); self.dbpm=self.dstd<(np.median(self.dstd)+5*np.std(self.dstd)); self.dbpm=self.dstd<(np.median(self.dstd*self.dbpm)+5*np.std(self.dstd*self.dbpm)); np.savez(fndark+'-dark',dmean=self.dmean,dstd=self.dstd,dbpm=self.dbpm); del dark;
def meanclip2(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0): from numpy import array import numpy xx=array(xx) yy=array(yy) xx0=array(xx[:]) yy0=array(yy[:]) ct=len(yy) slope=float(slope) iter = 0; c1 = 1.0 ; c2 = 0.0 while (c1 >= c2) and (iter < maxiter): lastct = ct sig=numpy.std(yy0-xx0*slope) # mean=numpy.mean(array(yy0)-array(xx0)*slope) mean=numpy.median(array(yy0)-array(xx0)*slope) wsm = numpy.where( abs(yy0-xx0*slope) < mean+clipsig*sig ) ct = len(wsm[0]) if ct > 0: xx0=xx0[wsm] yy0=yy0[wsm] c1 = abs(ct - lastct) c2 = converge_num * lastct iter += 1 # End of while loop # mean=numpy.mean(array(yy0)-array(xx0)*slope) mean=numpy.median(array(yy0)-array(xx0)*slope) sig=numpy.std(array(yy0)-array(xx0)*float(slope)) if verbose: pass return mean, sig,yy0,xx0
def meanclip3(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0): from numpy import array, polyfit import numpy xx=array(xx) yy=array(yy) xx0=array(xx[:]) yy0=array(yy[:]) ct=len(yy) iter = 0; c1 = 1.0 ; c2 = 0.0 while (c1 >= c2) and (iter < maxiter): lastct = ct pol = polyfit(xx0,yy0,1,full=True) ### mean0=pol[0][1] slope=pol[0][0] sig=numpy.std(yy0-mean0-slope*xx0) wsm = numpy.where( abs(yy0-xx0*slope) < mean0+clipsig*sig ) ct = len(wsm[0]) if ct > 0: xx0=xx0[wsm] yy0=yy0[wsm] c1 = abs(ct - lastct) c2 = converge_num * lastct iter += 1 # End of while loop pol = polyfit(xx0,yy0,1,full=True) ### mean0=pol[0][1] slope=pol[0][0] sig=numpy.std(yy0-mean0-slope*xx0) if verbose: pass return mean0, sig,slope,yy0,xx0
def run(self, inputs, run_id): pstore = self.pstore(run_id) data, npatients = inputs[0], inputs[1][0] corrs = [] for row in data: allrow = row[:npatients] amlrow = row[npatients:] mean, std = numpy.mean(row), numpy.std(row) amlrow = [(val - mean)/std for val in amlrow] allrow = [(val - mean)/std for val in allrow] amlmean, allmean = numpy.mean(amlrow), numpy.mean(allrow) amlstd, allstd = numpy.std(amlrow), numpy.std(allrow) corr = (allmean - amlmean) / (amlstd + allstd) corrs.append( corr ) start = time.time() rowsize = len(data[0]) if pstore.takes_pointers(): for rowidx, b in enumerate(corrs): fd = pstore.popen((rowidx,)) if fd: for colidx in xrange(rowsize): pstore.pwrite(fd, 0, (rowidx, colidx)) pstore.pwrite(fd, 1, (0,)) pstore.pclose(fd) end = time.time() return numpy.array(corrs), {'provoverhead' : end-start}
def _ols(self,x,y): lr = LinearRegression() coef_xy = lr.fit(y= y.reshape(-1, 1), X= x.reshape(-1, 1)).coef_ coef_yx = lr.fit(y= x.reshape(-1, 1), X= y.reshape(-1, 1)).coef_ r_xy = y - coef_xy*x r_yx = x - coef_yx*y return r_xy/np.std(r_xy), r_yx/np.std(r_yx)
def condBias(H,O): H_ensmean = np.mean(H, axis=1) r = np.corrcoef(H_ensmean, O)[0,1] std_H = np.std(H_ensmean) std_O = np.std(O) cond_bias = r * std_O/std_H return cond_bias
def test_Moster13SmHm_behavior(): """ """ default_model = Moster13SmHm() mstar1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12) ratio1 = mstar1/3.4275e10 np.testing.assert_array_almost_equal(ratio1, 1.0, decimal=3) default_model.param_dict['n10'] *= 1.1 mstar2 = default_model.mean_stellar_mass(prim_haloprop = 1.e12) assert mstar2 > mstar1 default_model.param_dict['n11'] *= 1.1 mstar3 = default_model.mean_stellar_mass(prim_haloprop = 1.e12) assert mstar3 == mstar2 mstar4_z1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12, redshift=1) default_model.param_dict['n11'] *= 1.1 mstar5_z1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12, redshift=1) assert mstar5_z1 != mstar4_z1 mstar_realization1 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43) mstar_realization2 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43) mstar_realization3 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=44) assert np.array_equal(mstar_realization1, mstar_realization2) assert not np.array_equal(mstar_realization1, mstar_realization3) measured_scatter1 = np.std(np.log10(mstar_realization1)) model_scatter = default_model.param_dict['scatter_model_param1'] np.testing.assert_allclose(measured_scatter1, model_scatter, rtol=1e-3) default_model.param_dict['scatter_model_param1'] = 0.3 mstar_realization4 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43) measured_scatter4 = np.std(np.log10(mstar_realization4)) np.testing.assert_allclose(measured_scatter4, 0.3, rtol=1e-3)
def cross_validate(self, seg_corpus, dep_corpus, out_folder=None): assert seg_corpus.keys() == dep_corpus.keys() texts = np.array(sorted(seg_corpus.keys())) folds = KFold(len(texts), number_of_folds) # extract features for all texts all_features = {} all_labels = {} for text in texts: features, labels = self.extract_features_from_text( dep_corpus[text], seg_forest=seg_corpus[text]) all_features[text] = features all_labels[text] = labels # do the cross-validation macro_F1s = [] micro_F1s = [] tp = fp = fn = tp_i = fp_i = fn_i = 0 for i, (train, test) in enumerate(folds): print "# FOLD", i # train train_texts = texts[train] train_features = chained([all_features[text] for text in train_texts]) train_labels = chained([all_labels[text] for text in train_texts]) print " training on %d items..." % len(train_labels) self._train(train_features, train_labels) print " extracted %d features using the dict vectorizer." % \ len(self.pipeline.named_steps[ 'vectorizer'].get_feature_names()) # test (predicting textwise) test_labels = [] pred_labels = [] for text in texts[test]: features = all_features[text] labels = all_labels[text] predictions = self._predict(features) test_labels.extend(labels) pred_labels.extend(predictions) if out_folder is not None: discourse_tree = self._segment_text(predictions, dep_corpus[text]) with open(out_folder + '/' + text + '.tree', 'w') as fout: fout.write(str(discourse_tree)) macro_f1, micro_f1 = self._score(test_labels, pred_labels) macro_F1s.append(macro_f1) micro_F1s.append(micro_f1) tp_i, fp_i, fn_i = _cnt_stat(test_labels, pred_labels) tp += tp_i fp += fp_i fn += fn_i print "# Average Macro F1 = %3.1f +- %3.2f" % \ (100 * np.mean(macro_F1s), 100 * np.std(macro_F1s)) print "# Average Micro F1 = %3.1f +- %3.2f" % \ (100 * np.mean(micro_F1s), 100 * np.std(micro_F1s)) if tp or fp or fn: print "# F1_{tp,fp} %.2f" % (2. * tp / (2. * tp + fp + fn) * 100) else: print "# F1_{tp,fp} 0. %"
def average_form_factors(qz_lists, F_lists): """Average multiple sets of form factors. Need at least two input data sets. qz_lists : list of lists F_lists : list of lists Each list must be in an ascending order, which is the default format in NFIT frm.dat. """ if len(qz_lists) < 2: raise TypeError('Need more than one form factor set for averaging') if len(qz_lists) != len(F_lists): raise TypeError('Number of qz and F data sets must agree') for qzvalues, Fvalues in zip(qz_lists, F_lists): if len(qzvalues) != len(Fvalues): raise TypeError('Length of each qz and F data set must agree') qz_bin, F_bin = create_binned_data(qz_lists, F_lists) normalize_to_each_other(F_bin) qz_bin = np.array(qz_bin) F_bin = np.array(F_bin) avg_qz = np.mean(qz_bin, axis=1) err_qz = np.std(qz_bin, axis=1, ddof=1, dtype=np.float64) avg_F = np.mean(F_bin, axis=1) err_F = np.std(F_bin, axis=1, ddof=1, dtype=np.float64) return avg_qz, err_qz, avg_F, err_F
def compute_data(self): breakdown_data = self.exp_config["breakdown"] table_concat = [] for config in breakdown_data["config"]: table = prettytable.PrettyTable(["Time", "TotalHLFracAVG", "TotalHLFracSTD", "LLFracAVG", "LLFracSTD"]) table.set_style(prettytable.PLAIN_COLUMNS) experiments = {} for job in breakdown_data[self.language]["jobs"]: experiments[job["name"]] = [Experiment(os.path.join(self.EXPERIMENT_ROOT, exp_dir, config["strat-path"], job["name"])) for exp_dir in breakdown_data[self.language]["expdir"][config["expdir"]]] for minute in range(self.START_MINUTES, self.END_MINUTES+1, 5): time_stamp = minute * 60 * 1000000 hlfrac_data = np.array( [[float(exp.high_level_tests.countSince(time_stamp))/len(exp.high_level_tests) for exp in exp_list] for exp_list in experiments.itervalues()]) llfrac_data = np.array( [[float(exp.high_level_tests.countSince(time_stamp))/(exp.low_level_tests.countSince(time_stamp) or 1) for exp in exp_list] for exp_list in experiments.itervalues()]) table.add_row([minute, np.average(np.average(hlfrac_data, axis=1)), np.std(np.average(hlfrac_data, axis=1)), np.average(np.average(llfrac_data, axis=1)), np.std(np.average(llfrac_data, axis=1))]) table_concat.append(table.get_string()) with open(self.data_file, "w") as f: print >>f, "\n\n\n".join(table_concat)
def ccf(x, y, unbiased=True): '''cross-correlation function for 1d Parameters ---------- x, y : arrays time series data unbiased : boolean if True, then denominators for autocovariance is n-k, otherwise n Returns ------- ccf : array cross-correlation function of x and y Notes ----- This is based np.correlate which does full convolution. For very long time series it is recommended to use fft convolution instead. If unbiased is true, the denominator for the autocovariance is adjusted but the autocorrelation is not an unbiased estimtor. ''' cvf = ccovf(x, y, unbiased=unbiased, demean=True) return cvf / (np.std(x) * np.std(y))
def performFit(self): """ Fit Distribution with triple Guassian function and validate via Kolmogorov-Smirnov test. """ self.fill = False self.x0 = [len(self.Data)/2.,0.0,numpy.std(self.Data), \ len(self.Data)/3.,0.0,numpy.std(self.Data)*2, \ len(self.Data)/4.,0.0,numpy.std(self.Data)*0.5] self.popt, pcov = scipy.optimize.curve_fit(self.triple, self.hists.bin_centers, self.hist, p0=self.x0, sigma=None, absolute_sigma=False) print("in",self.x0) print("out",self.popt) sigmaw = self.getSigmaW() X = numpy.linspace(self.hists.bin_centers[0],self.hists.bin_centers[-1],1000) fithist = numpy.array([self.triple(x,*self.popt) for x in X]) # Kolmogorov smirnov test ks = scipy.stats.ks_2samp(self.hist, fithist) props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5) self.axis.text(0.6, 0.5, r'$KS-test: p='+str(numpy.round(ks[1],3))+'$', fontsize=20, bbox=props ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes) self.axis.text(0.6, 0.6, r'$<\Delta t> ='+str(numpy.round(sigmaw,3))+'ps$', fontsize=20, bbox=props ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes) self.set_plot_options( plot_kwargs={ 'marker':' ','linestyle':'-'}) p = self._plot_datapoints(self.axis, X,fithist, xerr=None , yerr=None) self.plots.append(p) return self
def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest): """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def plotForce(): figure(size=3,aspect=0.5) subplot(1,2,1) from EvalTraj import plotFF plotFF(vp=351,t=28,f=900,cm=0.6,foffset=8) subplot_annotate() subplot(1,2,2) for i in [1,2,3,4]: R=np.squeeze(np.load('Rdpse%d.npy'%i)) R=stats.nanmedian(R,axis=2)[:,1:,:] dps=np.linspace(-1,1,201)[1:] plt.plot(dps,R[:,:,2].mean(0)); plt.legend([0,0.1,0.2,0.3],loc=3) i=2 R=np.squeeze(np.load('Rdpse%d.npy'%i)) R=stats.nanmedian(R,axis=2)[:,1:,:] mn=np.argmin(R,axis=1) y=np.random.randn(mn.shape[0])*0.00002+0.0438 plt.plot(np.sort(dps[mn[:,2]]),y,'+',mew=1,ms=6,mec=[ 0.39 , 0.76, 0.64]) plt.xlabel('Displacement of Force Origin') plt.ylabel('Average Net Force Magnitude') hh=dps[mn[:,2]] err=np.std(hh)/np.sqrt(hh.shape[0])*stats.t.ppf(0.975,hh.shape[0]) err2=np.std(hh)/np.sqrt(hh.shape[0])*stats.t.ppf(0.75,hh.shape[0]) m=np.mean(hh) print m, m-err,m+err np.save('force',[m, m-err,m+err,m-err2,m+err2]) plt.xlim([-0.5,0.5]) plt.ylim([0.0435,0.046]) plt.grid(b=True,axis='x') subplot_annotate()
def explore_city_data(city_data): """Calculate the Boston housing statistics.""" # Get the labels and features from the housing data housing_prices = city_data.target housing_features = city_data.data ################################### ### Step 1. YOUR CODE GOES HERE ### ################################### # Please calculate the following values using the Numpy library print "Size of data (number of houses)" print np.size(housing_prices) print "Number of features" print np.size(housing_features, 1) print "Minimum price" print np.min(housing_prices) print "Maximum price" print np.max(housing_prices) print "Calculate mean price" print np.mean(housing_prices) print "Calculate median price" print np.median(housing_prices) print "Calculate standard deviation" print np.std(housing_prices)
def prepare_results(self, initial_pops=[50,100]): """ Analyzes data from a batch run, preparing it for plotting. """ self.initial_pops = initial_pops self.result_dict = {} for pop in self.initial_pops: self.result_dict[pop] = {} print('Starting batch for %d.' % pop) batch = BatchDriver(self.num_sims) results = batch.drive(initial_pop=pop) stdevs = [] for indx, result in enumerate(results): adults = result['adults'] minus_120 = len(adults) - 120 last_120 = adults[minus_120:] stdev = np.std(last_120) stdevs.append(stdev) stdev_of_stdev = np.std(stdevs) self.result_dict[pop]['mean_stdev'] = np.mean(stdevs) self.result_dict[pop]['ci'] = (1.96 * stdev_of_stdev) / math.sqrt(self.num_sims) print(self.result_dict)
def main(): train = pd.DataFrame.from_csv('train.csv') places_index = train['place_id'].values places_loc_sqr_wei = [] for i, place_id in enumerate(train['place_id'].unique()): if not i % 100: print(i) place_df = train.iloc[places_index == place_id] place_weights_acc_sqred = 1 / (place_df['accuracy'].values ** 2) places_loc_sqr_wei.append([place_id, np.average(place_df['x'].values, weights=place_weights_acc_sqred), np.std(place_df['x'].values), np.average(place_df['y'].values, weights=place_weights_acc_sqred), np.std(place_df['y'].values), np.average(np.log(place_df['accuracy'].values)), np.std(np.log(place_df['accuracy'].values)), place_df.shape[0]]) # print(places_loc_sqr_wei[-1]) # plt.hist2d(place_df['x'].values, place_df['y'].values, bins=100) # plt.show() plt.hist(np.log(place_df['accuracy'].values), bins=20) plt.show() places_loc_sqr_wei = np.array(places_loc_sqr_wei) column_names = ['x_mean', 'x_sd', 'y_mean', 'y_sd', 'accuracy_mean', 'accuracy_sd', 'n_persons'] places_loc_sqr_wei = pd.DataFrame(data=places_loc_sqr_wei[:, 1:], index=places_loc_sqr_wei[:, 0], columns=column_names) now = str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) places_loc_sqr_wei.to_csv('places_loc_sqr_weights_%s.csv' % now)
def test_stats(x): coords_vals = year_sample_dict_data[x] x_nodes = [] y_nodes = [] z_nodes = [] values_list = [] for item in coords_vals: x_nodes.append(item[0]) y_nodes.append(item[1]) z_nodes.append(item[2]) values_list.append(coords_vals[item]) xs = x_nodes ys = y_nodes zs = z_nodes values_list = np.array(values_list) all_data = year_stack[x, :time_len, :lat_end, :lon_end] ### New and improved and faster!!! annual_mean = np.mean(all_data) sample_mean = np.mean(values_list) annual_stdev = np.std(all_data) sample_stdev = np.std(values_list) annual_max = np.max(all_data) annual_min = np.min(all_data) sample_max = np.max(all_data) sample_min = np.min(all_data) annual_range = np.abs(annual_max - annual_min) sample_range = np.abs(sample_max - sample_min) fitness = np.abs(annual_mean-sample_mean) + np.abs(annual_stdev - sample_stdev) + np.abs(annual_range - sample_range) return fitness, annual_mean, sample_mean, annual_stdev, sample_stdev
def rectif(z_in, contrast=contrast, method=method, verbose=False): """ Transforms an image (can be 1, 2 or 3D) with normal histogram into a 0.5 centered image of determined contrast method is either 'Michelson' or 'Energy' Phase randomization takes any image and turns it into Gaussian-distributed noise of the same power (or, equivalently, variance). # See: Peter J. Bex J. Opt. Soc. Am. A/Vol. 19, No. 6/June 2002 Spatial frequency, phase, and the contrast of natural images """ z = z_in.copy() # Final rectification if verbose: print('Before Rectification of the frames') print( 'Mean=', np.mean(z[:]), ', std=', np.std(z[:]), ', Min=', np.min(z[:]), ', Max=', np.max(z[:]), ' Abs(Max)=', np.max(np.abs(z[:]))) z -= np.mean(z[:]) # this should be true *on average* in MotionClouds if (method == 'Michelson'): z = (.5* z/np.max(np.abs(z[:]))* contrast + .5) else: z = (.5* z/np.std(z[:]) * contrast + .5) if verbose: print('After Rectification of the frames') print('Mean=', np.mean(z[:]), ', std=', np.std(z[:]), ', Min=', np.min(z[:]), ', Max=', np.max(z[:])) print('percentage pixels clipped=', np.sum(np.abs(z[:])>1.)*100/z.size) return z
def normalize( self, verbose=False): #list_of_points, point_count, stroke_count = self.getListOfXYPoints( self ) #coords = np.array( list_of_points ).reshape( point_count, 2 ) coords = self.listCoordinates() point_count,ccrd = coords.shape stroke_count = len(self.strokes) mean = np.mean(coords, 0) sdev = np.std(coords, 0) coords = coords - mean if sdev[0] != 0 and sdev[1] != 0: coords = coords * ( 1 / sdev ) new_sketch = self.constructNormalizedSketch(coords, point_count, stroke_count ) if verbose: print(mean, sdev) print(np.std(coords, 0)) print(np.mean(coords, 0)) for i in range(0, point_count): print( coords[i, 0], coords[i, 1] ) plt.figure(1) allpts = new_sketch.listCoordinates() plt.plot(allpts[:,0],allpts[:,1]) plt.xlabel('x') plt.ylabel('y') plt.title('New Sketch with Normalized Points') new_sketch.printContents() return new_sketch
def find_velocity(times, frames, maxshift=10): frame_times = np.array(times) nframes = len(frames) last_idx = np.argmax(frame_times) velocities = [] npairs = nframes*(nframes-1)/2 denom = 0 maxshift = 10 for i in range(nframes): for j in range(i+1, nframes): dt = frame_times[i] - frame_times[j] offset = findshift(frames[i], frames[j], maxshift) if abs(max(offset)) > maxshift: continue denom += 1 print (i, j, offset, dt) velocity = -offset/dt velocities.append(velocity) denom = min(1, denom-1) velocities = np.array(velocities) (vx1, vy1) = velocities[:,0].mean(), velocities[:,1].mean() vx = np.mean(velocities[:,0]) vy = np.mean(velocities[:,1]) sx = np.std(velocities[:,0])/denom + 0.2*abs(vx) + 5e-4 sy = np.std(velocities[:,1])/denom + 0.2*abs(vy) + 5e-4 return (vx, vy, sx, sy)
def mcnoise(data, noise_std, n, noise_scaling=1.): """ Parameters ---------- data : ndarray Array of data. noise_std : float Standard deviation of the noise n : int Number of repetition noise_scaling: float Scaling factor for noise Returns ------- variance, variance error, skewness, skewness error, kurtosis, kurtosis error """ noise_arr = np.random.normal(0, noise_std, (n, data.size)) * noise_scaling var_sample = np.var(data + noise_arr, axis=1) skew_sample = skew(data + noise_arr, axis=1) kurt_sample = kurtosis(data + noise_arr, axis=1) var_val = np.mean(var_sample) skew_val = np.mean(skew_sample) kurt_val = np.mean(kurt_sample) var_err = np.std(var_sample) skew_err = np.std(skew_sample) kurt_err = np.std(kurt_sample) return var_val, var_err, skew_val, skew_err, kurt_val, kurt_err
def gaussian_kernel(self,xvalues,yvalues,r200,normalization=100,scale=10,xres=200,yres=220,xmax=6.0,ymax=5000.0,adj=20): """ Uses a 2D gaussian kernel to estimate the density of the phase space. As of now, the maximum radius extends to 6Mpc and the maximum velocity allowed is 5000km/s The "q" parameter is termed "scale" here which we have set to 10 as default, but can go as high as 50. "normalization" is simply H0 "x/yres" can be any value, but are recommended to be above 150 "adj" is a custom value and changes the size of uniform filters when used (not normally needed) """ self.x_scale = xvalues/xmax*xres self.y_scale = ((yvalues+ymax)/(normalization*scale))/((ymax*2.0)/(normalization*scale))*yres img = np.zeros((xres+1,yres+1)) self.x_range = np.linspace(0,xmax,xres+1) self.y_range = np.linspace(-ymax,ymax,yres+1) for j in range(xvalues.size): img[self.x_scale[j],self.y_scale[j]] += 1 #Estimate kernel sizes #Uniform #self.ksize = 3.12/(xvalues.size)**(1/6.0)*((np.var(self.x_scale[xvalues<r200])+np.var(self.y_scale[xvalues<r200]))/2.0)**0.5/adj #if self.ksize < 3.5: # self.ksize = 3.5 #Gaussian self.ksize_x = (4.0/(3.0*xvalues.size))**(1/5.0)*np.std(self.x_scale[xvalues<r200]) self.ksize_y = (4.0/(3.0*yvalues.size))**(1/5.0)*np.std(self.y_scale[xvalues<r200]) #smooth with estimated kernel sizes #img = ndi.uniform_filter(img, (self.ksize,self.ksize))#,mode='reflect') self.img = ndi.gaussian_filter(img, (self.ksize_y,self.ksize_x),mode='reflect') self.img_grad = ndi.gaussian_gradient_magnitude(img, (self.ksize_y,self.ksize_x)) self.img_inf = ndi.gaussian_gradient_magnitude(ndi.gaussian_gradient_magnitude(img, (self.ksize_y,self.ksize_x)), (self.ksize_y,self.ksize_x))
def getDftBins(data=None, sampleRate=None, low=100, high=8000, chunk=64): """Return DFT (discrete Fourier transform) of ``data``, doing so in time-domain bins, each of size ``chunk`` samples. e.g., for getting FFT magnitudes in a ms-by-ms manner. If given a sampleRate, the data are bandpass filtered (low, high). """ # good to reshape & vectorize data rather than use a python loop if data is None: data = [] bins = [] i = chunk if sampleRate: # just to get freq vector _junk, freq = getDft(data[:chunk], sampleRate) band = (freq > low) & (freq < high) # band (frequency range) while i <= len(data): magn = getDft(data[i - chunk:i]) if sampleRate: bins.append(np.std(magn[band])) # filtered by frequency else: bins.append(np.std(magn)) # unfiltered i += chunk return np.array(bins)
def calculateSE(data): standardError = np.std(data, ddof=1) / np.sqrt(len(data) - 1) return standardError
def extract(self): global LEVEL, RADIUS, ANGLE # print '*** AGA - extracting level, radius and angle info ***' # z, x, self.AGA_types_ci = self.extract_parameter_errors(self.AGA_typeProbHistory, # ANGLE) self.aga_levels, self.aga_levels_std_dev, self.aga_levels_ci = self.extract_parameter_errors( self.AGA_errors, LEVEL) # print 'AGA - levels OK' self.aga_radius, self.aga_radius_std_dev, self.aga_radius_ci = self.extract_parameter_errors( self.AGA_errors, RADIUS) # print 'AGA - radius OK' self.aga_angles, self.aga_angles_std_dev, self.aga_angles_ci = self.extract_parameter_errors( self.AGA_errors, ANGLE) # print 'AGA - angles OK' self.aga_level_error_mean = np.mean(np.array(self.aga_levels)) self.aga_angle_error_mean = np.mean(np.array(self.aga_angles)) self.aga_radius_error_mean = np.mean(np.array(self.aga_radius)) self.aga_type_probability_mean = np.mean( np.array(self.AGA_typeProbHistory)) self.aga_level_error_ci = np.std(np.array(self.aga_levels)) self.aga_angle_error_ci = np.std(np.array(self.aga_angles)) self.aga_radius_error_ci = np.std(np.array(self.aga_radius)) self.aga_type_probability_ci = np.std( np.array(self.AGA_typeProbHistory)) # print '*** ABU - extracting level, radius and angle info ***' self.abu_levels, self.abu_levels_std_dev, self.abu_levels_ci = self.extract_parameter_errors( self.ABU_errors, LEVEL) # print 'ABU - levels OK' self.abu_radius, self.abu_radius_std_dev, self.abu_radius_ci = self.extract_parameter_errors( self.ABU_errors, RADIUS) # print 'ABU - radius O self.abu_angles, self.abu_angles_std_dev, self.abu_angles_ci = self.extract_parameter_errors( self.ABU_errors, ANGLE) # print 'ABU - angles OK' self.abu_level_error_mean = np.mean(np.array(self.abu_levels)) self.abu_angle_error_mean = np.mean(np.array(self.abu_angles)) self.abu_radius_error_mean = np.mean(np.array(self.abu_radius)) self.abu_type_probability_mean = np.mean( np.array(self.ABU_typeProbHistory)) self.abu_level_error_ci = np.std(np.array(self.abu_levels)) self.abu_angle_error_ci = np.std(np.array(self.abu_angles)) self.abu_radius_error_ci = np.std(np.array(self.abu_radius)) self.abu_type_probability_ci = np.std( np.array(self.ABU_typeProbHistory)) # print '*** OGE - extracting level, radius and angle info ***' self.OGE_levels, self.OGE_levels_std_dev, self.OGE_levels_ci = self.extract_parameter_errors( self.OGE_errors, LEVEL) # print 'OGE - levels OK' self.OGE_radius, self.OGE_radius_std_dev, self.OGE_radius_ci = self.extract_parameter_errors( self.OGE_errors, RADIUS) # print 'OGE - radius OK' self.OGE_angles, self.OGE_angles_std_dev, self.OGE_angles_ci = self.extract_parameter_errors( self.OGE_errors, ANGLE) # print 'OGE - angles OK' self.oge_level_error_mean = np.mean(np.array(self.OGE_levels)) self.oge_angle_error_mean = np.mean(np.array(self.OGE_angles)) self.oge_radius_error_mean = np.mean(np.array(self.OGE_radius)) self.oge_type_probability_mean = np.mean( np.array(self.OGE_typeProbHistory)) self.oge_level_error_ci = np.std(np.array(self.OGE_levels)) self.oge_angle_error_ci = np.std(np.array(self.OGE_angles)) self.oge_radius_error_ci = np.std(np.array(self.OGE_radius)) self.oge_type_probability_ci = np.std( np.array(self.OGE_typeProbHistory)) if len(self.OGE_me_errors) > 0: # print '*** OGE - extracting level, radius and angle info ***' self.OGE_me_levels, self.OGE_me_levels_std_dev, self.OGE_me_levels_ci = self.extract_parameter_errors( self.OGE_me_errors, LEVEL) # print 'OGE - levels OK' self.OGE_me_radius, self.OGE_me_radius_std_dev, self.OGE_me_radius_ci = self.extract_parameter_errors( self.OGE_me_errors, RADIUS) # print 'OGE - radius OK' self.OGE_me_angles, self.OGE_me_angles_std_dev, self.OGE_me_angles_ci = self.extract_parameter_errors( self.OGE_me_errors, ANGLE) # print 'OGE - angles OK' self.oge_me_level_error_mean = np.mean(np.array( self.OGE_me_levels)) self.oge_me_angle_error_mean = np.mean(np.array( self.OGE_me_angles)) self.oge_me_radius_error_mean = np.mean( np.array(self.OGE_me_radius)) self.oge_me_type_probability_mean = np.mean( np.array(self.OGE_me_typeProbHistory)) self.oge_me_level_error_ci = np.std(np.array(self.OGE_me_levels)) self.oge_me_angle_error_ci = np.std(np.array(self.OGE_me_angles)) self.oge_me_radius_error_ci = np.std(np.array(self.OGE_me_radius)) self.oge_me_type_probability_ci = np.std( np.array(self.OGE_me_typeProbHistory)) if len(self.OGE_mo_errors) > 0: # print '*** OGE - extracting level, radius and angle info ***' self.OGE_mo_levels, self.OGE_mo_levels_std_dev, self.OGE_mo_levels_ci = self.extract_parameter_errors( self.OGE_mo_errors, LEVEL) # print 'OGE - levels OK' self.OGE_mo_radius, self.OGE_mo_radius_std_dev, self.OGE_mo_radius_ci = self.extract_parameter_errors( self.OGE_mo_errors, RADIUS) # print 'OGE - radius OK' self.OGE_mo_angles, self.OGE_mo_angles_std_dev, self.OGE_mo_angles_ci = self.extract_parameter_errors( self.OGE_mo_errors, ANGLE) # print 'OGE - angles OK' self.oge_mo_level_error_mean = np.mean(np.array( self.OGE_mo_levels)) self.oge_mo_angle_error_mean = np.mean(np.array( self.OGE_mo_angles)) self.oge_mo_radius_error_mean = np.mean( np.array(self.OGE_mo_radius)) self.oge_mo_type_probability_mean = np.mean( np.array(self.OGE_mo_typeProbHistory)) self.oge_mo_level_error_ci = np.std(np.array(self.OGE_mo_levels)) self.oge_mo_angle_error_ci = np.std(np.array(self.OGE_mo_angles)) self.oge_mo_radius_error_ci = np.std(np.array(self.OGE_mo_radius)) self.oge_mo_type_probability_ci = np.std( np.array(self.OGE_mo_typeProbHistory)) if len(self.pomcp_errors) > 0: # print '*** pomcp - extracting level, radius and angle info ***' self.pomcp_levels, self.pomcp_levels_std_dev, self.pomcp_levels_ci = self.extract_parameter_errors( self.pomcp_errors, LEVEL) # print 'pomcp - levels OK' self.pomcp_radius, self.pomcp_radius_std_dev, self.pomcp_radius_ci = self.extract_parameter_errors( self.pomcp_errors, RADIUS) # print 'pomcp - radius OK' self.pomcp_angles, self.pomcp_angles_std_dev, self.pomcp_angles_ci = self.extract_parameter_errors( self.pomcp_errors, ANGLE) # print 'pomcp - angles OK' # z, x, self.pomcp_types_ci = self.extract_parameter_errors(self.pomcp_typeProbHistory, self.pomcp_level_error_mean = np.mean(np.array(self.pomcp_levels)) self.pomcp_angle_error_mean = np.mean(np.array(self.pomcp_angles)) self.pomcp_radius_error_mean = np.mean(np.array(self.pomcp_radius)) self.pomcp_type_probability_mean = np.mean( np.array(self.pomcp_typeProbHistory)) self.pomcp_level_error_ci = np.std(np.array(self.pomcp_levels)) self.pomcp_angle_error_ci = np.std(np.array(self.pomcp_angles)) self.pomcp_radius_error_ci = np.std(np.array(self.pomcp_radius)) self.pomcp_type_probability_ci = np.std( np.array(self.pomcp_typeProbHistory))
def asteroids_plot(self, image_path=None, ra=None, dec=None, odate=None, time_travel=1, radi=6, max_mag=20.0, circle_color='yellow', arrow_color='red', invert_yaxis="True"): """ Source plot module. @param image_path: data part of the FITS image @type image_path: numpy array @param ra: RA coordinate of target area. @type ra: str in "HH MM SS" @param dec: DEC coordinate of target area @type dec: str in "+DD MM SS" @param radi: Radius in arcmin. @type radi: float @param odate: Ephemeris date of observation in date @type odate: "2017-08-15T19:50:00.95" format in str @param time_travel: Jump into time after given date (in hour). @type time_travel: float @param max_mag: Limit magnitude to be queried object(s) @type max_mag: float @param circle_color: Color of the asteroids marks @type circle_color: str @param arrow_color: Color of the asteroids direction marks @type arrow_color: str @param invert_yaxis: invert y axis or not. @type invert_yaxis: bool @returns: boolean """ from .catalog import Query # filename = get_pkg_data_filename(image_path) rcParams['figure.figsize'] = [10., 8.] # rcParams.update({'font.size': 10}) if image_path: hdu = fits.open(image_path)[0] elif not image_path and ra and dec and odate: co = coordinates.SkyCoord('{0} {1}'.format(ra, dec), unit=(u.hourangle, u.deg), frame='icrs') print('Target Coordinates:', co.to_string(style='hmsdms', sep=':'), 'in {0} arcmin'.format(radi)) try: server_img = SkyView.get_images(position=co, survey=['DSS'], radius=radi * u.arcmin) hdu = server_img[0][0] except Exception as e: print("SkyView could not get the image from DSS server.") print(e) raise SystemExit wcs = WCS(hdu.header) data = hdu.data.astype(float) bkg = sep.Background(data) # bkg_image = bkg.back() # bkg_rms = bkg.rms() data_sub = data - bkg m, s = np.mean(data_sub), np.std(data_sub) ax = plt.subplot(projection=wcs) plt.imshow(data_sub, interpolation='nearest', cmap='gray', vmin=m - s, vmax=m + s, origin='lower') ax.coords.grid(True, color='white', ls='solid') ax.coords[0].set_axislabel('Galactic Longitude') ax.coords[1].set_axislabel('Galactic Latitude') overlay = ax.get_coords_overlay('icrs') overlay.grid(color='white', ls='dotted') overlay[0].set_axislabel('Right Ascension (ICRS)') overlay[1].set_axislabel('Declination (ICRS)') sb = Query() ac = AstCalc() if image_path: fo = FitsOps(image_path) if not odate: odate = fo.get_header('date-obs') else: odate = odate ra_dec = ac.center_finder(image_path, wcs_ref=True) elif not image_path and ra and dec and odate: odate = odate ra_dec = [co.ra, co.dec] request0 = sb.find_skybot_objects(odate, ra_dec[0].degree, ra_dec[1].degree, radius=radi) if request0[0]: asteroids = request0[1] elif request0[0] is False: print(request0[1]) raise SystemExit request1 = sb.find_skybot_objects(odate, ra_dec[0].degree, ra_dec[1].degree, radius=float(radi), time_travel=time_travel) if request1[0]: asteroids_after = request1[1] elif request1[0] is False: print(request1[1]) raise SystemExit for i in range(len(asteroids)): if float(asteroids['m_v'][i]) <= max_mag: c = coordinates.SkyCoord('{0} {1}'.format( asteroids['ra(h)'][i], asteroids['dec(deg)'][i]), unit=(u.hourangle, u.deg), frame='icrs') c_after = coordinates.SkyCoord('{0} {1}'.format( asteroids_after['ra(h)'][i], asteroids_after['dec(deg)'][i]), unit=(u.hourangle, u.deg), frame='icrs') r = FancyArrowPatch((c.ra.degree, c.dec.degree), (c_after.ra.degree, c_after.dec.degree), arrowstyle='->', mutation_scale=10, transform=ax.get_transform('icrs')) p = Circle((c.ra.degree, c.dec.degree), 0.005, edgecolor=circle_color, facecolor='none', transform=ax.get_transform('icrs')) ax.text(c.ra.degree, c.dec.degree - 0.007, asteroids['name'][i], size=12, color='black', ha='center', va='center', transform=ax.get_transform('icrs')) r.set_facecolor('none') r.set_edgecolor(arrow_color) ax.add_patch(p) ax.add_patch(r) # plt.gca().invert_xaxis() if invert_yaxis == "True": plt.gca().invert_yaxis() plt.show() print(asteroids) return True
def make_local_connectivity_scorr(func_img, clust_mask_img, thresh): """ Constructs a spatially constrained connectivity matrix from a fMRI dataset. The weights w_ij of the connectivity matrix W correspond to the spatial correlation between the whole brain FC maps generated from the time series from voxel i and voxel j. Connectivity is only calculated between a voxel and the 27 voxels in its 3D neighborhood (face touching and edge touching). Parameters ---------- func_img : Nifti1Image 4D Nifti1Image containing fMRI data. clust_mask_img : Nifti1Image 3D NIFTI file containing a mask, which restricts the voxels used in the analysis. thresh : str Threshold value, correlation coefficients lower than this value will be removed from the matrix (set to zero). Returns ------- W : Compressed Sparse Matrix A Scipy sparse matrix, with weights corresponding to the spatial correlation between the time series from voxel i and voxel j References ---------- .. Adapted from PyClusterROI """ from scipy.sparse import csc_matrix from scipy import prod from itertools import product from pynets.fmri.clustools import indx_1dto3d, indx_3dto1d neighbors = np.array(sorted(sorted(sorted([list(x) for x in list(set(product({-1, 0, 1}, repeat=3)))], key=lambda k: (k[0])), key=lambda k: (k[1])), key=lambda k: (k[2]))) # Read in the mask msz = clust_mask_img.shape # Convert the 3D mask array into a 1D vector mskdat = np.reshape(np.asarray(clust_mask_img.dataobj).astype('bool'), prod(msz)) # Determine the 1D coordinates of the non-zero # elements of the mask iv = np.nonzero(mskdat)[0] sz = func_img.shape # Reshape fmri data to a num_voxels x num_timepoints array imdat = np.reshape(np.asarray(func_img.dataobj).astype('float32'), (prod(sz[:3]), sz[3])) # Mask the datset to only the in-mask voxels imdat = imdat[iv, :] imdat_sz = imdat.shape # Z-score fmri time courses, this makes calculation of the # correlation coefficient a simple matrix product imdat_s = np.tile(np.std(imdat, 1), (imdat_sz[1], 1)).T # Replace 0 with really large number to avoid div by zero imdat_s[imdat_s == 0] = 1000000 imdat_m = np.tile(np.mean(imdat, 1), (imdat_sz[1], 1)).T imdat = (imdat - imdat_m) / imdat_s # Set values with no variance to zero imdat[imdat_s == 0] = 0 imdat[np.isnan(imdat)] = 0 # Remove voxels with zero variance, do this here # so that the mapping will be consistent across # subjects vndx = np.nonzero(np.var(imdat, 1) != 0)[0] iv = iv[vndx] m = len(iv) print(m, ' # of non-zero valued or non-zero variance voxels in the mask') # Construct a sparse matrix from the mask msk = csc_matrix((vndx + 1, (iv, np.zeros(m))), shape=(prod(msz), 1), dtype=np.float32) sparse_i = [] sparse_j = [] sparse_w = [[]] for i in range(0, m): if i % 1000 == 0: print('voxel #', i) # Convert index into 3D and calculate neighbors, then convert resulting 3D indices into 1D ndx1d = indx_3dto1d(indx_1dto3d(iv[i], sz[:-1]) + neighbors, sz[:-1]) # Convert 1D indices into masked versions ondx1d = msk[ndx1d].todense() # Exclude indices not in the mask ndx1d = ndx1d[np.nonzero(ondx1d)[0]].flatten() ondx1d = np.array(ondx1d[np.nonzero(ondx1d)[0]]) ondx1d = ondx1d.flatten() - 1 # Keep track of the index corresponding to the "seed" nndx = np.nonzero(ndx1d == iv[i])[0] # Extract the time courses corresponding to the "seed" # and 3D neighborhood voxels tc = np.array(imdat[ondx1d.astype('int'), :]) # Ensure that the "seed" has variance, if not just skip it if np.var(tc[nndx, :]) == 0: continue # Calculate functional connectivity maps for "seed" # and 3D neighborhood voxels R = np.corrcoef(np.dot(tc, imdat.T) / (sz[3] - 1)) if np.linalg.matrix_rank(R) == 1: R = np.reshape(R, (1, 1)) # Set nans to 0 R[np.isnan(R)] = 0 # Set values below thresh to 0 R[R < thresh] = 0 # Calculate the spatial correlation between FC maps if np.linalg.matrix_rank(R) == 0: R = np.reshape(R, (1, 1)) # Keep track of the indices and the correlation weights # to construct sparse connectivity matrix sparse_i = np.append(sparse_i, ondx1d, 0) sparse_j = np.append(sparse_j, (ondx1d[nndx]) * np.ones(len(ondx1d))) sparse_w = np.append(sparse_w, R[nndx, :], 1) # Ensure that the weight vector is the correct shape sparse_w = np.reshape(sparse_w, prod(np.shape(sparse_w))) # Concatenate the i, j, and w_ij vectors outlist = sparse_i outlist = np.append(outlist, sparse_j) outlist = np.append(outlist, sparse_w) # Calculate the number of non-zero weights in the connectivity matrix n = len(outlist) / 3 # Reshape the 1D vector read in from infile in to a 3xN array outlist = np.reshape(outlist, (3, int(n))) m = max(max(outlist[0, :]), max(outlist[1, :])) + 1 # Make the sparse matrix, CSC format is supposedly efficient for matrix arithmetic W = csc_matrix((outlist[2, :], (outlist[0, :], outlist[1, :])), shape=(int(m), int(m)), dtype=np.float32) del imdat, msk, mskdat, outlist, m, sparse_i, sparse_j, sparse_w return W
def random_forest(self, col_to_predict, cols=None, cols_to_remove=None, num_splits=25, graph=True, num_vars_graph=10): cols = [] if cols is None else cols cols_to_remove = [] if cols_to_remove is None else cols_to_remove if cols: cols_input = cols else: # TODO: Validate if random forest requires all variables to be numeric (no categoric). cols_input = list(self.dataset) if col_to_predict in cols_input: cols_input.remove(col_to_predict) for col in cols_to_remove: if col in cols_input: cols_input.remove(col) logger.info("*** Training random forest model...") # logger.info("*** Input features: ") # logger.info(cols_input) # X_train, X_test, y_train, y_test = train_test_split( # self.dataset[cols_input], # self.dataset[col_to_predict], # random_state = 777 # ) # output_model.fit(X_train, y_train) # prediction = output_model.predict(X = X_test) # r2 = r2_score(y_true=y_test, y_pred=prediction) # logger.info(f"R2 coefficient (Using training data): {r2}") # mse = mean_squared_error( # y_true = y_test, # y_pred = prediction, # squared = False # ) model = RandomForestRegressor(n_estimators=10, criterion='mse', max_depth=None, max_features='auto', oob_score=False, n_jobs=-1, random_state=777) cv = ShuffleSplit(n_splits=num_splits, test_size=0.3, random_state=777) output_models = cross_validate( model, self.dataset[cols_input], self.dataset[col_to_predict], cv=cv, scoring=["r2", "neg_mean_squared_error" ], # The smallest the number the better return_estimator=True, ) # Use abs since the scoring metric neg_mean_squared_error is negative in order # to follow the convention: higher return values are better than lower return # values when evaluating the models. model_mean = abs(np.mean(output_models["test_neg_mean_squared_error"])) model_standard_deviation = abs( np.std(output_models["test_neg_mean_squared_error"])) max_r2 = np.amax(output_models["test_r2"]) logger.info(f"*** After {num_splits} folds using cross validation:") logger.info(f" The average MSE is: {model_mean}") logger.info( f" The standard deviation of the MSE is: {model_standard_deviation}" ) logger.info(f" The maximum R2 is: {max_r2}") # Choose the best model to show variable importance and graphs max_estimator = np.amax(output_models["test_neg_mean_squared_error"]) max_estimator_index = np.where( output_models["test_neg_mean_squared_error"] == max_estimator)[0][0] best_model = output_models["estimator"][max_estimator_index] # Get the variable importance of the best model found. importances = best_model.feature_importances_ # Summarize feature importance. cols_importance = list(zip(cols_input, importances)) cols_importance_ordered = sorted(cols_importance, key=lambda x: x[1], reverse=True) for col, importance in cols_importance_ordered: logger.info(f"Feature: {col}, Score: {importance}") if graph: x_axis = [ "\n".join(wrap(x, 20)) for x in list(zip( *cols_importance_ordered))[0][:num_vars_graph] ] y_axis = list(zip(*cols_importance_ordered))[1][:num_vars_graph] plt.figure(figsize=(9, 4)) plt.title("Feature Importance - Random Forest") plt.bar(x_axis, y_axis) plt.xticks(rotation=90) plt.margins(x=0, y=0.1) plt.show() best_model.top_vars_graph = zip(x_axis, y_axis) return best_model, output_models
def linear_regression_ridge(self, col_to_predict, cols=None, cols_to_remove=None, num_splits=25, graph=True, num_vars_graph=10): cols = [] if cols is None else cols cols_to_remove = [] if cols_to_remove is None else cols_to_remove if cols: cols_input = cols else: cols_input = list(self.dataset) if col_to_predict in cols_input: cols_input.remove(col_to_predict) for col in cols_to_remove: if col in cols_input: cols_input.remove(col) logger.info("*** Training linear regression model...") reg = Ridge() cv = ShuffleSplit(n_splits=num_splits, test_size=0.3, random_state=123) output_models = cross_validate( reg, self.dataset[cols_input], self.dataset[col_to_predict], cv=cv, scoring=["r2", "neg_mean_squared_error" ], # The higher the number the better return_estimator=True, ) # Use abs since the scoring metric neg_mean_squared_error is negative in order # to follow the convention: higher return values are better than lower return # values when evaluating the models. model_mean = abs(np.mean(output_models["test_neg_mean_squared_error"])) model_standard_deviation = abs( np.std(output_models["test_neg_mean_squared_error"])) max_r2 = np.amax(output_models["test_r2"]) logger.info(f"*** After {num_splits} folds using cross validation:") logger.info(f" The average MSE is: {model_mean}") logger.info( f" The standard deviation of the MSE is: {model_standard_deviation}" ) logger.info(f" The maximum R2 is: {max_r2}") # Choose the best model to show variable importance and graphs max_estimator = np.amax(output_models["test_neg_mean_squared_error"]) max_estimator_index = np.where( output_models["test_neg_mean_squared_error"] == max_estimator)[0][0] best_model = output_models["estimator"][max_estimator_index] # Get the variable importance of the best model found. In this model the absolute # value measures the importance of each feature. importances = tuple(abs(item) for item in best_model.coef_) # Summarize feature importance. cols_importance = list(zip(cols_input, importances)) cols_importance_ordered = sorted(cols_importance, key=lambda x: x[1], reverse=True) for col, importance in cols_importance_ordered: logger.info(f"Feature: {col}, Score: {importance}") if graph: x_axis = [ "\n".join(wrap(x, 20)) for x in list(zip( *cols_importance_ordered))[0][:num_vars_graph] ] y_axis = list(zip(*cols_importance_ordered))[1][:num_vars_graph] plt.figure(figsize=(9, 4)) plt.title("Feature Importance - Linear Regression") plt.bar(x_axis, y_axis) plt.xticks(rotation=90) plt.margins(x=0, y=0.1) plt.show() best_model.top_vars_graph = zip(x_axis, y_axis) return best_model, output_models
def reclassify(accuracy, evidence): """ This function reclassifies correct responses that are likely to be correct by chance as incorrect responses in discrimination experiments using some evidence. We've recomputed 140 classification images with accuracies reclassified by response time, for example, and increased SNR by more than 10% on average with little bias (Gosselin et al., submitted). The function takes 2 inputs: _accuracy_, a vector equal to 0 when the response to a trial was incorrect and to 1 when it was correct; and _reclass_evidence_, a vector of the same length as _accuracy_ equal to a reclassification evidence for each corresponding trial such as the response times. Note that the average of the evidence for incorrect responses ca be either greater or smaller than the average of the evidence for correct responses but it must be different. The The function has 2 outputs: _accuracy_reclass_, a vector of the same length as _accuracy_ equal to 0 when the response to a trial was incorrect or when a correct response to a trial was reclassified as incorrect, and to 1 when the response to trial was correct and wasn't reclassified as incorrect; and _stats_ and _stats_ a dictionary with 4 items: _reclass_evidence_criterion_, a reclassification evidence such that when _reclass_evidence_polarity_ * _reclass_evidence_ > _reclass_evidence_polarity_ * _reclass_evidence_criterion_ a correct response was reclassified as incorrect; _reclass_evidence_polarity_, either 1 or -1 and indicating how to interpret the criterion; _reclass_index_, an index of the correct responses reclassified as incorrect; _reclass_efficiency_, the estimated proportion of true correct and incorrect responses minus false correct and incorrect responses following reclassification; and _reclass_gain_, the ratio between _reclass_efficiency_ and the efficiency prior to reclassification. Note that sqrt(_reclass_gain_) provides an approximation of the expected SNR gain. This function reclassifies correct responses that are likely to be correct by chance as incorrect responses in discrimination experiments using some evidence (e.g. response times). We've recomputed 140 classification images with accuracies reclassified by response time, and increased SNR by more than 10% on average with little bias (Gosselin et al., submitted). Gosselin, F., Daigneault, V., Larouche, J.-M. & Caplette, L. (submitted). Reclassifying guesses to increase signal-to-noise ratio in psychological experiments. Frederic Gosselin, 01/06/2020 [email protected] Adapted to Python by Laurent Caplette, 17/08/2020 [email protected] """ evidence = np.array(evidence).astype(np.float32) # in case a list is feeded accuracy = np.array(accuracy).astype(np.float32) # in case a list is feeded if not all(np.unique(accuracy) == [0, 1]): # check that accuracy is composed of only zeros and ones raise ("'accuracy' variable must be composed of zeros and ones") if not accuracy.shape == evidence.shape: # check that variables are of the same size raise ("'accuracy' and evidence must have the same size") polarity = 1 # default evidence polarity if (np.mean(evidence[accuracy == 0]) - np.mean(evidence[accuracy == 1])) < 0: # the evidence is greater for incorrect than correct trials polarity = -1 # change evidence polarity evidence *= polarity # the evidence multiplied by its polarity nb_std = 2 outliers = evidence > np.mean(evidence) + nb_std * np.std(evidence) # temporary outliers to help frame the histogram _, bins = np.histogram(evidence[np.logical_not(outliers)], 'fd') # uses the Freedman-Diaconis rule for bin width bin_width = bins[1] - bins[0] # bin width bins = np.arange(bins[0], np.ceil(np.amax(evidence) / bin_width) * bin_width, bin_width) # complete evidence range, including outliers correct_evidence = evidence[accuracy == 1] # correct response evidences n_correct, _ = np.histogram(correct_evidence, bins) # correct evidences histograms incorrect_evidence = evidence[accuracy == 0] # incorrect response evidences n_incorrect, _ = np.histogram(incorrect_evidence, bins) # incorrect evidences histogram; this is also the false correct evidences histogram # calculates frequency distribution x = (bins[:-1] + bins[1:]) / 2 # centers of the histogram bins s_x = np.linspace(np.amin(x), np.amax(x), ((np.amax(x) - np.amin(x)) // .01).astype(np.int32)) # fine histogram bins for interpolation for ii in range(len(bins) - 1): # replaces histogram bin centers by histogram bin averages whenever possible ind = np.where(np.logical_and(evidence >= bins[ii], evidence < bins[ii + 1]))[0] if ind.size != 0: x[ii] = np.mean(evidence[ind]) f1 = interp.CubicSpline(x, n_correct) s_n_correct = f1(s_x) f2 = interp.CubicSpline(x, n_incorrect) s_n_incorrect = f2(s_x) s_n_true_correct = s_n_correct - s_n_incorrect # finds the best evidence criteria N = np.sum(s_n_correct) + np.sum(s_n_incorrect) # number of points in all interpolated frequency distributions; general case I_o = np.sum(s_n_incorrect) # number of points in interpolated n_incorrect frequency distribution cCR = np.cumsum(s_n_true_correct) # cumulative interpolated true correct evidence frequency distribution cM = np.cumsum(s_n_incorrect) # cumulative interpolated false correct evidence frequency distribution s_efficiency = (4 * I_o - N + 2 * (cCR - cM)) / N # interpolated efficiency as a function of evidence reclassification criterion; general case s_ind = np.argmax(s_efficiency) reclass_criterion = s_x[s_ind] # chosen evidence criterion # reclassifies correct responses as incorrect responses accuracy_reclass = accuracy # initialize with old accuracy reclass_index = np.where(np.logical_and(accuracy == 1, (evidence > reclass_criterion)))[0] # which correct response should be reclassified as an incorrect accuracy_reclass[reclass_index] = 0 # some statistics stats = { 'reclass_polarity': polarity, 'reclass_criterion': polarity * reclass_criterion, 'reclass_index': reclass_index, 'reclass_efficiency': s_efficiency[s_ind], 'reclass_gain': s_efficiency[s_ind] / s_efficiency[-1] } return accuracy_reclass, stats
point_num = 12 th = int(area_list[-1] - area_list[0]) / point_num temp_volume_list = [[] for i in range(point_num)] temp_deviation_list = [] for item in area_list: for i in range(point_num): if (area_list[0] + th * i) < item and (area_list[0] + th * (i + 1)) >= item: temp_volume_list[i].append(item**3) if (area_list[0] + th * 3) < item and (area_list[0] + th * 7) >= item: temp_deviation_list.append(item) sum_volume_list = [] for i in range(point_num): sum_volume_list.append(sum(temp_volume_list[i])) temp_sum = sum(sum_volume_list) for i in range(point_num): sum_volume_list[i] /= temp_sum grade = sum(sum_volume_list[4:8]) standard_deviation = np.std(np.array(temp_deviation_list)) print(grade, standard_deviation) plt.plot(sum_volume_list) plt.ylabel("Volume Ratio %") plt.xlabel("Relative Volume") plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent)) plt.show() # plt.bar(rad, y, alpha=0.5, width=0.5, label='Partical Size', lw=3) # plt.legend()
def train_PG(exp_name, env_name, n_iter, gamma, min_timesteps_per_batch, max_path_length, learning_rate, reward_to_go, animate, logdir, normalize_advantages, nn_baseline, seed, n_layers, size): start = time.time() #========================================================================================# # Set Up Logger #========================================================================================# setup_logger(logdir, locals()) #========================================================================================# # Set Up Env #========================================================================================# # Make the gym environment env = gym.make(env_name) # Set random seeds tf.set_random_seed(seed) np.random.seed(seed) env.seed(seed) # Maximum length for episodes max_path_length = max_path_length or env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(env.action_space, gym.spaces.Discrete) # Observation and action sizes ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.n if discrete else env.action_space.shape[0] #========================================================================================# # Initialize Agent #========================================================================================# computation_graph_args = { 'n_layers': n_layers, 'ob_dim': ob_dim, 'ac_dim': ac_dim, 'discrete': discrete, 'size': size, 'learning_rate': learning_rate, } sample_trajectory_args = { 'animate': animate, 'max_path_length': max_path_length, 'min_timesteps_per_batch': min_timesteps_per_batch, } estimate_return_args = { 'gamma': gamma, 'reward_to_go': reward_to_go, 'nn_baseline': nn_baseline, 'normalize_advantages': normalize_advantages, } agent = Agent(computation_graph_args, sample_trajectory_args, estimate_return_args) # build computation graph agent.build_computation_graph() # tensorflow: config, session, variable initialization agent.init_tf_sess() #========================================================================================# # Training Loop #========================================================================================# total_timesteps = 0 for itr in range(n_iter): print("********** Iteration %i ************" % itr) paths, timesteps_this_batch = agent.sample_trajectories(itr, env) total_timesteps += timesteps_this_batch # Build arrays for observation, action for the policy gradient update by concatenating # across paths ob_no = np.concatenate([path["observation"] for path in paths]) ac_na = np.concatenate([path["action"] for path in paths]) re_n = [path["reward"] for path in paths] q_n, adv_n = agent.estimate_return(ob_no, re_n) agent.update_parameters(ob_no, ac_na, q_n, adv_n) # Log diagnostics returns = [path["reward"].sum() for path in paths] ep_lengths = [pathlength(path) for path in paths] logz.log_tabular("Time", time.time() - start) logz.log_tabular("Iteration", itr) logz.log_tabular("AverageReturn", np.mean(returns)) logz.log_tabular("StdReturn", np.std(returns)) logz.log_tabular("MaxReturn", np.max(returns)) logz.log_tabular("MinReturn", np.min(returns)) logz.log_tabular("EpLenMean", np.mean(ep_lengths)) logz.log_tabular("EpLenStd", np.std(ep_lengths)) logz.log_tabular("TimestepsThisBatch", timesteps_this_batch) logz.log_tabular("TimestepsSoFar", total_timesteps) logz.dump_tabular() logz.pickle_tf_vars()
if args.query_url is not 'none': print('Query url: ' + args.query_url) if args.query_url in url_dict and url_dict[args.query_url] in res: print('page rank =' + str(round(res[url_dict[args.query_url]], 7))) else: print 'page rank = 0' print 'pagerank stats: ' max_hub = 0 min_hub = 1 median_hub = 0 std_hub = 0 i = 0 hubs = np.ones(shape=(500)) for tup in sorted_pr[:500]: if tup[1] > max_hub: max_hub = tup[1] if tup[1] < min_hub: min_hub = tup[1] if i == 249 or i == 250: median_hub += tup[1] hubs[i] = tup[1] i += 1 print('max = ' + str(max_hub)) print('min = ' + str(min_hub)) print('median = ' + str(median_hub / 2)) print('std = ' + str(np.std(hubs)))
# plt.legend(loc='upper right') plt.xlim(min(vals), max(vals)) plt.savefig('output/dist.png') plt.show() plt.close() if 'walkers' in args.plot: file = 'output/dist.dat' vals = [] with open(file) as f: for line in f: vals.append(float(line)) steps = np.arange(0, len(vals)) avg = round(np.mean(vals), 3) std = round(np.std(vals) / math.sqrt(len(vals)), 3) avg_array = np.array([avg for i in vals]) plt.figure(figsize=[16, 5]) plt.step(steps, vals, color='k', alpha=.7, linewidth=1, label=r'$T=100$') plt.plot(steps, avg_array, color='r', label=r'$<E>=%s \pm %s$' % (str(avg), str(std))) plt.fill_between(steps, avg_array - std,
peak1Array.append(third_list[i][1] * 10**3) elif third_list[i][0] == max_index: peak3Array.append(third_list[i][1] * 10**3) else: peak2Array.append(third_list[i][1] * 10**3) # Take the mean values peak1_mean = np.mean(peak1Array) peak2_mean = np.mean(peak2Array) peak3_mean = np.mean(peak3Array) print("peak1: " + str(peak1_mean)) print("peak2: " + str(peak2_mean)) print("peak3: " + str(peak3_mean)) # Take RMS deviation std1 = np.std(peak1Array)/math.sqrt(len(peak1Array)) std2 = np.std(peak2Array)/math.sqrt(len(peak2Array)) std3 = np.std(peak3Array)/math.sqrt(len(peak3Array)) print("std1: " + str(std1)) print("std2: " + str(std2)) print("std3: " + str(std3)) f_open.close() # Create root file rootFile = '%speaks_run_%s.root' % (RootFilePath, run) f_root = TFile(rootFile, "RECREATE") treeName = 'data' tree = TTree(treeName, treeName) run_array = np.zeros(1, dtype=np.dtype('u4')) timestamp_array = np.zeros(1, dtype=np.float32) mean1_array = np.zeros(1, dtype=np.float32)
def sort_by_target(mnist): reorder_train = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[:60000])]))[:, 1] reorder_test = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[60000:])]))[:, 1] mnist.data[:60000] = mnist.data[reorder_train] mnist.target[:60000] = mnist.target[reorder_train] mnist.data[60000:] = mnist.data[reorder_test + 60000] mnist.target[60000:] = mnist.target[reorder_test + 60000] # Get MNIST data, normalize, and divide by level # mnist = fetch_openml('MNIST original', data_home='./data') mnist = fetch_openml('mnist_784', version=1, cache=True) mnist.target = mnist.target.astype(np.int8) # fetch_openml() returns targets as strings sort_by_target(mnist) # fetch_openml() returns an unsorted dataset mu = np.mean(mnist.data.astype(np.float32), 0) sigma = np.std(mnist.data.astype(np.float32), 0) mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) mnist_data = [] for i in trange(10): idx = mnist.target==i mnist_data.append(mnist.data[idx]) print([len(v) for v in mnist_data]) ###### CREATE USER DATA SPLIT ####### # Assign 10 samples to each user X = [[] for _ in range(1000)] y = [[] for _ in range(1000)] idx = np.zeros(10, dtype=np.int64) for user in range(1000): for j in range(2):
def update_parameters(self, ob_no, ac_na, q_n, adv_n): """ Update the parameters of the policy and (possibly) the neural network baseline, which is trained to approximate the value function. arguments: ob_no: shape: (sum_of_path_lengths, ob_dim) ac_na: shape: (sum_of_path_lengths). q_n: shape: (sum_of_path_lengths). A single vector for the estimated q values whose length is the sum of the lengths of the paths adv_n: shape: (sum_of_path_lengths). A single vector for the estimated advantages whose length is the sum of the lengths of the paths returns: nothing """ #====================================================================================# # ----------PROBLEM 6---------- # Optimizing Neural Network Baseline #====================================================================================# if self.nn_baseline: # If a neural network baseline is used, set up the targets and the inputs for the # baseline. # # Fit it to the current batch in order to use for the next iteration. Use the # baseline_update_op you defined earlier. # # Hint #bl2: Instead of trying to target raw Q-values directly, rescale the # targets to have mean zero and std=1. (Goes with Hint #bl1 in # Agent.compute_advantage.) # YOUR_CODE_HERE # raise NotImplementedError target_n = (q_n - np.mean(q_n)) / np.std(q_n) self.sess.run(self.baseline_update_op, feed_dict={ self.sy_ob_no: ob_no, self.sy_target_n: target_n }) #====================================================================================# # ----------PROBLEM 3---------- # Performing the Policy Update #====================================================================================# # Call the update operation necessary to perform the policy gradient update based on # the current batch of rollouts. # # For debug purposes, you may wish to save the value of the loss function before # and after an update, and then log them below. # # YOUR_CODE_HERE self.sess.run(self.update_op, feed_dict={ self.sy_ob_no: ob_no, self.sy_ac_na: ac_na, self.sy_adv_n: adv_n })
'data/glass_data_7.txt': 0.2 } for test in tests: data_instances = [] data_file = open(test) print("Running with %s" % test) for line in data_file: # Digest read data line_split = line.split(',') data_instances.append(map(float, line_split)) data_instances = np.array(data_instances) # Normalize continuous attributes if 'iris' in test: for column in data_instances.T: column = (column - np.mean(column)) / (2.0 * np.std(column)) # Shuffle data instances np.random.shuffle(data_instances) data_indices = [idx for idx in range(data_instances.shape[0])] # 10-fold cross validation fold_size = (data_instances.shape[0]) / 10 total_performance = 0.0 for holdout_fold_idx in range(10): print("Cross validation fold %d" % (holdout_fold_idx + 1)) # training_indices = data_indices - holdout_fold indices training_indices = np.array( np.setdiff1d( data_indices, data_indices[fold_size * holdout_fold_idx : \ fold_size * holdout_fold_idx + fold_size]))
def train_and_predict(): print('-'*30) print('Loading and preprocessing train data...') print('-'*30) imgs_train, imgs_mask_train = load_train_data() imgs_train = preprocess(imgs_train) imgs_mask_train = preprocess(imgs_mask_train) imgs_train = imgs_train.astype('float32') mean = np.mean(imgs_train) # mean for data centering std = np.std(imgs_train) # std for data normalization imgs_train -= mean imgs_train /= std imgs_mask_train = imgs_mask_train.astype('float32') imgs_mask_train /= 255. # scale masks to [0, 1] print('-'*30) print('Creating and compiling model...') print('-'*30) model = get_unet() model_checkpoint = ModelCheckpoint('unet.hdf5', monitor='loss', save_best_only=True) print('-'*30) print('Fitting model...') print('-'*30) model.fit(imgs_train, imgs_mask_train, batch_size=32, nb_epoch=20, verbose=1, shuffle=True, callbacks=[model_checkpoint]) print('-'*30) print('Loading and preprocessing test data...') print('-'*30) imgs_test, imgs_mask_test_truth = load_test_data() imgs_test = preprocess(imgs_test) imgs_test = imgs_test.astype('float32') imgs_test -= mean imgs_test /= std print('-'*30) print('Loading saved weights...') print('-'*30) model.load_weights('unet.hdf5') print('-'*30) print('Predicting masks on test data...') print('-'*30) imgs_mask_test_result = model.predict(imgs_test, verbose=1) imgs_mask_test_result = postprocess(imgs_mask_test_result) #test results is converted to 0-255 due to resizing print(imgs_mask_test_result.max()) imgs_mask_test_result = imgs_mask_test_result.astype('float32') imgs_mask_test_result /= 255 print(imgs_mask_test_truth.shape) imgs_mask_test_truth = imgs_mask_test_truth.astype('float32') imgs_mask_test_truth /= 255 test_truth = imgs_mask_test_truth.flatten() test_result = imgs_mask_test_result.flatten() print(test_result.shape) print(test_truth.shape) intersect = test_result * test_truth dice_score = (2. * intersect.sum()) / (test_truth.sum() + test_result.sum()) print('Dice coefficient on testing data is : {0:.3f}.'.format(dice_score))
def evaluate_recon(model, dataloader, n_samples, cuda): # Models in Eval mode model.eval() # Setup Average Meters # Single Modality CMI meters single_image_meter = utils.AverageMeter() single_trajectory_meter = utils.AverageMeter() single_sound_meter = utils.AverageMeter() # Double Modality CMI meters double_image_sound_meter = utils.AverageMeter() double_image_trajectory_meter = utils.AverageMeter() double_sound_trajectory_meter = utils.AverageMeter() # Triple Modality CMI meters all_mods_meter = utils.AverageMeter() # Main Evaluation Loop with torch.no_grad(): for batch_idx, data in enumerate(tqdm(dataloader)): # Original data img_data = data[1] trj_data = data[2] snd_data = data[3] sym_data = torch.nn.functional.one_hot(data[0], num_classes=10).float() labels = data[0] # To generate multiple samples labels = labels.repeat_interleave(repeats=n_samples, dim=0) img_data = img_data.repeat_interleave(repeats=n_samples, dim=0) trj_data = trj_data.repeat_interleave(repeats=n_samples, dim=0) snd_data = snd_data.repeat_interleave(repeats=n_samples, dim=0) sym_data = sym_data.repeat_interleave(repeats=n_samples, dim=0) if cuda: labels = labels.cuda() img_data = img_data.cuda() trj_data = trj_data.cuda() snd_data = snd_data.cuda() sym_data = sym_data.cuda() # Single Modality CMI # From image _, _, _, cm_sym = model.generate(x_img=img_data) sym_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) single_image_meter.update(sym_acc.item()) # From sound _, _, _, cm_sym = model.generate(x_snd=snd_data) snd_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) single_sound_meter.update(snd_acc.item()) # From trajectory _, _, _, cm_sym = model.generate(x_trj=trj_data) trj_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) single_trajectory_meter.update(trj_acc.item()) # Double mod CMI encoding # From image and sound _, _, _, cm_sym = model.generate(x_snd=snd_data, x_img=img_data) img_snd_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) double_image_sound_meter.update(img_snd_acc.item()) # From image and trajectory _, _, _, cm_sym = model.generate(x_trj=trj_data, x_img=img_data) img_trj_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) double_image_trajectory_meter.update(img_trj_acc.item()) # From sound and trajectory _, _, _, cm_sym = model.generate(x_snd=snd_data, x_trj=trj_data) snd_trj_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) double_sound_trajectory_meter.update(snd_trj_acc.item()) # Triple mod CMI encoding _, _, _, cm_sym = model.generate(x_img=img_data, x_snd=snd_data, x_trj=trj_data) all_mods_acc = compute_accuracy(samples=cm_sym[1], target=labels, classifier=None) all_mods_meter.update(all_mods_acc.item()) # Compile Results sym_acc_scores_dic = { 'single_image': single_image_meter.avg, 'single_sound': single_sound_meter.avg, 'single_trajectory': single_trajectory_meter.avg, 'double_image_sound': double_image_sound_meter.avg, 'double_image_trajectory': double_image_trajectory_meter.avg, 'double_sound_trajectory': double_sound_trajectory_meter.avg, 'all_mods': all_mods_meter.avg } single_sym_acc_results = [ sym_acc_scores_dic['single_image'], sym_acc_scores_dic['single_sound'], sym_acc_scores_dic['single_trajectory'] ] print("\n Symbol Accuracy:") print(" * Single Modality = " + str(np.mean(single_sym_acc_results)) + " +-" + str(np.std(single_sym_acc_results))) print(" * All Modalities = " + str(sym_acc_scores_dic['all_mods'])) print("\n") return sym_acc_scores_dic
data.sample(20) X = data["text"] y = data["sentiment"] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=123) num_words = 10000 tokenizer = Tokenizer(num_words=num_words) tokenizer.fit_on_texts(x_train) x_train_tokens = tokenizer.texts_to_sequences(x_train) x_test_tokens = tokenizer.texts_to_sequences(x_test) num_tokens = [len(tokens) for tokens in x_train_tokens + x_test_tokens] num_tokens = np.array(num_tokens) max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens) max_tokens = int(max_tokens) pad = 'pre' x_train_pad = pad_sequences(x_train_tokens, maxlen=max_tokens, padding=pad, truncating=pad) x_test_pad = pad_sequences(x_test_tokens, maxlen=max_tokens, padding=pad, truncating=pad) model = Sequential() embedding_size = 8 model.add(Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_tokens, name='layer_embedding'))
def df_stdevp(sub_df): """Get the population standard deviation""" return np.std(sub_df)
# -*- coding: utf-8 -*- """ Created on Mon Nov 16 11:27:36 2020 @author: merel """ import numpy as np a = np.array([4.36,3.75,4.10,4.86,4.45,4.45,4.28,3.97,4.30,4.09,\ 3.74,3.79,3.91,3.60,4.51,4.59,4.27,3.74,4.30,4.12,\ 3.81,4.44,4.36,4.44,3.90,4.29,4.35,4.16,4.63,3.92,\ 3.90,4.28,4.42,4.54,3.68,4.43,3.84,4.06,4.20,4.01,4.23]) gemiddelde = a.mean() standaarddeviatie = np.std(a,ddof=1) print(gemiddelde, standaarddeviatie)
import numpy as np import matplotlib.pyplot as plt x = np.arange(-4, 4, 0.01) def f(x): return (np.e) ** (-(x+0.00788786) ** 2 / (2 * 0.9933789 ** 2)) / (2 * np.pi * 0.9933789 ** 2) ** 0.5 def g(x): return (np.e) ** (-x ** 2 / (2 * 1 ** 2)) / (2 * np.pi * 1 ** 2) ** 0.5 data1 = np.loadtxt('W_4.txt',delimiter=",") data = data1[:, 0] mean = np.mean(data) std = np.std(data) print(mean, std) plt.plot(x, f(x), label='BNN') plt.plot(x, g(x), label='True') plt.hist(data, bins=100, density=True, histtype='stepfilled', label='NN') plt.legend(loc='upper left') plt.title('w1') plt.show()
import os import matplotlib.pyplot as plt folder = r'C:\Users\magnu\OneDrive\Dokument\KTH\2020HT\DD2412_-_Deep_Learning_Advanced_Course\Project\Results\resnet' files = next(os.walk(folder))[2] for filename in files: if os.path.splitext(filename)[0][-3:] == 'iou': filepath = os.path.join(folder,filename) print(filename) f = open(filepath,'r') null_ious = 0 ious = [] lines = f.readlines() for line in lines: iou = float(line) if iou > 0: ious.append(iou) else: null_ious += 1 print(np.mean(ious)) print(np.std(ious))
def normalize(X, m, s): """normalizes (standardizes) a matrix""" return (X - np.mean(X, 0)) / np.std(X, 0)
z0 = [-1.193, -3.876] t_grid_train = np.linspace(0, 20, data_size) t_grid_true = np.linspace(0, 40, data_size_true) y_train = odeint(Damped_pendulum, z0, t_grid_train, args=(alpha, beta)) idx = np.random.choice(np.arange(data_size - batch_time - 1, dtype=np.int64), batch_size, replace=False) y_train = y_train[idx] t_grid_train = t_grid_train[idx] y_true = odeint(Damped_pendulum, z0, t_grid_true, args=(alpha, beta)) sigma_normal1 = np.std(y_train[:, 0:1]) sigma_normal2 = np.std(y_train[:, 1:2]) sigma_normal = np.asarray([sigma_normal1, sigma_normal2]) parameters = np.load("parameters.npy") precision = np.load("loggammalist.npy") loglikelihood = np.load("loglikelihood.npy") precision = np.exp(precision) print("precision", precision) print(parameters.shape) loglikelihood = loglikelihood[-N_total:] num_samples = parameters.shape[0] length_dict = parameters.shape[1] num_dim = 2
plt.plot(xvals, yvals, label="KDE") plt.axvline(mode, label="Mode", c='r') plt.legend() plt.show() plt.hist(data, bins=100, label="Data", alpha=0.5) plt.axvline(data.mean(), label="Mean", ls="--", c='#f9ee4a') plt.axvline(np.median(data), label="Median", ls="-", c='#44d9ff') plt.axvline(mode, label="Mode", ls=":", c='#f95b4a') plt.legend() plt.show() """ Approximate with gauss dist. """ xs = np.linspace(data.min(), data.max(), 100) ys = st.norm.pdf(xs, loc=np.mean(data), scale=np.std(data)) plt.hist(data, bins=50, density=True, histtype="step", label="Data") plt.plot(xs, ys, label="Normal approximation") plt.legend() plt.ylabel("Probability") plt.show() """ Approximate with gauss dist + skew. """ xs = np.linspace(data.min(), data.max(), 100) ys1 = st.norm.pdf(xs, loc=np.mean(data), scale=np.std(data)) ys2 = st.skewnorm.pdf(xs, st.skew(data), loc=np.mean(data), scale=np.std(data)) plt.hist(data, bins=50, density=True, histtype="step", label="Data") plt.plot(xs, ys1, label="Normal approximation")
np.min(a) np.min(a, 1) np.max(a, 1) np.ptp(a) # 差值 ##Range of values (maximum - minimum) along an axis. np.percentile(a, 50) # 百分位数 np.percentile(a, 25) np.median(a) np.mean(a, 1) np.mean(a, 0) np.std(a, 0) ##标准差 np.var(a) ##方差 #排序相关功能 #获取非零元素 np.nonzero(a) #非零元素的下标 # ============================================================================= # (array([0, 0, 1, 1, 1, 2, 2, 2], dtype=int64), # array([1, 2, 0, 1, 2, 0, 1, 2], dtype=int64)) # (0,1),(0,2,)... # ============================================================================= a = np.ones((3, 3)) a[[0, 2, 1, 1, 0], [0, 2, 0, 1, 2]] = 0 np.nonzero(a) #等同于 a[np.where(a != 0)]
def _get_relevant_channels_over_median_peaks(threshold, template): median = np.median(np.nanmin(template, axis=0)) std = np.std(np.nanmin(template, axis=0)) points_under_median = np.argwhere(template < (median - threshold * std)) channels_over_threshold = np.unique(points_under_median[:, 1]) return channels_over_threshold
item_mask = itens[5:] resultados = resultados[5:] resultado[j, item_mask] = resultados participante_acertos[j] = sum(resultados) j = j + 1 print(' Fim do preenchimento. ') #-------------------------------------------------------------------------- # Inicializar o parametro theta para todos os participantes #-------------------------------------------------------------------------- participante_theta = (participante_acertos - np.mean(participante_acertos) ) / np.std(participante_acertos) #-------------------------------------------------------------------------- # INICIALIZACAO DOS MODELOS DE REGRESSSAO LOGISTICA #-------------------------------------------------------------------------- # modelo para ajuste dos parametros dos itens (a e b) lr_ab = LogisticRegression( penalty=REGRESSAO_LOGISTICA_AJUSTE_AB_PENALIDADE, C=REGRESSAO_LOGISTICA_AJUSTE_AB_REGULARIZACAO, fit_intercept=True) # modelo para ajuste da habilidade dos participantes (theta) lr_theta = LogisticRegression(
'batch_size': 100, 'learn_rate': 1e-3, 'max_epochs': 1500, 'early_stop': 5, 'check_freq': 5, } for argv in sys.argv: if('--' == argv[:2] and '=' in argv): eq_ind = argv.index('=') setting_feature = argv[2:eq_ind] setting_value = argv[eq_ind+1:] if(setting_feature in ['save', 'plot']): training_settings[setting_feature] = (setting_value=='True') if(setting_feature == 'model'): model_names = [setting_value] print(training_settings) eval_rmses, eval_lls = run_experiment( model_names, 'KEGG', dataset, **training_settings) print(eval_rmses, eval_lls) for model_name in model_names: rmse_mu = np.mean(eval_rmses[model_name]) rmse_std = np.std(eval_rmses[model_name]) ll_mu = np.mean(eval_lls[model_name]) ll_std = np.std(eval_lls[model_name]) print('>>> '+model_name) print('>> RMSE = {:.4f} \pm {:.4f}'.format(rmse_mu, 1.96*rmse_std)) print('>> NLPD = {:.4f} \pm {:.4f}'.format(ll_mu, 1.96*ll_std))
def AnalyzeScaleVariation(filenames): import matplotlib.pyplot as pyplot import mpl_utils import krebs.quantities as Q import collections data = [] for fn in filenames: with h5py.File(fn, 'r+') as f: sample = ObtainDataOfVesselFile(f) del sample['message'] data.append(sample) byScale = collections.defaultdict(list) for d in data: scale = d['scale'] byScale[scale].append(d) for k, v in byScale.items(): byScale[k] = myutils.zipListOfDicts(v) curves = collections.defaultdict(list) for k, v in byScale.items(): res = ComputeSingleNumberAvgStd(v) for name, (std, avg) in res.items(): curves[name].append((std, avg)) order = np.argsort(np.asarray(curves['scale'])[:, 0]) for k, v in curves.items(): curves[k] = np.asarray(v).transpose()[:, order] scales = { 'mvd': (Q.um**-2).asNumber(Q.mm**-2), 'rbv': 100., 'rbf': 60., } with mpl_utils.PageWriter('vessel-calibration-analysis', fileformats=['pdf']) as pdfwriter: fig, axes = pyplot.subplots(3, 1, figsize=mpl_utils.a4size * np.asarray([0.4, 0.5])) for scale, data in byScale.items(): bins = np.average( data['bins'], axis=0 ) # sanity check, all bins arrays have equal size, so just try to average the bin boundaries, even if it makes no real sense x = bins x_rbv = 0.5 * (x[1:] + x[:-1]) # bin center for rBV # plot things ax = axes[0] ya = data['mvd'] ax.errorbar(x, scales['mvd'] * np.average(ya, axis=0), yerr=scales['mvd'] * np.std(ya, axis=0), label=('h = %0.f' % scale)) legend = ax.legend(loc=4, fontsize='xx-small') ax = axes[1] scale = scales['rbv'] ya = data['rbv'] ax.errorbar(x_rbv, scale * np.average(ya, axis=0), yerr=scale * np.std(ya, axis=0)) ax = axes[2] ya = data['rbf'] scale = scales['rbf'] ax.errorbar(x, scale * np.average(ya, axis=0), yerr=scale * np.std(ya, axis=0)) axes[0].set(ylabel='mvd [$mm^{-1}$]') axes[1].set(ylabel='rbv [$\%$]') axes[2].set(ylabel='rbf [$min^{-1}$]', xlabel='$|x| [\mu m]$') pyplot.tight_layout() pyplot.legend() pdfwriter.savefig(fig) fig, axes = pyplot.subplots(3, 1, figsize=mpl_utils.a4size * np.asarray([0.4, 0.5])) for ax in axes: ax.grid(linestyle=':', linewidth=0.5, color='#aaaaaa') x = curves['scale'][0, :] ax = axes[0] y, yerr = scales['mvd'] * curves['mvd'] ax.errorbar(x, y, yerr=yerr) ax = axes[1] y, yerr = scales['rbv'] * curves['rbv'] ax.errorbar(x, y, yerr=yerr) ax = axes[2] y, yerr = scales['rbf'] * curves['rbf'] ax.errorbar(x, y, yerr=yerr) axes[0].set(ylabel='mvd [$mm^{-1}$]') axes[1].set(ylabel='rbv [$\%$]') axes[2].set(ylabel='rbf [$min^{-1}$]', xlabel='$h [\mu m]$') pyplot.tight_layout() pdfwriter.savefig(fig)