def _rm_walls(self, data, publisher): variance = 0.1 #to account for noise switch = int(self.switch) ### The PLS Model oscillates between returning 180 pts and 181 pts, this toggle tracks that in a hacky way self.switch = not self.switch #toggle switch #if the filter is in place remove walls if self.filter_set: walls = self.walls[switch] ranges = data.ranges filtered_ranges = [] for i in xrange(len(walls)): try: if ranges[i] < (walls[i]-variance): filtered_ranges.append(ranges[i]) else: filtered_ranges.append(data.range_max+1) #invalidate the result at this point except IndexError: filtered_ranges.append(data.range_max+1) #publish filtered_ranges as LaserScan filtered_scan = data h = std_msgs.msg.Header() h.stamp = data.header.stamp h.frame_id = data.header.frame_id filtered_scan.header = h filtered_scan.ranges = filtered_ranges publisher.publish(filtered_scan) #if the filter reset has been called use data to change filter instead else: #scan the room a specified # of times into an array # take the median of those scans at each index value # once the minimum number of scans is reached if self.reset_count < self.reset_thresh: self.new_walls.append([]) self.new_walls[self.reset_count] = data.ranges self.reset_count += 1 elif self.reset_count == self.reset_thresh: #unzip new_walls (to go by point instead of dataset) - len should be ~180 zipped0 = map(list, zip(*filter(None, self.new_walls[0::2]))) zipped1 = map(list, zip(*filter(None, self.new_walls[1::2]))) self.walls[0] = [numpy.median(z) for z in zipped0] self.walls[1] = [numpy.median(z) for z in zipped1] #reset vars self.reset_count = 0 self.new_walls = [[]] self.filter_set = True #save the new wall data (as 2 seperate files since there is a 180 range and 181 range) wall_array_zero = np.array(self.walls[0], dtype=np.float64) wall_array_one = np.array(self.walls[1], dtype=np.float64) numpy.savetxt(self.fp_0, wall_array_zero, delimiter=",") numpy.savetxt(self.fp_1, wall_array_one, delimiter=",") print "New walls learned and saved"
def exp(inF1,inF2): G = Gene(inF1) ouFile = open(inF1 + '.exp', 'w') ouFile.write('Gene\tMock\tMERS\n') D = {} inFile = open(inF2) head = inFile.readline() for line in inFile: line = line.strip() fields = line.split('\t') gene = fields[1] D.setdefault(gene, []) #mock = (float(fields[2]) + float(fields[3]))/2 #rsv20h = (float(fields[14]) + float(fields[15]))/2 Mock = np.median([float(fields[2]), float(fields[3]), float(fields[4])]) MERS = np.median([float(fields[5]), float(fields[6]), float(fields[7])]) D[gene].append([Mock,MERS]) inFile.close() for g in G: if g in D: if len(D[g]) > 1: #print(D[g]) pass ouFile.write(g + '\t' + str(D[g][0][0]) + '\t' + str(D[g][0][1]) + '\n') ouFile.close()
def work(self, **kwargs): self.__dict__.update(kwargs) self.worked = True samples = LGMM1(rng=self.rng, size=(self.n_samples,), **self.LGMM1_kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] centers = .5 * edges[:-1] + .5 * edges[1:] print edges pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(centers, y) plt.plot(centers, pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def _getTotalDuration(self,actStream): #for bed toilet transition margin = 1 hr totDuration=0; count = 0; durlist = []; for i in range(0,len(actStream)-2): #print actStream[i] firstLine = actStream[i].split(" "); secondLine =actStream[i+1].split(" "); #get a date from here d1= self._get_datetime(firstLine[0],firstLine[1]); d2=self._get_datetime(secondLine[0],secondLine[1]); td= d2-d1; duration =td.total_seconds(); #print td, duration #durlist.append(duration) margin = self._calculateMargin(d1,d2); if duration > 60*margin: #check to see if there were other activities, count=count+1; continue; durlist.append(duration) totDuration=duration+totDuration; try: #print round(min(durlist)/3600, 2), round(max(durlist)/3600, 2), round(totDuration/3600,2), round(sum(durlist)/3600, 2) #return (round(totDuration/60,5), count, round(numpy.min(durlist)/60, 5), round(numpy.max(durlist)/60, 5), round(numpy.median(durlist)/60,5), round(numpy.average(durlist)/60, 5)); return (round(numpy.median(durlist)/60,5), count); except ValueError: #return (round(totDuration/60,5),count, 0, 0, 0, 0); return (round(numpy.median(durlist)/60,5), count);
def compute_ks_by_contained(contigs_by_lib_name, sinks, sources): # compute median of maxmin as well as ks p-value of contained maxmin for lib_snk in contigs_by_lib_name: # for a fixed lib_snk; do all source libs together # contained_ctg: contig names of all source libraries stored by source library names contained_ctg=collections.defaultdict(set) for snkCtg in contigs_by_lib_name[lib_snk].itervalues(): for srcCtg in snkCtg.contained_in: contained_ctg[srcCtg.lib].add(srcCtg.name) for lib_src in contigs_by_lib_name: if lib_src in contained_ctg: contained=[] not_contained=[] for ctg in contigs_by_lib_name[lib_src]: if ctg in contained_ctg[lib_src]: contained.append(contigs_by_lib_name[lib_src][ctg].maxmin) else: not_contained.append(contigs_by_lib_name[lib_src][ctg].maxmin) # contained=[contigs_by_lib_name[lib_src][ctg].maxmin for ctg in contigs_by_lib_name[lib_src] if ctg in contained_ctg[lib_src]] # not_contained=[contigs_by_lib_name[lib_src][ctg].maxmin for ctg in contigs_by_lib_name[lib_src] if ctg not in contained_ctg[lib_src]] ks_pvalue = stats.ks_2samp(contained, not_contained)[1] print lib_src, lib_snk, ks_pvalue, sum(contained)/len(contained), sum(not_contained)/len(not_contained) if ks_pvalue < 0.05 and np.median(contained) > np.median(not_contained): sources[lib_snk] |= {lib_src} sinks[lib_src] |= {lib_snk}
def __init__(self, fndark, nblocksize): if (os.path.isfile(fndark+'-dark.npz')): npzfile=np.load(fndark+'-dark.npz'); self.dmean=npzfile['dmean']; self.dstd=npzfile['dstd']; self.dbpm=npzfile['dbpm']; else: dark=Binary(fndark); nframes=dark.nframes; my=dark.my; mx=dark.mx; nblocks=nframes//nblocksize; bmed=np.zeros((nblocks,my,mx)); bstd=np.zeros((nblocks,my,mx)); for iblock in range(nblocks): t0=time.clock(); a=dark.data[iblock*nblocksize:(iblock+1)*nblocksize]; a,idx=dropbadframes(a); print '- read block, dropped bad, subtracted dark in '+str(time.clock()-t0)+'s'; nfb=a.shape[0]; bmed[iblock,:,:]=np.median(a,axis=0); bstd[iblock,:,:]=np.std(a,axis=0); self.dmean=np.mean(bmed,axis=0); self.dstd=np.sqrt(np.sum((bstd)**2,axis=0)); self.dbpm=self.dstd<(np.median(self.dstd)+5*np.std(self.dstd)); self.dbpm=self.dstd<(np.median(self.dstd*self.dbpm)+5*np.std(self.dstd*self.dbpm)); np.savez(fndark+'-dark',dmean=self.dmean,dstd=self.dstd,dbpm=self.dbpm); del dark;
def work(self): self.worked = True kwargs = dict( weights=self.weights, mus=self.mus, sigmas=self.sigmas, low=self.low, high=self.high, q=self.q, ) samples = GMM1(rng=self.rng, size=(self.n_samples,), **kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] #print samples pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(edges[:-1], y) plt.plot(edges[:-1], pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def meanclip2(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0): from numpy import array import numpy xx=array(xx) yy=array(yy) xx0=array(xx[:]) yy0=array(yy[:]) ct=len(yy) slope=float(slope) iter = 0; c1 = 1.0 ; c2 = 0.0 while (c1 >= c2) and (iter < maxiter): lastct = ct sig=numpy.std(yy0-xx0*slope) # mean=numpy.mean(array(yy0)-array(xx0)*slope) mean=numpy.median(array(yy0)-array(xx0)*slope) wsm = numpy.where( abs(yy0-xx0*slope) < mean+clipsig*sig ) ct = len(wsm[0]) if ct > 0: xx0=xx0[wsm] yy0=yy0[wsm] c1 = abs(ct - lastct) c2 = converge_num * lastct iter += 1 # End of while loop # mean=numpy.mean(array(yy0)-array(xx0)*slope) mean=numpy.median(array(yy0)-array(xx0)*slope) sig=numpy.std(array(yy0)-array(xx0)*float(slope)) if verbose: pass return mean, sig,yy0,xx0
def getStripStatistics(self, yKey='vPhi', nMin=10): """For each of the strips, get the strip statistics""" if np.size(self.stripsFeH) < 1: self.buildStripsFeH() # may as well loop through!! # View of what we're using for our vertical quantity x = self.tSim['FeHObs'] y = self.tSim[yKey] nStrips = np.size(self.stripsFeH) - 1 self.stripCounts = np.zeros(nStrips, dtype='int') self.stripMeans = np.zeros(nStrips) self.stripMedns = np.zeros(nStrips) self.stripStdds = np.zeros(nStrips) self.stripFeHs = np.zeros(nStrips) # central point for sample for iStrip in range(nStrips): xLo = self.stripsFeH[iStrip] xHi = self.stripsFeH[iStrip+1] bStrip = (self.bSel) & (x >= xLo) & (x < xHi) self.stripCounts[iStrip] = np.sum(bStrip) if self.stripCounts[iStrip] < nMin: continue self.stripMeans[iStrip] = np.mean(y[bStrip]) self.stripMedns[iStrip] = np.median(y[bStrip]) self.stripStdds[iStrip] = np.std(y[bStrip]) self.stripFeHs[iStrip] = np.median(x[bStrip])
def bench(workers, sizes, max_partition_fill_rates, byte_sizes, num_runs): for worker in workers: for size in sizes: for max_partition_fill_rate in max_partition_fill_rates: for byte_size in byte_sizes: with open(result_dir + "/" + str(worker) + "_" + str(size) + "_" + str(max_partition_fill_rate) + "_" + str(byte_size) + "_S", "w+") as file1: times = [] #flushes = [] #collisions = [] spills = [] for _ in range(num_runs): process = subprocess.Popen(['../../build/benchmarks/hashtable_bench_probing_hashtable', '-s', str(size), '-w', str(worker), '-f', str(max_partition_fill_rate), '-t', str(byte_size)], stdout=subprocess.PIPE) process.wait() out = process.communicate()[0] out_s = out.split() times.append(float(out_s[0])) #flushes.append(float(out_s[1])) #collisions.append(float(out_s[2])) spills.append(float(out_s[1])) time = numpy.median(times) #flush = numpy.median(flushes) #collision = numpy.median(collisions) spill = numpy.median(spills) print str(worker) + "_" + str(size) + "_" + "_" + str(max_partition_fill_rate) + "_" + str(byte_size) + ": " + str(time) + " " + str(spill) file1.write(str(time) + " " + str(spill) + "\n") file1.close()
def group_images_in_blocks(times, limit=3): """ In a night at the telescope, we can observe blocks of images on the same field of the sky. For example, five blank fields, then the object, then another 4 blank fields, then the object... We might want to distinguish those blocks, in order to, e.g., combine the blank fields of each block, correct a block of imafges of the object with a certain blank or bias image... This routine gets the datetime.datetime objects that indicate the date and time of observations, and separates them in blocks, giving back an array: indices = [ind0,ind1,ind2,ind3] so that, each slice [ind0:ind1], [ind1:ind2] and [ind2:ind3] gives a block of the incoming images """ delta_times = np.asarray( [(times[ii+1] - times[ii]).seconds for ii in range(len(times)-1)] ) # Median and median absolute deviation of delta_times as first guesses median_delta = np.median(delta_times) MAD = np.median(abs(delta_times - median_delta)) # We have found a boundary between blocks it the time between images is larger # than limit. The limit will be assigned to the upper image, hence the +1 block_limits = np.where(delta_times > median_delta + limit * MAD)[0] +1 # Now we have the limits between blocks, we need to add the first image (where # the first block starts) and the last image (where last block ends) block_limits = np.insert( np.append(block_limits, len(times)+1) , 0, 0) return block_limits
def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest): """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def allclose_with_out(x, y, atol=0.0, rtol=1.0e-5): # run the np.allclose on x and y # if it fails print some stats # before returning ac = np.allclose(x, y, rtol=rtol, atol=atol) if not ac: dd = np.abs(x - y) neon_logger.display('abs errors: %e [%e, %e] Abs Thresh = %e' % (np.median(dd), np.min(dd), np.max(dd), atol)) amax = np.argmax(dd) if np.isscalar(x): neon_logger.display('worst case: %e %e' % (x, y.flat[amax])) elif np.isscalar(y): neon_logger.display('worst case: %e %e' % (x.flat[amax], y)) else: neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax])) dd = np.abs(dd - atol) / np.abs(y) neon_logger.display('rel errors: %e [%e, %e] Rel Thresh = %e' % (np.median(dd), np.min(dd), np.max(dd), rtol)) amax = np.argmax(dd) if np.isscalar(x): neon_logger.display('worst case: %e %e' % (x, y.flat[amax])) elif np.isscalar(y): neon_logger.display('worst case: %e %e' % (x.flat[amax], y)) else: neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax])) return ac
def medianVolume(self): volpath = os.path.join(self.params['rundir'], "volumes/*a.mrc") mrcfiles = glob.glob(volpath) volumes = [] for filename in mrcfiles: if os.path.isfile(filename): vol = mrc.read(filename) print filename, vol.shape volumes.append(vol) volarray = numpy.asarray(volumes, dtype=numpy.float32) try: medarray = numpy.median(volarray, axis=0) except: medarray = numpy.median(volarray) medfile = os.path.join(self.params['rundir'], "volumes/medianVolume.mrc") print medfile, medarray.shape mrc.write(medarray, medfile) apix = apStack.getStackPixelSizeFromStackId(self.params['stackid']) sessiondata = apStack.getSessionDataFromStackId(self.params['stackid']) uploadcmd = ( ("uploadModel.py --projectid=%d --session=%s --file=%s " +"--apix=%.3f --sym=%s --name=satmedian-recon%d.mrc --res=30 --description='%s %d'") %(self.params['projectid'], sessiondata['name'], medfile, apix, self.params['symmname'], self.params['reconid'], "SAT selected median volume for recon", self.params['reconid'], ) ) apDisplay.printColor(uploadcmd, "purple") f = open("upload.sh", "w") f.write(uploadcmd+"\n") f.close()
def start_requests(self): summary_utc = datetime.utcnow() - timedelta(days=1) db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI')) db_session = sessionmaker(bind=db_engine)() db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'), LiveTVRoom.url.label('room_url'), LiveTVRoomPresent.crawl_date_format.label('summary_date'), func.array_agg(LiveTVRoomPresent.online).label('online_list'))\ .join(LiveTVSite, LiveTVRoom, LiveTVRoomPresent)\ .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT))\ .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format) for group_row in db_query: meta_info = { 'site_id': group_row.site_id, 'room_id': group_row.room_id, 'summary_date': group_row.summary_date, 'online': numpy.median(group_row.online_list) } room = self.session.query(LiveTVRoom).filter_by(id=meta_info['room_id']).one_or_none() if room: yield DailyItem(site_id=group_row.site_id, room_id=group_row.room_id, summary_date=group_row.summary_date, online=numpy.median(group_row.online_list), followers=room.followers, description=room.description, announcement=room.announcement, fallback=False) db_session.close()
def explore_city_data(city_data): """Calculate the Boston housing statistics.""" # Get the labels and features from the housing data housing_prices = city_data.target housing_features = city_data.data ################################### ### Step 1. YOUR CODE GOES HERE ### ################################### # Please calculate the following values using the Numpy library print "Size of data (number of houses)" print np.size(housing_prices) print "Number of features" print np.size(housing_features, 1) print "Minimum price" print np.min(housing_prices) print "Maximum price" print np.max(housing_prices) print "Calculate mean price" print np.mean(housing_prices) print "Calculate median price" print np.median(housing_prices) print "Calculate standard deviation" print np.std(housing_prices)
def make_lick_individual(targetSN, w1, w2): """ Make maps for the kinematics. """ filename = "lick_corr_sn{0}.tsv".format(targetSN) binimg = pf.getdata("voronoi_sn{0}_w{1}_{2}.fits".format(targetSN, w1, w2)) intens = "collapsed_w{0}_{1}.fits".format(w1, w2) extent = calc_extent(intens) bins = np.loadtxt(filename, usecols=(0,), dtype=str).tolist() bins = np.array([x.split("bin")[1] for x in bins]).astype(int) data = np.loadtxt(filename, usecols=np.arange(25)+1).T labels = [r'Hd$_A$', r'Hd$_F$', r'CN$_1$', r'CN$_2$', r'Ca4227', r'G4300', r'Hg$_A$', r'Hg$_F$', r'Fe4383', r'Ca4455', r'Fe4531', r'C4668', r'H$_\beta$', r'Fe5015', r'Mg$_1$', r'Mg$_2$', r'Mg$_b$', r'Fe5270', r'Fe5335', r'Fe5406', r'Fe5709', r'Fe5782', r'Na$_D$', r'TiO$_1$', r'TiO$_2$'] mag = "[mag]" ang = "[\AA]" units = [ang, ang, mag, mag, ang, ang, ang, ang, ang, ang, ang, ang, ang, ang, mag, mag, ang, ang, ang, ang, ang, ang, ang, mag, mag] lims = [[None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None], [None, None]] pdf = PdfPages("figs/lick_sn{0}.pdf".format(targetSN)) fig = plt.figure(1, figsize=(6.25,5)) plt.subplots_adjust(bottom=0.12, right=0.97, left=0.09, top=0.96) plt.minorticks_on() ax = plt.subplot(111) ax.minorticks_on() plot_indices = np.arange(12,22) for i, vector in enumerate(data): if i not in plot_indices: continue print "Making plot for {0}...".format(labels[i]) kmap = np.zeros_like(binimg) kmap[:] = np.nan for bin,v in zip(bins, vector): idx = np.where(binimg == bin) kmap[idx] = v vmin = lims[i][0] if lims[i][0] else np.median(vector) - 2 * vector.std() vmax = lims[i][1] if lims[i][1] else np.median(vector) + 2 * vector.std() m = plt.imshow(kmap, cmap="inferno", origin="bottom", vmin=vmin, vmax=vmax, extent=extent, aspect="equal") make_contours() plt.minorticks_on() plt.xlabel("X [kpc]") plt.ylabel("Y [kpc]") plt.xlim(extent[0], extent[1]) plt.ylim(extent[2], extent[3]) cbar = plt.colorbar(m) cbar.set_label("{0} {1}".format(labels[i], units[i])) pdf.savefig() plt.clf() pdf.close() return
def plotB2reg(prefix=''): w=loadStanFit(prefix+'revE2B2LHregCa.fit') px=np.array(np.linspace(-0.5,0.5,101),ndmin=2) a1=np.array(w['ma'][:,4],ndmin=2).T+1 a0=np.array(w['ma'][:,3],ndmin=2).T printCI(w,'ma') y=np.concatenate([sap(a0+a1*px,97.5,axis=0),sap(a0+a1*px[:,::-1],2.5,axis=0)]) x=np.squeeze(np.concatenate([px,px[:,::-1]],axis=1)) man=np.array([-0.4,-0.2,0,0.2,0.4]) plt.plot(px[0,:],np.median(a0)+np.median(a1)*px[0,:],'red') #plt.plot([-1,1],[0.5,0.5],'grey') ax=plt.gca() ax.set_aspect(1) ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w')) y=np.concatenate([sap(a0+a1*px,75,axis=0),sap(a0+a1*px[:,::-1],25,axis=0)]) ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w')) mus=[] for m in range(len(man)): mus.append(loadStanFit(prefix+'revE2B2LHC%d.fit'%m)['ma4']+man[m]) mus=np.array(mus).T errorbar(mus,x=man) ax.set_xticks(man) plt.xlim([-0.5,0.5]) plt.ylim([-0.6,0.8]) plt.xlabel('Pivot Displacement') plt.ylabel('Perceived Displacemet')
def plotB3reg(): w=loadStanFit('revE2B3BHreg.fit') printCI(w,'mmu') printCI(w,'mr') for b in range(2): subplot(1,2,b+1) plt.title('') px=np.array(np.linspace(-0.5,0.5,101),ndmin=2) a0=np.array(w['mmu'][:,b],ndmin=2).T a1=np.array(w['mr'][:,b],ndmin=2).T y=np.concatenate([sap(a0+a1*px,97.5,axis=0),sap(a0+a1*px[:,::-1],2.5,axis=0)]) x=np.squeeze(np.concatenate([px,px[:,::-1]],axis=1)) plt.plot(px[0,:],np.median(a0)+np.median(a1)*px[0,:],'red') #plt.plot([-1,1],[0.5,0.5],'grey') ax=plt.gca() ax.set_aspect(1) ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w')) y=np.concatenate([sap(a0+a1*px,75,axis=0),sap(a0+a1*px[:,::-1],25,axis=0)]) ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w')) man=np.array([-0.4,-0.2,0,0.2,0.4]) mus=[] for m in range(len(man)): mus.append(loadStanFit('revE2B3BH%d.fit'%m)['mmu'][:,b]) mus=np.array(mus).T errorbar(mus,x=man) ax.set_xticks(man) plt.xlim([-0.5,0.5]) plt.ylim([-0.4,0.8]) #plt.xlabel('Manipulated Displacement') if b==0: plt.ylabel('Perceived Displacemet') plt.gca().set_yticklabels([]) subplot_annotate() plt.text(-1.1,-0.6,'Pivot Displacement',fontsize=8);
def columnpull(column, index, bg, stdev): """Define a column pull detector artifact. Parameters ---------- column : array The column from a detector. index : int The index at which the column pull may have started, e.g., the location of a bright star. bg : float The background level of the image. stdev : float The background standard deviation. Returns ------- pull : ndarray The shape of the column pull. """ if (index < 0) or (index >= column.shape[0]): return m1 = np.median(column[:index]) - bg m2 = np.median(column[index:]) - bg pull = np.zeros_like(column) if (np.abs(m1 - m2) / stdev) > 1.0: pull[:index] = m1 pull[index:] = m2 return pull
def run(self, spikesorter, sign = '-', relative_thresh = 4., noise_estimation = 'MAD', threshold_mode = 'crossing',peak_span = 0.3*pq.ms, consistent_across_channels = False, consistent_across_segments = True, ): sps = spikesorter # Threshold estimation centers = np.zeros(sps.filtered_sigs.shape, dtype = float) noises = np.zeros(sps.filtered_sigs.shape, dtype = float) for c, s in np.ndindex(sps.filtered_sigs.shape): sig = sps.filtered_sigs[c, s] if noise_estimation=='MAD': centers[c, s] = np.median(sig) noises[c, s] = np.median(np.abs(sig-np.median(sig))) / .6745 elif noise_estimation=='STD': centers[c, s] = np.mean(sig) noises[c, s] = np.std(sig) if sign == '+': thresholds = centers + noises*abs(relative_thresh) if sign == '-': thresholds = centers - noises*abs(relative_thresh) peak_span = int((sps.sig_sampling_rate*peak_span).simplified) peak_span = (peak_span//2)*2+1 # Detect sps.spike_index_array = threshold_detection_multi_channel_multi_segment( sps.filtered_sigs, thresholds, sign, consistent_across_channels,consistent_across_segments, threshold_mode, peak_span) sps.detection_thresholds = thresholds
def test_compare_cache_benchmark(self, varying_param, analytics_data, plt): stats = pytest.importorskip('scipy.stats') d1, d2 = analytics_data assert np.all(d1[varying_param] == d2[varying_param]), ( 'Cannot compare different parametrizations') axis_label = self.param_to_axis_label[varying_param] print("Cache, varying {0}:".format(axis_label)) for label, key in zip(self.labels, self.keys): clean_d1 = [self.reject_outliers(d) for d in d1[key]] clean_d2 = [self.reject_outliers(d) for d in d2[key]] diff = [np.median(b) - np.median(a) for a, b in zip(clean_d1, clean_d2)] p_values = np.array([2. * stats.mannwhitneyu(a, b)[1] for a, b in zip(clean_d1, clean_d2)]) overall_p = 1. - np.prod(1. - p_values) if overall_p < .05: print(" {label}: Significant change (p <= {p:.3f}). See plots" " for details.".format( label=label, p=np.ceil(overall_p * 1000.) / 1000.)) else: print(" {label}: No significant change.".format(label=label)) plt.plot(d1[varying_param], diff, label=label) plt.xlabel("Number of %s" % axis_label) plt.ylabel("Difference in build time (s)") plt.legend(loc='best')
def q1(): # generate random clusters clusters = [] sizes = range(2, 201) for size in sizes: clusters.append(gen_random_clusters(size)) # get running times random.seed(912) # run 10 trials, and take the median time for each n to smooth data slow_trials = np.zeros((10, 199)) fast_trials = np.zeros((10, 199)) for i in range(10): slow_trials[i,:] = timer(slow_closest_pair, clusters) fast_trials[i,:] = timer(fast_closest_pair, clusters) # times slow_times = np.median(slow_trials, 0) fast_times = np.median(fast_trials, 0) # plot plt.figure() plt.plot(sizes, slow_times, 'c-', label='slow_closest_pair') plt.plot(sizes, fast_times, 'm-', label='fast_closest_pair') plt.legend(loc='upper left') plt.xlabel('Size of Cluster List') plt.ylabel('Median Running Time (s), 10 Trials') plt.title('Comparison of Running Times on Desktop Python') plt.show() return None
def lonlat2xy(lon,lat,lon_0=None,lat_0=None): """ Convert pairs of (Lat,Lon) into (x,y) Input: Lon [deg] Lat [deg] Lon_0 [deg] => Lon of the origin of the cartesian system Lat_0 [deg] => Lat of the origin of the cartesian system Output: x [m] y [m] The projection is deformed as get away from the center. Since the Latitudes don't deform, the y is estimated first, then for each point is estimated the distante to the meridian of reference (Lon_0) considering the Latitude of the measurement. """ if (lat_0==None) or (lon_0==None): lat_0=numpy.median(lat) lon_0=numpy.median(lon) from fluid.common.distance import distance y=distance(lat,0,lat_0,0) y[lat<lat_0]=-1*y[lat<lat_0] x=distance(lat,lon,lat,lon_0) x[lon<lon_0]=-1*x[lon<lon_0] return x,y
def is_outlier(points, threshold=3.5): """ Returns a boolean array with True if points are outliers and False otherwise. Data points with a modified z-score greater than this # value will be classified as outliers. """ # transform into vector if len(points.shape) == 1: points = points[:,None] # compute median value median = np.median(points, axis=0) # compute diff sums along the axis diff = np.sum((points - median)**2, axis=-1) diff = np.sqrt(diff) # compute MAD med_abs_deviation = np.median(diff) # compute modified Z-score # http://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm#Iglewicz modified_z_score = 0.6745 * diff / med_abs_deviation # return a mask for each outlier return modified_z_score > threshold
def denoise(self, data, wavelet): noiseSigma = median(absolute(data - median(data))) / 0.6745 levels = int(floor(log(len(data)))) WC = pywt.wavedec(data, wavelet, level=levels) threshold = noiseSigma * sqrt(2 * log(len(data))) NWC = map(lambda x: pywt.thresholding.hard(x, threshold), WC) return pywt.waverec(NWC, wavelet)
def _idealize_uncert(dds): for action in dds.actions: field = action.diffeo.d field_inv = action.diffeo_inv.d I = np.zeros(field.shape) Y, X = np.meshgrid(range(field.shape[1]), range(field.shape[0])) I[:, :, 0] = X I[:, :, 1] = Y D = field - I v = (np.median(D[:, :, 0]), np.median(D[:, :, 1])) D_inv = field_inv - I v_inv = (np.median(D_inv[:, :, 0]), np.median(D_inv[:, :, 1])) print('v = ' + str(v)) print('v_inv = ' + str(v_inv)) for c in itertools.product(range(X.shape[0]), range(X.shape[1])): if defined_cell(c, X.shape, v): action.diffeo.variance[c] = 1.0 else: action.diffeo.variance[c] = 0.0 if defined_cell(c, X.shape, v_inv): action.diffeo_inv.variance[c] = 1.0 else: action.diffeo_inv.variance[c] = 0.0 return dds
def remaining_time(self): """Return our best estimate of the remaining duration, or None if we have no bases for guessing.""" if self.end_times is None: return None # We have not started the first module yet else: module_index = self.current_module.module_num - 1 index = self.image_set_index * self.num_modules + module_index durations = (self.end_times[1:] - self.end_times[:-1]).reshape(self.num_image_sets, self.num_modules) per_module_estimates = np.zeros(self.num_modules) per_module_estimates[:module_index] = np.median(durations[:self.image_set_index+1,:module_index], 0) current_module_so_far = self.adjusted_time() - self.end_times[1 + index - 1] if self.image_set_index > 0: per_module_estimates[module_index:] = np.median(durations[:self.image_set_index,module_index:], 0) per_module_estimates[module_index] = max(per_module_estimates[module_index], current_module_so_far) else: # Guess that the modules that haven't finished yet are # as slow as the slowest one we've seen so far. per_module_estimates[module_index] = current_module_so_far per_module_estimates[module_index:] = per_module_estimates[:module_index+1].max() if False: print "current_module_so_far =", current_module_so_far, "; adjusted_time =", self.adjusted_time(), "; end_times =", self.end_times print "durations:" print durations print "per_module_estimates:" print per_module_estimates per_module_estimates[:module_index] *= self.num_image_sets - self.image_set_index - 1 per_module_estimates[module_index:] *= self.num_image_sets - self.image_set_index per_module_estimates[module_index] -= current_module_so_far return per_module_estimates.sum()
def __init__(self, f, label, color="k", linestyle="-"): d = np.load(f) self.data = d self.mass = d["mass"] self.ul_med = [] self.ul68_lo = [] self.ul68_hi = [] self.ul95_lo = [] self.ul95_hi = [] self.label = label self.color = color self.linestyle = linestyle for i in range(len(d["mass"])): ul = np.sort(d["ul"][:, i]) ul = ul[ul > 0] n = len(ul) m = np.median(ul) self.ul68_lo.append(ul[max(0, n / 2.0 - n * 0.34)]) self.ul68_hi.append(ul[min(n - 1, n / 2.0 + n * 0.34)]) self.ul95_lo.append(ul[max(0, n / 2.0 - n * 0.95 / 2.0)]) self.ul95_hi.append(ul[min(n - 1, n / 2.0 + n * 0.95 / 2.0)]) self.ul_med.append(np.median(ul))
def createModel(self,b,g,r): bMinusr = self.bMinusr bMinusg = self.bMinusg b0 = b.copy() g0 = g.copy() r0 = r.copy() w = r.shape[0]/2-5 rb = r0/b0 gb = g0/b0 rnorm = numpy.median(rb[w:-w,w:-w]) gnorm = numpy.median(gb[w:-w,w:-w]) r0 /= rnorm g0 /= gnorm r0 *= 10**(0.4*bMinusr) g0 *= 10**(0.4*bMinusg) r0 /= 620. g0 /= 540. b0 /= 460. I = (r0+g0+b0)/3. self.I = I self.rnorm = rnorm self.gnorm = gnorm return self.colorize(b,g,r)
print('Finished runInfo- which assesses the refresh and processes of this computer') #check screen refresh is what assuming it is ############################################## Hzs = list() myWin.flip() myWin.flip() myWin.flip() myWin.flip() myWin.setRecordFrameIntervals(True) # otherwise myWin.fps won't work print('About to measure frame flips') for i in range(50): myWin.flip() Hzs.append(myWin.fps()) # varies wildly on successive runs! myWin.setRecordFrameIntervals(False) # end testing of screen refresh######################################################## Hzs = np.array(Hzs) Hz = np.median(Hzs) msPerFrame = 1000./Hz refreshMsg1 = 'Frames per second ~=' + str(np.round(Hz, 1)) refreshRateTolerancePct = 3 pctOff = abs((np.median(Hzs)-refreshRate) / refreshRate) refreshRateWrong = pctOff > (refreshRateTolerancePct/100.) if refreshRateWrong: refreshMsg1 += ' BUT' refreshMsg1 += ' program assumes ' + str(refreshRate) refreshMsg2 = 'which is off by more than' + str(round(refreshRateTolerancePct, 0)) + '%!!' else: refreshMsg1 += ', which is close enough to desired val of ' + str(round(refreshRate, 1)) myWinRes = myWin.size myWin.allowGUI = True print(myWinRes) myWin.close() # have to close window to show dialog box
def main(): # Command line arguments parser = argparse.ArgumentParser('Label an image using the cat model') parser.add_argument( '-s', '--server', help='URL of host serving the cat model' ) parser.add_argument( '-p', '--port', type=int, default=9000, help='Port at which cat model is being served' ) parser.add_argument( '-m', '--model', type=str, default='resnet', help='Paths (local or url) to images you would like to label' ) parser.add_argument( '-d', '--dim', type=int, default=224, help='Size of (square) image, an integer indicating its width and ' 'height. Resnet\'s default is 224' ) parser.add_argument( '-r', '--replications', type=int, default=1, help='How many times to replicate samples to send a larger batch size' ) parser.add_argument( 'images', type=str, nargs='+', help='Paths (local, GCS, or url) to images you would like to label' ) parser.add_argument( '-n', '--num_trials', type=int, default='.txt', help='File used to log batch serving request delays. Will create file' 'if it does not exist. Otherwise, it will append to the file.' ) args = parser.parse_args() # Preprocess images at the client and compress as jpeg img_size = args.dim images = args.images jpeg_batch = preprocess_and_encode_images(images, img_size) # Create r copies of the array for profiling. batch_array = [] for i in range(0, args.replications): batch_array = np.append(batch_array, jpeg_batch, axis=0) batch_size = len(batch_array) print("Batch size: " + str(batch_size)) # Call the server num_trials times elapsed_times = [] for t in range(0, args.num_trials): # Call the server to predict top 5 classes and probabilities, and time taken result, elapsed = predict_and_profile( args.server, args.port, args.model, batch_array) # Print and log the delay print('Request delay: ' + str(elapsed) + ' ms') elapsed_times.append(elapsed) print('Mean: %0.2f' % np.mean(elapsed_times)) print('Median: %0.2f' % np.median(elapsed_times)) print('Min: %0.2f' % np.min(elapsed_times)) print('Max: %0.2f' % np.max(elapsed_times))
def data_statistics(self, Ephem): ''' Make statistics on the data. Useful to summarize night conditions. ''' def select_bests(values, number): return (np.sort(values)[::-1][0:number]) def fourier_filter(array, nterms): ''' Make a fourier filter for the first nterms terms. ''' array_fft = np.fft.fft(array) # Filter data array_fft[nterms:] = 0 filtered_array = np.fft.ifft(array_fft) return (filtered_array) def window_smooth(x, window_len=10, window='hanning'): # http://scipy-cookbook.readthedocs.io/items/SignalSmooth.html x = np.asarray(x) if x.ndim != 1: raise ValueError, "smooth requires 1-d arrays" if x.size < window_len: raise ValueError, "size(input) < window_size" if window_len < 3: return x if not window in [ 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' ]: raise ValueError, \ "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'" s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]] if window == 'flat': # moving average w = np.ones(window_len, 'd') else: w = eval('np.' + window + '(window_len)') y = np.convolve(w / w.sum(), s, mode='valid') return (y) astronomical_night_filter = ( \ (np.array(self.all_night_dt) > Ephem.twilight_prev_set) * \ (np.array(self.all_night_dt) < Ephem.twilight_next_rise)) if np.sum(astronomical_night_filter) > 10: self.astronomical_night_sb = \ np.array(self.all_night_sb)[astronomical_night_filter] self.astronomical_night_temp = \ np.array(self.all_night_temp)[astronomical_night_filter] else: print( \ 'Warning, < 10 points in astronomical night, ' + \ ' using the whole night data instead') self.astronomical_night_sb = self.all_night_sb self.astronomical_night_temp = self.all_night_temp Stat = self.Statistics # with self.Statistics as Stat: # Complete list Stat.mean = np.mean(self.astronomical_night_sb) Stat.median = np.median(self.astronomical_night_sb) Stat.std = np.median(self.astronomical_night_sb) Stat.number = np.size(self.astronomical_night_sb) # Only the best 1/100th. Stat.bests_number = 1 + Stat.number / 25 Stat.bests_mean = np.mean( select_bests(self.astronomical_night_sb, Stat.bests_number)) Stat.bests_median = np.median( select_bests(self.astronomical_night_sb, Stat.bests_number)) Stat.bests_std = np.std( select_bests(self.astronomical_night_sb, Stat.bests_number)) Stat.bests_err = Stat.bests_std * 1. / np.sqrt(Stat.bests_number) Stat.model_nterm = 1 + Stat.number / 25 # data_smooth = fourier_filter(self.astronomical_night_sb,nterms=Stat.model_nterm) data_smooth = window_smooth(self.astronomical_night_sb, window_len=Stat.model_nterm) min_length = min(len(data_smooth), len(self.astronomical_night_sb)) data_residuals = self.astronomical_night_sb[: min_length] - data_smooth[: min_length] Stat.data_model_abs_meandiff = np.mean(np.abs(data_residuals)) Stat.data_model_sum_squareresiduals = np.sum(data_residuals**2) # Other interesting data Stat.min_temperature = np.min(self.astronomical_night_temp) Stat.max_temperature = np.max(self.astronomical_night_temp)
all_regions = np.unique(current_latency_dataframe['region']) for region in all_regions: if len(selected_frame) == 0: region_units = current_latency_dataframe[( current_latency_dataframe['region'] == region)] else: region_units = current_latency_dataframe[ (current_latency_dataframe['region'] == region) & (current_latency_dataframe['frame'] == selected_frame)] all_latencies = region_units[current_version].values.astype(float) all_latencies = all_latencies[~np.isnan(all_latencies)] if len(all_latencies) == 0: all_latencies = np.zeros(1) latencies_across_region.append(all_latencies) mean_per_region.append(np.median(all_latencies)) all_region_names.append(region) if display_plot: regional_medians = [] for region_e in regions_in_all_exps: if region_e in all_region_names: regional_medians.append( mean_per_region[all_region_names.index(region_e)]) else: regional_medians.append(0) ax.set_ylim([0, 200]) x_axis_vals = range(1, len(regions_in_all_exps) + 1) ax.set_xticks(x_axis_vals) ax.set_xticklabels(regions_in_all_exps) ax.plot(x_axis_vals, regional_medians, marker='o')
import random import numpy as np import matplotlib.pyplot as plt num_tries = 1000 succ_chance = 0.4 i = 0 avg_tries = [] while (i < num_tries): tries = 0 while (True): roll = random.random() tries += 1 if roll > (1 - succ_chance): break avg_tries.append(tries) i += 1 print("empirical: ", np.median(avg_tries)) print("logarithm: ", np.log(0.5) / np.log(1 - succ_chance)) print(" division: ", 1 / succ_chance) plt.hist(avg_tries, bins=20) plt.show()
def main(): args = get_args() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) if args.cuda and torch.cuda.is_available() and args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True log_dir = os.path.expanduser(args.log_dir) eval_log_dir = log_dir + "_eval" utils.cleanup_log_dir(log_dir) utils.cleanup_log_dir(eval_log_dir) torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") name = "compiled_dataset_08131950" #add 50 back in embed_dim = 300 # switch this later!! embed_size = embed_dim with open('data/'+name+'_all_instructions', 'rb') as f: all_instructions = pickle.load(f) vocab, vocab_weights = build_vocabulary(all_instructions, name, embed_dim) vocab.add_word('<pad>') vocab.add_word('<start>') vocab.add_word('<end>') vocab.add_word('<unk>') envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, device, False, vocabulary=vocab) #actor_critic = Policy( # envs.observation_space.shape, # envs.action_space, # base_kwargs={'recurrent': args.recurrent_policy}) actor_critic, ob_rms = torch.load(args.load_dir + ".pt") actor_critic.to(device) if args.algo == 'a2c': agent = algo.A2C_ACKTR( actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = algo.PPO( actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = algo.A2C_ACKTR( actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) if args.gail: assert len(envs.observation_space.shape) == 1 discr = gail.Discriminator( envs.observation_space.shape[0] + envs.action_space.shape[0], 100, device) file_name = os.path.join( args.gail_experts_dir, "trajs_{}.pt".format( args.env_name.split('-')[0].lower())) gail_train_loader = torch.utils.data.DataLoader( gail.ExpertDataset( file_name, num_trajectories=4, subsample_frequency=20), batch_size=args.gail_batch_size, shuffle=True, drop_last=True) #print(args.num_env_steps) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.observation_space.shape, envs.action_space, actor_critic.recurrent_hidden_state_size) obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) episode_rewards = deque(maxlen=10) start = time.time() num_updates = int( args.num_env_steps) // args.num_steps // args.num_processes #print(num_updates) for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly utils.update_linear_schedule( agent.optimizer, j, num_updates, agent.optimizer.lr if args.algo == "acktr" else args.lr) for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) # Obser reward and next obs obs, reward, done, infos = envs.step(action) for info in infos: if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) # If done then clean the history of observations. masks = torch.FloatTensor( [[0.0] if done_ else [1.0] for done_ in done]) bad_masks = torch.FloatTensor( [[0.0] if 'bad_transition' in info.keys() else [1.0] for info in infos]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks, bad_masks) with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() if args.gail: if j >= 10: envs.venv.eval() gail_epoch = args.gail_epoch if j < 10: gail_epoch = 100 # Warm up for _ in range(gail_epoch): discr.update(gail_train_loader, rollouts, utils.get_vec_normalize(envs)._obfilt) for step in range(args.num_steps): rollouts.rewards[step] = discr.predict_reward( rollouts.obs[step], rollouts.actions[step], args.gamma, rollouts.masks[step]) rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.gae_lambda, args.use_proper_time_limits) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() # save for every interval-th episode or for the last epoch if (j % args.save_interval == 0 or j == num_updates - 1) and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass torch.save([ actor_critic, getattr(utils.get_vec_normalize(envs), 'ob_rms', None) ], os.path.join(save_path, args.model_name + ".pt")) if j % args.log_interval == 0 and len(episode_rewards) > 1: total_num_steps = (j + 1) * args.num_processes * args.num_steps end = time.time() print( "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n" .format(j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy, value_loss, action_loss)) if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): ob_rms = utils.get_vec_normalize(envs).ob_rms evaluate(actor_critic, ob_rms, args.env_name, args.seed, args.num_processes, eval_log_dir, device)
def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): """ Bootstrap plot on mean, median and mid-range statistics. The bootstrap plot is used to estimate the uncertainty of a statistic by relaying on random sampling with replacement [1]_. This function will generate bootstrapping plots for mean, median and mid-range statistics for the given number of samples of the given size. .. [1] "Bootstrapping (statistics)" in \ https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 Parameters ---------- series : pandas.Series Pandas Series from where to get the samplings for the bootstrapping. fig : matplotlib.figure.Figure, default None If given, it will use the `fig` reference for plotting instead of creating a new one with default parameters. size : int, default 50 Number of data points to consider during each sampling. It must be greater or equal than the length of the `series`. samples : int, default 500 Number of times the bootstrap procedure is performed. **kwds : Options to pass to matplotlib plotting method. Returns ------- fig : matplotlib.figure.Figure Matplotlib figure See Also -------- pandas.DataFrame.plot : Basic plotting for DataFrame objects. pandas.Series.plot : Basic plotting for Series objects. Examples -------- .. plot:: :context: close-figs >>> s = pd.Series(np.random.uniform(size=100)) >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP """ import random import matplotlib.pyplot as plt # random.sample(ndarray, int) fails on python 3.3, sigh data = list(series.values) samplings = [random.sample(data, size) for _ in range(samples)] means = np.array([np.mean(sampling) for sampling in samplings]) medians = np.array([np.median(sampling) for sampling in samplings]) midranges = np.array([(min(sampling) + max(sampling)) * 0.5 for sampling in samplings]) if fig is None: fig = plt.figure() x = lrange(samples) axes = [] ax1 = fig.add_subplot(2, 3, 1) ax1.set_xlabel("Sample") axes.append(ax1) ax1.plot(x, means, **kwds) ax2 = fig.add_subplot(2, 3, 2) ax2.set_xlabel("Sample") axes.append(ax2) ax2.plot(x, medians, **kwds) ax3 = fig.add_subplot(2, 3, 3) ax3.set_xlabel("Sample") axes.append(ax3) ax3.plot(x, midranges, **kwds) ax4 = fig.add_subplot(2, 3, 4) ax4.set_xlabel("Mean") axes.append(ax4) ax4.hist(means, **kwds) ax5 = fig.add_subplot(2, 3, 5) ax5.set_xlabel("Median") axes.append(ax5) ax5.hist(medians, **kwds) ax6 = fig.add_subplot(2, 3, 6) ax6.set_xlabel("Midrange") axes.append(ax6) ax6.hist(midranges, **kwds) for axis in axes: plt.setp(axis.get_xticklabels(), fontsize=8) plt.setp(axis.get_yticklabels(), fontsize=8) return fig
# NOTE: pseudocounts that come from the F4 dilution estimate, so we # only listen to the data if there is enough data points to listen to len_pseudo = 1 n_pseudo = sample.get_n_templates_dilutions() n_allp = np.concatenate([n_all, ([n_pseudo] * len_pseudo)]) if VERBOSE >= 2: print 'Number of doubly polymorphic sites:', nsites, 'n_pseudo:', n_pseudo # NOTE: the estimate of n has a bad distribution because some points are # exactly on the diagonal, so we average the inverse (which is well # behaved) and also take the medians as alternatives n = 1.0 / (1.0 / n_allp).mean() ninv = n_allp.mean() nmed = np.median(n_allp) if VERBOSE >= 2: print fr1, fr2, n, ninv, nmed key = (samplename, fr1, fr2) data['af'][key] = (af1[indfm], af2[indfm]) data['mean'][key] = mea data['var'][key] = var data['n_all'][key] = n_all data['n'][key] = n data['ninv'][key] = ninv data['nmed'][key] = nmed data['nsites'][key] = nsites data['npseudo'][samplename] = n_pseudo if use_plot:
def plan_experiment(self): use_nonzero_mask_for_normalization = self.determine_whether_to_use_mask_for_norm() print("Are we using the nonzero mask for normalizaion?", use_nonzero_mask_for_normalization) spacings = self.dataset_properties['all_spacings'] sizes = self.dataset_properties['all_sizes'] all_classes = self.dataset_properties['all_classes'] modalities = self.dataset_properties['modalities'] num_modalities = len(list(modalities.keys())) target_spacing = self.get_target_spacing() new_shapes = [np.array(i) / target_spacing * np.array(j) for i, j in zip(spacings, sizes)] max_spacing_axis = np.argmax(target_spacing) remaining_axes = [i for i in list(range(3)) if i != max_spacing_axis] self.transpose_forward = [max_spacing_axis] + remaining_axes self.transpose_backward = [np.argwhere(np.array(self.transpose_forward) == i)[0][0] for i in range(3)] # we base our calculations on the median shape of the datasets median_shape = np.median(np.vstack(new_shapes), 0) print("the median shape of the dataset is ", median_shape) max_shape = np.max(np.vstack(new_shapes), 0) print("the max shape in the dataset is ", max_shape) min_shape = np.min(np.vstack(new_shapes), 0) print("the min shape in the dataset is ", min_shape) print("we don't want feature maps smaller than ", self.unet_featuremap_min_edge_length, " in the bottleneck") # how many stages will the image pyramid have? self.plans_per_stage = list() target_spacing_transposed = np.array(target_spacing)[self.transpose_forward] median_shape_transposed = np.array(median_shape)[self.transpose_forward] print("the transposed median shape of the dataset is ", median_shape_transposed) print("generating configuration for 3d_fullres") self.plans_per_stage.append(self.get_properties_for_stage(target_spacing_transposed, target_spacing_transposed, median_shape_transposed, len(self.list_of_cropped_npz_files), num_modalities, len(all_classes) + 1)) # thanks Zakiyi (https://github.com/MIC-DKFZ/nnUNet/issues/61) for spotting this bug :-) # if np.prod(self.plans_per_stage[-1]['median_patient_size_in_voxels'], dtype=np.int64) / \ # architecture_input_voxels < HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0: architecture_input_voxels_here = np.prod(self.plans_per_stage[-1]['patch_size'], dtype=np.int64) if np.prod(median_shape) / architecture_input_voxels_here < \ self.how_much_of_a_patient_must_the_network_see_at_stage0: more = False else: more = True if more: print("generating configuration for 3d_lowres") # if we are doing more than one stage then we want the lowest stage to have exactly # HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0 (this is 4 by default so the number of voxels in the # median shape of the lowest stage must be 4 times as much as the network can process at once (128x128x128 by # default). Problem is that we are downsampling higher resolution axes before we start downsampling the # out-of-plane axis. We could probably/maybe do this analytically but I am lazy, so here # we do it the dumb way lowres_stage_spacing = deepcopy(target_spacing) num_voxels = np.prod(median_shape, dtype=np.float64) while num_voxels > self.how_much_of_a_patient_must_the_network_see_at_stage0 * architecture_input_voxels_here: max_spacing = max(lowres_stage_spacing) if np.any((max_spacing / lowres_stage_spacing) > 2): lowres_stage_spacing[(max_spacing / lowres_stage_spacing) > 2] \ *= 1.01 else: lowres_stage_spacing *= 1.01 num_voxels = np.prod(target_spacing / lowres_stage_spacing * median_shape, dtype=np.float64) lowres_stage_spacing_transposed = np.array(lowres_stage_spacing)[self.transpose_forward] new = self.get_properties_for_stage(lowres_stage_spacing_transposed, target_spacing_transposed, median_shape_transposed, len(self.list_of_cropped_npz_files), num_modalities, len(all_classes) + 1) architecture_input_voxels_here = np.prod(new['patch_size'], dtype=np.int64) if 2 * np.prod(new['median_patient_size_in_voxels'], dtype=np.int64) < np.prod( self.plans_per_stage[0]['median_patient_size_in_voxels'], dtype=np.int64): self.plans_per_stage.append(new) self.plans_per_stage = self.plans_per_stage[::-1] self.plans_per_stage = {i: self.plans_per_stage[i] for i in range(len(self.plans_per_stage))} # convert to dict print(self.plans_per_stage) print("transpose forward", self.transpose_forward) print("transpose backward", self.transpose_backward) normalization_schemes = self.determine_normalization_scheme() only_keep_largest_connected_component, min_size_per_class, min_region_size_per_class = None, None, None # removed training data based postprocessing. This is deprecated # these are independent of the stage plans = {'num_stages': len(list(self.plans_per_stage.keys())), 'num_modalities': num_modalities, 'modalities': modalities, 'normalization_schemes': normalization_schemes, 'dataset_properties': self.dataset_properties, 'list_of_npz_files': self.list_of_cropped_npz_files, 'original_spacings': spacings, 'original_sizes': sizes, 'preprocessed_data_folder': self.preprocessed_output_folder, 'num_classes': len(all_classes), 'all_classes': all_classes, 'base_num_features': self.unet_base_num_features, 'use_mask_for_norm': use_nonzero_mask_for_normalization, 'keep_only_largest_region': only_keep_largest_connected_component, 'min_region_size_per_class': min_region_size_per_class, 'min_size_per_class': min_size_per_class, 'transpose_forward': self.transpose_forward, 'transpose_backward': self.transpose_backward, 'data_identifier': self.data_identifier, 'plans_per_stage': self.plans_per_stage, 'preprocessor_name': self.preprocessor_name, 'conv_per_stage': self.conv_per_stage, } self.plans = plans self.save_my_plans()
#label = ['Min','Mean','Median','Max','Std'] #clust_data = [[np.min(x)],[np.mean(x)],[np.median(x)],np.max(x),[np.std(x)]] #the_table = ax.table(cellText=clust_data,rowLabels=label,loc='center') #ax.text(2, 10, r'$\cos(2 \pi t) \exp(-t)$', fontdict=font) #ax2.text(2, 10, r'$min$={min}'.format(min=np.min(x))) #ax2.text(0.1,0.6,'$min={:0.3f}$'.format(np.min(x)),fontsize=12) #ax2.text(0.1,0.5,'$mean={:0.3f}$'.format(np.mean(x)),fontsize=12) #ax2.text(0.1,0.4,'r$median={:0.3f}$'.format(np.median(x)),fontsize=12) #ax2.text(0.1,0.3,'$max={:0.3f}$'.format(np.max(x)),fontsize=12) #ax2.text(0.1,0.2,'$\sigma={:0.3f}$'.format(np.std(x)),fontsize=12) #table version row_labels = ['min', 'mean', 'median', 'max', 'std'] celldata = [['{:0.3f}'.format(np.min(x))], ['{:0.3f}'.format(np.mean(x))], ['{:0.3f}'.format(np.median(x))], ['{:0.3f}'.format(np.max(x))], ['{:0.3f}'.format(np.std(x))]] ax2.table(cellText=celldata, rowLabels=row_labels, loc='center left', fontsize=24, colWidths=[0.4]) #row_labels=['min','mean','median','max','$\sigma$'] #table_vals=['${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x))] #table = r'''\begin{tabular}{ c | c | c | c } & col1 & col2 & col3 \\\hline row1 & 11 & 12 & 13 \\\hline row2 & 21 & 22 & 23 \\\hline row3 & 31 & 32 & 33 \end{tabular}''' #plt.text(0.1,0.8,table,size=12) elif var_types[v] == 3 and var_types[w] != 3: #boxplot d = []
def find_LFEs(filename, stations, tbegin, tend, outputfile, TDUR=10.0, filt=(1.5, 9.0), \ freq0=1.0, nattempts=2, waittime=5.0, draw=False, \ type_threshold='MAD', threshold=0.0075): """ Find LFEs with the temporary stations from FAME using the templates from Plourde et al. (2015) Input: type filename = string filename = Name of the template type stations = list of strings stations = name of the stations used for the matched-filter algorithm type tebgin = tuplet of 6 integers tbegin = Time when we begin looking for LFEs type tend = tuplet of 6 integers tend = Time we stop looking for LFEs type TDUR = float TDUR = Time to add before and after the time window for tapering type filt = tuple of floats filt = Lower and upper frequencies of the filter type freq0 = float freq0 = Maximum frequency rate of LFE occurrence type nattempts = integer nattempts = Number of times we try to download data type waittime = positive float waittime = Type to wait between two attempts at downloading type draw = boolean draw = Do we draw a figure of the cross-correlation? type type_threshold = string type_threshold = 'MAD' or 'Threshold' type threshold = float threshold = Cross correlation value must be higher than that Output: None """ # Get the network, channels, and location of the stations staloc = pd.read_csv(os.path.join(DATADIR, 'station_locations.txt'), \ sep=r'\s{1,}', header=None, engine='python') staloc.columns = ['station', 'network', 'channels', 'location', \ 'server', 'latitude', 'longitude'] # Create directory to store the LFEs times namedir = 'LFEs/' + filename if not os.path.exists(namedir): os.makedirs(namedir) # File to write error messages namedir = 'error' if not os.path.exists(namedir): os.makedirs(namedir) errorfile = 'error/' + filename + '.txt' # Read the templates templates = Stream() for station in stations: data = pickle.load(open(DATADIR + '/templates/' + filename + \ '/' + station + '.pkl', 'rb')) if (len(data) == 3): EW = data[0] NS = data[1] UD = data[2] EW.stats.station = station NS.stats.station = station EW.stats.channel = 'E' NS.stats.channel = 'N' templates.append(EW) templates.append(NS) else: UD = data[0] UD.stats.station = station UD.stats.channel = 'Z' templates.append(UD) # Begin and end time of analysis t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \ day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \ second=tbegin[5]) t2 = UTCDateTime(year=tend[0], month=tend[1], \ day=tend[2], hour=tend[3], minute=tend[4], \ second=tend[5]) # Read the data data = [] for station in stations: # Get station metadata for downloading for ir in range(0, len(staloc)): if (station == staloc['station'][ir]): network = staloc['network'][ir] channels = staloc['channels'][ir] location = staloc['location'][ir] server = staloc['server'][ir] # Duration of template template = templates.select(station=station, component='Z')[0] dt = template.stats.delta nt = template.stats.npts duration = (nt - 1) * dt Tstart = t1 - TDUR Tend = t2 + duration + TDUR delta = t2 + duration - t1 ndata = int(delta / dt) + 1 # Orientation of template # Date chosen: January 1st 2020 mychannels = channels.split(',') mylocation = location if (mylocation == '--'): mylocation = '' response = DATADIR + '/response/' + network + '_' + station + '.xml' inventory = read_inventory(response, format='STATIONXML') reference = [] for channel in mychannels: angle = inventory.get_orientation(network + '.' + \ station + '.' + mylocation + '.' + channel, \ UTCDateTime(2020, 1, 1, 0, 0, 0)) reference.append(angle) # First case: we can get the data from IRIS if (server == 'IRIS'): (D, orientation) = get_from_IRIS(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile, DATADIR) # Second case: we get the data from NCEDC elif (server == 'NCEDC'): (D, orientation) = get_from_NCEDC(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile, DATADIR) else: raise ValueError('You can only download data from IRIS and NCEDC') # Append data to stream if (type(D) == obspy.core.stream.Stream): stationdata = fill_data(D, orientation, station, channels, \ reference) if (len(stationdata) > 0): for stream in stationdata: data.append(stream) # Number of hours of data to analyze nhour = int(ceil((t2 - t1) / 3600.0)) # Create dataframe to store LFE times df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \ 'minute', 'second', 'cc', 'nchannel']) # Loop on hours of data for hour in range(0, nhour): nchannel = 0 Tstart = t1 + hour * 3600.0 Tend = t1 + (hour + 1) * 3600.0 + duration delta = Tend - Tstart ndata = int(delta / dt) + 1 # Loop on channels for channel in range(0, len(data)): # Cut the data subdata = data[channel] subdata = subdata.slice(Tstart, Tend) # Check whether we have a complete one-hour-long recording if (len(subdata) == 1): if (len(subdata[0].data) == ndata): # Get the template station = subdata[0].stats.station component = subdata[0].stats.channel template = templates.select(station=station, \ component=component)[0] # Cross correlation cctemp = correlate.optimized(template, subdata[0]) if (nchannel > 0): cc = np.vstack((cc, cctemp)) else: cc = cctemp nchannel = nchannel + 1 if (nchannel > 0): # Compute average cross-correlation across channels meancc = np.mean(cc, axis=0) if (type_threshold == 'MAD'): MAD = np.median(np.abs(meancc - np.mean(meancc))) index = np.where(meancc >= threshold * MAD) elif (type_threshold == 'Threshold'): index = np.where(meancc >= threshold) else: raise ValueError('Type of threshold must be MAD or Threshold') times = np.arange(0.0, np.shape(meancc)[0] * dt, dt) # Get LFE times if np.shape(index)[1] > 0: (time, cc) = clean_LFEs(index, times, meancc, dt, freq0) # Add LFE times to dataframe i0 = len(df.index) for i in range(0, len(time)): timeLFE = Tstart + time[i] df.loc[i0 + i] = [int(timeLFE.year), int(timeLFE.month), \ int(timeLFE.day), int(timeLFE.hour), \ int(timeLFE.minute), timeLFE.second + \ timeLFE.microsecond / 1000000.0, cc[i], nchannel] # Draw figure if (draw == True): params = {'xtick.labelsize':16, 'ytick.labelsize':16} pylab.rcParams.update(params) plt.figure(1, figsize=(20, 8)) if np.shape(index)[1] > 0: for i in range(0, len(time)): plt.axvline(time[i], linewidth=2, color='grey') plt.plot(np.arange(0.0, np.shape(meancc)[0] * dt, \ dt), meancc, color='black') if (type_threshold == 'MAD'): plt.axhline(threshold * MAD, linewidth=2, color='red', \ label = '{:6.2f} * MAD'.format(threshold)) elif (type_threshold == 'Threshold'): plt.axhline(threshold, linewidth=2, color='red', \ label = 'Threshold = {:8.4f}'.format(threshold)) else: raise ValueError( \ 'Type of threshold must be MAD or Threshold') plt.xlim(0.0, (np.shape(meancc)[0] - 1) * dt) plt.xlabel('Time (s)', fontsize=24) plt.ylabel('Cross-correlation', fontsize=24) plt.title('Average cross-correlation across stations', \ fontsize=30) plt.legend(loc=2, fontsize=24) plt.savefig('LFEs/' + filename + '/' + \ '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format( \ Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \ Tstart.minute, Tstart.second) + '.png', format='png') plt.close(1) # Add to pandas dataframe and save df_all = df df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \ 'day':'int32', 'hour':'int32', 'minute':'int32', \ 'second':'float', 'cc':'float', 'nchannel':'int32'}) df_all.to_csv('LFEs/' + filename + '/' + outputfile)
def spatio_spectral_patterns(epochs,y, n_components = 4, output_dir="", test_name = "test", doClassif = False, legend = ['A', 'B']): """Computes the Common Spatial Pattern (CSP) on all data and print the most discriminant feature and performs a simple CSP+Logistic Regression Classification. The results will be stacked for all subjects at the end of pipeline_1. code inspired by Alexandre Barachant's Kaggle https://www.kaggle.com/alexandrebarachant/common-spatial-pattern-with-mne another reading for CSP decoding https://www.nmr.mgh.harvard.edu/mne/dev/auto_examples/decoding/plot_decoding_csp_eeg.html """ score = [] X = epochs.get_data() # run CSP zeta.util.blockPrint() # to clean the terminal csp = mne.decoding.CSP(reg='ledoit_wolf') csp.fit(X, y) zeta.util.enablePrint() # restore print # compute spatial filtered spectrum for each components fig = [] for indc in range(n_components): po = [] for x in X: f, p = welch(np.dot(csp.filters_[indc, :].T, x), int(epochs.info['sfreq']), nperseg=256) po.append(p) po = np.array(po) # prepare topoplot _, epos, _, _, _ = mne.viz.topomap._prepare_topo_plot(epochs, 'eeg', None) # plot first pattern pattern = csp.patterns_[indc, :] pattern -= pattern.mean() ix = np.argmax(abs(pattern)) # the parttern is sign invariant. # invert it for display purpose if pattern[ix] > 0: sign = 1.0 else: sign = -1.0 fig[indc], ax_topo = plt.subplots(1, 1, figsize=(12, 4)) title = 'Spatial Pattern' fig.suptitle(title, fontsize=14) img, _ = mne.viz.topomap.plot_topomap(sign * pattern, epos, axes=ax_topo, show=False) divider = make_axes_locatable(ax_topo) # add axes for colorbar ax_colorbar = divider.append_axes('right', size='5%', pad=0.05) plt.colorbar(img, cax=ax_colorbar) # plot spectrum fix = (f > 1) & (f < 35) ax_spectrum = divider.append_axes('right', size='300%', pad=1.2) ax_spectrum.plot(f[fix], np.log(po[y == 0][:, fix].mean(axis=0).T), '-r', lw=2) ax_spectrum.plot(f[fix], np.log(po[y == 1][:, fix].mean(axis=0).T), '-b', lw=2) ax_spectrum.plot(f[fix], np.log(np.median(po[y == 0][:, fix], axis=0).T), '-r', lw=0.5) ax_spectrum.plot(f[fix], np.log(np.min(po[y == 0][:, fix], axis=0).T), '--r', lw=0.5) ax_spectrum.plot(f[fix], np.log(np.max(po[y == 0][:, fix], axis=0).T), '--r', lw=0.5) ax_spectrum.plot(f[fix], np.log(np.median(po[y == 1][:, fix], axis=0).T), '-b', lw=0.5) ax_spectrum.plot(f[fix], np.log(np.min(po[y == 1][:, fix], axis=0).T), '--b', lw=0.5) ax_spectrum.plot(f[fix], np.log(np.max(po[y == 1][:, fix], axis=0).T), '--b', lw=0.5) ax_spectrum.set_xlabel('Frequency (Hz)') ax_spectrum.set_ylabel('Power (dB)') plt.grid() plt.legend(legend) # plt.show() plt.savefig( os.path.join(output_dir, 'spatial_pattern_subject_' + test_name + '_c' + str(indc) + '.png'), bbox_inches='tight') # run cross validation if doClassif: zeta.util.blockPrint() # to have a clean terminal clf = sklearn.pipeline.make_pipeline(mne.decoding.CSP(n_components=n_components), sklearn.linear_model.LogisticRegression(solver="lbfgs")) cv = sklearn.model_selection.StratifiedKFold(n_splits=5) score = sklearn.model_selection.cross_val_score(clf, X, y, cv=cv, scoring='roc_auc') zeta.util.enablePrint() print(test_name + " : AUC cross val score : %.3f" % (score.mean())) return fig, score
def filter_paired_reads(bam, fasta, min_ani = 0.97, min_mapq = 2, min_insert = 50, max_insert = 1500, write_bam = False): ''' Filter reads from a .bam file Returns: pair2info - dictionary of read pair -> (mismatches, insert distance, mapq score, combined length) ''' scaffolds, fasta_length = get_fasta(fasta) filtered = set() # Information on pairs samfile = pysam.AlignmentFile(bam) if write_bam: logging.info("Copying header for new bam...") samfile_out = pysam.AlignmentFile(bam.split("/")[-1].split(".")[0] + "_filtered.bam", "wb", template=samfile) total = mapped_pairs = mapq_good = insert_good = 0 insert_sizes = [] read_lengths = [] for scaff in tqdm(scaffolds, desc='Filtering Reads'): read_data = {} # Information on the first pair of each read for read in samfile.fetch(scaff): total += 1 # If we've seen this read's pair before if read.query_name in read_data: # Make sure that the pair is on the same scaffold and that it's mapped at all if ((read_data[read.query_name]['scaf'] == scaff) & (read.get_reference_positions() != [])): mapped_pairs += 1 pairMM = float(read_data[read.query_name]['read'].get_tag('NM')) + float(read.get_tag('NM')) #number of mismatches in pair mapped_read_lengths = float(read_data[read.query_name]['read'].infer_query_length() + read.infer_query_length()) #total length of pair if read.get_reference_positions()[-1] > read_data[read.query_name]['read'].get_reference_positions()[0]: pair_inserts = read.get_reference_positions()[-1] - read_data[read.query_name]['read'].get_reference_positions()[0] #insert distance else: pair_inserts = read_data[read.query_name]['read'].get_reference_positions()[-1] - read.get_reference_positions()[0] #insert distance pair_mapq = max(read.mapping_quality, read_data[read.query_name]['read'].mapping_quality) #pair mapq pair_ani = 1 - (pairMM / mapped_read_lengths) #pair %ANI to reference insert_sizes.append(pair_inserts) read_lengths.append(mapped_read_lengths) # Final filter if pair_inserts >= min_insert and pair_inserts <= max_insert: insert_good += 1 if pair_mapq >= min_mapq: mapq_good += 1 if pair_ani >= min_ani: filtered.add(read.query_name) if write_bam: samfile_out.write(read_data[read.query_name]['read']) samfile_out.write(read) # Add this read, in future search for its mate elif read.get_reference_positions() != []: # don't use unmapped reads: read_data[read.query_name] = {"read": read, "scaf": scaff} # total = total / 2 logging.info("Total read count, divided by 2\t" + str(total)) logging.info("Mean read pair sequences length\t" + str(np.mean(read_lengths))) logging.info("Total FASTA length\t" + str(fasta_length)) logging.info("Expected total coverage\t" + str(float(total)*np.mean(read_lengths) / fasta_length)) logging.info("Mapped read pairs\t" + str(mapped_pairs) + "\t" + str(int(100*mapped_pairs / total)) + "%") logging.info("Median end-to-end insert length\t" + str(np.median(insert_sizes))) logging.info("Read pairs which pass insert distance filters:\t" + str(insert_good) + "\t" + str(int(100*float(insert_good) / total)) + "%") logging.info("Read pairs which also meet min_mapq of " + str(min_mapq) + "\t" + str(mapq_good) + "\t" + str(int(100*float(mapq_good) / total)) + "%") logging.info("Read pairs which also pass final read pair PID >" + str(min_ani) + "%\t" + str(len(filtered)) + "\t" + str(int(100*len(filtered) / total)) + "%") logging.info("Final expected coverage\t" + str(float(len(filtered)) * np.mean(read_lengths) / fasta_length)) samfile.close() if write_bam: samfile_out.close() logging.info("sorting new bam") pysam.sort("-o", bam.split("/")[-1].split(".")[0] + "_filtered_sort.bam", bam.split("/")[-1].split(".")[0] + "_filtered.bam") os.system('rm ' + bam.split("/")[-1].split(".")[0] + "_filtered.bam") return filtered
def compute_input_stats(rg_iostream=None, print_all=False, make_plots=True, save_figures=False): # N = len(rg_inputoutput[0][1][0]) # Taking the first reber string's ([0]) one-hot encodings ([1]) for the # # first letter 'A' ([0]) and finding its length gives us the value of N. graph_idx = get_graph_from_dataset(rg_iostream) logger.info('Reading input stream file...') with open( os.path.join(data_path, rg_iostream.replace('.npy', '') + '.npy'), 'rb') as stream: rg_inputoutput = np.load(stream, allow_pickle=True) in_reber_strings = [ rg_inputoutput[i][0] for i in range(len(rg_inputoutput)) ] dict_count_allTransitions = count_allTransitions( graph_idx=graph_idx, in_reber_strings=in_reber_strings) # NOF TOTAL CHARACTERS in the input stream total_len_inputstream = 0 for string in in_reber_strings: total_len_inputstream += len(string) len_reber_strings = [] for ex in in_reber_strings: len_reber_strings.append(len(ex)) logger.info('The stream consists of a total of {} strings. \n\ With: \n\ Number of characters in total = {}. \n\ Mean length of string = {}. \n\ Median length of string = {}. \n\ \n Unique Strings = {}. \n\ Number of Unique Strings = {}.'.format(len(in_reber_strings), total_len_inputstream, np.mean(len_reber_strings), np.median(len_reber_strings), np.unique(in_reber_strings), len(np.unique(in_reber_strings)))) if make_plots: logger.info( 'Plotting distribution of lengths of sample reber strings in the inputstream...' ) y, binEdges = np.histogram(len_reber_strings, bins=np.unique(len_reber_strings)) plt.figure(figsize=(15, 8)) plt.bar(binEdges[:-1], y, width=1, color='maroon') plt.errorbar(binEdges[:-1], y, yerr=np.sqrt(y), fmt='o', color='Black', elinewidth=3, capthick=2, alpha=0.7, markersize=5, capsize=5) plt.xlabel('Length of Reber String', fontsize=18) plt.ylabel('Number of Occurrences \n in Inputstream', fontsize=18) plt.xticks(fontsize=18) plt.yticks(fontsize=18) plt.text( x=binEdges[-1] / 2, y=0.8 * y[0], s=f'Mean Length of Reber String: {np.mean(len_reber_strings)}', fontsize=17) plt.text( x=binEdges[-1] / 2, y=0.7 * y[0], s=f'Median Length of Reber String: {np.median(len_reber_strings)}', fontsize=17) plt.grid(True, linestyle="--", color='black', alpha=0.4) if save_figures: fig_name = 'StringLengthDist_{}.svg'.format( rg_iostream.replace('.npy', '')) plt.savefig(fname=os.path.join(fig_path, fig_name), format='svg') logger.info('Figure saved in svg format at {}.svg.'.format( os.path.join(fig_path, fig_name))) plt.show() plt.close() logger.info( 'Plotting distribution of possible transitions (trigrams) in reber strings in the inputstream...' ) transitions = list(dict_count_allTransitions.keys()) counts = list(dict_count_allTransitions.values()) plt.figure(figsize=(15, 8)) plt.bar(transitions, counts, color='maroon', width=0.5) plt.xlabel('Possible Transitions in Simple Reber Grammar', fontsize=18) plt.ylabel('Number of Occurrences in Input Stream', fontsize=18) plt.xticks(rotation=50, fontsize=15) plt.yticks(fontsize=15) plt.grid(True, linestyle="--", color='black', alpha=0.4) if save_figures: fig_name = 'TransitionsDist_{}.svg'.format( rg_iostream.replace('.npy', '')) plt.savefig(fname=os.path.join(fig_path, fig_name), format='svg') logger.info('Figure saved in svg format at {}.svg.'.format( os.path.join(fig_path, fig_name))) plt.show() plt.close() if print_all: for i, string in enumerate(in_reber_strings): print(i, string)
def compute_shortest_paths(self, graph_goal): self.shortest_paths = nx.shortest_path(self.graph, target=graph_goal, weight='weight') self.shortest_distances = [len(value) - 1 for value in self.shortest_paths.values()] print 'Mean shortest_distances to goal:', mean(self.shortest_distances) print 'Median shortest_distances to goal:', median(self.shortest_distances)
def bootstrap_sample_medians(data, n_bootstrap_samples=10000): bootstrap_sample_medians = [] for i in range(n_bootstrap_samples): bootstrap_sample = np.random.choice(data, size=len(data), replace=True) bootstrap_sample_medians.append(np.median(bootstrap_sample)) return bootstrap_sample_medians
runcount=int(sys.argv[3]) else: print "usage: %s SIZE STRIDE [COUNT]" % sys.argv[0] print "run the benchmark COUNT times (default 500), and display the results as an histogram" sys.exit(1) for i in range(runcount): log=subprocess.check_output(["./benchmark", str(size), str(stride)]); for l in log.splitlines(): if "MB/s" in l: print l data.append( float(l[ l.find('=')+1: l.find('MB/s') ]) ) plt.hist(data) plt.xlabel("Throughput (MB/s)"); plt.ylabel("Count (out of %d)" % runcount); plt.title("Benchmark was run %d times.\n Throughput (MB/s) min=%.1f, max=%.1f, med=%.1f, avg=%.1f" % ( runcount, min(data),max(data),np.median(data),np.average(data) ) ) print '%d runs with size %d, stride %d -> median throughput = %.1f MB/s' % (runcount, size, stride, np.median(data)) print 'min=%.1f, max=%.1f, med=%.1f, avg=%.1f' % (min(data),max(data),np.median(data),np.average(data)) plt.show()
top_two_hundred_harris = np.array(top_two_hundred_harris) top_two_hundred_orb = np.array(top_two_hundred_orb) image_distance_matrix = np.zeros((top_two_hundred_harris.shape[0], top_two_hundred_orb.shape[0])) '''calculate pairwise image distance matrix''' for i in range(image_distance_matrix.shape[0]): image_distance_matrix[i] = calculate_distance(top_two_hundred_harris[i], top_two_hundred_orb) '''Harris 100 -> ORB 200 sort ASC all elements in each row''' rowwise_sort = np.sort(image_distance_matrix, axis=1) rowwise_index_sort = np.argsort(image_distance_matrix, axis=1) h20_median_dist = np.median(rowwise_sort[:100,0]) h20_avg_dist = np.mean(rowwise_sort[:100,0]) h20_median_rank_dist = np.median(np.abs(rowwise_index_sort[:100,0]-np.arange(100))) h20_avg_rank_dist = np.mean(np.abs(rowwise_index_sort[:100,0]-np.arange(100))) '''ORB 100 -> Harris 200 sort ASC all elements in each column''' columnwise_sort = np.sort(image_distance_matrix, axis=0) columnwise_index_sort = np.argsort(image_distance_matrix, axis=0) o2h_median_dist = np.median(columnwise_sort[0,:100]) o2h_avg_dist = np.mean(columnwise_sort[0,:100]) o2h_median_rank_dist = np.median(np.abs(columnwise_index_sort[0,:100]-np.arange(100))) o2h_avg_rank_dist = np.mean(np.abs(columnwise_index_sort[0,:100]-np.arange(100))) '''print the formatted outputs''' print("\nHarris keypoint to ORB distances:")
def assess_on_models(): errors = [] predicates = Config.predicates cols = Config.columns aggregate_str = Config.aggregates rmse_results = [] for pred in predicates: for col in cols: if col <= pred: continue print("Predicates {0} || Columns {1}".format(pred, col)) workload = np.loadtxt( 'input/synthetic_workloads/{}-Queries/query-workload-predicates_{}-cols_{}.csv' .format(Config.queries, pred, col), delimiter=',') workload = workload[~np.isnan(workload).any(axis=1)] if workload.shape[0] < 0.1 * Config.queries: print( "Error on workload possibly containing large fraction of nans : {}" .format(1 - workload.shape[0] / Config.queries)) errors.append( 'query-workload-predicates_{}-cols_{}.csv'.format( pred, col)) continue aggregate = range(workload.shape[1] - 5, workload.shape[1]) for t_y, l_Y in zip(aggregate, aggregate_str): X_train, X_test, y_train, y_test = train_test_split( workload[:, :workload.shape[1] - 5], workload[:, t_y], test_size=0.3, random_state=0) # X_train[(X_train==1e-8) | (X_train==1e+8)] = np.mean(X_train) # X_test[(X_test==1e-8) | (X_test==1e+8)] = np.mean(X_test) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform( X_test) # apply same transformation to test data for m, m_l in zip(model, model_str): # print("\tFitting for Agg {0} with {1}".format(l_Y, m_l)) m.fit(X_train, y_train) predictions_test = m.predict(X_test) ml_relative_error = np.mean( np.abs((y_test - predictions_test) / y_test)) ml_relative_error_median = np.median( np.abs((y_test - predictions_test) / y_test)) rmse = np.sqrt( metrics.mean_squared_error(y_test, predictions_test)) mae = metrics.median_absolute_error( y_test, predictions_test) nrmsd = np.sqrt( metrics.mean_squared_error( y_test, predictions_test)) / np.std(y_test) rmse_results.append([ pred, col, m_l, l_Y, rmse, nrmsd, mae, ml_relative_error, ml_relative_error_median ]) if len(errors) != 0: print("Finished with errors on:") for e in errors: print(e) test_df = pd.DataFrame(rmse_results, columns=[ 'predicates', 'columns', 'model', 'aggregate', 'rmse', 'nrmsd', 'mae', 'rel_error_mean', 'rel_error_median' ]) test_df.to_csv( 'output/accuracy/csvs/synthetic_workloads_eval_on_models_{}_queries.csv' .format(Config.queries))
def evaluate_a(a00_t, a10_t, a01_t, a11_t): print np.median(a00_t[BURN:]) print np.median(a10_t[BURN:]) print np.median(a01_t[BURN:]) print np.median(a11_t[BURN:]) plt.figure() plt.plot(range(0, ITS), a00_t) plt.axhline(A00, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A00") plt.savefig('a00_plot.png') plt.figure() plt.plot(range(0, ITS), a10_t) plt.axhline(A10, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A10") plt.savefig('a10_plot.png') plt.figure() plt.plot(range(0, ITS), a01_t) plt.axhline(A01, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A01") plt.savefig('a01_plot.png') plt.figure() plt.plot(range(0, ITS), a11_t) plt.axhline(A11, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A11") plt.savefig('a11_plot.png') med00 = np.median(a00_t[BURN:]) var00 = np.var(a00_t[BURN:]) plt.figure() plt.hist(a00_t[BURN:], normed=True) plt.axvline(A00, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A00: med=%f, var=%f" % (med00, var00)) plt.savefig('a00_hist.png') med10 = np.median(a10_t[BURN:]) var10 = np.var(a10_t[BURN:]) plt.figure() plt.hist(a10_t[BURN:], normed=True) plt.axvline(A10, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A10: med=%f, var=%f" % (med10, var10)) plt.savefig('a10_hist.png') med01 = np.median(a01_t[BURN:]) var01 = np.var(a01_t[BURN:]) plt.figure() plt.hist(a01_t[BURN:], normed=True) plt.axvline(A01, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A01: med=%f, var=%f" % (med01, var01)) plt.savefig('a01_hist.png') med11 = np.median(a11_t[BURN:]) var11 = np.median(a11_t[BURN:]) plt.figure() plt.hist(a11_t[BURN:], normed=True) plt.axvline(A11, color='r', linestyle='dashed', linewidth=2) plt.title("Prob of causal SNP - A11: med=%f, var=%f" % (med11, var11)) plt.savefig('a11_hist.png')
radcs = np.zeros(pgeo.npix) radcs[ipixarr] = np.nansum(radwvcs, axis=1) / nlamb * ereo radcl = np.zeros(pgeo.npix) radcl[ipixarr] = np.nansum(radwvcl, axis=1) / nlamb * ereo radcs[radcs < 0.0] = 0.0 ####force to zero if fcl < 0: fclall[fclall < 0.0] = 0.0 ############################ rad = radcs * (1.0 - fclall) + fclall * radcl lc.append(np.nansum(rad) * dOmega) lccs.append(np.nansum(radcs * (1.0 - fclall)) * dOmega) lccl.append(np.nansum(radcl * fclall) * dOmega) lccsf.append(np.nansum(radcs) * dOmega) fcl.append(np.median(data["arr_6"])) if args.x: ereoarr = np.zeros(pgeo.npix) ereoarr[ipixarr] = ereo # hp.mollview(radcs,title="radcs",flip="geo",cmap=plt.cm.pink) hp.mollview(radcl, title="radcl", flip="geo", cmap=plt.cm.pink) # hp.mollview(ereoarr,title="radcs",flip="geo",cmap=plt.cm.CMRmap,min=0.0,max=np.pi) hp.graticule(color="orange") plt.savefig("tmp/" + str(idPM) + ".png") if args.m: print(irrad_solar) hp.mollview(np.pi * rad / irrad_solar,
import numpy as np import pandas as pd from sklearn.model_selection import ShuffleSplit, train_test_split, GridSearchCV from sklearn.metrics import r2_score, make_scorer from sklearn.tree import DecisionTreeRegressor import visuals as vs data = pd.read_csv('housing.csv') prices = data['MEDV'] features = data.drop('MEDV', axis = 1) # Statistics minimum_price = np.min(prices) maximum_price = np.max(prices) mean_price = np.mean(prices) median_price = np.median(prices) std_price = np.std(prices) print("Statistics for Boston housing dataset:\n") print("Minimum price: ${}".format(minimum_price)) print("Maximum price: ${}".format(maximum_price)) print("Mean price: ${}".format(mean_price)) print("Median price ${}".format(median_price)) print("Standard deviation of prices: ${}".format(std_price)) def performance_metric(y_true, y_predict): """ Calculates and returns the performance score between true and predicted values based on the metric chosen. """ # Calculate the performance score between 'y_true' and 'y_predict' score = r2_score(y_true, y_predict) return score
def _ep_trigger_avg(x, trig_code, pre=0, post=0, iqr_thresh=-1, envelope=False): """ Average response to 1 or more experimental conditions Arguments --------- x: data (nchan, npts) trig_code : sequence-type (2, stim) or StimulatedExperiment First row is the trigger indices, second row is a condition ID (integer). Condition ID -1 codes for a flagged trial to be skipped. If a StimulatedExperiment, then triggers and conditions are available from this object. pre, post : ints Number of pre- and post-stim samples in interval. post + pre > 0 default: 0 and stim-to-stim interval sum_limit : int Do partial sum up to this many terms iqr_thresh : float If set, do simple outlier detection on all groups of repeated conditions based on RMS power in the epoch interval. The iqr_thresh multiplies the width of the inter-quartile range to determine the "inlier" range of RMS power. Returns ------- avg (nchan, ncond, epoch_length) n_avg number of triggers found for each condition skipped (nskip, nchan, epoch_length) epochs that were not averaged """ x.shape = (1,) + x.shape if x.ndim == 1 else x.shape #pos_edge = trig_code[0]; conds = trig_code[1] pos_edge, conds = trigs_and_conds(trig_code) epoch_len = int(np.round(np.median(np.diff(pos_edge)))) n_cond = len(np.unique(conds)) n_pt = x.shape[1] if not (post or pre): post = epoch_len # this formula should provide consistent epoch lengths, # no matter the offset epoch_len = int(round(post + pre)) pre = int(round(pre)) post = epoch_len - pre # edit trigger list to exclude out-of-bounds epochs while pos_edge[0] - pre < 0: pos_edge = pos_edge[1:] conds = conds[1:] while pos_edge[-1] + post >= n_pt: pos_edge = pos_edge[:-1] conds = conds[:-1] avg = np.zeros((x.shape[0], n_cond, epoch_len), x.dtype) n_avg = np.zeros((x.shape[0], n_cond), 'i') for n, c in enumerate(np.unique(conds)): trials = np.where(conds == c)[0] if not len(trials): continue epochs = extract_epochs(x, pos_edge, trials, pre, post) if iqr_thresh > 0: pwr = np.sqrt(np.sum(epochs**2, axis=-1)) # analyze outlier trials per channel out_mask = ut.fenced_out( pwr, thresh=iqr_thresh, axis=1, low=False ) epochs = epochs * out_mask[:, :, None] n_avg[:, n] = np.sum(out_mask, axis=1) else: n_avg[:, n] = len(trials) if envelope: epochs = signal.hilbert( epochs, N=ut.nextpow2(epoch_len), axis=-1 ) epochs = np.abs(epochs[..., :epoch_len])**2 avg[:, c - 1, :] = np.sum(epochs, axis=1) / n_avg[:, c - 1][:, None] x.shape = [x for x in x.shape if x > 1] if envelope: np.sqrt(avg, avg) return avg, n_avg
def assess_on_no_queries(): errors = [] predicates = Config.predicates cols = Config.columns aggregate_str = Config.aggregates queries_number = np.linspace(0.1, 1, 10) * (Config.queries / 10) relative_errors = [] for n in queries_number: n = int(n) print("Number of Queries {0}".format(n)) for pred in predicates: for col in cols: if col <= pred: continue print("Predicates {0} || Columns {1}".format(pred, col)) workload = np.loadtxt( 'input/synthetic_workloads/{}-Queries/query-workload-predicates_{}-cols_{}.csv' .format(Config.queries, pred, col), delimiter=',') workload = workload[~np.isnan(workload).any(axis=1)] if workload.shape[0] < 0.1 * Config.queries: print( "Error on workload possibly containing large fraction of nans : {}" .format(1 - workload.shape[0] / Config.queries)) errors.append( 'query-workload-predicates_{}-cols_{}.csv'.format( pred, col)) continue workload = workload[:n, :] aggregate = range(workload.shape[1] - 5, workload.shape[1]) for t_y, l_Y in zip(aggregate, aggregate_str): non_outliers_ratio = __compute_ratio_of_non_outliers( workload[:, t_y]) xgb = XGBRegressor() X_train, X_test, y_train, y_test = train_test_split( workload[:, :workload.shape[1] - 5], workload[:, t_y], test_size=0.2, random_state=0) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform( X_test) # apply same transformation to test data xgb.fit(X_train, y_train) predictions_training = xgb.predict(X_train) # print("Training RMSE {0}".format(np.sqrt(metrics.mean_squared_error(y_train, predictions_training)))) predictions_test = xgb.predict(X_test) ml_relative_error = np.mean( np.abs((y_test - predictions_test) / y_test)) ml_relative_error_median = np.median( np.abs((y_test - predictions_test) / y_test)) rmse = np.sqrt( metrics.mean_squared_error(y_test, predictions_test)) mae = metrics.median_absolute_error( y_test, predictions_test) nrmsd = np.sqrt( metrics.mean_squared_error( y_test, predictions_test)) / np.std(y_test) relative_errors.append([ pred, col, n, rmse, mae, ml_relative_error, ml_relative_error_median, nrmsd, l_Y, non_outliers_ratio ]) if len(errors) != 0: print("Finished with errors on:") for e in errors: print(e) eval_df = pd.DataFrame(relative_errors, columns=[ 'predicates', 'columns', 'queries', 'rmse', 'mae', 'relative_error_mean', 'relative_error_median', 'nrmsd', 'aggregate', 'non_outliers_ratio' ]) eval_df.to_csv( 'output/accuracy/csvs/synthetic_workloads_eval_on_workloads_varying_queries_{}_queries.csv' .format(Config.queries))
def median_datasets(datasetArr, m, n, l): istack = np.dstack(datasetArr) median = np.median(istack, axis=2) return median
def iter_epochs(x, pivots, selected=(), pre=0, post=0, fill=np.nan): """ Generator that yields epochs pivoted at the specified triggers. Parameters ---------- x : data (n_chan, n_pt) pivots : array-like or StimulatedExperiment A sequence of literal pivot samples, or an experiment wrapper containing the timestamps. selected : sequence Indices into trig_code for a subset of stims. If empty, return *ALL* epochs (*a potentially very large array*) pre, post : ints Number of pre- and post-stim samples in interval. post + pre > 0 default: 0 and stim-to-stim interval """ x = np.atleast_2d(x) if x.ndim == 1 else x if isinstance(pivots, StimulatedExperiment): pivots, _ = trigs_and_conds(pivots) if not np.iterable(pivots): pivots = [pivots] if not (post or pre): if len(pivots) > 1: print('Default epoch length based on median inter-trial time') post = int(np.median(np.diff(pivots))) else: print('Default epoch length 200 pts') post = 200 epoch_len = int(round(post + pre)) pre = int(round(pre)) post = epoch_len - pre if len(selected): if hasattr(selected, 'dtype') and selected.dtype.char == '?': selected = np.where(selected)[0] pivots = np.take(pivots, selected) epoch = np.empty((x.shape[0], epoch_len), x.dtype) for k in pivots: if k - pre < 0: start_put = pre - k pre = k else: start_put = 0 if k + post >= x.shape[1]: stop_put = x.shape[1] - k + pre post = x.shape[1] - k else: stop_put = pre + post grab_idx = (slice(None), slice(k - pre, k + post)) put_idx = (slice(None), slice(start_put, stop_put)) if start_put > 0 or stop_put < pre + post: epoch.fill(fill) epoch[put_idx] = x[grab_idx] yield epoch.copy() return
def main(msname, store_basename, newparmdbext='-instrument_amp_clock_offset'): # name (path) for parmdb to be written newparmDB = msname + newparmdbext # load the numpy arrays written by the previous scripts # (filenames constructed in the same way as in these scripts) freqs_ampl = np.load('freqs_for_amplitude_array.npy') amps_array = np.load(store_basename + '_amplitude_array.npy') clock_array = np.load('fitted_data_dclock_' + store_basename + '_1st.npy') freqs_phase = np.load('freqs_for_phase_array.npy') phases_array = np.load(store_basename + '_phase_array.npy') station_names = np.load(store_basename + '_station_names.npy') #print "phases shape:",np.shape(phases_array) #print "amps shape:",np.shape(amps_array) #print "clock shape:",np.shape(clock_array) #for ms in mslist: #this script works only on one MS! msinfo = ReadMs(msname) # this is the same for all antennas starttime = msinfo.timepara['start'] endtime = msinfo.timepara['end'] startfreqs = msinfo.msfreqvalues - msinfo.GetFreqpara('step') / 2. endfreqs = msinfo.msfreqvalues + msinfo.GetFreqpara('step') / 2. ntimes = 1 nfreqs = len(startfreqs) outDB = make_empty_parmdb(newparmDB) # Now do the interpolating for antenna_id, antenna in enumerate(station_names): if antenna not in msinfo.stations: pass # form median of amplitudes along the time axis, for both polarizations amp_cal_00_all = np.median(amps_array[antenna_id, :, :, 0], axis=0) amp_cal_11_all = np.median(amps_array[antenna_id, :, :, 1], axis=0) # interpolate to target frequencies amp_cal_00 = np.interp(msinfo.msfreqvalues, freqs_ampl, amp_cal_00_all) amp_cal_11 = np.interp(msinfo.msfreqvalues, freqs_ampl, amp_cal_11_all) # interpolate phases phase_cal_00 = 0. phase_cal_11 = np.interp(msinfo.msfreqvalues, freqs_phase, phases_array[:, antenna_id]) # convert to real and imaginary real_00 = amp_cal_00 * np.cos(phase_cal_00) imag_00 = amp_cal_00 * np.sin(phase_cal_00) real_11 = amp_cal_11 * np.cos(-1. * phase_cal_11) imag_11 = amp_cal_11 * np.sin(-1. * phase_cal_11) real_00_pdb = real_00.reshape((ntimes, nfreqs)) imag_00_pdb = imag_00.reshape((ntimes, nfreqs)) real_11_pdb = real_11.reshape((ntimes, nfreqs)) imag_11_pdb = imag_11.reshape((ntimes, nfreqs)) # generate parmDB entries ValueHolder = outDB.makeValue(values=real_00_pdb, sfreq=startfreqs, efreq=endfreqs, stime=starttime, etime=endtime, asStartEnd=True) outDB.addValues('Gain:0:0:Real:' + antenna, ValueHolder) ValueHolder = outDB.makeValue(values=imag_00_pdb, sfreq=startfreqs, efreq=endfreqs, stime=starttime, etime=endtime, asStartEnd=True) outDB.addValues('Gain:0:0:Imag:' + antenna, ValueHolder) ValueHolder = outDB.makeValue(values=real_11_pdb, sfreq=startfreqs, efreq=endfreqs, stime=starttime, etime=endtime, asStartEnd=True) outDB.addValues('Gain:1:1:Real:' + antenna, ValueHolder) ValueHolder = outDB.makeValue(values=imag_11_pdb, sfreq=startfreqs, efreq=endfreqs, stime=starttime, etime=endtime, asStartEnd=True) outDB.addValues('Gain:1:1:Imag:' + antenna, ValueHolder) #now handle the clock-value (no fancy interpolating needed) clock_pdb = np.array(np.median(clock_array[:, antenna_id]), ndmin=2) ValueHolder = outDB.makeValue(values=clock_pdb, sfreq=startfreqs[0], efreq=endfreqs[-1], stime=starttime, etime=endtime, asStartEnd=True) outDB.addValues('Clock:' + antenna, ValueHolder) outDB = False return {'transfer_parmDB': newparmDB}
import numpy arr=[1,2,3] median1=numpy.median(arr) print( median1 )
def comb_frames(frames_arr, printtype=None, frametype='Unknown', saturation=None, maskvalue=1048577, method='weightmean', satpix='reject', cosmics=None, n_lohi=[0,0], sig_lohi=[3.,3.], replace='maxnonsat'): """ Combine several frames .. todo:: - Make better use of np.ma.MaskedArray objects throughout? - More testing of replacement code necessary? - Improve docstring... Parameters ---------- frames_arr : ndarray (3D) Array of frames to be combined weights : str, or None (optional) How should the frame combination by weighted (not currently implemented) frametype : str, optional What is the type of frame being combining? maskvalue : int (optional) What should the masked values be set to (should be greater than the detector's saturation value -- Default = 1 + 2**20) printtype : str (optional) The frame type string that should be printed by armsgs. If None, frametype will be used reject : dict, optional Set the rejection parameters: cosmics, lowhigh, level, replace Perhaps these should be called out separately satpix : str, optional Method for handling saturated pixels saturation : float, optional Saturation value; only required for some choices of reject['replace'] Returns ------- comb_frame : ndarray """ ########### # FIRST DO SOME CHECKS ON THE INPUT ########### # Was printtype specified if printtype is None: printtype = frametype # Check the number of frames if frames_arr is None: msgs.error("No '{0:s}' frames were given to comb_frames to combine".format(printtype)) (sz_x, sz_y, num_frames) = np.shape(frames_arr) if num_frames == 1: msgs.info("Only one frame to combine!") msgs.info("Returning input frame") return frames_arr[:, :, 0] else: msgs.info("Combining {0:d} {1:s} frames".format(num_frames, printtype)) # Check if the user has allowed the combination of long and short # frames (e.g. different exposure times) msgs.work("lscomb feature has not been included here yet...") # Check the user hasn't requested to reject more frames than available if n_lohi[0] > 0 and n_lohi[1] > 0 and n_lohi[0] + n_lohi[1] >= num_frames: msgs.error('You cannot reject more frames than are available with \'n_lohi\'.' + msgs.newline() + 'There are {0:d} frames '.format(num_frames) + 'and n_lohi will reject {0:d} low and {1:d} high values.'.format( n_lohi[0], n_lohi[1])) # Calculate the values to be used if all frames are rejected in some pixels if replace == 'min': allrej_arr = np.amin(frames_arr, axis=2) elif replace == 'max': allrej_arr = np.amax(frames_arr, axis=2) elif replace == 'mean': allrej_arr = np.mean(frames_arr, axis=2) elif replace == 'median': allrej_arr = np.median(frames_arr, axis=2) elif replace == 'weightmean': msgs.work("No weights are implemented yet") allrej_arr = frames_arr.copy() allrej_arr = masked_weightmean(allrej_arr, maskvalue) elif replace == 'maxnonsat': allrej_arr = frames_arr.copy() allrej_arr = maxnonsat(allrej_arr, saturation) else: msgs.error("You must specify what to do in case all pixels are rejected") ################ # Saturated Pixels msgs.info("Finding saturated and non-linear pixels") if satpix == 'force': # If a saturated pixel is in one of the frames, force them to # all have saturated pixels # satw = np.zeros_like(frames_arr) # satw[np.where(frames_arr > settings.spect['det']['saturation']*settings.spect['det']['nonlinear'])] = 1.0 # satw = np.any(satw,axis=2) # del satw setsat = np.zeros_like(frames_arr) setsat[frames_arr > saturation] = 1 elif satpix == 'reject': # Ignore saturated pixels in frames if possible frames_arr[frames_arr > saturation] = maskvalue elif satpix == 'nothing': # Don't do anything special for saturated pixels (Hopefully the # user has specified how to deal with them below!) pass else: msgs.error('Option \'{0}\' '.format(satpix) + 'for dealing with saturated pixels was not recognised.') ################ # Cosmic Rays if cosmics > 0.0: msgs.info("Rejecting cosmic rays") # Use a robust statistic masked_fa = np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue) medarr = np.ma.median(masked_fa, axis=2) stdarr = 1.4826*np.ma.median(np.ma.absolute(masked_fa - medarr[:,:,None]), axis=2) indx = (frames_arr != maskvalue) \ & (frames_arr > (medarr.data + cosmics * stdarr.data)[:,:,None]) frames_arr[indx] = maskvalue # Delete unecessary arrays del medarr, stdarr else: msgs.info("Not rejecting cosmic rays") ################ # Low and High pixel rejection --- Masks *additional* pixels rejlo, rejhi = n_lohi if n_lohi[0] > 0 or n_lohi[1] > 0: # First reject low pixels frames_arr = np.sort(frames_arr, axis=2) if n_lohi[0] > 0: msgs.info("Rejecting {0:d} deviant low pixels".format(n_lohi[0])) while rejlo > 0: xi, yi = np.indices(sz_x, sz_y) frames_arr[xi, yi, np.argmin(frames_arr, axis=2)] = maskvalue del xi, yi rejlo -= 1 # Now reject high pixels if n_lohi[1] > 0: msgs.info("Rejecting {0:d} deviant high pixels".format(n_lohi[1])) frames_arr[np.where(frames_arr == maskvalue)] *= -1 while rejhi > 0: xi, yi = np.indices(sz_x, sz_y) frames_arr[xi, yi, np.argmax(frames_arr, axis=2)] = -maskvalue del xi, yi rejhi -= 1 frames_arr[np.where(frames_arr) == -maskvalue] *= -1 # TODO: Do we need this? # The following is an example of *not* masking additional pixels # if reject['lowhigh'][1] > 0: # msgs.info("Rejecting {0:d} deviant high pixels".format(reject['lowhigh'][1])) # masktemp[:,:,-reject['lowhigh'][0]:] = True else: msgs.info("Not rejecting any low/high pixels") ################ # Deviant Pixels # TODO: sig_lohi (what was level) is not actually used, instead this # just selects if cosmics should be used. Is this intentional? Why # not just do: `if cosmics > 0:`? if sig_lohi[0] > 0.0 or sig_lohi[1] > 0.0: msgs.info("Rejecting deviant pixels") # Use a robust statistic masked_fa = np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue) medarr = np.ma.median(masked_fa, axis=2) stdarr = 1.4826*np.ma.median(np.ma.absolute(masked_fa - medarr[:,:,None]), axis=2) indx = (frames_arr != maskvalue) \ & ( (frames_arr > (medarr.data + cosmics*stdarr.data)[:,:,None]) | (frames_arr < (medarr.data - cosmics*stdarr.data)[:,:,None])) frames_arr[indx] = maskvalue # Delete unecessary arrays del medarr, stdarr else: msgs.info("Not rejecting deviant pixels") ############## # Combine the arrays msgs.info("Combining frames with a {0:s} operation".format(method)) if method == 'mean': comb_frame = np.ma.mean(np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue), axis=2) elif method == 'median': comb_frame = np.ma.median(np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue), axis=2) elif method == 'weightmean': comb_frame = frames_arr.copy() comb_frame = masked_weightmean(comb_frame, maskvalue) else: msgs.error("Combination type '{0:s}' is unknown".format(method)) ############## # If any pixels are completely masked, apply user-specified function msgs.info("Replacing completely masked pixels with the {0:s} value of the input frames".format(replace)) indx = comb_frame == maskvalue comb_frame[indx] = allrej_arr[indx] # Delete unecessary arrays del allrej_arr ############## # Apply the saturated pixels: if satpix == 'force': msgs.info("Applying saturated pixels to final combined image") comb_frame[setsat] = saturation # settings.spect[dnum]['saturation'] ############## # And return a 2D numpy array msgs.info("{0:d} {1:s} frames combined successfully!".format(num_frames, printtype)) # Make sure the returned array is the correct type comb_frame = np.array(comb_frame, dtype=np.float) return comb_frame
def collect_samples_and_stats( config: SimpleNamespace, model_cls: Type[BaseModel], all_ppl_details: List[PPLDetails], train_data: xr.Dataset, test_data: xr.Dataset, output_dir: str, ) -> Tuple[xr.Dataset, xr.Dataset]: """ :param confg: The benchmark configuration. :param model_cls: The model class :param ppl_details: For each ppl the the impl and inference classes etc. :param train_data: The training dataset. :param test_data: The held-out test dataset. :param output_dir: The directory for storing results. :returns: Two datasets: variable_metrics Coordinates: ppl, metric (n_eff, Rhat), others from model Data variables: from model other_metrics Coordinates: ppl, chain, draw, phase (compile, infer) Data variables: pll (ppl, chain, draw), timing (ppl, chain, phase) """ all_variable_metrics, all_pll, all_timing, all_names = [], [], [], [] all_samples, all_overall_neff, all_overall_neff_per_time = [], [], [] for pplobj in all_ppl_details: all_names.append(pplobj.name) rand = np.random.RandomState(pplobj.seed) LOGGER.info(f"Starting inference on `{pplobj.name}` with seed {pplobj.seed}") # first compile the PPL Implementation this involves two steps compile_t1 = time.time() # compile step 1: instantiate ppl inference object infer_obj = pplobj.inference_class(pplobj.impl_class, train_data.attrs) # compile step 2: call compile infer_obj.compile(seed=rand.randint(1, 1e7), **pplobj.compile_args) compile_time = time.time() - compile_t1 LOGGER.info(f"compiling on `{pplobj.name}` took {compile_time:.2f} secs") # then run inference for each trial trial_samples, trial_pll, trial_timing = [], [], [] for trialnum in range(config.trials): infer_t1 = time.time() samples = infer_obj.infer( data=train_data, num_samples=config.num_samples, seed=rand.randint(1, 1e7), **pplobj.infer_args, ) infer_time = time.time() - infer_t1 LOGGER.info(f"inference trial {trialnum} took {infer_time:.2f} secs") # compute the pll per sample and then convert it to the actual pll over # cumulative samples persample_pll = model_cls.evaluate_posterior_predictive(samples, test_data) pll = np.logaddexp.accumulate(persample_pll) - np.log( np.arange(config.num_samples) + 1 ) LOGGER.info(f"PLL = {str(pll)}") trial_samples.append(samples) trial_pll.append(pll) trial_timing.append([compile_time, infer_time]) # finally, give the inference object an opportunity # to write additional diagnostics infer_obj.additional_diagnostics(output_dir, f"{pplobj.name}_{trialnum}") del infer_obj # concatenate the samples data from each trial together so we can compute metrics trial_samples_data = xr.concat( trial_samples, pd.Index(data=np.arange(config.trials), name="chain") ) neff_data = arviz.ess(trial_samples_data) rhat_data = arviz.rhat(trial_samples_data) LOGGER.info(f"Trials completed for {pplobj.name}") LOGGER.info("== n_eff ===") LOGGER.info(str(neff_data.data_vars)) LOGGER.info("== Rhat ===") LOGGER.info(str(rhat_data.data_vars)) # compute ess/time neff_df = neff_data.to_dataframe() overall_neff = [ neff_df.values.min(), np.median(neff_df.values), neff_df.values.max(), ] mean_inference_time = np.mean(np.array(trial_timing)[:, 1]) overall_neff_per_time = np.array(overall_neff) / mean_inference_time LOGGER.info("== overall n_eff [min, median, max]===") LOGGER.info(str(overall_neff)) LOGGER.info("== overall n_eff/s [min, median, max]===") LOGGER.info(str(overall_neff_per_time)) trial_variable_metrics_data = xr.concat( [neff_data, rhat_data], pd.Index(data=["n_eff", "Rhat"], name="metric") ) all_variable_metrics.append(trial_variable_metrics_data) all_pll.append(trial_pll) all_timing.append(trial_timing) all_samples.append(trial_samples_data) all_overall_neff.append(overall_neff) all_overall_neff_per_time.append(overall_neff_per_time) # merge the trial-level metrics at the PPL level all_variable_metrics_data = xr.concat( all_variable_metrics, pd.Index(data=all_names, name="ppl") ) all_other_metrics_data = xr.Dataset( { "timing": (["ppl", "chain", "phase"], all_timing), "pll": (["ppl", "chain", "draw"], all_pll), "overall_neff": (["ppl", "percentile"], all_overall_neff), "overall_neff_per_time": (["ppl", "percentile"], all_overall_neff_per_time), }, coords={ "ppl": np.array(all_names), "chain": np.arange(config.trials), "phase": np.array(["compile", "infer"]), "draw": np.arange(config.num_samples), "percentile": np.array(["min", "median", "max"]), }, ) all_samples_data = xr.concat(all_samples, pd.Index(data=all_names, name="ppl")) model_cls.additional_metrics(output_dir, all_samples_data, train_data, test_data) LOGGER.info("all benchmark samples and metrics collected") # save the samples data only if requested if getattr(config, "save_samples", False): save_dataset(output_dir, "samples", all_samples_data) # write out thes metrics save_dataset(output_dir, "diagnostics", all_variable_metrics_data) save_dataset(output_dir, "metrics", all_other_metrics_data) return all_variable_metrics_data, all_other_metrics_data