def get_cluster_distribution(g, method = 'average'): """ The clustering coefficient distribution grouped by degree. Similar to the histogram shows the possible degree k, and average/median clustering coefficient of nodes with degree k in graph g. Parameters: ----------- g: NetworkX Graph method: str, ('average', 'median'), (default = 'average') Returns: -------- xdata, ydata, a 2-tuple of array, (k, avg_cc(V_k)), where V_k are the nodes with degree k """ g = to_undirected(g) k = nx.clustering(g) d = g.degree() ck = defaultdict(list) for n in g.nodes_iter(): ck[d[n]].append(k[n]) xdata, ydata = list(), list() if method == 'average': for x, y in ifilter(lambda x: x[0] > 1 and average(x[1]) > 0, ck.iteritems()): xdata.append(x) ydata.append(average(y)) elif method == 'median': for x, y in ifilter(lambda x: x[0] > 1 and median(x[1]) > 0, ck.iteritems()): xdata.append(x) ydata.append(median(y)) else: raise NameError("method should be 'average' or 'mean'") xdata = array(xdata) ydata = array(ydata) return(xdata, ydata)
def plotAllCombinations(aclasses, avariants, fclasses, fvariants, trials, maxsteps, maxbatchsize=10): fundic = {} ploti = 1 rows = sum([len(avariants[ac]) for ac in aclasses]) + len(aclasses) - 1 cols = len(fvariants) * len(fclasses) + len(fclasses) - 1 f_mid = int(median(range(len(fvariants)))) for ac_id, aclass in enumerate(aclasses): a_mid = int(median(range(len(avariants[aclass])))) for as_id, aparams in enumerate(avariants[aclass]): if as_id == 0 and ac_id > 0: ploti += cols for fc_id, fclass in enumerate(fclasses): if fc_id not in fundic: # shared samples across all uses of one function fun = fclass() fwrap = FunctionWrapper(trials, fun, record_samples=True) fwrap.nextSamples(maxbatchsize * (maxsteps+10)) fundic[fc_id] = fwrap._seen data = fundic[fc_id] for fs_id, fsettings in enumerate(fvariants): if fs_id == 0 and fc_id > 0: ploti += 1 fun = fclass(**fsettings) provider = DataFunctionWrapper(data, fun, shuffling=False) pylab.subplot(rows, cols, ploti); ploti += 1 plotHeatmap(provider, aclass, aparams, trials, maxsteps) if ac_id == 0 and as_id == 0 and fs_id == f_mid: pylab.title(fclass.__name__[5:]) if fs_id == 0 and as_id == a_mid: pylab.ylabel(aclass.__name__[:6]) pylab.subplots_adjust(left=0.1, bottom=0.01, right=0.99, top=0.9, wspace=0.05, hspace=0.05)
def showVectorDisplacements(): global testImage, croppedRefImage, u, v, valid, q1, umean, vmean, x, y, sxyVar, wxyVar, goodvectorsVar from scipy import where, compress, logical_and, median, logical_or, nan from pylab import resize, transpose, quiver, title, show, find, imshow, hist, figure, clf, draw, save, load, xlabel, ylabel, flipud mxy = 3 wxy = int(wxyVar.get()) sxy = int(sxyVar.get()) goodvectors = float(goodvectorsVar.get()) #process to find PIV-style displacements x, y, u, v, q1, valid = simplepiv(croppedRefImage, testImage, wxy, mxy, sxy) good = where(logical_and(q1 > goodvectors, valid > 0), True, False) umean = median(compress(good.flat, u.flat)) vmean = median(compress(good.flat, v.flat)) u = where(logical_or(q1 < goodvectors, valid < 0), 0, u) v = where(logical_or(q1 < goodvectors, valid < 0), 0, v) u = u - umean v = v - vmean save('vecx.out', x) save('vecy.out', y) save('vecu.out', u) save('vecv.out', v) save('vecq1.out', q1) save('vecvalid.out', valid) u = flipud(u) v = -flipud(v) quiver(x, y, u, v) title('Vector displacements') xlabel('Pixels') ylabel('Pixels') show() return
def PrintValues( outfile, values, options, prefix = "",titles = None): if options.flat or options.aggregate_column: if options.add_header: if prefix: outfile.write( "prefix\t" ) if titles: outfile.write( "column\t" ) print "\t".join( ("nval", "min", "max", "mean", "median", "stddev", "sum", "q1", "q3" ) ) for x in range(len(values)): vals = values[x] if len(vals) == 0: if options.output_empty: if titles: outfile.write( titles[x] + "\t" ) if prefix: outfile.write( prefix + "\t" ) outfile.write( "0" + "\tna" * 8 + "\n" ) continue if titles: outfile.write( titles[x] + "\t" ) if prefix: outfile.write( prefix + "\t" ) vals.sort() if len(vals) > 4: q1 = options.value_format % vals[len(vals) // 4] q3 = options.value_format % vals[len(vals) * 3 // 4] else: q1 = options.value_format % vals[0] q3 = options.value_format % vals[-1] outfile.write( "\t".join( ( "%i" % len(vals), options.value_format % float(min(vals)), options.value_format % float(max(vals)), options.value_format % scipy.mean(vals), options.value_format % scipy.median(vals), options.value_format % scipy.std(vals), options.value_format % reduce( lambda x, y: x+y, vals), q1, q3, )) + "\n") else: if titles: print "category\t%s" % string.join(titles,"\t") print "count\t%s" % (string.join( map(lambda v: "%i" % len(v), values), "\t")) print "min\t%s" % (string.join( map(lambda v: options.value_format % min(v), values), "\t")) print "max\t%s" % (string.join( map(lambda v: options.value_format % max(v), values), "\t")) print "mean\t%s" % (string.join( map(lambda v: options.value_format % scipy.mean(v), values), "\t")) print "median\t%s" % (string.join( map(lambda v: options.value_format % scipy.median(v), values), "\t")) print "stddev\t%s" % (string.join( map(lambda v: options.value_format % scipy.std(v), values), "\t")) print "sum\t%s" % (string.join( map(lambda v: options.value_format % reduce( lambda x,y: x+y, v), values), "\t")) print "q1\t%s" % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=25), values), "\t")) print "q3\t%s" % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=75), values), "\t"))
def WriteRadius(mali, identifiers, prefix="", gap_char="-"): """write percent identities in pairwise comparisons both for nucleotide acids and amino acids.""" pides_na = [] seq_aa = [] for x in range(0, len(identifiers)): seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]])) for y in range(x + 1, len(identifiers)): if x == y: continue pides_na.append(MaliIO.getPercentIdentity( mali[identifiers[x]], mali[identifiers[y]], gap_char)) pides_aa = [] for x in range(0, len(identifiers) - 1): for y in range(x + 1, len(identifiers)): pides_aa.append( MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char)) print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\ string.join(map(lambda x: "%.2f" % x, (min(pides_na), max(pides_na), scipy.mean(pides_na), scipy.median(pides_na), scipy.std(pides_na))), "\t") + "\t" +\ string.join(map(lambda x: "%.2f" % x, (min(pides_aa), max(pides_aa), scipy.mean(pides_aa), scipy.median(pides_aa), scipy.std(pides_aa))), "\t")
def lossTraces(fwrap, aclass, dim, maxsteps, storesteps=None, x0=None, initNoise=0., minLoss=1e-10, algoparams={}): """ Compute a number of loss curves, for the provided settings, stored at specific storestep points. """ if not storesteps: storesteps = range(maxsteps + 1) # initial points, potentially noisy if x0 is None: x0 = ones(dim) + randn(dim) * initNoise # tracking progress by callback paramtraces = {'index':-1} def storer(a): lastseen = paramtraces['index'] for ts in [x for x in storesteps if x > lastseen and x <= a._num_updates]: paramtraces[ts] = a.bestParameters.copy() paramtraces['index'] = a._num_updates # initialization algo = aclass(fwrap, x0, callback=storer, **algoparams) print algo, fwrap, dim, maxsteps, # store initial step algo.callback(algo) algo.run(maxsteps) # process learning curve del paramtraces['index'] paramtraces = array([x for _, x in sorted(paramtraces.items())]) oloss = mean(fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum)) ls = abs(fwrap.stochfun.expectedLoss(ravel(paramtraces)) - oloss) + minLoss ls = reshape(ls, paramtraces.shape) print median(ls[-1]) return ls
def centroid(stamp): """ Calcula el centro de la estrella viendo un centro de masasx con el flujo. Parameters ---------- stamp : (N,)array_like Arreglo en 2-D, representa una seccion de imagen que engloba a una estrella. Returns ------- cx : float Coordenada x del centro de la estrella. cy : float Coordenada y del centro de la estrella. """ # Se crean vectores con los indices x e y de la estampilla. x_vect = sp.arange(0, sp.shape(stamp)[1]) y_vect = sp.arange(0, sp.shape(stamp)[0]) # Se estima un centro de la estrella. cx = sp.median(x_vect) cy = sp.median(y_vect) # Se calcula la coordenada x del centro de la estrella. sum_x = sp.nansum(x_vect * stamp[cy, :]) cx = sum_x / sp.nansum(stamp[cy, :]) # Se calcula la coordenada y del centro de la estrella. sum_y = sp.nansum(y_vect * stamp[:, cx]) cy = sum_y / sp.nansum(stamp[:, cx]) return cx, cy
def WriteRadius(mali, identifiers, prefix="", gap_char="-"): """write percent identities in pairwise comparisons both for nucleotide acids and amino acids.""" pides_na = [] seq_aa = [] for x in range(0, len(identifiers)): seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]])) for y in range(x + 1, len(identifiers)): if x == y: continue pides_na.append(MaliIO.getPercentIdentity( mali[identifiers[x]], mali[identifiers[y]], gap_char)) pides_aa = [] for x in range(0, len(identifiers) - 1): for y in range(x + 1, len(identifiers)): pides_aa.append( MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char)) print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\ string.join(map(lambda x: "%.2f" % x, (min(pides_na), max(pides_na), scipy.mean(pides_na), scipy.median(pides_na), numpy.std(pides_na))), "\t") + "\t" +\ string.join(map(lambda x: "%.2f" % x, (min(pides_aa), max(pides_aa), scipy.mean(pides_aa), scipy.median(pides_aa), numpy.std(pides_aa))), "\t")
def merged_event_breakpoint_stats(mev): bp1d, bp2d = [], [] bend1 = bend2 = None reads = [] quals = [] for ev in mev.events: bp1d.append(ev.bp1.pos) bp2d.append(ev.bp2.pos) reads.append(ev.reads) quals.append(ev.qual) bend1 = ev.bp1.breakend bend2 = ev.bp2.breakend bp1d = np.array(bp1d) bp2d = np.array(bp2d) if bend1 == "+": bp1limit = scipy.amin(bp1d) else: bp1limit = scipy.amax(bp1d) if bend2 == "+": bp2limit = scipy.amin(bp2d) else: bp2limit = scipy.amax(bp2d) reads_median = int(scipy.median(reads)) qual_median = int(scipy.median(quals)) return int(bp1limit), int(bp2limit), int(bp2limit - bp1limit), scipy.mean( bp1d), scipy.amax(bp1d) - scipy.amin(bp1d), scipy.std( bp1d), scipy.mean(bp2d), scipy.amax(bp2d) - scipy.amin( bp2d), scipy.std(bp2d), reads_median, qual_median
def plot_collated(r_set="truth", infl_set="varinfl-0.25", subplots=True, save=False): d = cl("%s/output-2013/sim3-results_r-%s_%s"%(DATA_DIR,r_set, infl_set)) coverages = SP.array(range(20,200,20) + range(200,1001,100)) #range(200,500,50) + range(500,1001,100)) if r_set == "truth": coverages = SP.array(range(20,200,20) + range(200,500,50) + range(500,1001,100)) afs = map(lambda x:"%.2f"%x, [0.7,0.85,0.99]) models = ['sQTL','Smooth','ML','MP'] p = 0 colors = 'bgry' if subplots: PL.figure(figsize=(14,10)) for feature in 'FX': for af in afs: if subplots: PL.subplot(2,3,p+1) else: PL.figure() p += 1 lines = [] for i,model in enumerate(models): I = SP.where(d[af][model][feature].var(axis=0) > 1e-10)[0] err = d[af][model][feature][:,I].var(axis=1)**0.5 lines.append(PL.plot(coverages + 2*i,SP.median(d[af][model][feature][:,I],axis=1), "-o", linewidth=3, markersize=9, color=colors[i])[0]) PL.errorbar(coverages + 2*i, SP.median(d[af][model][feature][:,I],axis=1), yerr=err, fmt="-o", linewidth=1, markersize=9,color=colors[i]) PL.xticks(coverages) #PL.xlim(min(coverages),max(coverages)) PL.title("%s %s - %s"%(infl_set, feature, af)) PL.xlim(15,220) if feature == "X": PL.ylim(0,8) if p == 1: PL.legend(lines, models) if save: PL.savefig("/Users/leopold/doc/write/manuscripts/2011_X_sQTL/figures/figure2013-3_2%s.pdf"%("ABCDEF"[p-1:p])) PL.show()
def subtract_overscan(data,x,y): """This function finds the median values in each of the four overscan regions and subtracts them from the appropriate regions of the input data file. It then converts the results back to electrons rather than ADU""" # Define bias region limits bx1 = slice(0,15,1) bx2 = slice(2065,2080,1) y1 = slice(0,1024,1) y2 = slice(1024,2048,1) # Define limits of regions associated with the four amps x1 = slice(16,1040) x2 = slice(1040,2064) # Define median values of overscan regions from appropriate data regions newdata = data.astype(scipy.float32) overscan = scipy.zeros((4,1)) overscan[0] = scipy.median(newdata[y1,bx1].ravel()) overscan[1] = scipy.median(newdata[y2,bx1].ravel()) overscan[2] = scipy.median(newdata[y1,bx2].ravel()) overscan[3] = scipy.median(newdata[y2,bx2].ravel()) # Subtract overscan newdata[y1,x1] = newdata[y1,x1] - overscan[0] newdata[y2,x1] = newdata[y2,x1] - overscan[1] newdata[y1,x2] = newdata[y1,x2] - overscan[2] newdata[y2,x2] = newdata[y2,x2] - overscan[3] newdata = newdata[y,x] return newdata
def major_axis(x, y, ndist=10, mask=1): dist = scipy.array([scipy.hypot(x-i,y-j) \ for i, j in itertools.izip(x, y)]) shape = dist.shape imax = scipy.zeros(ndist, dtype=int) jmax = scipy.zeros(ndist, dtype=int) # dummy run for i in xrange((mask - 1) * ndist): ii, jj = scipy.unravel_index(scipy.argmax(dist), shape) dist[ii] = scipy.zeros(shape[0]) dist[jj] = scipy.zeros(shape[0]) dist[:, ii] = scipy.zeros(shape[1]) dist[:, jj] = scipy.zeros(shape[1]) for i in xrange(ndist): imax[i], jmax[i] = scipy.unravel_index(scipy.argmax(dist), shape) dist[imax[i]] = scipy.zeros(shape[0]) dist[jmax[i]] = scipy.zeros(shape[0]) dist[:, imax[i]] = scipy.zeros(shape[1]) dist[:, jmax[i]] = scipy.zeros(shape[1]) #print imax, jmax slopes = [(y[i]-y[j])/(x[i]-x[j]) \ for i, j in itertools.izip(imax, jmax)] zeros = [y[i] - m * x[i] for i, m in itertools.izip(imax, slopes)] m = scipy.median(slopes) n = scipy.median(zeros) return imax, jmax, m, n
def plot_hist_compare(self,which_case): plt.ylabel('Percentage of points') plt.xlabel('Percentage RMS relative error') def yto_percent(y, x): s = str(sp.around((y/(len(self.REL_ERR)*1.0)*100),2)) if matplotlib.rcParams['text.usetex'] is True: return s + r'$\%$' else: return s + '%' def xto_percent(y, x): s = str(y*100) if matplotlib.rcParams['text.usetex'] is True: return s + r'$\%$' else: return s + '%' thermo1, thermo2, = self.select[which_case] #Plot the SU2 error i=0; self.REL_ERR = 0; for v in self.variables[sp.where\ ((self.variables!=thermo1) * (self.variables!=thermo2))]: i=i+1; self.REL_ERR = self.REL_ERR + \ ((getattr(self.SU2[which_case],v)-getattr(self.RandomSamples,v))/\ (getattr(self.RandomSamples,v)))**2; self.REL_ERR = sp.sqrt(self.REL_ERR)/i plt.hist(self.REL_ERR, bins=25, color='k', alpha=0.3, label='SU2') print 'Error max SU2', max(self.REL_ERR) setattr(self.SU2[which_case],"median_ERR",sp.median(self.REL_ERR)); #Plot the SciPy error i =0; self.REL_ERR = 0; for v in self.variables[sp.where\ ((self.variables!=thermo1) * (self.variables!=thermo2))]: i=i+1; self.REL_ERR = self.REL_ERR + \ ((getattr(self.SciPy[which_case],v)-getattr(self.RandomSamples,v))/\ (getattr(self.RandomSamples,v)))**2; self.REL_ERR = sp.sqrt(self.REL_ERR)/i plt.hist(self.REL_ERR, bins=25, color='c', alpha=0.5, label='SciPy') print 'Error max SciPy', max(self.REL_ERR) setattr(self.SciPy[which_case],"median_ERR",sp.median(self.REL_ERR)); formatter_y = FuncFormatter(yto_percent) formatter_x = FuncFormatter(xto_percent) plt.gca().yaxis.set_major_formatter(formatter_y) plt.gca().xaxis.set_major_formatter(formatter_x) plt.grid(which='both') plt.legend() return
def __call__(self, x): res = median([self.f(x) for _ in range(int(self.resample_over))]) if self.num_evals % self.batchsize == 0 and self.num_evals > 0: alt_res = median([self.f(x) for _ in range(int(self.resample_over))]) self._adaptResampling(res, alt_res) res = 0.5 * res + 0.5 * alt_res self.recents[self.num_evals % self.batchsize] = res self.num_evals += 1 return res
def calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp): logRes = sp.log(disp_raw[idx]) - sp.log(disp_fitted[idx]) stdLogRes = sp.median(abs(logRes - sp.median(logRes))) * 1.4826 varLogRes = stdLogRes**2 varPrior = varLogRes - varLogDispSamp return max(varPrior, 0.1)
def _printStuff(self): print self._num_updates, for n, a in self._print_quantities: #print n, type(a) if abs(median(a)) > 1e4 or abs(median(a)) < 1e-3: print n, median(a), '\t', else: print n, round(median(a), 4), '\t', print
def __get_params(self, kplanet): """Retrieve model parameters.""" period = sp.median(self.cold[:, 5 * kplanet]) amplitude = sp.median(self.cold[:, 5 * kplanet + 1]) phase = sp.median(self.cold[:, 5 * kplanet + 2]) eccentricity = sp.median(self.cold[:, 5 * kplanet + 3]) longitude = sp.median(self.cold[:, 5 * kplanet + 4]) params = (period, amplitude, phase, eccentricity, longitude) return params
def calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp): logRes = sp.log(disp_raw[idx]) - sp.log(disp_fitted[idx]) stdLogRes = sp.median(abs(logRes - sp.median(logRes))) * 1.4826 varLogRes = stdLogRes ** 2 varPrior = varLogRes - varLogDispSamp return max(varPrior, 0.1)
def __call__(self, x): res = median([self.f(x) for _ in range(int(self.resample_over))]) if self.num_evals % self.batchsize == 0 and self.num_evals > 0: alt_res = median( [self.f(x) for _ in range(int(self.resample_over))]) self._adaptResampling(res, alt_res) res = 0.5 * res + 0.5 * alt_res self.recents[self.num_evals % self.batchsize] = res self.num_evals += 1 return res
def analyzeMali(mali, options, prefix_row=""): if len(mali) == 0: raise "not analyzing empty multiple alignment" # count empty sequences row_data = map( lambda x: Mali.MaliData(x.mString, options.gap_chars, options. mask_chars), mali.values()) col_data = map( lambda x: Mali.MaliData(x, options.gap_chars, options.mask_chars), mali.getColumns()) if len(row_data) == 0 or len(col_data) == 0: return False if options.loglevel >= 2: for row in row_data: options.stdlog.write("# row: %s\n" % str(row)) for col in col_data: options.stdlog.write("# col: %s\n" % str(col)) options.stdout.write(prefix_row) # calculate average column occupancy col_mean = scipy.mean(map(lambda x: x.mNChars, col_data)) col_median = scipy.median(map(lambda x: x.mNChars, col_data)) length = mali.getLength() if float(int(col_median)) == col_median: options.stdout.write("%5.2f\t%5.2f\t%i\t%5.2f" % (col_mean, 100.0 * col_mean / length, col_median, 100.0 * col_median / length)) else: options.stdout.write("%5.2f\t%5.2f\t%5.1f\t%5.2f" % (col_mean, 100.0 * col_mean / length, col_median, 100.0 * col_median / length)) row_mean = scipy.mean(map(lambda x: x.mNChars, row_data)) row_median = scipy.median(map(lambda x: x.mNChars, row_data)) width = mali.getWidth() if float(int(row_median)) == row_median: options.stdout.write("\t%5.2f\t%5.2f\t%i\t%5.2f" % (row_mean, 100.0 * row_mean / width, row_median, 100.0 * row_median / width)) else: options.stdout.write("\t%5.2f\t%5.2f\t%5.1f\t%5.2f" % (row_mean, 100.0 * row_mean / width, row_median, 100.0 * row_median / width)) options.stdout.write("\n") return True
def MAD(a, c=0.6745): """ Median Absolute Deviation along first axis of an array: median(abs(a - median(a))) / c """ a = N.asarray(a, N.float64) d = N.multiply.outer(median(a), N.ones(a.shape[1:])) return median(N.fabs(a - d) / c)
def update_sp_stat(self): """Calculate the statistics of the spectrum/spectra selected and print it""" n = len(self.axes[2].lines) - 1 data = self.axes[2].lines[n].get_ydata() med = scipy.median(data) Mean = scipy.mean(data) sigma = scipy.std(data) disp = sqrt(scipy.median((data - med)**2)) self.text['mean_sp1'].set_text('%8.2f' % Mean) self.text['sigma_sp1'].set_text('%8.2f' % sigma) self.text['median_sp1'].set_text('%8.2f' % med) self.text['med_disp_sp1'].set_text('%8.2f' % disp)
def startStopClicked_Callback(self): print('start/stop clicked') if self.running: self.timer.stop() self.running = False self.outfile.close() t = scipy.array(self.timeList) dt = (t[1:] - t[:-1])*1e-6 dt = dt[100:] dtMean = dt.mean() dtMedian = scipy.median(dt) dtStdDev = dt.std() dtMedianAbsDev = scipy.median(scipy.absolute(dt - dtMedian)) dtMaxAbsDev = scipy.absolute(dt - dtMean).max() dtMinAbsDev = scipy.absolute(dt - dtMean).min() numBin = int(0.01*dt.shape[0]) numBin = max([numBin,20]) diff = 1e-6*scipy.array(self.diffTimeList) print() print('dt mean: ', dtMean) print('dt median: ', dtMedian) print('dt standard dev: ', dtStdDev) print('dt medain abs dev: ', dtMedianAbsDev) print('dt max abs dev: ', dtMaxAbsDev) print() fig = plt.figure(1) plt.clf() plt.hist(dt,numBin) plt.xlabel('Period dt (sec)') plt.ylabel('Count') plt.grid('on') plt.draw() #fig = plt.figure(2) #plt.clf() #plt.hist(diff,numBin) #plt.xlabel('Get-Set dt (sec)') #plt.ylabel('Count') #plt.grid('on') #plt.draw() else: self.running = True self.count = 0 self.timeList = [] self.outfile = open(self.outfileName,'w') self.timer.start(1.0e3/self.freq) self.updateStartStopText()
def mad_clipping(input_data, sigma_clip_level, return_length=False): medval = median(input_data) sigma = 1.4826 * median(abs(medval - input_data)) high_sigma_clip_limit = medval + sigma_clip_level * sigma low_sigma_clip_limit = medval - sigma_clip_level * sigma clipped_data = input_data[(input_data>(low_sigma_clip_limit)) & \ (input_data<(high_sigma_clip_limit))] new_medval = median(clipped_data) new_sigma = 1.4826 * median(abs(medval - clipped_data)) if return_length: return new_medval, new_sigma, len(clipped_data) else: return new_medval, new_sigma
def mad_clipping(input_data, sigma_clip_level): medval = median(input_data) sigma = 1.48 * median(abs(medval - input_data)) high_sigma_clip_limit = medval + sigma_clip_level * sigma low_sigma_clip_limit = medval - sigma_clip_level * sigma clipped_data = [] for value in input_data: if (value > low_sigma_clip_limit) and (value < high_sigma_clip_limit): clipped_data.append(value) clipped_data_array = array(clipped_data) new_medval = median(clipped_data_array) new_sigma = 1.48 * median(abs(medval - clipped_data_array)) return clipped_data_array, new_medval, new_sigma
def TMMNormalization(input_downweighted_df): # Adding small number to prevent doing log(0) final_df = input_downweighted_df.copy() # Output DataFrames first_df = pd.DataFrame() geom_mean_df = pd.DataFrame() # Iterate on the sample names list to obtain a DataFrame of downweighted read counts divided by geometric mean of a transcript for i in range(final_df.shape[0]): # Obtain list of counts for a particular transcript transcript_counts = list(final_df.iloc[i, 2:]) # Obtain geometric mean transcript_geom_mean = scipy.stats.mstats.gmean(transcript_counts) # If the geometric mean of the transcript is equal 0, omit the transcript if transcript_geom_mean == 0: del transcript_counts, transcript_geom_mean continue # Creating output DataFrames transcript_geom_mean_df = pd.DataFrame({ 0: [final_df.iloc[i, 0]], 1: [final_df.iloc[i, 1]], 2: [transcript_geom_mean] }) geom_mean_df = geom_mean_df.append(transcript_geom_mean_df) del transcript_geom_mean_df # Obtain DataFrame with downweighted read counts multiplied by geometric mean of a transcript temp_df = pd.DataFrame([final_df.iloc[i, :]]) temp_df.iloc[:, 2:] = temp_df.iloc[:, 2:] / transcript_geom_mean # Append the row to the final DataFrame first_df = first_df.append(temp_df) del transcript_counts, transcript_geom_mean, temp_df del i geom_mean_df.columns = [ '#Transcript_splicing_pattern', 'Transcript_ID', 'geometric_mean' ] # Read sample names from column names of DataFrame iteration_list = list(final_df.columns)[2:] # Iterate on the DataFrame sample name columns with normalization factors and normalize the read counts by multiplification of each read count by median of normalization factor per sample sample_factor_df = pd.DataFrame() for i in range(len(iteration_list)): # Creating output DataFrame with sample median of normalized values norm_sample_factor_df = pd.DataFrame({ 0: [iteration_list[i]], 1: [scipy.median(list(first_df[iteration_list[i]]))] }) sample_factor_df = sample_factor_df.append(norm_sample_factor_df) del norm_sample_factor_df final_df[iteration_list[i]] = final_df[iteration_list[i]] * ( scipy.median(list(first_df[iteration_list[i]]))) del i, first_df, iteration_list sample_factor_df.columns = ['#sample_id', 'normalized_values_median'] return final_df, geom_mean_df, sample_factor_df
def lossTraces(fwrap, aclass, dim, maxsteps, storesteps=None, x0=None, initNoise=0., minLoss=1e-10, algoparams={}): """ Compute a number of loss curves, for the provided settings, stored at specific storestep points. """ if not storesteps: storesteps = range(maxsteps + 1) # initial points, potentially noisy if x0 is None: x0 = ones(dim) + randn(dim) * initNoise elif not isinstance(x0, ndarray): x0 = ones(dim) * x0 # optimal loss oloss = mean( fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum)) # tracking progress by callback paramtraces = {'index': -1} losstraces = {} def storer(a): lastseen = paramtraces['index'] for ts in [ x for x in storesteps if x > lastseen and x <= a._num_updates ]: paramtraces[ts] = a.bestParameters.copy() losstraces[ts] = abs( fwrap.stochfun.expectedLoss(paramtraces[ts]) - oloss) + minLoss paramtraces['index'] = a._num_updates # initialization algo = aclass(fwrap, x0, callback=storer, **algoparams) print algo, fwrap, dim, maxsteps, # store initial step algo.callback(algo) algo.run(maxsteps) # process learning curve del paramtraces['index'] ls = array([x for _, x in sorted(losstraces.items())]) print median(ls[-1]) return ls
def analysis(records, analysis_function): """Read in the results of one of the tools and calculate certain statistics. fn is a function for reading in the results (e.g. cleangingTools.parseScopaInfo) """ trim = [] left_trim = [] right_trim = [] tp, fp, tn, fn = [0]*4 # true positive, false positive, ... for i,seq_record in enumerate(records): id, present, actual_start, actual_end, found, predicted_start, predicted_end = analysis_function(seq_record)[-1] if present: if found: tp = tp + 1 left_trim.append(int(actual_start) - int(predicted_start)) right_trim.append(int(predicted_end) - int(actual_end)) trim.append(left_trim[-1] + right_trim[-1]) else: fn = fn + 1 else: if found: fp = fp + 1 else: tn = tn + 1 sensitivity = float(tp) / (tp + fn) if tp + fn > 0 else -1 specificity = float(tn) / (tn + fp) if tn + fp > 0 else -1 if len(trim) > 0: pct_correct = len(filter(lambda x: x==0, trim)) / float(len(trim)) avg_trim = scipy.mean(trim) median_trim = scipy.median(trim) SoS_trim = scipy.mean(map(lambda x : x*x, trim)) avg_left = scipy.mean(left_trim) median_left = scipy.median(left_trim) avg_right = scipy.mean(right_trim) median_right = scipy.median(right_trim) overArr = filter(lambda x : x > 0, trim) pct_over = len(overArr) / float(len(trim)) avg_over = scipy.mean(overArr) if len(overArr) > 0 else -99999 median_over = scipy.median(overArr) if len(overArr) > 0 else -99999 underArr = filter(lambda x : x < 0, trim) pct_under = len(underArr) / float(len(trim)) avg_under = scipy.mean(underArr) if len(underArr) > 0 else -99999 median_under = scipy.median(underArr) if len(underArr) > 0 else -99999 else: return [sensitivity, specificity] + [9999]*10 return [sensitivity, specificity, pct_correct, avg_trim, median_trim, avg_left, median_left, avg_right, median_right, SoS_trim, pct_over, avg_over, median_over, pct_under, avg_under, median_under]
def __amp_detect(self, x): ref = np.floor(self.min_ref_per*self.sr/1000.0) # HIGH-PASS FILTER OF THE DATA (b,a) = signal.ellip(2, 0.1, 40, [self.fmin_detect*2.0/self.sr,self.fmax_detect*2.0/self.sr], btype='bandpass', analog=0, output='ba') xf_detect = signal.filtfilt(b, a, x) (b,a) = signal.ellip(2, 0.1, 40, [self.fmin_sort*2.0/self.sr,self.fmax_sort*2.0/self.sr], btype='bandpass', analog=0, output='ba') xf = signal.filtfilt(b, a, x) noise_std_detect = scipy.median(np.abs(xf_detect))/0.6745; noise_std_sorted = scipy.median(np.abs(xf))/0.6745; thr = self.stdmin * noise_std_detect #thr for detection is based on detected settings. thrmax = self.stdmax * noise_std_sorted #thrmax for artifact removal is based on sorted settings. # LOCATE SPIKE TIMES nspk = 0; xaux = np.argwhere(xf_detect[self.w_pre+1:len(xf_detect)-self.w_post-1-1] > thr) + self.w_pre + 1 xaux = np.resize(xaux,len(xaux)) xaux0 = 0; index = [] for i in range(len(xaux)): if xaux[i] >= (xaux0 + ref): # after find a peak it begin search after ref over the last xaux iaux = xf[xaux[i]:xaux[i]+np.floor(ref/2.0)].argmax(0) # introduces alignment nspk = nspk + 1 index.append(iaux + xaux[i]) xaux0 = index[nspk-1]; # SPIKE STORING (with or without interpolation) ls = self.w_pre + self.w_post spikes = np.zeros([nspk,ls+4]) xf = np.concatenate((xf,np.zeros(self.w_post)),axis=0) for i in range(nspk): # Eliminates artifacts if np.max( np.abs( xf[index[i]-self.w_pre:index[i]+self.w_post] )) < thrmax : spikes[i,:] = xf[index[i]-self.w_pre-1:index[i]+self.w_post+3] aux = np.argwhere(spikes[:,self.w_pre] == 0) #erases indexes that were artifacts if len(aux) != 0: aux = aux.reshape((1,len(aux)))[0] spikes = np.delete(spikes, aux, axis = 0) index = np.delete(index,aux) if self.interpolation == 'y': # Does interpolation spikes = self.__int_spikes(spikes) return spikes, thr, index
def skyopt(p, x, data, model): par = special_functions.unpack_coeff(p).tolist() wave = special_functions.genfunc(x, 0, p).astype(scipy.float64) sky = interpolate.splev(wave, model).astype(scipy.float64) ratio = scipy.median(data) / scipy.median(sky) offset = 0. par.append(ratio) par.append(offset) coeff, ier = optimize.leastsq(skyfit, par, (x, data, model, p), maxfev=100000) return special_functions.build_coeff(coeff, p), coeff[-2], coeff[-1]
def _analysis(self): self._prepareDictionary() arr_d = scipy.array(self.dic.values(), scipy.int32) arr_b = scipy.array(self.bmd.values(), scipy.int32) median_d = scipy.median(arr_d) median_b = scipy.median(arr_b) mean_d = arr_d.mean() mean_b = arr_b.mean() self.rate = scipy.sqrt(median_b) * median_d / median_b tpl = "%s\t%s\t%s" print tpl % (r"r\c", "median", "mean") tpl = "%s\t%d\t%d" print tpl % ("dict", median_d, mean_d) print tpl % ("bmf", median_b, mean_b)
def find_holes(data): sample = data.copy() size = sample.size # Here's a little hack to "flatten" star boxes tmp = scipy.sort(sample) star_cutoff = scipy.median(tmp[-30:-10]) * 0.6 sample = scipy.where(sample > star_cutoff, star_cutoff, sample) derivative = deriv_1d(sample) derivative = ndimage.gaussian_filter1d(derivative, 3) derivative = abs(derivative) tmp = scipy.sort(derivative) avg = scipy.median(tmp[size / 8:size * 3 / 8]) sigma = tmp[size / 8:size * 3 / 8].std() threshold = avg + sigma * 100. edge = [] count = 0 while derivative.max() > threshold: start = derivative.argmax() - 7 end = derivative.argmax() + 8 if start < 0: start = 0 if end > derivative.size: end = derivative.size fit = find_peak(derivative[start:end]) if start > 7 and end < derivative.size - 7: edge.append(float(start) + fit[2]) start -= 3 end += 3 if start < 0: start = 0 if end > derivative.size: end = derivative.size derivative[start:end] = 0. edge.sort() return edge, threshold, star_cutoff
def __clean_rvs(self): """Clean radial-velocities by adding the offset and jitter.""" instrumental = self.cold[:, -2 * self.nins:] rv0 = copy.deepcopy(self.rv) err0 = copy.deepcopy(self.err) acc = sp.median(self.cold[:, -2 * self.nins - 1]) for i in range(self.nins): jitter = sp.median(instrumental[:, i]) offset = sp.median(instrumental[:, i + 1]) ins = self.ins == i # Assume linear acceleration for now. rv0[ins] -= offset + acc err0[ins] = sp.sqrt(err0[ins]**2 + jitter**2) self.rv0 = rv0 self.err0 = err0 pass
def cluster_points(r, z): R, Z = geom.pointloop(r, z) dX = norm([r[1:] - r[:-1], z[1:] - z[:-1]], axis=0) dx_median = sp.median(dX) cluster, i = OrderedDict(), count(0) for r, z in zip(R, Z): dx = [] for cl in cluster: rc, zc = cluster[cl]['r'], cluster[cl]['z'] dx.append(np.min(norm([r - rc, z - zc], axis=0))) if len(dx) == 0 or np.min(dx) > 2 * dx_median: # new cl = 'group{:1.0f}'.format(next(i)) cluster[cl] = {} cluster[cl] = {'r': [r], 'z': [z]} else: icl = np.argmin(dx) cl = list(cluster.keys())[icl] cluster[cl]['r'] = np.append(cluster[cl]['r'], r) cluster[cl]['z'] = np.append(cluster[cl]['z'], z) for cl in cluster: r, z = cluster[cl]['r'], cluster[cl]['z'] dx = norm([r[:1] - r[:-1], z[:1] - z[:-1]], axis=0) imax = np.argmax(dx) + 1 r = np.append(r[imax:], r[:imax]) z = np.append(z[imax:], z[:imax]) cluster[cl]['r'], cluster[cl]['z'] = r, z return cluster
def spike_detect(trace, threshold=2.5): """ takes a single trace, returns a spike location mask ::param trace: ::param threshold: number of standard deviations ::return spike_mask: """ trace = trace-scipy.median(trace) #median subraction trace_std = np.std(trace) above_threshold = (trace < -trace_std*threshold).astype(int) # all points below spike threshold threshold_bounds = np.diff(above_threshold) # find places where the spike mask changes value putative_event_starts = np.where(threshold_bounds>0)[0] # change from 0 -> 1 is a start putative_event_ends = np.where(threshold_bounds<0)[0] # change from 1 -> 0 is an end event_maxima = np.zeros(trace.shape[0]) for start, end in zip(putative_event_starts, putative_event_ends): event = trace[start:end] minimum_val = min(event) event_max_loc = np.where(event==minimum_val)[0] event_maxima[start+event_max_loc] = 1 return event_maxima
def rendGauss(x,y, sx, imageBounds, pixelSize): fuzz = 3*scipy.median(sx) roiSize = int(fuzz/pixelSize) fuzz = pixelSize*roiSize X = numpy.arange(imageBounds.x0 - fuzz,imageBounds.x1 + fuzz, pixelSize) Y = numpy.arange(imageBounds.y0 - fuzz,imageBounds.y1 + fuzz, pixelSize) #print X im = scipy.zeros((len(X), len(Y)), 'f') #record our image resolution so we can plot pts with a minimum size equal to res (to avoid missing small pts) delX = scipy.absolute(X[1] - X[0]) for i in range(len(x)): ix = scipy.absolute(X - x[i]).argmin() iy = scipy.absolute(Y - y[i]).argmin() sxi = max(sx[i], delX) imp = Gauss2D(X[(ix - roiSize):(ix + roiSize + 1)], Y[(iy - roiSize):(iy + roiSize + 1)],1/sxi, x[i],y[i],sxi) im[(ix - roiSize):(ix + roiSize + 1), (iy - roiSize):(iy + roiSize + 1)] += imp im = im[roiSize:-roiSize, roiSize:-roiSize] return im
def median_filter_bord(im, size=3): """The function performs a local median filter on a flat image. Border's pixels are processed. Args: im: the image to process size: the size in pixels of the local square window. Default value is 3. Returns: out: the filtered image """ ## Get the size of the image [nl, nc, d] = im.shape ## Get the size of the moving window s = (size - 1) / 2 ## Initialization of the output out = sp.empty((nl, nc, d)) temp = sp.empty((nl + 2 * s, nc + 2 * s, d)) # A temporary file is created temp[0:s, :] = sp.NaN temp[:, 0:s] = sp.NaN temp[-s:, :] = sp.NaN temp[:, -s:] = sp.NaN temp[s : s + nl, s : nc + s] = im ## Apply the max filter for i in range(s, nl + s): # Shift the origin to remove border effect for j in range(s, nc + s): for k in range(d): window = temp[i - s : i + 1 + s, j - s : j + s + 1, k] out[i - s, j - s, k] = sp.median(window[sp.isfinite(window)]) return out.astype(im.dtype.name)
def normalization(data, ntype): if ntype == 'mean_maxs': data /= scipy.mean(numpy.max(data, 1)) elif ntype == 'median_maxs': data /= scipy.median(numpy.max(data, 1)) else: raise TypeError, "Normalization type %s not implemented."
def getDistance(self, sampleSize=10, waitTime=0.00001): distances = [] time.sleep(2) for i in range(0, sampleSize): # set Trigger to HIGH GPIO.output(self.trig, True) # set Trigger after specificied wait time to LOW time.sleep(waitTime) GPIO.output(self.trig, False) StartTime = time.time() StopTime = time.time() # save StartTime while GPIO.input(self.echo) == 0: StartTime = time.time() # save time of arrival while GPIO.input(self.echo) == 1: StopTime = time.time() # time difference between start and arrival TimeElapsed = StopTime - StartTime # multiply with the speed of sounds(34300 cm/s) # and divide by 2, because the signal has to travel there and back distance = (TimeElapsed * 34300) / 2 # Upper and lower bound on distance, 2cm to 500cm if ((distance > 2) and (distance < 500)): distances.append(distance) return median(distances)
def plotmedian(x, y, clr, opt=1, nbins=10, ax=[], xmin=30., xmax=150): xmid, ymid, ylist = [], [], [] dx = (xmax - xmin) / nbins for i in range(nbins): xmid.append(xmin + (0.5 + i) * dx) ylist.append([]) ymid.append(0.) for i in range(len(x)): idx = int((x[i] - xmin) / dx) if (idx < 0): idx = 0 if (idx > nbins - 1): idx = nbins - 1 ylist[idx].append(y[i]) for i in range(nbins): if ((len(ylist[i]) == 0 or max(ylist[i]) < 0.1) and i > 0): ymid[i] = ymid[i - 1] xmid[i] = xmid[i - 1] else: ymid[i] = median(ylist[i]) if (opt == 1): ax.plot(xmid, ymid, ".-", color=clr) if (opt == 2): plt.plot(xmid, ymid, ".-", color=clr) print(xmid, ymid)
def plot_REL_ERR_SU2(self,which_case): i=0; thermo1 = self.select[which_case][0] thermo2 = self.select[which_case][1] get_REL_ERR_SU2(self,which_case) print 'Median error SU2', sp.median(self.REL_ERR) print 'Mean error SU2', sp.mean(self.REL_ERR) print 'Max error SU2', max(self.REL_ERR) print 'Min error SU2', min(self.REL_ERR) x = getattr(self.SU2[which_case],thermo1) y = getattr(self.SU2[which_case],thermo2) #trusted_values = sp.where(self.REL_ERR>0<0.9*max(self.REL_ERR)) self.REL_ERR = self.REL_ERR[trusted_values] x = x[trusted_values] y = y[trusted_values] scat=plt.scatter(x,y,c=self.REL_ERR, s=1) plt.grid(which='both') scat.set_array(self.REL_ERR) plt.colorbar(scat) plt.xlim((min(x)*0.95,max(x)*1.05)); plt.ylim((min(y)*0.95,max(y)*1.05)); print 'x argmax %i , x_val: %f ' %(sp.argmax(self.REL_ERR),x[sp.argmax(self.REL_ERR)]) print 'y argmax %i , y_val: %f ' %(sp.argmax(self.REL_ERR),y[sp.argmax(self.REL_ERR)]) return;
def app_convert_ratio_median(self, appID, _day, tw): cr = list() for i in range(tw): convert = self.ad.get_app_count(appID, 1, _day - i - 1, _day - i) click = self.ad.get_app_count(appID, 0, _day - i - 1, _day - i) cr.append(convert / (click + convert + 1)) return [scipy.median(cr)]
def median_filter(im, size=3): """The function performs a local median filter on a flat image. Border's pixels are not processed. Args: im: the image to process size: the size in pixels of the local square window. Default value is 3. Returns: out: the filtered image """ ## Get the size of the image [nl, nc, d] = im.shape ## Get the size of the moving window s = (size - 1) / 2 ## Initialization of the output out = sp.zeros((nl, nc, d), dtype=im.dtype.name) ## Apply the max filter for i in range(s, nl - s): # Shift the origin to remove border effect for j in range(s, nc - s): for k in range(d): temp = im[i - s : i + 1 + s, j - s : j + s + 1, k] out[i, j, k] = sp.median(temp) return out
def avgFoundAfter(decreasingTargetValues, listsOfActualValues, batchSize=1, useMedian=False): """ Determine the average number of steps to reach a certain value (for the first time), given a list of value sequences. If a value is not always encountered, the length of the longest sequence is used. Returns an array. """ from scipy import sum numLists = len(listsOfActualValues) longest = max(map(len, listsOfActualValues)) # gather a list of indices of first encounters res = [[0] for _ in range(numLists)] for tval in decreasingTargetValues: for li, l in enumerate(listsOfActualValues): lres = res[li] found = False for i in range(lres[-1], len(l)): if l[i] <= tval: lres.append(i) found = True break if not found: lres.append(longest) tmp = array(res) if useMedian: resx = median(tmp, axis=0)[1:] else: resx = sum(tmp, axis=0)[1:] / float(numLists) return resx * batchSize
def print_all_stats(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') while (start < ftime): # for each time interval end = ftime if ftime < end else end sample_arrays = [ s.get_samples(start, end) for s in series ] samplevalue_arrays = [] for sample_array in sample_arrays: samplevalue_arrays.append( [ sample.value for sample in sample_array ] ) #print('samplevalue_arrays len: %d' % len(samplevalue_arrays)) #print('samplevalue_arrays elements len: ' + \ #str(map( lambda l: len(l), samplevalue_arrays))) # collapse list of lists of sample values into list of sample values samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) #print('samplevalues: ' + str(sorted(samplevalues))) # compute all stats and print them myarray = scipy.fromiter(samplevalues, float) mymin = scipy.amin(myarray) myavg = scipy.average(myarray) mymedian = scipy.median(myarray) my90th = scipy.percentile(myarray, 90) my95th = scipy.percentile(myarray, 95) my99th = scipy.percentile(myarray, 99) mymax = scipy.amax(myarray) print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( start, len(samplevalues), mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) # advance to next interval start += ctx.interval end += ctx.interval
def updateProperties(self, values): """update properties. If values is an vector of strings, each entry will be converted to float. Entries that can not be converted are ignored. """ values = [x for x in values if x != None] if len(values) == 0: raise ValueError("no data for statistics") ## convert self.mNErrors = 0 if type(values[0]) not in (types.IntType, types.FloatType): n = [] for x in values: try: n.append(float(x)) except ValueError: self.mNErrors += 1 else: n = values ## use a non-sort algorithm later. n.sort() self.mQ1 = n[len(n) / 4] self.mQ3 = n[len(n) * 3 / 4] self.mCounts = len(n) self.mMin = min(n) self.mMax = max(n) self.mMean = scipy.mean(n) self.mMedian = scipy.median(n) self.mSampleStd = scipy.std(n) self.mSum = reduce(lambda x, y: x + y, n)
def __call__(self, a, mu=None, scale=None): """ Compute Huber\'s proposal 2 estimate of scale, using an optional initial value of scale and an optional estimate of mu. If mu is supplied, it is not reestimated. """ self.a = N.asarray(a, N.float64) if mu is None: self.n = self.a.shape[0] - 1 self.mu = N.multiply.outer(median(self.a), N.ones(self.a.shape[1:])) self.est_mu = True else: self.n = self.a.shape[0] self.mu = mu self.est_mu = False if scale is None: self.scale = MAD(self.a)**2 else: self.scale = scale for donothing in self: pass self.s = N.sqrt(self.scale) return self.s
def domain_length(self,face_1,face_2): r''' Calculate the distance between two faces of the network Parameters ---------- face_1 and face_2 : array_like Lists of pores belonging to opposite faces of the network Returns ------- The length of the domain in the specified direction Notes ----- - Does not yet check if input faces are perpendicular to each other ''' #Ensure given points are coplanar before proceeding if misc.iscoplanar(self['pore.coords'][face_1]) and misc.iscoplanar(self['pore.coords'][face_2]): #Find distance between given faces x = self['pore.coords'][face_1] y = self['pore.coords'][face_2] Ds = misc.dist(x,y) L = sp.median(sp.amin(Ds,axis=0)) else: self._logger.warning('The supplied pores are not coplanar. Length will be approximate.') f1 = self['pore.coords'][face_1] f2 = self['pore.coords'][face_2] distavg = [0,0,0] distavg[0] = sp.absolute(sp.average(f1[:,0]) - sp.average(f2[:,0])) distavg[1] = sp.absolute(sp.average(f1[:,1]) - sp.average(f2[:,1])) distavg[2] = sp.absolute(sp.average(f1[:,2]) - sp.average(f2[:,2])) L = max(distavg) return L
def FindCenterVel(self, Median = True,N_Median = 1000): if Median == False: self.Mean_vx = scipy.mean(self.Snapshot.vx) self.Mean_vy = scipy.mean(self.Snapshot.vy) self.Mean_vz = scipy.mean(self.Snapshot.vz) else: if self.Snapshot.V == None: print 'Potentials not defined. Can not find center' return None V_index = scipy.argsort(self.Snapshot.V) self.Mean_vx = scipy.median(self.Snapshot.vx[V_index[0:N_Median]]) self.Mean_vy = scipy.median(self.Snapshot.vy[V_index[0:N_Median]]) self.Mean_vz = scipy.median( self.Snapshot.vz[V_index[0:N_Median]]) self.CenterVelFound = True
def updateProperties( self, values): """update properties. If values is an vector of strings, each entry will be converted to float. Entries that can not be converted are ignored. """ values = [x for x in values if x != None ] if len(values) == 0: raise ValueError( "no data for statistics" ) ## convert self.mNErrors = 0 if type(values[0]) not in (types.IntType, types.FloatType): n = [] for x in values: try: n.append( float(x) ) except ValueError: self.mNErrors += 1 else: n = values ## use a non-sort algorithm later. n.sort() self.mQ1 = n[len(n) / 4] self.mQ3 = n[len(n) * 3 / 4] self.mCounts = len(n) self.mMin = min(n) self.mMax = max(n) self.mMean = scipy.mean( n ) self.mMedian = scipy.median( n ) self.mSampleStd = scipy.std( n ) self.mSum = reduce( lambda x, y: x+y, n )
def domain_length(self,face_1,face_2): r''' Calculate the distance between two faces of the network Parameters ---------- face_1 and face_2 : array_like Lists of pores belonging to opposite faces of the network Returns ------- The length of the domain in the specified direction Notes ----- - Does not yet check if input faces are perpendicular to each other ''' #Ensure given points are coplanar before proceeding if misc.iscoplanar(self['pore.coords'][face_1]) and misc.iscoplanar(self['pore.coords'][face_2]): #Find distance between given faces x = self['pore.coords'][face_1] y = self['pore.coords'][face_2] Ds = misc.dist(x,y) L = sp.median(sp.amin(Ds,axis=0)) else: logger.warning('The supplied pores are not coplanar. Length will be approximate.') f1 = self['pore.coords'][face_1] f2 = self['pore.coords'][face_2] distavg = [0,0,0] distavg[0] = sp.absolute(sp.average(f1[:,0]) - sp.average(f2[:,0])) distavg[1] = sp.absolute(sp.average(f1[:,1]) - sp.average(f2[:,1])) distavg[2] = sp.absolute(sp.average(f1[:,2]) - sp.average(f2[:,2])) L = max(distavg) return L
def avgFoundAfter(decreasingTargetValues, listsOfActualValues, batchSize=1, useMedian=False): """ Determine the average number of steps to reach a certain value (for the first time), given a list of value sequences. If a value is not always encountered, the length of the longest sequence is used. Returns an array. """ from scipy import sum numLists = len(listsOfActualValues) longest = max(list(map(len, listsOfActualValues))) # gather a list of indices of first encounters res = [[0] for _ in range(numLists)] for tval in decreasingTargetValues: for li, l in enumerate(listsOfActualValues): lres = res[li] found = False for i in range(lres[-1], len(l)): if l[i] <= tval: lres.append(i) found = True break if not found: lres.append(longest) tmp = array(res) if useMedian: resx = median(tmp, axis=0)[1:] else: resx = sum(tmp, axis=0)[1:] / float(numLists) return resx * batchSize
def main(database): #Commits per committer limited to the 30 first with the highest accumulated activity query = "select count(*) from scmlog group by committer_id order by count(*) desc limit 40" #Connecting to the data base and retrieving data connector = connect(database) results = int(connector.execute(query)) if results > 0: results_aux = connector.fetchall() else: print("Error when retrieving data") return #Moving data to a list commits = [] for commit in results_aux[5:]: # for commits in results_aux: commits.append(int(commit[0])) #Calculating basic statistics print "max: " + str(sp.amax(commits)) print "min: " + str(sp.amin(commits)) print "mean: " + str(sp.mean(commits)) print "median: " + str(sp.median(commits)) print "std: " + str(sp.std(commits)) print ".25 quartile: " + str(sp.percentile(commits, 25)) print ".50 quartile: " + str(sp.percentile(commits, 50)) print ".75 quartile: " + str(sp.percentile(commits, 75))
def music_heat(qvals): heat = scipy.median( [qvals["FDR_CT"], qvals["FDR_LRT"], qvals["FDR_FCPT"]]) if heat != 0: return -log10(heat) else: return max_heat
def main(database): # Commits per committer limited to the 30 first with the highest accumulated activity query = "select count(*) from scmlog group by committer_id order by count(*) desc limit 40" # Connecting to the data base and retrieving data connector = connect(database) results = int(connector.execute(query)) if results > 0: results_aux = connector.fetchall() else: print ("Error when retrieving data") return # Moving data to a list commits = [] for commit in results_aux[5:]: # for commits in results_aux: commits.append(int(commit[0])) # Calculating basic statistics print "max: " + str(sp.amax(commits)) print "min: " + str(sp.amin(commits)) print "mean: " + str(sp.mean(commits)) print "median: " + str(sp.median(commits)) print "std: " + str(sp.std(commits)) print ".25 quartile: " + str(sp.percentile(commits, 25)) print ".50 quartile: " + str(sp.percentile(commits, 50)) print ".75 quartile: " + str(sp.percentile(commits, 75))
def DataArrayStatisticsReport(parent, titleString, tempdata): scrolledText = tk_stxt.ScrolledText(parent, width=textboxWidth, height=textboxHeight, wrap=tk.NONE) scrolledText.insert(tk.END, titleString + '\n\n') # must at least have max and min minData = min(tempdata) maxData = max(tempdata) if maxData == minData: scrolledText.insert(tk.END, 'All data has the same value,\n') scrolledText.insert(tk.END, "value = %-.16E\n" % (minData)) scrolledText.insert(tk.END, 'statistics cannot be calculated.') else: scrolledText.insert(tk.END, "max = %-.16E\n" % (maxData)) scrolledText.insert(tk.END, "min = %-.16E\n" % (minData)) try: temp = scipy.mean(tempdata) scrolledText.insert(tk.END, "mean = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "mean gave error in calculation\n") try: temp = scipy.stats.sem(tempdata) scrolledText.insert(tk.END, "standard error of mean = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "standard error of mean gave error in calculation\n") try: temp = scipy.median(tempdata) scrolledText.insert(tk.END, "median = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "median gave error in calculation\n") try: temp = scipy.var(tempdata) scrolledText.insert(tk.END, "variance = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "variance gave error in calculation\n") try: temp = scipy.std(tempdata) scrolledText.insert(tk.END, "std. deviation = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "std. deviation gave error in calculation\n") try: temp = scipy.stats.skew(tempdata) scrolledText.insert(tk.END, "skew = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "skew gave error in calculation\n") try: temp = scipy.stats.kurtosis(tempdata) scrolledText.insert(tk.END, "kurtosis = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "kurtosis gave error in calculation\n") return scrolledText
def FindCenter(self,Median = True,N_Median=10): "define center to be the particle with the lowest potential" if self.Snapshot.V == None: print 'Potentials not defined. Can not find center' return None self.V_index = scipy.argsort(self.Snapshot.V) if Median == False: self.x_C = self.Snapshot.x[self.V_index[0]] self.y_C = self.Snapshot.y[self.V_index[0]] self.z_C = self.Snapshot.z[self.V_index[0]] else: self.x_C = scipy.median(self.Snapshot.x[self.V_index[0:N_Median]]) self.y_C = scipy.median(self.Snapshot.y[self.V_index[0:N_Median]]) self.z_C = scipy.median( self.Snapshot.z[self.V_index[0:N_Median]]) self.CenterFound = True
def center_scale(row): """centers the provided row around the median""" filtered = row[np.isfinite(row)] center = scipy.median(filtered) scale = util.r_stddev(filtered) nurow = [((value - center) / scale) if not np.isnan(value) else value for value in row] return nurow