def check_basic(self): a1 = [3,4,5,10,-3,-5,6] a2 = [3,-6,-2,8,7,4,2,1] a3 = [3.,4,5,10,-3,-5,-6,7.0] assert_equal(stats.median(a1),4) assert_equal(stats.median(a2),2.5) assert_equal(stats.median(a3),3.5)
def statsex(self, objects): """ Do some statistics on a source list Return dictionary """ import stats, pstat # Return if we have no objects if len(objects) == 0: return 0 # Define dictionary to hold statistics stat = {} # Get number of objects stat['N'] = str(len(objects)) # Define list (float) of FWHM values fwhm = [ float(obj[7]) for obj in objects ] # Define list (float) of ELLIPTICITY values el = [ float(obj[6]) for obj in objects ] # Define list (float) of THETA_IMAGE values pa = [ float(obj[5]) for obj in objects ] # Define list (float) of 'Stella-like' values stella = [ float(obj[9]) for obj in objects ] # Create a histogram of FWHM values of binsize 1 pixel hfwhm = stats.histogram(fwhm,40,[0,40])[0] stat['medianFWHM'] = "%.2f" % stats.median(fwhm) stat['meanFWHM'] = "%.2f" % stats.mean(fwhm) stat['modeFWHM'] = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5) try: stat['stdevFWHM'] = "%.2f" % stats.stdev(fwhm) except ZeroDivisionError: stat['stdevFWHM'] = '0.00'; stat['medianEL'] = "%.2f" % stats.median(el) stat['meanEL'] = "%.2f" % stats.mean(el) try: stat['stdevEL'] = "%.2f" % stats.stdev(el) except ZeroDivisionError: stat['stdevEL'] = '0.00' # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg) #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0] # Histogram of Stellarity (0 to 1, bins of 0.05) #stat['histoStella'] = stats.histogram(stella,20,[0,1.01])[0] return stat
def test_median1(): obs = median([0,0,0,0]) assert_equal(obs, 0) obs = median([0,1,2]) assert_equal(obs, 1) obs = median([0,1]) assert_equal(obs, 0.5) assert_raises(TypeError, median, ['a', 'b', 'v'])
def collect_median(input_list): """ Collect time execution of median of each module """ begin_py_median = clock() py_median(input_list) end_py_median = clock() begin_median = clock() median(input_list) end_median = clock() times = format_times(end_py_median - begin_py_median, end_median - begin_median) save_times(times, logs['median'])
def test_matplotlib_barchart(loop_times=LOOP_TIMES): render_time = [] all_render_time_np = [] for (i, j) in SCREEN_SIZE: print("Screen Size: ", (i, j)) plt.rcParams["figure.figsize"] = (i, j) for x in range(loop_times): plt.ion() objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp') y_pos = np.arange(len(objects)) performance = [10, 8, 6, 4, 2, 1] tstart = time.time() plt.bar(y_pos, performance, align='center', alpha=0.5) plt.xticks(y_pos, objects) plt.ylabel('Usage') plt.title('Programming language usage') plt.show() plt.close('all') tend = time.time() render_time.append(tend - tstart) print(6, "Bar Chart Draw Time: ", tend - tstart) render_time_np = np.array(render_time) all_render_time_np.append(render_time_np) print("Mean Render Time: ", render_time_np.mean()) print("Median Render Time: ", median(render_time_np)) print() print("Xtiles:") for np_arr in all_render_time_np: print(xtile(np_arr, lo=0, hi=2))
def test_matplotlib_sign(loop_times=LOOP_TIMES): render_time = [] all_render_time_np = [] for (i, j) in SCREEN_SIZE: print("Screen Size: ", (i, j)) plt.rcParams["figure.figsize"] = (i, j) for k in range(loop_times): x = np.arange(0, 2 * np.pi, 0.01) y = np.sin(x) fig, axes = matplotlib.pyplot.subplots(nrows=6) styles = ['r-', 'g-', 'y-', 'm-', 'k-', 'c-'] lines = [ax.plot(x, y, style)[0] for ax, style in zip(axes, styles)] tstart = time.time() for i in range(1, NUM_OF_SIN_CURVES): for j, line in enumerate(lines, start=1): line.set_ydata(np.sin(j * x + i / 10.0)) fig.canvas.draw() fig.show() plt.close(fig) tend = time.time() render_time.append(tend - tstart) print(NUM_OF_SIN_CURVES, "Sine Curve Draw Time: ", tend-tstart) render_time_np = np.array(render_time) all_render_time_np.append(render_time_np) print("Mean Render Time: ", render_time_np.mean()) print("Median Render Time: ", median(render_time_np)) print() print("Xtiles:") for np_arr in all_render_time_np: print(xtile(np_arr, lo=0, hi=2))
def sync_check(): # print 'Checking sync...' max_mcnt_difference=4 mcnts=dict() mcnts_list=[] mcnt_tot=0 for f,fpga in enumerate(fpgas): mcnts[f]=dict() try: hdr_index=bram_oob[f]['hdr'].index(1) except: print 'ERR: No headers found in BRAM. Are the F engines properly connected?' exit() pkt_64bit = struct.unpack('>Q',bram_dmp['bram_msb'][f]['data'][(4*hdr_index):(4*hdr_index)+4]+bram_dmp['bram_lsb'][f]['data'][(4*hdr_index):(4*hdr_index)+4])[0] mcnts[f]['mcnt'] =(pkt_64bit&((2**64)-(2**16)))>>16 mcnts_list.append(mcnts[f]['mcnt']) # print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt']) mcnts['mean']=stats.mean(mcnts_list) mcnts['median']=stats.median(mcnts_list) mcnts['mode']=stats.mode(mcnts_list) mcnts['modalmean']=stats.mean(mcnts['mode'][1]) # print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode'] for f,fpga in enumerate(fpgas): if mcnts[f]['mcnt']>(mcnts['modalmean']+max_mcnt_difference) or mcnts[f]['mcnt'] < (mcnts['modalmean']-max_mcnt_difference): print '%s OUT OF SYNC!!'%servers[f] mcnts[f]['sync_status']='FAIL with error of %i'%(mcnts[f]['mcnt']-mcnts['modalmean']) else: mcnts[f]['sync_status']='PASS' return mcnts
def _getRatingAverages(): count = 0 android_rating_total = 0 ios_rating_total = 0 android_ratings = [] ios_ratings = [] global android_rating_average global ios_rating_average global android_rating_median global ios_rating_median global android_rating_q1 global ios_rating_q1 global android_rating_q3 global ios_rating_q3 for app in collection_ios.find().batch_size(30): # count=count+1 # android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',','')) # ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',','')) android_ratings.append(float(app["android_ratingsAllVersions"].replace(",", ""))) ios_ratings.append(float(app["ios_ratingsAllVersions_new"].replace(",", ""))) android_rating_average = stats.mean(android_ratings) ios_rating_average = stats.mean(ios_ratings) android_rating_median = stats.median(android_ratings) ios_rating_median = stats.median(ios_ratings) android_rating_q1 = stats.quartiles(android_ratings)[0] ios_rating_q1 = stats.quartiles(ios_ratings)[0] android_rating_q3 = stats.quartiles(android_ratings)[1] ios_rating_q3 = stats.quartiles(ios_ratings)[1] print "ios stats" print ios_rating_q1 print ios_rating_median print ios_rating_q3 print "Android stats" print android_rating_q1 print android_rating_median print android_rating_q3
def calculations(x): from stats import median, mean, mode, MAD, RANGE return { 'median': median(x), 'mean': mean(x), 'mode': mode(x), 'range': RANGE(x), 'MAD': MAD(x) }
def forward(self): self.xsampled = [] self.ysampled = [] if self.sampling_distribution == 'uniform': radians = np.random.uniform(0,1,self.Nsamples) * 2 * np.pi r = np.random.uniform(0,self.radius,self.Nsamples) elif self.sampling_distribution == 'normal': radians = np.random.normal(0,1,self.Nsamples) * 2 * np.pi r = np.random.normal(0,self.radius,self.Nsamples) for x,y in zip(np.array(r * np.cos(radians)) + np.array([self.center[0]]*len(radians)),np.array(r * np.sin(radians)) + np.array([self.center[1]]*len(radians))): if self.pos_sample_count < self.pos_samples: self.xsampled.append(x) self.ysampled.append(y) features = FeatureExtract(rgb2gray(asarray(get_window(self.PIL_image, (x,y), self.crop_size, self.resolution))), self.feature_types, self.feature_coords) features = pd.DataFrame([features]) class_prediction = self.trained_ModelObject.predict(features) if class_prediction == 'pos': #print(class_prediction) self.data.append([x,y,features,1]) self.pos_sample_count += 1 else: self.data.append([x,y,features,0]) else: break if self.pos_sample_count != 0: self.data = np.array(self.data, dtype=object) posLandmarks = self.data[self.data[:,3] == 1] self.posLandmarks_Xcoords = posLandmarks[:,0] self.posLandmarks_Ycoords = posLandmarks[:,1] if self.finalpredstat == 'median': self.pred_coord = (stats.median(self.posLandmarks_Xcoords),stats.median(self.posLandmarks_Ycoords)) if self.finalpredstat == 'mean': self.pred_coord = (stats.mean(self.posLandmarks_Xcoords),stats.mean(self.posLandmarks_Ycoords)) if self.true_landmark != None: self.error = float(math.sqrt((abs(self.true_landmark[0]- self.pred_coord[0]))**2 + (abs(self.true_landmark[1]- self.pred_coord[1]))**2 )) self.no_pred = 0 else: self.no_pred = 1
def print_tiles(list1, name_of_list1, list2, name_of_list2): my_lo = min(list1) my_hi = max(list1) if min(list2) < my_lo: my_lo = min(list2) if max(list2) > my_hi: my_hi = max(list2) def show(lst): return stats.xtile(lst, lo=my_lo, hi=my_hi, width=25, show=lambda s: " %3.2f" % s) print(name_of_list1, show(list1), "| Median: ", round(stats.median(list1), 2)) print(name_of_list2, show(list2), "| Median: ", round(stats.median(list2), 2))
def calculate_stats(population): # find the growth in population in consecutive years growth = [] for i in range(0, len(population)-1): growth.append(population[i+1] - population[i]) print('Mean growth: {0:.5f}'.format(mean(growth))) print('Median growth: {0:.5f}'.format(median(growth))) print('Variance/Sd growth: {0:.5f}, {1:.5f}'.format(*variance_sd(growth))) return growth
def calculate_stats(population): #find the growth in population in consecutive years growth = [] for i in range(0, len(population)-1): growth.append(population[i+1] - population[i]) print("Mean growth: {0:.5f}".format(mean(growth))) print("Median growth: {0:.5f}".format(median(growth))) print("Variance/Sd growth:{0:.5f}, {1:.5f}".format(*variance_sd(growth))) return growth
def calculations(x): from stats import median, mean, mode, MAD, RANGE, maximum, minimum return { 'median': median(x), 'mean': mean(x), 'mode': mode(x), 'range': RANGE(x), 'MAD': MAD(x), 'max': maximum(x), 'min': minimum(x) }
def PlotKineticEnergyOverHeight(df, block=False, xlabel='', ylabel='', MaxStep=100000, saveflag=False, savedir='', savename='', writeflag=False): print("Compute Kinetic Energy Profile") ###### Constants # redundant redefinition kB = 1.3806503e-23 e0 = 1.60217662e-19 au = 1.66053904e-27 step = MaxStep mass = 40 m = mass * au Bound = 5.0 MaxHeight = df['z'].max() stepsize = 0.5 HeightArr = np.arange(0,MaxHeight-2.,stepsize) xKE_In = [] xKE_Out = [] yKE_In = [] yKE_Out = [] zKE_In = [] zKE_Out = [] AvgWindow = 1000000 lengthArray = len(HeightArr) for h in HeightArr: VelocityArrIn = df.loc[(df['z'] > h) & (df['z'] <= h+stepsize) & (df['traj'] < 20) & (df['step'] >= MaxStep-AvgWindow) & (df['vz'] <= 0), ['vx', 'vy', 'vz']] VelocityArrIn['xke'] = 0.5 * m * (VelocityArrIn['vx'] * 100.) ** 2 / kB VelocityArrIn['yke'] = 0.5 * m * (VelocityArrIn['vy'] * 100.) ** 2 / kB VelocityArrIn['zke'] = 0.5 * m * (VelocityArrIn['vz'] * 100.) ** 2 / kB VelocityArrOut = df.loc[(df['z'] > h) & (df['z'] <= h+stepsize) & (df['traj'] < 20) & (df['step'] >= MaxStep-AvgWindow) & (df['vz'] > 0), ['vx', 'vy', 'vz']] VelocityArrOut['xke'] = 0.5 * m * (VelocityArrOut['vx'] * 100.) ** 2 / kB VelocityArrOut['yke'] = 0.5 * m * (VelocityArrOut['vy'] * 100.) ** 2 / kB VelocityArrOut['zke'] = 0.5 * m * (VelocityArrOut['vz'] * 100.) ** 2 / kB xKE_In.append(VelocityArrIn['xke'].mean()) xKE_Out.append(VelocityArrOut['xke'].mean()) yKE_In.append(VelocityArrIn['yke'].mean()) yKE_Out.append(VelocityArrOut['yke'].mean()) zKE_In.append(VelocityArrIn['zke'].mean()) zKE_Out.append(VelocityArrOut['zke'].mean()) from stats import median xKEmean = 0.5 * (median(xKE_In[lengthArray//2:]) + median(xKE_Out[lengthArray//2:])) yKEmean = 0.5 * (median(yKE_In[lengthArray//2:]) + median(yKE_Out[lengthArray//2:])) zKEmean = 0.5 * (median(zKE_In[lengthArray//2:]) + median(zKE_Out[lengthArray//2:])) print("KEmean",(xKEmean + yKEmean + zKEmean) / 3.0) if writeflag == True: WritePlot(X=HeightArr, Y=[xKE_In, yKE_In, zKE_In], name=savedir+savename+'In', xlabel=xlabel, ylabel=ylabel+' x,y,z', header=True, action='w') WritePlot(X=HeightArr, Y=[xKE_Out, yKE_Out, zKE_Out], name=savedir+savename+'Out', xlabel=xlabel, ylabel=ylabel+' x,y,z', header=True, action='w') plt.plot(HeightArr, [xKE_In[i] + yKE_In[i] + zKE_In[i] for i in range(len(xKE_In))], label='Kin Energy In') plt.plot(HeightArr, [xKE_Out[i] + yKE_Out[i] + zKE_Out[i] for i in range(len(xKE_Out))], label='Kin Energy Out') MakePlot(saveflag=saveflag, block=block, xlabel=xlabel, ylabel=ylabel, savepath=savedir+savename)
def calculations(x): rang = lambda y, z: y - z y = max(x) z = min(x) from stats import median, mode return { 'median': median(x), 'mean': avg(x), 'mode': mode(x), 'range': rang(y, z), 'max': y, 'min': z }
def report(x): print 'n =', len(x) print 'minimum =', min(x) print 'maximum =', max(x) print 'mean =', stats.mean(x) print 'median =', stats.median(x) print 'population variance =', stats.popvar(x) print 'sample variance =', stats.samvar(x) print 'population standard deviation =', stats.popstd(x) print 'sample standard deviation =', stats.samstd(x) print 'median deviation =', stats.median_deviation(x) print 'mean deviation =', stats.mean_deviation(x) print 'population skewness =', stats.popskw(x) print 'sample skewness =', stats.samskw(x) print 'nonparametric skew =', stats.nonparametric_skew(x)
def upload(reactor, url, project, revision, revision_date, benchmark, param, statistic, backend, environment, samples): d = _upload( reactor, url=url, project=project, revision=revision, revision_date=revision_date, benchmark='%s-%s-%s' % (benchmark, param, statistic), executable='%s-backend' % (backend,), environment=environment, result_value=median(samples), result_date=datetime.now(), std_dev=mad(samples), # Not really! max_value=max(samples), min_value=min(samples)) d.addErrback(err, "Upload failed") return d
def calc(x, conf): size = len(x) sum = stats.sum(x) av = stats.average(sum, size) gm = stats.gmean(x) v = stats.var(sum, stats.sqsum(x), size) med = stats.median(x) if v != 'error': sd = stats.stdv1(v) c = stats.conf(float(conf), sd, size) else: sd = 'error' c = 'none' return av, gm, v, sd, c, med
def sync_check(): # print 'Checking sync...' max_mcnt_difference = 4 mcnts = dict() mcnts_list = [] mcnt_tot = 0 for f, fpga in enumerate(fpgas): mcnts[f] = dict() try: hdr_index = bram_oob[f]['hdr'].index(1) except: print 'ERR: No headers found in BRAM. Are the F engines properly connected?' exit() pkt_64bit = struct.unpack( '>Q', bram_dmp['bram_msb'][f]['data'][(4 * hdr_index):(4 * hdr_index) + 4] + bram_dmp['bram_lsb'][f]['data'][(4 * hdr_index):(4 * hdr_index) + 4])[0] mcnts[f]['mcnt'] = (pkt_64bit & ((2**64) - (2**16))) >> 16 mcnts_list.append(mcnts[f]['mcnt']) # print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt']) mcnts['mean'] = stats.mean(mcnts_list) mcnts['median'] = stats.median(mcnts_list) mcnts['mode'] = stats.mode(mcnts_list) mcnts['modalmean'] = stats.mean(mcnts['mode'][1]) # print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode'] for f, fpga in enumerate(fpgas): if mcnts[f]['mcnt'] > (mcnts['modalmean'] + max_mcnt_difference) or mcnts[f]['mcnt'] < ( mcnts['modalmean'] - max_mcnt_difference): print '%s OUT OF SYNC!!' % servers[f] mcnts[f]['sync_status'] = 'FAIL with error of %i' % ( mcnts[f]['mcnt'] - mcnts['modalmean']) else: mcnts[f]['sync_status'] = 'PASS' return mcnts
def _getRatingAverages(): count=0 android_rating_total=0 ios_rating_total=0 android_ratings=[] ios_ratings=[] global android_rating_average global ios_rating_average global android_rating_median global ios_rating_median global android_rating_q1 global ios_rating_q1 global android_rating_q3 global ios_rating_q3 for app in collection_ios.find(no_cursor_timeout=True): #count=count+1 #android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',','')) #ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',','')) android_ratings.append(float(app['android_success']-app['ios_success'])) #ios_ratings.append(float(app['ios_success'])) #difference android_rating_average=stats.mean(android_ratings) #ios_rating_average=stats.mean(ios_ratings) android_rating_median=stats.median(android_ratings) #ios_rating_median=stats.median(ios_ratings) android_rating_q1=stats.quartiles(android_ratings)[0] #ios_rating_q1=stats.quartiles(ios_ratings)[0] android_rating_q3=stats.quartiles(android_ratings)[1] #ios_rating_q3=stats.quartiles(ios_ratings)[1] print "Android stats" print android_rating_q1 print android_rating_median print android_rating_q3
def _getRatingAverages(): count = 0 android_rating_total = 0 ios_rating_total = 0 android_ratings = [] ios_ratings = [] global android_rating_average global ios_rating_average global android_rating_median global ios_rating_median global android_rating_q1 global ios_rating_q1 global android_rating_q3 global ios_rating_q3 for app in collection_ios.find(no_cursor_timeout=True): #count=count+1 #android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',','')) #ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',','')) android_ratings.append( float(app['android_success'] - app['ios_success'])) #ios_ratings.append(float(app['ios_success'])) #difference android_rating_average = stats.mean(android_ratings) #ios_rating_average=stats.mean(ios_ratings) android_rating_median = stats.median(android_ratings) #ios_rating_median=stats.median(ios_ratings) android_rating_q1 = stats.quartiles(android_ratings)[0] #ios_rating_q1=stats.quartiles(ios_ratings)[0] android_rating_q3 = stats.quartiles(android_ratings)[1] #ios_rating_q3=stats.quartiles(ios_ratings)[1] print "Android stats" print android_rating_q1 print android_rating_median print android_rating_q3
def length_filter(ref_gtf, filter_gtf, spread_lower, spread_upper, verbose=False): # hash lengths transcript_lengths = {} for line in open(ref_gtf): a = line.split('\t') tid = gtf_kv(a[8])['transcript_id'] transcript_lengths[tid] = transcript_lengths.get(tid, 0) + int( a[4]) - int(a[3]) + 1 # determine length boundaries length_median = float(stats.median(transcript_lengths.values())) if spread_lower: length_spread_min = length_median / spread_lower else: length_spread_min = 0 if spread_upper: length_spread_max = length_median * spread_upper else: length_spread_max = max(transcript_lengths.values()) # remove too short and too long transcripts_kept = set() filter_out = open(filter_gtf, 'w') for line in open(ref_gtf): a = line.split('\t') tid = gtf_kv(a[8])['transcript_id'] tlen = transcript_lengths.get(tid, 0) if length_spread_min <= tlen <= length_spread_max: print >> filter_out, line, transcripts_kept.add(tid) filter_out.close() if verbose: print >> sys.stderr, 'Transcript length median: %6d' % length_median print >> sys.stderr, 'Transcript length min: %6d' % length_spread_min print >> sys.stderr, 'Transcript length max: %6d' % length_spread_max print >> sys.stderr, '%6d of %6d (%.3f) transcripts used.' % ( len(transcripts_kept), len(transcript_lengths), len(transcripts_kept) / float(len(transcript_lengths)))
def test_plotly_barchat(loop_times=LOOP_TIMES): SCREEN_PIXEL = convert_inch_to_pixel(SCREEN_SIZE) render_time = [] all_render_time_np = [] for (i, j) in SCREEN_PIXEL: print("Screen Size: ", (i, j)) for x in range(loop_times): objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp') y_pos = np.arange(len(objects)) performance = [10, 8, 6, 4, 2, 1] layout = go.Layout( autosize=False, width=i, height=j, margin=go.layout.Margin( l=50, r=50, b=100, t=100, pad=4 ), paper_bgcolor='#7f7f7f', plot_bgcolor='#c7c7c7' ) tstart = time.time() data = [go.Bar(x=objects, y=performance)] fig = go.Figure(data=data, layout=layout) plotly.offline.plot(fig, filename='basic-bar') tend = time.time() render_time.append(tend - tstart) print(6, "Bar Chart Draw Time: ", tend - tstart) render_time_np = np.array(render_time) all_render_time_np.append(render_time_np) print("Mean Render Time: ", render_time_np.mean()) print("Median Render Time: ", median(render_time_np)) print() print("Xtiles:") for np_arr in all_render_time_np: print(xtile(np_arr, lo=0, hi=2))
def calculate_stats_num(self, name, per = [5,25,50,75,95]): # get columnt instance Col = self.get_column(name) # type validation assert Col.type == 'numerical', 'only possible numerical columns.' # get data data = self.get_data(name) # initialize dstats = dict() # calculate statistics dstats['mean'] = stats.mean(data) dstats['median'] = stats.median(data) dstats['std'] = stats.std(data) dstats['min'] = stats.min(data) dstats['max'] = stats.max(data) dstats['skew'] = stats.skew(data) dstats['kurtosis'] = stats.kurtosis(data) for ip in per: dstats['per%s'%ip] = stats.percentile(data, ip) # return Col.stats = dstats
def getQuartilesData(numbersList): mean = 0 median = 0 quartiles = 0.0, 0.0, 0.0 inQuarts = countValuesInQuartiles(numbersList) q1, q2, q3 = inQuarts q1percent = calculatePercentage(q1, len(numbersList)) q2percent = calculatePercentage(q2, len(numbersList)) q3percent = calculatePercentage(q3, len(numbersList)) try: quartiles = stats.quartiles(numbersList) except Exception as e: pass try: median = stats.median(numbersList) except Exception as e: pass try: mean = stats.mean(numbersList) except Exception as e: pass return { "mean": mean, "median": median, "Q1": round(q1, 2), "Q2": round(q2, 2), "Q3": round(q3, 2), "Q1Perc": round(q1percent, 2), "Q2Perc": round(q2percent, 2), "Q3Perc": round(q3percent, 2), "quartileCount": inQuarts, "quartiles": quartiles }
def length_filter(ref_gtf, filter_gtf, spread_lower, spread_upper, verbose=False): # hash lengths transcript_lengths = {} for line in open(ref_gtf): a = line.split('\t') tid = gtf_kv(a[8])['transcript_id'] transcript_lengths[tid] = transcript_lengths.get(tid,0) + int(a[4]) - int(a[3]) + 1 # determine length boundaries length_median = float(stats.median(transcript_lengths.values())) if spread_lower: length_spread_min = length_median / spread_lower else: length_spread_min = 0 if spread_upper: length_spread_max = length_median * spread_upper else: length_spread_max = max(transcript_lengths.values()) # remove too short and too long transcripts_kept = set() filter_out = open(filter_gtf, 'w') for line in open(ref_gtf): a = line.split('\t') tid = gtf_kv(a[8])['transcript_id'] tlen = transcript_lengths.get(tid,0) if length_spread_min <= tlen <= length_spread_max: print >> filter_out, line, transcripts_kept.add(tid) filter_out.close() if verbose: print >> sys.stderr, 'Transcript length median: %6d' % length_median print >> sys.stderr, 'Transcript length min: %6d' % length_spread_min print >> sys.stderr, 'Transcript length max: %6d' % length_spread_max print >> sys.stderr, '%6d of %6d (%.3f) transcripts used.' % (len(transcripts_kept), len(transcript_lengths), len(transcripts_kept)/float(len(transcript_lengths)))
''' statistics_calculator.py Read numbers from a file, calculate and print statistical measures: mean, median, mode, variance, standard deviation ''' from stats import mean, median, mode, variance_sd def read_data(filename): numbers = [] with open(filename) as f: for line in f: numbers.append(float(line)) return numbers if __name__ == '__main__': data = read_data('mydata.txt') m = mean(data) median = median(data) mode = mode(data) variance, sd = variance_sd(data) print('Mean: {0:.5f}'.format(m)) print('Median: {0:.5f}'.format(median)) print('Mode: {0:.5f}'.format(mode)) print('Variance: {0:.5f}'.format(variance)) print('Standard deviation: {0:.5f}'.format(sd))
def check_median(self): assert_equal(stats.median(self.a1), 4) assert_equal(stats.median(self.a2), 2.5) assert_equal(stats.median(self.a3), 3.5)
""" statistics_calculator.py Read numbers from a file, calculate and print statistical measures: mean, median, mode, variance, standard deviation """ from stats import mean, median, mode, variance_sd def read_data(filename): numbers = [] with open(filename) as f: for line in f: numbers.append(float(line)) return numbers if __name__ == "__main__": data = read_data("mydata.txt") m = mean(data) median = median(data) mode = mode(data) variance, sd = variance_sd(data) print("Mean: {0:.5f}".format(m)) print("Median: {0:.5f}".format(median)) print("Mode: {0:.5f}".format(mode)) print("Variance: {0:.5f}".format(variance)) print("Standard deviation: {0:.5f}".format(sd))
except ImportError: pass l = range(1,21) lf = range(1,21) lf[2] = 3.0 a = N.array(l) af = N.array(lf) ll = [l]*5 aa = N.array(ll) print('\nCENTRAL TENDENCY') print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)) print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)) print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)) print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)) print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)) print('mode:',stats.mode(l),stats.mode(a)) print('\nMOMENTS') print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)) print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)) print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)) print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)) print('mean:',stats.mean(a),stats.mean(af)) print('var:',stats.var(a),stats.var(af)) print('stdev:',stats.stdev(a),stats.stdev(af)) print('sem:',stats.sem(a),stats.sem(af)) print('describe:') print(stats.describe(l)) print(stats.describe(lf)) print(stats.describe(a))
def stats(): '''returns stats for given macs between timestamp''' after = request.args.get('after') if after is not None: try: after = time.mktime(time.strptime(after, '%Y-%m-%dT%H:%M:%S')) except ValueError as v: raise InvalidUsage('Invalid after parameter') before = request.args.get('before') if before is not None: try: before = time.mktime(time.strptime(before, '%Y-%m-%dT%H:%M:%S')) except ValueError as v: raise InvalidUsage('Invalid before parameter') macs = request.args.getlist('macs') rssi, zero, day = None, False, False cur = get_db().cursor() # to store temp table and indices in memory sql = 'pragma temp_store = 2;' cur.execute(sql) sql, sql_args = build_sql_query(after, before, macs, rssi, zero, day) try: cur.execute(sql, sql_args) except sqlite3.OperationalError as e: return jsonify({ 'status': 'error', 'message': 'sqlite3 db is not accessible' }), 500 # gather stats about each mac, same code as in stats.py # TODO: just import that macs = {} for row in cur.fetchall(): mac = row[1] if is_local_bit_set(mac): # create virtual mac for LAA mac address mac = 'LAA' if mac not in macs: macs[mac] = { 'vendor': row[2], 'ssid': [], 'rssi': [], 'last': row[0], 'first': row[0] } d = macs[mac] if row[3] != '' and row[3] not in d['ssid']: d['ssid'].append(row[3]) if row[0] > d['last']: d['last'] = row[0] if row[0] < d['first']: d['first'] = row[0] if row[4] != 0: d['rssi'].append(row[4]) # sort on frequency of appearence of a mac tmp = [(k, len(v['rssi'])) for k, v in macs.items()] tmp = [m for m, _ in reversed(sorted(tmp, key=lambda k: k[1]))] data = [] # dump our stats for m in tmp: v = macs[m] first = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(v['first'])) last = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(v['last'])) t = { 'mac': m, 'vendor': v['vendor'], 'ssids': sorted(v['ssid']), 'first': first, 'last': last } rssi = v['rssi'] if rssi != []: t.update({ 'rssi': { 'count': len(rssi), 'min': min(rssi), 'max': max(rssi), 'avg': sum(rssi) / len(rssi), 'median': int(median(rssi)) } }) data.append(t) return jsonify(data)
def main(): usage = 'usage: %prog [options] <gff file>' parser = OptionParser(usage) parser.add_option('-c', dest='cons_dir', default='%s/research/common/data/phylop' % os.environ['HOME'], help='Conservation directory [Default: %default]') parser.add_option('-l', dest='lncrna', action='store_true', default=False, help='Use the lncRNA specific file to speed things up [Default: %default]') (options,args) = parser.parse_args() if len(args) != 1: parser.error('Must provide gff file to intersect') else: gff_file = args[0] t2g = gff.t2g(gff_file) # build interval trees lnc_lengths = {} chr_features = {} interval2lnc = {} lnc_cons = {} for line in open(gff_file): a = line.split('\t') chrom = a[0] start = int(a[3]) end = int(a[4]) tid = gff.gtf_kv(a[8])['transcript_id'] align = (chrom,start,end) lnc_cons[tid] = [] lnc_lengths[tid] = lnc_lengths.get(tid,0) + (end-start+1) if interval2lnc.has_key(align): interval2lnc[align].add(tid) else: interval2lnc[align] = set([tid]) chr_features.setdefault(chrom, IntervalTree()).insert_interval(Interval(start,end)) # process overlapping chromosome blocks if options.lncrna: lnc_wig = glob.glob('%s/lnc_catalog.*wigFix*' % options.cons_dir)[0] process_file(chr_features, interval2lnc, lnc_cons, lnc_wig) else: for cons_file in glob.glob('%s/chr*' % options.cons_dir): process_file(chr_features, interval2lnc, lnc_cons, cons_file) # print table for tid in lnc_lengths: cons_len = len(lnc_cons[tid]) cons_cov = float(cons_len) / lnc_lengths[tid] if cons_len == 0: cons_mean = 0.0 cons_median = 0.0 cons_pos = 0.0 cons_neg = 0.0 else: cons_mean = stats.mean(lnc_cons[tid]) cons_median = stats.median(lnc_cons[tid]) cons_pos = len([c for c in lnc_cons[tid] if c > 1]) / float(cons_len) cons_neg = len([c for c in lnc_cons[tid] if c < 1]) / float(cons_len) cols = (tid, t2g[tid], lnc_lengths[tid], cons_cov, cons_mean, cons_median, cons_neg, cons_pos) print '%-15s %-15s %7d %9.4f %9.4f %9.4f %9.4f %9.4f' % cols
d = map(float, sys.stdin.xreadlines()) else: d = map(long, sys.stdin.xreadlines()) except ValueError, err: sys.stderr.write("Bad datum: %s\n" % str(err)) sys.exit(1) if len(d) == 0: sys.stderr.write("No data given\n") sys.exit(1) d.sort() print " N =", len(d) print " SUM =", stats.sum(d) print " MIN =", min(d) print "1ST-QUARTILE =", stats.firstquartile(d) print " MEDIAN =", stats.median(d) print "3RD-QUARTILE =", stats.thirdquartile(d) print " MAX =", max(d) print " MEAN =", stats.mean(d) if d[0] < 0: print " N50 = NA" else: print " N50 =", stats.n50(d) if options.showMode: modeinfo = stats.mode(d) print " MODE(S) =", ','.join(map(str, modeinfo[0])), "(%d)" % modeinfo[1]
def testSingleMedian(self): # passing only one data point to median() must return the same value self.assertEqual(stats.median([7]), 7)
offset.append(teloff) # pixels away from the center. (07.10.04) elif distance < 800: offset.append(teloff) offset.append(teloff) else: offset.append(teloff) else: offset.append(teloff) #writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset) #print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1) if len(offset) > 0: # Determine mean, median and stdev of unclipped offsets mean = stats.mean(offset) median = stats.median(offset) try: stdev = stats.stdev(offset) except ZeroDivisionError: stdev = '0.00'; # Do a 1-sigma clipping clipLowLimit = float(mean) - 1 * float(stdev) clipHighLimit = float(mean) + 1 * float(stdev) offset = [off for off in offset if float(off) < clipHighLimit and float(off) > clipLowLimit ] # Determine stats on sigma clipped data mean_c = stats.mean(offset) median_c = stats.median(offset) try:
def findBase(self, mapNum, sugar, phos5, phos3, baseType, direction = 3): """Rotate the sugar center by 360 degrees in ROTATE_SUGAR_INTERVAL increments ARGUMENTS: mapNum - the molecule number of the Coot map to use sugar - the coordinates of the C1' atom phos5 - the coordinates of the 5' phosphate phos3 - the coordinates of the 3' phosphate baseType - the base type (A, C, G, or U) OPTIONAL ARGUMENTS: direction - which direction are we tracing the chain if it is 5 (i.e. 3'->5'), then phos5 and phos3 will be flipped all other values will be ignored defaults to 3 (i.e. 5'->3') RETURNS: baseObj - a list of [baseType, baseCoordinates] """ if direction == 5: (phos5, phos3) = (phos3, phos5) #calculate the bisector of the phos-sugar-phos angle #first, calculate a normal to the phos-sugar-phos plane sugarPhos5Vec = minus(phos5, sugar) sugarPhos3Vec = minus(phos3, sugar) normal = crossProd(sugarPhos5Vec, sugarPhos3Vec) normal = scalarProd(normal, 1.0/magnitude(normal)) phosSugarPhosAngle = angle(phos5, sugar, phos3) bisector = rotate(sugarPhos5Vec, normal, phosSugarPhosAngle/2.0) #flip the bisector around (so it points away from the phosphates) and scale its length to 5 A startingBasePos = scalarProd(bisector, -1/magnitude(bisector)) #rotate the base baton by 10 degree increments about half of a sphere rotations = [startingBasePos] #a list of coordinates for all of the rotations for curTheta in range(-90, -1, 10) + range(10, 91, 10): curRotation = rotate(startingBasePos, normal, curTheta) rotations.append(curRotation) #here's where the phi=0 rotation is accounted for for curPhi in range(-90, -1, 10) + range(10, 91, 10): rotations.append(rotate(curRotation, startingBasePos, curPhi)) #test electron density along all base batons for curBaton in rotations: curDensityTotal = 0 densityList = [] for i in range(1, 9): (x, y, z) = plus(sugar, scalarProd(i/2.0, curBaton)) curPointDensity = density_at_point(mapNum, x, y, z) curDensityTotal += curPointDensity densityList.append(curPointDensity) curBaton.append(curDensityTotal) #the sum of the density (equivalent to the mean for ordering purposes) curBaton.append(median(densityList)) #the median of the density curBaton.append(min(densityList)) #the minimum of the density #find the baton with the max density (as measured using the median) #Note that we ignore the sum and minimum of the density. Those calculations could be commented out, # but they may be useful at some point in the future. When we look at higher resolutions maybe? # Besides, they're fast calculations.) baseDir = max(rotations, key = lambda x: x[4]) #rotate the stock base+sugar structure to align with the base baton rotationAngle = angle(self.__baseStrucs["C"]["C4"], [0,0,0], baseDir) axis = crossProd(self.__baseStrucs["C"]["C4"], baseDir[0:3]) orientedBase = rotateAtoms(self.__baseStrucs["C"], axis, rotationAngle) #rotate the base about chi to find the best fit to density bestFitBase = None maxDensity = -999999 for curAngle in range(0,360,5): rotatedBase = rotateAtoms(orientedBase, orientedBase["C4"], curAngle, sugar) curDensity = 0 for curAtom in ["N1", "C2", "N3", "C4", "C5", "C6"]: curDensity += density_at_point(mapNum, rotatedBase[curAtom][0], rotatedBase[curAtom][1], rotatedBase[curAtom][2]) #this is "pseudoChi" because it uses the 5' phosphate in place of the O4' atom pseudoChi = torsion(phos5, sugar, rotatedBase["N1"], rotatedBase["N3"]) curDensity *= self.__pseudoChiInterp.interp(pseudoChi) if curDensity > maxDensity: maxDensity = curDensity bestFitBase = rotatedBase baseObj = ["C", bestFitBase] #mutate the base to the appropriate type if baseType != "C": baseObj = self.mutateBase(baseObj, baseType) return baseObj
timing_data, unreviewed_patchnums, owner_no_follow_ups, need_review_followup = review_timings.load_data(REVIEWS_FILENAME) client_timing_data, client_unreviewed_patchnums, client_owner_no_follow_ups, client_need_review_followup = review_timings.load_data(CLIENT_REVIEWS_FILENAME) # timing_data.update(client_timing_data) # trim off the top and bottom few percent outliers = int(len(timing_data) * .1) // 2 owner_data = sorted([x[0] for x in timing_data.itervalues()])[outliers:-outliers] reviewer_data = sorted([x[1] for x in timing_data.itervalues()])[outliers:-outliers] template_vars['open_patches'] = '%d' % len(timing_data.keys()) template_vars['unreviewed_patches'] = '%d' % (len(unreviewed_patchnums) )#+ len(client_unreviewed_patchnums)) template_vars['need_followup_count'] = '%d' % len(need_review_followup) template_vars['no_follow_ups'] = '%d' % (len(owner_no_follow_ups) )#+ len(client_owner_no_follow_ups)) owner_time = timedelta(seconds=stats.median(owner_data)) reviewer_time = timedelta(seconds=stats.median(reviewer_data)) template_vars['owner_response'] = str(owner_time) template_vars['reviewer_response'] = str(reviewer_time) with open(PERCENT_ACTIVE_FILENAME, 'rb') as f: total_contributors = len(f.readlines()) template_vars['total_contributors'] = total_contributors with open(AVERAGES_FILENAME, 'rb') as f: actives_windows, actives_avg = json.load(f) for aw, rolling_avg_windows in actives_windows[-1:]: aw = str(aw) for r_a_w in rolling_avg_windows[:1]: r_a_w = str(r_a_w) active_contributors = int(
# timing_data.update(client_timing_data) # unreviewed.extend(client_unreviewed) outliers = int(len(timing_data) * .1) // 2 owner_data = sorted([x[0] for x in timing_data.itervalues()])[outliers:-outliers] reviewer_data = sorted([x[1] for x in timing_data.itervalues()])[outliers:-outliers] histogram(owner_data, 'owner') histogram(reviewer_data, 'reviewer') print 'Stats for %d patches' % len(timing_data.keys()) print 'Patch owner review stats:' print ' mean: %s' % str(datetime.timedelta(seconds=stats.mean(owner_data))) print ' median: %s' % str(datetime.timedelta(seconds=stats.median(owner_data))) print ' std_deviation: %s' % str(datetime.timedelta(seconds=stats.std_deviation(owner_data))) print ' max_difference: %s' % str(datetime.timedelta(seconds=stats.min_max_difference(owner_data))) print ' %d patches with no follow-up' % len(owner_no_follow_ups) print print 'Patch reviewer stats:' print ' mean: %s' % str(datetime.timedelta(seconds=stats.mean(reviewer_data))) print ' median: %s' % str(datetime.timedelta(seconds=stats.median(reviewer_data))) print ' std_deviation: %s' % str(datetime.timedelta(seconds=stats.std_deviation(reviewer_data))) print ' max_difference: %s' % str(datetime.timedelta(seconds=stats.min_max_difference(reviewer_data))) print ' %d unreviewed patches' % len(unreviewed) print ' %d patches need reviewer follow-up' % len(need_review_followup) if '--show-unreviewed' in sys.argv: for patch_number in unreviewed: print 'https://review.openstack.org/#/c/%d/' % patch_number[0] if '--show-need-review' in sys.argv:
I = la.make_matrix(5, 5, la.is_diagonal) print("identity matrix = ", I) print("\n\n") print("*** Test Module <stats> ***") A = [1, 3, 5, 7, 9, 2, 3, 4, 4, 4, 6, 8, 10, 13, 15, 17] print("vector A = ", A) print("sorted A = ", sorted(A)) mean = st.mean(A) print("A's mean = ", mean) median = st.median(A) print("A's median = ", median) quantile = st.quantile(A, 0.2) print("A's 20% quantile = ", quantile) quantile = st.quantile(A, 0.9) print("A's 90% quantile = ", quantile) mode = st.mode(A) print("A's mode = ", mode) data_range = st.data_range(A) print("A's range = ", data_range) variance = st.variance(A)
def test_median0(): obs = median([3.,7,3,4.,0.]) exp = 3 assert_equal(obs,exp)
def test_median(): obs = median([5.]) exp = 5. assert_equal(obs,exp)
offset.append(teloff) else: offset.append(teloff) writeLog(logpath,file,"FocusPyr: teloffset= %d" % teloff) #print "FocusPyr: teloffset= %d distance=(%f,%f) (%f,%f) %s" % (teloff,xdist,ydist,x1,y1,o[11]) # Append to a list to be inserted into Objects table pyrobjects.append((teloff,xdist,ydist,x1,y1,o[11])) if len(offset) > 0: # Determine mean, median and stdev of unclipped offsets mean = stats.mean(offset) median = stats.median(offset) try: stdev = stats.stdev(offset) except ZeroDivisionError: stdev = '0.00'; # Do a 1-sigma clipping clipLowLimit = float(mean) - 1 * float(stdev) clipHighLimit = float(mean) + 1 * float(stdev) offset = [off for off in offset if float(off) <= clipHighLimit and float(off) >= clipLowLimit ] # Determine stats on sigma clipped data h['meanFocusOffset'] = stats.mean(offset) h['medianFocusOffset'] = stats.median(offset) try:
def test_plotly_sign_curve(loop_times=LOOP_TIMES): #TODO x = np.arange(0, 2 * np.pi, 0.01) y = np.sin(x) trace1 = { "x": x, "y": y, "line": { "color": "rgb(0,113.985,188.955)", "dash": "solid", "width": 0.5 }, "marker": { "color": "rgb(0,113.985,188.955)", "line": {"width": 0.5}, "size": 6 }, "mode": "lines", "name": "", "showlegend": True, "type": "scatter", "visible": True, "xaxis": "x1", "yaxis": "y1" } data = Data([trace1]) SCREEN_PIXEL = convert_inch_to_pixel(SCREEN_SIZE) render_time = [] all_render_time_np = [] for (i, j) in SCREEN_PIXEL: print("Screen Size: ", (i, j)) for k in range(loop_times): tstart = time.time() layout = { "annotations": [ { "x": 0.5175, "y": 0.935, "align": "center", "bordercolor": "rgba(0,0,0,0)", "borderpad": 3, "borderwidth": 0.5, "font": { "color": "rgb(0,0,0)", "family": "Arial, sans-serif", "size": 11 }, "showarrow": False, "text": "<b>Sine curve</b>", "textangle": 0, "xanchor": "center", "xref": "paper", "yanchor": "bottom", "yref": "paper" }, { "x": 0.5175, "y": 0.461162790698, "align": "center", "bordercolor": "rgba(0,0,0,0)", "borderpad": 3, "borderwidth": 0.5, "font": { "color": "rgb(0,0,0)", "family": "Arial, sans-serif", "size": 11 }, "showarrow": False, "text": "<b>Cosine curve</b>", "textangle": 0, "xanchor": "center", "xref": "paper", "yanchor": "bottom", "yref": "paper" } ], "autosize": False, "height": i, "hovermode": "closest", "margin": { "r": 0, "t": 0, "b": 0, "l": 0, "pad": 0 }, "paper_bgcolor": "rgb(255,255,255)", "plot_bgcolor": "rgba(0,0,0,0)", "showlegend": False, "title": "<b>Sine curve</b>", "titlefont": {"color": "rgba(0,0,0,0)"}, "width": j, "xaxis1": { "anchor": "y1", "autorange": False, "domain": [0.13, 0.905], "exponentformat": "none", "gridcolor": "rgb(38.25,38.25,38.25)", "gridwidth": 1, "linecolor": "rgb(38.25,38.25,38.25)", "linewidth": 1, "mirror": "ticks", "nticks": 9, "range": [0, 7], "showgrid": False, "showline": True, "side": "bottom", "tickcolor": "rgb(38.25,38.25,38.25)", "tickfont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 10 }, "ticklen": 6.51, "ticks": "inside", "tickwidth": 1, "titlefont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 11 }, "type": "linear", "zeroline": False }, "xaxis2": { "anchor": "y2", "autorange": False, "domain": [0.13, 0.905], "exponentformat": "none", "gridcolor": "rgb(38.25,38.25,38.25)", "gridwidth": 1, "linecolor": "rgb(38.25,38.25,38.25)", "linewidth": 1, "mirror": "ticks", "nticks": 9, "range": [0, 7], "showgrid": False, "showline": True, "side": "bottom", "tickcolor": "rgb(38.25,38.25,38.25)", "tickfont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 10 }, "ticklen": 6.51, "ticks": "inside", "tickwidth": 1, "titlefont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 11 }, "type": "linear", "zeroline": False }, "yaxis1": { "anchor": "x1", "autorange": False, "domain": [0.583837209302, 0.925], "exponentformat": "none", "gridcolor": "rgb(38.25,38.25,38.25)", "gridwidth": 1, "linecolor": "rgb(38.25,38.25,38.25)", "linewidth": 1, "mirror": "ticks", "nticks": 6, "range": [-1, 1], "showgrid": False, "showline": True, "showticklabels": True, "side": "left", "tickcolor": "rgb(38.25,38.25,38.25)", "tickfont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 10 }, "ticklen": 6.51, "ticks": "inside", "tickwidth": 1, "titlefont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 11 }, "type": "linear", "zeroline": False }, "yaxis2": { "anchor": "x2", "autorange": False, "domain": [0.11, 0.451162790698], "exponentformat": "none", "gridcolor": "rgb(38.25,38.25,38.25)", "gridwidth": 1, "linecolor": "rgb(38.25,38.25,38.25)", "linewidth": 1, "mirror": "ticks", "nticks": 6, "range": [-1, 1], "showgrid": False, "showline": True, "showticklabels": True, "side": "left", "tickcolor": "rgb(38.25,38.25,38.25)", "tickfont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 10 }, "ticklen": 6.51, "ticks": "inside", "tickwidth": 1, "titlefont": { "color": "rgb(38.25,38.25,38.25)", "family": "Arial, sans-serif", "size": 11 }, "type": "linear", "zeroline": False } } fig = go.Figure(data=data, layout=layout) plot_url = plotly.offline.plot(fig) tend = time.time() render_time.append(tend - tstart) print(NUM_OF_SIN_CURVES, "Sine Curve Draw Time: ", tend - tstart) render_time_np = np.array(render_time) all_render_time_np.append(render_time_np) print("Mean Render Time: ", render_time_np.mean()) print("Median Render Time: ", median(render_time_np)) print() print("Xtiles:") for np_arr in all_render_time_np: print(xtile(np_arr, lo=0, hi=2))
import stats my_list = [4,1,5,7,6,8,9,10,8,3,3,8,12] mean = stats.mean(my_list) print('The mean is: ' + str(mean)) median = stats.median(my_list) print('The median is: ' + str(median)) range = stats.range(my_list) print('The range is: ' + str(range)) sum = stats.sum(my_list) print('The sum of all numbers is: ' + str(su
def days(): cur = get_db().cursor() # to store temp table and indices in memory sql = 'pragma temp_store = 2;' cur.execute(sql) macs = request.args.getlist('macs') if macs is None: # return list of days with probes in db try: sql = 'select date from probemon' sql_args = () cur.execute(sql, sql_args) except sqlite3.OperationalError as e: return jsonify({ 'status': 'error', 'message': 'sqlite3 db is not accessible' }), 500 days = set() for row in cur.fetchall(): t = time.strftime('%Y-%m-%d', time.localtime(row[0])) days.add(t) days = sorted(list(days)) missing = [] last = datetime.strptime(days[-1], '%Y-%m-%d') day = datetime.strptime(days[0], '%Y-%m-%d') while day != last: d = day.strftime('%Y-%m-%d') if d not in days: missing.append(d) day += timedelta(days=1) data = {'first': days[0], 'last': days[-1], 'missing': missing} return jsonify(data) else: # check if stats table is available try: cur.execute( 'select count(*) from sqlite_master where type=? and name=?', ('table', 'stats')) except sqlite3.OperationalError as e: return jsonify({ 'status': 'error', 'message': 'sqlite3 db is not accessible' }), 500 if cur.fetchone()[0] == 1: # return day-by-day stats for macs params = ','.join(['?'] * len(macs)) sql = f'''select mac.id, address from mac inner join vendor on vendor.id=mac.vendor where address in ({params});''' cur.execute(sql, macs) mac_ids = {} for row in cur.fetchall(): mac_ids[row[0]] = row[1] data = [] for m in list(mac_ids.keys()): md = [] ssids = set() sql = 'select date, first_seen, last_seen, count, min, max, avg, med, ssids from stats where mac_id=? order by date;' cur.execute(sql, (m, )) for d, first, last, count, rmin, rmax, ravg, rmed, ssid in cur.fetchall( ): first = time.mktime( time.strptime(f'{d}T{first}', '%Y-%m-%dT%H:%M:%S')) * 1000 last = time.mktime( time.strptime(f'{d}T{last}', '%Y-%m-%dT%H:%M:%S')) * 1000 md.append({ 'day': d.replace('-', ''), 'count': count, 'last': last, 'first': first, 'min': rmin, 'max': rmax, 'avg': ravg, 'median': rmed }) ssids = ssids.union(ssid.split(',')) ssids = sorted(list(ssids)) if '' in ssids: ssids.remove('') data.append({ 'mac': mac_ids[m], 'days': md, 'ssids': ssids }) return jsonify(data) else: params = ','.join(['?'] * len(macs)) sql = f'''select date,mac.address,rssi,ssid.name from probemon inner join ssid on ssid.id=probemon.ssid inner join mac on mac.id=probemon.mac where mac.address in ({params})''' sql_args = macs cur.execute(sql, sql_args) # WARNING: this is copy-pasted from stats.py stats = {} for row in cur.fetchall(): if row[1] not in list(stats.keys()): stats[row[1]] = {'ssids': set()} stats[row[1]]['ssids'].add(row[3]) day = time.strftime('%Y%m%d', time.localtime(row[0])) if day in stats[row[1]]: smd = stats[row[1]][day] smd['rssi'].append(row[2]) if row[0] > smd['last']: smd['last'] = row[0] if row[0] < smd['first']: smd['first'] = row[0] else: stats[row[1]][day] = { 'rssi': [row[2]], 'first': row[0], 'last': row[0] } data = [] for mac in list(stats.keys()): md = [] for d in sorted(stats[mac].keys()): if d == 'ssids': continue rssi = stats[mac][d]['rssi'] md.append({ 'day': d, 'count': len(rssi), 'last': int(stats[mac][d]['last'] * 1000), 'first': int(stats[mac][d]['first'] * 1000), 'min': min(rssi), 'max': max(rssi), 'avg': sum(rssi) // len(rssi), 'median': median(rssi) }) ssids = list(stats[mac]['ssids']) if '' in ssids: ssids.remove('') data.append({'mac': mac, 'days': md, 'ssids': ssids}) return jsonify(data)
def test_median(): obs = median([1, 2, 3, 4]) exp = 2.5 assert_equal(obs, exp)
def main(): usage = 'usage: %prog [options] <bam> <ref_gtf>' parser = OptionParser(usage) # IO options parser.add_option('-o', dest='out_dir', default='uniform', help='Output directory [Default: %default]') # window options parser.add_option('-w', dest='window_size', type='int', default=25, help='Window size for counting [Default: %default]') parser.add_option('-i', '--ignore', dest='ignore_gff', help='Ignore reads overlapping overlapping troublesome regions in the given GFF file') parser.add_option('-u', '--unstranded', dest='unstranded', action='store_true', default=False, help='Sequencing is unstranded [Default: %default]') # cufflinks options parser.add_option('--cuff_done', dest='cuff_done', action='store_true', default=False, help='The Cufflinks run to estimate the model parameters is already done [Default: %default]') parser.add_option('-t', dest='threads', type='int', default=2, help='Number of threads to use [Default: %default]') # debug options parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Verbose output [Default: %default]') parser.add_option('-g', '--gene', dest='gene_only', help='Call peaks on the specified gene only') #parser.add_option('--print_windows', dest='print_windows', default=False, action='store_true', help='Print statistics for all windows [Default: %default]') (options,args) = parser.parse_args() if len(args) != 2: parser.error(usage) else: bam = args[0] ref_gtf = args[1] clip_peaks.out_dir = options.out_dir if not os.path.isdir(clip_peaks.out_dir): os.mkdir(clip_peaks.out_dir) ############################################ # parameterize ############################################ if not options.cuff_done: # make a new gtf w/ unspliced RNAs update_ref_gtf = clip_peaks.prerna_gtf(ref_gtf) subprocess.call('cufflinks -o %s -p %d -G %s %s' % (clip_peaks.out_dir, options.threads, update_ref_gtf, bam), shell=True) # store transcripts transcripts = clip_peaks.read_genes('%s/transcripts.gtf'%clip_peaks.out_dir, key_id='transcript_id') # merge overlapping genes g2t_merge, antisense_clusters = clip_peaks.merged_g2t('%s/transcripts.gtf'%clip_peaks.out_dir, options.unstranded) if options.unstranded: # alter strands clip_peaks.ambiguate_strands(transcripts, g2t_merge, antisense_clusters) # set transcript FPKMs clip_peaks.set_transcript_fpkms(transcripts, clip_peaks.out_dir, missing_fpkm=0) # possibly limit genes to examine if options.gene_only: gene_ids = [] for gids in g2t_merge.keys(): if options.gene_only in gids.split(','): gene_ids.append(gids) if len(gene_ids) == 0: print >> sys.stderr, 'gene_id %s not found' % options.gene_only exit(1) else: gene_ids = g2t_merge.keys() ############################################ # filter BAM ############################################ if options.ignore_gff: bam_ignore_fd, bam_ignore_file = tempfile.mkstemp(dir='%s/research/scratch/temp' % os.environ['HOME']) subprocess.call('intersectBed -v -abam %s -b %s > %s' % (bam, options.ignore_gff, bam_ignore_file), shell=True) bam = bam_ignore_file ############################################ # process genes ############################################ # index subprocess.call('samtools index %s' % bam, shell=True) # initialize stats table_out = open('%s/uniformity_table.txt' % clip_peaks.out_dir, 'w') id_list = [] fpkm_list = [] # open bam bam_in = pysam.Samfile(bam, 'rb') # for each gene for gene_id in gene_ids: # make a more focused transcript hash for this gene gene_transcripts = {} for tid in g2t_merge[gene_id]: gene_transcripts[tid] = transcripts[tid] # obtain basic gene attributes (gchrom, gstrand, gstart, gend) = clip_peaks.gene_attrs(gene_transcripts) # initialize window counts transcript_isoform_counts = {} for tid in gene_transcripts: transcript_isoform_counts[tid] = [] # choose a single event position and weight the reads read_pos_weights = clip_peaks.position_reads(bam_in, gchrom, gstart, gend, gstrand, mapq_zero=True) # process read alignments for (pos, weight, mm) in read_pos_weights: # map pos to isoforms iso_pos = {} for tid in gene_transcripts: iso_pos[tid] = isoform_position(gene_transcripts[tid], pos) # sum fpkms for hit isoforms fpkm_sum = sum([gene_transcripts[tid].fpkm for tid in gene_transcripts if iso_pos[tid] != None]) if fpkm_sum <= 0: pass #print >> sys.stderr, 'No FPKM for %s at %d' % (gene_id,pos) else: # distribute read to isoform counts for tid in gene_transcripts: if iso_pos[tid] != None: win_i = int(iso_pos[tid] / options.window_size) while win_i >= len(transcript_isoform_counts[tid]): transcript_isoform_counts[tid].append(0) transcript_isoform_counts[tid][win_i] += weight*gene_transcripts[tid].fpkm/fpkm_sum # compute window stats for tid in gene_transcripts: if gene_transcripts[tid].fpkm > 1 and len(transcript_isoform_counts[tid]) > 5: u, sd = stats.mean_sd(transcript_isoform_counts[tid][:-1]) if u > 0: id_list.append(sd*sd/u) fpkm_list.append(gene_transcripts[tid].fpkm) cols = (tid, gene_transcripts[tid].fpkm, len(transcript_isoform_counts[tid])-1, u, sd, id_list[-1]) print >> table_out, '%-20s %8.2f %6d %7.2f %7.2f %5.3f' % cols bam_in.close() table_out.close() ############################################ # summary stats ############################################ median = stats.median(id_list) mean = stats.mean(id_list) fpkm_cv_sum = sum([id_list[i]*fpkm_list[i] for i in range(len(id_list))]) fpkm_sum = sum(fpkm_list) fpkm_mean = fpkm_cv_sum / fpkm_sum logfpkm_cv_sum = sum([id_list[i]*math.log(fpkm_list[i]+1,2) for i in range(len(id_list))]) logfpkm_sum = sum([math.log(f+1,2) for f in fpkm_list]) logfpkm_mean = logfpkm_cv_sum / logfpkm_sum # print print 'Median: %7.4f' % median print 'Mean: %7.4f' % mean print 'FPKM-weighted mean: %7.4f' % fpkm_mean print 'logFPKM-weighted mean: %7.4f' % logfpkm_mean # clean cufflinks output if not options.cuff_done: os.remove(update_ref_gtf) os.remove('%s/skipped.gtf' % clip_peaks.out_dir) os.remove('%s/genes.fpkm_tracking' % clip_peaks.out_dir) if options.ignore_gff: os.close(bam_ignore_fd) os.remove(bam_ignore_file)
def main(): options = UploadOptions() try: options.parseOptions(sys.argv[1:]) except UsageError, e: print e return 1 fname, benchmark, param, statistic = options['statistic'].split(',') stat, samples = select( pickle.load(file(fname)), benchmark, param, statistic) d = upload( reactor, url=options['url'], project=options['project'], revision=options['revision'], revision_date=options['revision-date'], benchmark='%s-%s-%s' % (benchmark, param, statistic), executable='%s-backend' % (options['backend'],), environment=options['environment'], result_value=median(samples), result_date=datetime.now(), std_dev=mad(samples), # Not really! max_value=max(samples), min_value=min(samples)) d.addErrback(err, "Upload failed") reactor.callWhenRunning(d.addCallback, lambda ign: reactor.stop()) reactor.run()
def test_median(): obs = median([1,2,3]) exp = 2.0 assert_equal(obs, exp)
def check_basic(self): data1 = [1,3,5,2,3,1,19,-10,2,4.0] data2 = [3,5,1,10,23,-10,3,-2,6,8,15] assert_almost_equal(stats.median(data1),2.5) assert_almost_equal(stats.median(data2),5)