def num_features_smarch(samplefile_, n_): _configs = list() if os.path.exists(samplefile_): with open(samplefile_, "r") as sf: for line in sf: raw = line.split(',') config = raw[:len(raw) - 1] _configs.append(config) else: return -1 _samples = list() if n_ < 0: _samples = _configs.copy() else: rands = get_random(n_, len(_configs)) for r in rands: _samples.append(_configs[r - 1]) _fnums = list() for sample in _samples: fnum = 0 for v in sample: if not v.startswith('-'): fnum += 1 _fnums.append(fnum) if n_ < 0: avg = stats.tmean(_fnums) std = stats.tstd(_fnums) return avg, std return stats.tmean(_fnums), stats.tstd(_fnums)
def query4(self, length=8): global data1 data1=pandas.read_sql_query(query['4a'], cnx) pysql = lambda q: pandasql.sqldf(q, globals()) data1_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data1 ") global data2 data2=pandas.read_sql_query(query['4b'], cnx) data2_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data2 ") a=data1['exp'].values b=data2['exp'].values print(stats.tmean(a)) print(stats.tmean(b)) print(stats.tvar(a)) print(stats.tvar(b)) return """<html> <form method="get" action="index"> <button type="submit">Return</button> </form> </form> <form method="post" action="processQuery4"> Custom Query on Result: <input type="text" name="qu"><br> <input type="submit"> </form> <h2>T-statistics for Exp Values::</h2>"""+(str)(stats.ttest_ind(a,b,equal_var=True)[0])+""" <h1>Exp values for patients with ALL<h3>(Rows-"""+str(len(data1.index))+""")</h3></h1>"""+data1_rep.to_html(index=False)+""" <h1>Exp values for patients without ALL<h3>(Rows-"""+str(len(data2.index))+""")</h3></h1>"""+data2_rep.to_html(index=False)+"""
def compute_metric(self): tmid=(self.ts.t[:-1]+self.ts.t[1:])/2.0 rng=range(1,len(tmid)) # Throw out first and last self.tmid=tmid[rng] maxval=numpy.zeros(len(rng)) minval=numpy.ones(len(rng))*1e100 self.rate=[] for v in self.ts: self.rate.append(numpy.divide(numpy.diff(v)[rng], numpy.diff(self.ts.t)[rng])) maxval=numpy.maximum(maxval,self.rate[-1]) minval=numpy.minimum(minval,self.rate[-1]) vals=[] mean=[] std=[] for j in range(len(rng)): vals.append([]) for v in self.rate: vals[j].append(v[j]) mean.append(tmean(vals[j])) std.append(tstd(vals[j])) imbl=maxval-minval self.ratio=numpy.divide(std,mean) self.ratio2=numpy.divide(imbl,maxval) # mean of ratios is the threshold statistic self.metric = abs(tmean(self.ratio))
def get_aggregate_stats(self, duration_start=0, duration_end=None): if duration_end is None: duration_end = self.get_duration() filtered_stats = self._filter_stats(duration_start, duration_end) filtered_interactions = self._filter_interactions(duration_start, duration_end) lens_by_src = {} for p in filtered_stats: lens_by_src.setdefault(p.get('src_addr'), []).append(p.get('pkt_len')) int_durations = [i.get_duration() for i in filtered_interactions] total_bytes = sum([p.get('pkt_len') for p in filtered_stats]) aggregate_stats = { 'duration': self.get_duration(), 'avg_lens': {k: stats.tmean(v) for k,v in lens_by_src.items()}, 'max_lens': {k: max(v) for k,v in lens_by_src.items()}, 'total_by_src': {k: sum(v) for k,v in lens_by_src.items()}, 'num_interactions': len(filtered_interactions), 'avg_interaction_duration': stats.tmean(int_durations) if int_durations else 0, 'max_interaction_duration': max(int_durations) if int_durations else 0, 'min_interaction_duration': min(int_durations) if int_durations else 0, 'total_bytes': total_bytes } return aggregate_stats
def compute_metric(self): ts=self.ts gfloprate = numpy.zeros(len(ts.t)-1) gdramrate = numpy.zeros(len(ts.t)-1) gcpurate = numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): if ts.pmc_type == 'amd64' : gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t)) elif ts.pmc_type == 'intel_snb': gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t)) gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[3][h][0]),numpy.diff(ts.t)) mfr=tmean(gfloprate)/ts.numhosts mdr=tmean(gdramrate)/ts.numhosts mcr=tmean(gcpurate)/(ts.numhosts*ts.wayness*100.) if (mcr/self.peak[ts.pmc_type][2] > 0.5): self.metric = (mfr/self.peak[ts.pmc_type][0])/(mdr/self.peak[ts.pmc_type][1]) else: self.metric = 0 return
def filterByMutualRemoval(data1, data2): nSTD = 1 x=[] y=[] std1 = stats.tstd(data1) mean1 = stats.tmean(data1) std2 = stats.tstd(data2) mean2 = stats.tmean(data2) print 'm1, std1: ', mean1, std1 print 'm2, std2: ', mean2, std2 for i, value in enumerate(data1): if (data1[i] > mean1 + (nSTD*std1)): pass elif (data1[i] < mean1 - (nSTD*std1)): pass elif data2[i] > mean2 + (nSTD*std2): pass elif value < mean2 - (nSTD*std2): pass else: x.append(data1[i]) y.append(data2[i]) return x,y
def compute_ttest_for_col(self, p_thresh): res_4df = {'features': [], 'ttest': [], 'welch': []} res = dict() for col in self.ls_cols: group1 = self.df[self.df[self.group_col] == self.groups[0]][col] group2 = self.df[self.df[self.group_col] == self.groups[1]][col] ttest_eq_pop_var = stats.ttest_ind(group1, group2, equal_var=True) ttest_welch = stats.ttest_ind(group1, group2, equal_var=False) if ttest_eq_pop_var[1] < p_thresh: meas, struct = get_structure_measurement( col, self.ls_meas, self.ls_struct) #print('{:<15} {}'.format(meas, struct)) res[col] = { '{}, mean'.format(self.groups[0]): stats.tmean(group1), '{}, std'.format(self.groups[1]): stats.tstd(group2), '{}, mean'.format(self.groups[1]): stats.tmean(group2), '{}, std'.format(self.groups[1]): stats.tstd(group2), 'ttest': ttest_eq_pop_var[1], 'welch': ttest_welch[1], 'kurtosis': stats.kurtosis(self.df[self.group_col]), 'skewness': stats.skew(self.df[self.group_col]) } res_4df['features'].append(struct + ' (' + meas + ')') res_4df['ttest'].append(ttest_eq_pop_var[1]) res_4df['welch'].append(ttest_welch[1]) self.save_res(res_4df) return res
def query4(self): global data1 data1 = pandas.read_sql_query(query['4a'], cnx) global data2 data2 = pandas.read_sql_query(query['4b'], cnx) a = data1['Expression Val'].values b = data2['Expression Val'].values print(stats.tmean(a)) print(stats.tmean(b)) print(stats.tvar(a)) print(stats.tvar(b)) tt = stats.ttest_ind(a, b, equal_var=True) return """<html> <form method="get" action="index"> <button type="submit">Return</button> </form> </form> <form method="post" action="processQuery4"> Custom Query on Result: <input type="text" name="qu"><br> <input type="submit"> </form> <h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """ <h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """ <h1>Exp values for patients with ALL<h3>(Rows-""" + str(len( data1.index)) + """)</h3></h1>""" + data1.to_html( index=False) + """ <h1>Exp values for patients without ALL<h3>(Rows-""" + str( len(data2.index)) + """)</h3></h1>""" + data2.to_html( index=False) + """
def print_and_plot_results(count, results, verbose, plot_file_name): print("RPS calculated as 95% confidence interval") rps_mean_ar = [] low_ar = [] high_ar = [] test_name_ar = [] for test_name in sorted(results): data = results[test_name] rps = count / array(data) rps_mean = tmean(rps) rps_var = tvar(rps) low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5) times = array(data) * 1000000 / count times_mean = tmean(times) times_stdev = tstd(times) print('Results for', test_name) print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,' '\tstandard deviation {:.3f} μs' .format(int(rps_mean), int(low), int(high), times_mean, times_stdev)) test_name_ar.append(test_name) rps_mean_ar.append(rps_mean) low_ar.append(low) high_ar.append(high) if verbose: print(' from', times) print() if plot_file_name is not None: import matplotlib.pyplot as plt from matplotlib import cm fig = plt.figure() ax = fig.add_subplot(111) L = len(rps_mean_ar) color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)] bars = ax.bar( arange(L), rps_mean_ar, color=color, yerr=(low_ar, high_ar), ecolor='k') # order of legend is reversed for visual appeal ax.legend( reversed(bars), reversed(test_name_ar), loc='upper left') ax.get_xaxis().set_visible(False) plt.ylabel('Requets per Second', fontsize=16) print(plot_file_name) plt.savefig(plot_file_name, dpi=96) print("Plot is saved to {}".format(plot_file_name)) if verbose: plt.show()
def plot(self,jobid,job_data=None): if not self.setup(jobid,job_data=job_data): return ts=self.ts host_cpi = {} host_names = sorted(ts.data[0].keys()) for v in host_names: ncores = len(ts.data[0][v]) num = 0 den = 0 for k in range(ncores): ratio = nan_to_num(diff(ts.data[0][v][k]) / diff(ts.data[1][v][k])) try: cpi = vstack((cpi,ratio)) except: cpi = array([ratio]) num += diff(ts.data[0][v][k]) den += diff(ts.data[1][v][k]) host_cpi[v] = tmean(nan_to_num(num/den)) mean_cpi = tmean(host_cpi.values()) if len(host_cpi.values()) > 1: var_cpi = tvar(host_cpi.values()) else: var_cpi= 0.0 self.fig = Figure(figsize=(10,12),dpi=110) self.ax=self.fig.add_subplot(1,1,1) ycore = arange(cpi.shape[0]+1) time = ts.t/3600. yhost=arange(len(host_cpi.keys())+1)*ncores + ncores fontsize = 8 set_printoptions(precision=4) if len(yhost) > 80: fontsize /= 0.5*log(len(yhost)) self.ax.set_ylim(bottom=ycore.min(),top=ycore.max()) self.ax.set_yticks(yhost[0:-1]-ncores/2.) self.ax.set_yticklabels([key +'(' + "{0:.2f}".format(host_cpi[key]) +')' for key in host_names],fontsize=fontsize) self.ax.set_xlim(left=time.min(),right=time.max()) pcm = self.ax.pcolor(time, ycore, cpi,vmin=0.0,vmax=5.0) pcm.cmap = cm.get_cmap('jet_r') try: self.ax.set_title(self.k2[ts.pmc_type][0] +'/'+self.k2[ts.pmc_type][1] + '\n' + r'Mean(Std)='+'{0:.2f}'.format(mean_cpi)+r'({0:.2f})'.format(sqrt(var_cpi))) except: self.ax.set_title(self.k2[0] +'/'+self.k2[1] + '\n'+ r'$\bar{Mean}$='+'{0:.2f}'.format(mean_cpi)+r'$\pm$'+'{0:.2f}'.format(sqrt(var_cpi))) self.fig.colorbar(pcm) self.ax.set_xlabel('Time (hrs)') self.output('heatmap')
def sma(hd, time_period=90, offset=0): """ Returns the simple moving average for the stock over the specified period of time. Note: time_period is used instead of n since typically the time period here being used is greater than n. """ if len(hd.close) >= offset+time_period: return sts.tmean(hd.close[offset:offset+time_period]) else: return sts.tmean(hd.close[offset:])
def print_and_plot_results(count, results, verbose, plot_file_name): print("RPS calculated as 95% confidence interval") rps_mean_ar = [] low_ar = [] high_ar = [] test_name_ar = [] for test_name in sorted(results): data = results[test_name] rps = count / array(data) rps_mean = tmean(rps) rps_var = tvar(rps) low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5) times = array(data) * 1000000 / count times_mean = tmean(times) times_stdev = tstd(times) print('Results for', test_name) print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,' '\tstandard deviation {:.3f} μs'.format(int(rps_mean), int(low), int(high), times_mean, times_stdev)) test_name_ar.append(test_name) rps_mean_ar.append(rps_mean) low_ar.append(low) high_ar.append(high) if verbose: print(' from', times) print() if plot_file_name is not None: import matplotlib.pyplot as plt from matplotlib import cm fig = plt.figure() ax = fig.add_subplot(111) L = len(rps_mean_ar) color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)] bars = ax.bar(arange(L), rps_mean_ar, color=color, yerr=(low_ar, high_ar), ecolor='k') # order of legend is reversed for visual appeal ax.legend(reversed(bars), reversed(test_name_ar), loc='upper left') ax.get_xaxis().set_visible(False) plt.ylabel('Requets per Second', fontsize=16) print(plot_file_name) plt.savefig(plot_file_name, dpi=96) print("Plot is saved to {}".format(plot_file_name)) if verbose: plt.show()
def train_models(slope_history): if len(slope_history) < 3: return None #slope_histroy = list of tuples: (avg_outdoor, slope, std_err,temperature_profile[0,0]) sh = np.matrix(slope_history) lnmodel = LinearRegression() error_inverse = np.array(1 / sh[:, 2])[:, 0] lnfit = lnmodel.fit(sh[:, 0], sh[:, 1]) #svr_rbf = SVR(kernel='linear', C=10, epsilon=0.5) #svrfit = svr_rbf.fit(sh[:, 0], sh[:,1]) ln_residue = [] for i in range(len(slope_history)): p = lnfit.predict(slope_history[i][0])[0][0] ln_residue.append((p - slope_history[i][1])**2) ln_std = stats.tstd(ln_residue) ln_mean = stats.tmean(ln_residue) new_sh = None for i in range(len(ln_residue)): if ln_residue[i] < ln_mean + 3 * ln_std: #sh = np.delete(sh,i,axis=0) if new_sh is None: new_sh = sh[i, :] else: new_sh = np.vstack((new_sh, sh[i, :])) sh = new_sh #redo the fit error_inverse = np.array(1 / sh[:, 2])[:, 0] slope_mean = stats.tmean(sh[:, 1]) slope_std = stats.tstd(sh[:, 1]) lnfit = lnmodel.fit(sh[:, 0], sh[:, 1]) ln_residue = [] for i in range(len(sh)): p = lnfit.predict(sh[i, 0])[0][0] ln_residue.append((p - sh[i, 1])**2) ln_std = stats.tstd(ln_residue) ln_mean = stats.tmean(ln_residue) return { 'ln_model': lnfit, 'ln_residue': ln_residue, 'ln_residue_std': ln_std, 'ln_residue_mean': ln_mean, 'slope_mean': slope_mean, 'slope_std': slope_std, 'data_matrix': sh }
def main(): f27_scan = open('sim_scan27.txt', 'r') f27_table = open('sim_table27.txt', 'r') f35932_scan = open('sim_scan35932.txt', 'r') f35932_table = open('sim_table35932.txt', 'r') ntests = 10 scan27 = [0 for i in range(ntests)] table27 = [0 for i in range(ntests)] scan35932 = [0 for i in range(ntests)] table35932 = [0 for i in range(ntests)] files = [f27_scan, f27_table, f35932_scan, f35932_table] arrs = [scan27, table27, scan35932, table35932] for i in range(ntests): for j in range(4): line = files[j].readline() if line[len(line)-1] == '\n': line = line[:len(line)-1] arrs[j][i] = float(line) for j in range(4): files[j].close() _, p27 = stats.ttest_ind(scan27, table27, equal_var=False) mean27scan = stats.tmean(scan27) mean27table = stats.tmean(table27) var27scan = stats.tvar(scan27) var27table = stats.tvar(table27) _, p35932 = stats.ttest_ind(scan35932, table35932, equal_var=False) mean35932scan = stats.tmean(scan35932) mean35932table = stats.tmean(table35932) var35932scan = stats.tvar(scan35932) var35932table = stats.tvar(table35932) f = open('sim_results_compare_scan_table.txt', 'w') f.write('27\n') f.write('scan mean: ' + str(mean27scan) + '\n') f.write('scan var: ' + str(var27scan) + '\n') f.write('table mean: ' + str(mean27table) + '\n') f.write('table var: ' + str(var27table) + '\n') f.write('p-value: ' + str(p27) + '\n\n') f.write('35932\n') f.write('scan mean: ' + str(mean35932scan) + '\n') f.write('scan var: ' + str(var35932scan) + '\n') f.write('table mean: ' + str(mean35932table) + '\n') f.write('table var: ' + str(var35932table) + '\n') f.write('p-value: ' + str(p35932) + '\n') f.close()
def compute_metric(self): ts = self.ts tmid=(ts.t[:-1]+ts.t[1:])/2.0 # Average over each node's time series turning nan's to zero's ratio = {} for k in ts.j.hosts.keys(): ratio[k] = tmean(numpy.nan_to_num(numpy.diff(ts.data[0][k][0])/numpy.diff(ts.data[1][k][0]))) # Average of time-averaged nodes self.metric = tmean(ratio.values())
def process_CustomTstat(self, disease1, disease2, go): print(disease2) q1 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease1 + "\"" q1_not = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` !=\"" + disease1 + "\"" data1 = pandas.read_sql_query(q1, cnx) data1_not = pandas.read_sql_query(q1_not, cnx) a = data1['exp'].values b = data1_not['exp'].values print(stats.tmean(a)) print(stats.tmean(b)) print(stats.tvar(a)) print(stats.tvar(b)) if disease1 == disease2: tt = stats.ttest_ind(a, b, equal_var=True) return """<html> <form method="get" action="index"> <button type="submit">Return</button> </form> </form> <h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """ <h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """ <h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str( len(data1.index) ) + """)</h3></h1>""" + data1.to_html(index=False) + """ <h1>Exp values for patients without """ + disease1 + """<h3>(Rows-""" + str( len(data1_not.index)) + """)</h3></h1>""" + data1_not.to_html( index=False) + """ </html>""" else: q2 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease2 + "\"" data2 = pandas.read_sql_query(q2, cnx) b = data2['exp'].values print(stats.tmean(a)) print(stats.tmean(b)) print(stats.tvar(a)) print(stats.tvar(b)) tt = stats.ttest_ind(a, b, equal_var=True) return """<html> <form method="get" action="index"> <button type="submit">Return</button> </form> </form> <h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """ <h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """ <h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str( len(data1.index)) + """)</h3></h1>""" + data1.to_html( index=False) + """ <h1>Exp values for patients with """ + disease2 + """<h3>(Rows-""" + str( len(data2.index)) + """)</h3></h1>""" + data2.to_html( index=False) + """
def Quest4(): global query global cnx data=pandas.read_sql_query(query['4a'], cnx) data2=pandas.read_sql_query(query['4b'], cnx) a=data['exp'].values b=data2['exp'].values print(stats.tmean(a)) print(stats.tmean(b)) print(stats.tvar(a)) print(stats.tvar(b)) print(stats.ttest_ind(a,b,equal_var=True)) return
def close_measure(self, duration): """ Method call upon end of an incident. If the duration is apprpriate, the observation will be stored in db :param duration: :return: """ if duration > self.observation_minimum_duration_seconds: mean = stats.tmean(self.active_observations) / 10 variance = stats.variation(self.active_observations) / 10 self.parent_service.log.info( f'Closing reading for {str(self)}. ' f'During {duration} seconds, ' f'{len(self.active_observations)} observations were made. ' f'Average intensity: {mean}, variance: {variance}') stored_reading = self.parent_service.persistence.store_intensity_reading( self.provide_sensor(), self.active_observations_since, duration, mean, variance, datetime.now()) self.parent_service.log.debug( f'Stored new intensity reading: {str(stored_reading)}') else: self.parent_service.log.debug( f'Duration of an observation {str(self)} is too short. Resetting' ) del self.active_observations
def norm_fit_sparsely(self, show_it=0, save_it=0, save_dir=None, save_name=None, start=0, end=0): if not end: print('norm fit:asign end') _sparseness = 10**4 _start = int(start) _end = int(end) _data_num = _end - _start print( 223, 'int(data_num/sparseness + 1):{}, data_num:{}'.format( int(_data_num / _sparseness + 1), _data_num)) _cur_x = [ self.x[i] for i in range(_start, _end, int(_data_num / _sparseness + 1)) ] _guess = [stats.tmean(self.x), stats.tstd(self.x)] _x = _cur_x _x.sort() self.norm_params, self.norm_params_covariance = optimize.curve_fit( self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess) self.hist_norm_of_move_sparsely(show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name, start=start, end=end)
def predict(self, data, regressive=False): print "Predicting..." result = range(len(data)) for testRow in xrange(len(data)): print "For testRow...", testRow distances = [] #print distances distances = range(len(self.data)) for trainRow in xrange(len(self.data)): #if trainRow < 10: # print distances for col in xrange(len(self.data[testRow])): #print data[testRow][col] distances[trainRow] += float( (data[testRow][col] - self.data[trainRow][col])**2) topK = [] for i in xrange(self.k): topK.append(-1) for i in xrange(len(distances)): comparison = i for j in xrange(self.k): if (topK[j] == -1) or (distances[topK[j]] > distances[i]): comparison, topK[j] = topK[j], comparison #print("len of data, targets: " + str(len(self.data)) + ", " + str(len(self.targets))) #print("topK: " + str(topK)) #print("self.targets: " + str(self.targets)) #print("self.targets[topK, :]: " + str(self.targets[topK])) if not regressive: result[testRow] = int(mode(self.targets[topK])[0][0]) else: result[testRow] = int(tmean(self.targets[topK])[0][0]) return result
def plot_stats(statfile, statplotctime, statplotlcode): """ Grafica las estadísticas. :param statplotlcode: Archivo de gráficos línea de código :param statplotctime: Archivo de gráficos tiempo de compilación :param statfile: Archivo de estadísticas :return: """ import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator data = open(statfile) numcomp = [] timecomp = [] lcode = [] k = 0 for i in data: if k > 0: j = split_str(i.strip(), ' ') numcomp.append(int(j[0])) timecomp.append(float(j[2])) lcode.append(int(j[4])) k += 1 nlen = len(numcomp) lastid = numcomp[nlen - 1] if nlen >= 3 and SCIPY: # Tiempo de compilación tme = stats.tmean(timecomp) trc = stats.trim_mean(timecomp, 0.15) plt.figure(1) fig, ax = plt.subplots() ax.plot(numcomp, timecomp, 'c', label=u'Tiempo compilación (s)') ax.plot([numcomp[0], numcomp[nlen - 1]], [tme, tme], 'r--', label=f'Tiempo medio ({tme:.3g}s)') ax.plot([numcomp[0], numcomp[nlen - 1]], [tme, tme], 'b--', label=f'Media acotada ({trc:.3g}s)') ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_xlabel(u'Número de compilación') ax.set_ylabel(u'Tiempo de compilación [s]') ax.set_title(u'Estadísticas') plt.xlim(1, lastid) ax.legend() fig.savefig(statplotctime, dpi=600) # Líneas de código fig, ax = plt.subplots() ax.plot(numcomp, lcode) ax.set_xlabel(u'Número de compilación') ax.set_ylabel(u'Líneas de código') ax.set_title(u'Estadísticas') plt.ylim([min(lcode) * 0.97, max(lcode) * 1.03]) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.xlim(1, lastid) fig.savefig(statplotlcode, dpi=600) data.close()
def get_marginal_mean(self, **kwargs): """ Get the marginal mean of a subset of the data, supplied as factor:level arguments """ try: return stats.tmean([d[self.outcome_key] for d in self.data if all([d[k]==kwargs[k] for k in kwargs])]) except: raise TypeError('You must specify at least one factor with a list containing at least one level.')
def __init__(self, filename, sep=",", skip=0, index=True, header=True): if type(filename) == str: allm = gendata(filename, sep=sep, skip=skip, index=index, header=header) if index==True: self.date = allm[0] self.data = allm[1] self.header = allm[2] else: self.data = allm[0] self.header = allm[1] self.date = "none" self.data = self.data.transpose() else: self.data = filename self.date = index self.header = header self.N,self.T = self.data.shape self.mean = [sps.tmean(i) for i in self.data] if self.N > 1: self.variance = [sps.tvar(i) for i in self.data] else: self.variance = [sps.tvar(self.data)] self.mean = np.array(self.mean) self.variance = np.array(self.variance) self.covariance = np.zeros((self.N, self.N)) self.correlation = np.zeros((self.N, self.N)) self.skewness = 0 self.kurtosis = 0 self.dmean = (self.data.transpose()-self.mean).transpose() self.did_covar = False self.JB = 0 self.JBpvalue = 0
def perform_test(self): """ Perform requested tests """ # store some stuff for reuse combined_sample = [d[self.outcome_key] for d in self.data] grand_mean = stats.tmean(combined_sample) sum_of_grand_mean_squared = self.n*grand_mean**2 sum_of_observations_squared = sum(d[self.outcome_key]**2 for d in self.data) highest_order_interaction_bracket_term = self.__get_mean_bracket_term(self.factor_keys) ss_error = sum_of_observations_squared - highest_order_interaction_bracket_term df_error = float((self.n_per_condition-1)*self.num_conditions) ms_error = ss_error / df_error # store each main effect and interaction as a dictionary in a list self.results = list() bracket_terms = dict() for i in range(len(self.factor_keys)): combinations = itertools.combinations(self.factor_keys, i+1) for combination in combinations: bracket_terms[combination] = self.__get_mean_bracket_term(combination) ss_explained = ( bracket_terms[combination] + sum([bracket_terms[b]*(1 if len(b)%2==len(combination)%2 else -1) for b in bracket_terms.keys() if len(b) < len(combination) and all([t in combination for t in b])]) + sum_of_grand_mean_squared*(1 if len(combination)%2==0 else -1) ) self.results.append(self.__create_new_source_for_results('x'.join(combination))) r = ANOVAResults self.results[-1].ss = ss_explained self.results[-1].df = reduce(lambda x, y: x*y, [len(self.factor_levels[f])-1.0 for f in combination]) self.results[-1].ms = self.results[-1].ss / self.results[-1].df self.results[-1].test_statistic = self.results[-1].ms / ms_error self.results[-1].p_two = stats.f.sf(self.results[-1].test_statistic, self.results[-1].df, df_error) self.results[-1].effect_size = 0.0 # TODO self.results[-1].partial_effect_size = ANOVA.estimate_partial_effect_size(self.results[-1].test_statistic, self.results[-1].df, self.n) # fill in complete effects f_df_pairs = [(effect.test_statistic, effect.df) for effect in self.results] for i in range(len(self.results)): self.results[i].effect_size = ANOVA.estimate_complete_effect_size( self.results[i].test_statistic, self.results[i].df, self.n, *f_df_pairs ) # add entry for error self.results.append(self.__create_new_source_for_results('Error')) self.results[-1].ss = ss_error self.results[-1].df = df_error self.results[-1].ms = ms_error # add entry for total self.results.append(self.__create_new_source_for_results('Total')) self.results[-1].ss = self.ss_total(grand_mean, combined_sample) self.results[-1].df = float(self.n-1) # TODO: simple effects # TODO: contrasts # print results if not in silent mode if not self.is_silent: print self.printable_test_results()
def start_end (start_date,end_date): #create a link from Python to the Dataase session=Session(engine) #Query the dates and temperature observations of the most active station for the last year of data. results = session.query(Measurement.tobs).\ filter(Measurement.date >= start_date).\ filter(Measurement.date <= end_date).\ filter(Measurement.tobs != 'None' and Measurement.tobs !='bb').all() session.close() #append the temp obervations to a list results=np.ravel(results) tobs_list=[] i=[] for tobs in results: tobs_dict={} tobs_list.append(tobs) #assign key:value pairs for minimum, average, and max tobs_dict['min']=stats.tmin(tobs_list) tobs_dict['avg']=stats.tmean(tobs_list) tobs_dict['max']=stats.tmax(tobs_list) i.append(tobs_dict) #Return a JSON list of temperature observations (TOBS) for the previous year. return jsonify(i)
def throughput_from_file(scalar_file, use_link_layer=True): """ Extracts the stream statistics from the file and returns the average and minimum stream throughput. - The use_link_layer option determines whether to use statistics recorded at the server's link layer or the application layer. """ scalar_file = pf.check_file(scalar_file) mlog.debug("scalar_file = %s", scalar_file) if use_link_layer: module = "port_queue.stream" else: module = "simpleUDPApplication.stream" #module = ".*stream.*" #module = "stream" tx_scalar = "streamTxBytes:sum" rx_scalar = "streamRxBytes:sum" tx_scalar_values = pf.scalar_from_file(scalar_file, tx_scalar, module) rx_scalar_values = pf.scalar_from_file(scalar_file, rx_scalar, module) try: streams = match_streams(tx_scalar_values, rx_scalar_values) except: mlog.error("Failed to match streams in file: %s", scalar_file) raise throughputs = [v[2] for v in streams.itervalues()] if not len(throughputs) > 0: mlog.error("Couldn't get throughput from file %s!", scalar_file) raise StandardError("Couldn't process file") #mlog.debug("throughputs = %s", throughputs) avg_thr = stats.tmean(throughputs) min_thr = min(throughputs) return avg_thr, min_thr
def handle(self): # # Sample config values # # global_slow_threshold=120 # max_batch_size=99999 # config = Bunch() for item in self.request.payload.splitlines(): key, value = item.split('=') config[key] = int(value) for key in self.server.kvdb.conn.keys(KVDB.SERVICE_TIME_RAW + '*'): service_name = key.replace(KVDB.SERVICE_TIME_RAW, '') current_mean = float( self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time') or 0) current_min = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time') or 0) current_max = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time') or 0) batch_min, batch_max, batch_mean, batch_total = self.aggregate_raw_times( key, service_name, config.max_batch_size) self.server.kvdb.conn.hset( KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time', sp_stats.tmean((batch_mean, current_mean))) self.server.kvdb.conn.hset( KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time', min(current_min, batch_min)) self.server.kvdb.conn.hset( KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time', max(current_max, batch_max)) # Services use RPUSH for storing raw times so we are safe to use LTRIM # in order to do away with the already processed ones self.server.kvdb.conn.ltrim(key, batch_total, -1)
def ownCorrelationMeasure(self, X, Y): # Group X-values into categories with their respective set of Y-values groups = {} for i in range(len(X)): key = X[i] value = Y[i] if key in groups: groups[key] += [value] else: groups[key] = [value] # Calculate normal distribution for every X-value normal_distributions = {} #normal_distributions_old = {} for x in groups.keys(): #normal_distributions_old[x] = stats.norm.fit(groups[x]) if len(groups[x]) > 1: normal_distributions[x] = (stats.tmean(groups[x]), stats.tvar(groups[x])) else: normal_distributions[x] = (groups[x][0], 0) # Calculate correlation measure max_dist = max(normal_distributions.values()) min_dist = min(normal_distributions.values()) correlation = max_dist[0]/min_dist[0] # Ratio between mean for max and min return [correlation, normal_distributions]
def player_info(accountId): try: summoner = watcher.summoner.by_account(my_region, accountId) summonerId = summoner['id'] league = watcher.league.by_summoner(my_region, summonerId)[0] except: return None, None, None, None, None level = summoner['summonerLevel'] total_win = league['wins'] total_loss = league['losses'] hot_streak = int(league['hotStreak']) data = [1] * total_win + [0] * total_loss win_skew = skew(data) win_std = tstd(data) win_mean = tmean(data) ''' match_lst = watcher.match.matchlist_by_account( my_region, accountId, end_index=30, queue='420') for match in match_lst: print(match) ''' return win_mean, win_std, win_skew, level, hot_streak
def ss_between(cls, grand_mean=None, *args, **kwargs): """ Get the sum of squared deviations of each group's mean compared to the grand mean of all groups """ if grand_mean is None: grand_mean = cls.get_grand_mean(*args) return sum([len(a)*(grand_mean-stats.tmean(a))**2 for a in args])
def _calc_rt_base(self, keys): values = [] for key in keys: in_time = self.in_tasks[key] out_time = self.out_tasks[key] values.append(out_time - in_time) return stat.tmean(values)
def compute_metric(self): gfloprate = 0 if self.ts.pmc_type == 'amd64' : gfloprate += self.arc(self.ts.data[0]) if self.ts.pmc_type == 'intel_hsw': # print "Haswell chips do not have FLOP counters" return if self.ts.pmc_type == 'intel_snb': schema = self.ts.j.get_schema('intel_snb') if 'ERROR' in schema: return data = self.ts.j.aggregate_stats('intel_snb') try: flops = numpy.diff(data[0][:,schema['SSE_DOUBLE_SCALAR'].index] + 2*data[0][:,schema['SSE_DOUBLE_PACKED'].index] + 4*data[0][:,schema['SIMD_DOUBLE_256'].index])/numpy.diff(self.ts.t) except: flops = numpy.diff(data[0][:,schema['SSE_D_ALL'].index] + 4*data[0][:,schema['SIMD_D_256'].index])/numpy.diff(self.ts.t) flops = flops/data[1] self.metric = tmean(flops)/1.0e9 return
def num_features_DDbS(samplefile_, n_): _configs = list() init = True if os.path.exists(samplefile_): with open(samplefile_, 'r') as sf: for line in sf: if not init: raw = line.split(";") if len(raw) != 0: raw = raw[1:] config = list() for i in range(0, len(raw)): if raw[i] == '1': config.append(i + 1) _configs.append(config) else: init = False else: return -1 _fnums = list() for sample in _configs: fnum = 0 for v in sample: if v > 0: fnum += 1 _fnums.append(fnum) return stats.tmean(_fnums), stats.tstd(_fnums)
def test_calculate_mean(self): sample = [] for i in range(0, 100): sample.append(random()) mean = self.stat.calculate_mean(sample) control = tmean(sample) self.assertAlmostEqual(mean, control)
def num_features_QS(samplefile_, n_): i = 0 _configs = list() if os.path.exists(samplefile_): with open(samplefile_, 'r') as sf: for line in sf: raw = line.split(" ") if len(raw) != 0: config = raw[:len(raw) - 1] _configs.append(config) i += 1 else: return -1 _samples = list() rands = get_random(n_, len(_configs)) for r in rands: _samples.append(_configs[r - 1]) _fnums = list() for sample in _samples: fnum = 0 for v in sample: if not v.startswith('-'): fnum += 1 _fnums.append(fnum) return stats.tmean(_fnums), stats.tstd(_fnums)
def compute_metric(self): gvecrate = 0 if self.ts.pmc_type == 'amd64' : gvecrate += self.arc(self.ts.data[0]) if self.ts.pmc_type == 'intel_snb': schema = self.ts.j.get_schema('intel_snb') if 'ERROR' in schema: return data = self.ts.j.aggregate_stats('intel_snb') nodes = data[1] data = data[0].astype(float) try: vectorized = 2*data[:,schema['SSE_DOUBLE_PACKED'].index]+4*data[:,schema['SIMD_DOUBLE_256'].index] every = vectorized + data[:,schema['SSE_DOUBLE_SCALAR'].index] except: vectorized = 4*data[:,schema['SIMD_D_256'].index] every = vectorized + data[:,schema['SSE_D_ALL'].index] vecs = numpy.diff(vectorized)/numpy.diff(every) self.metric = tmean(vecs) return
def run(self): self._arithmetic_mean = mean(self._wave) self._harmonic_mean = hmean(self._wave) self._geometric_mean = gmean(self._wave) self._trimmed_mean = tmean(self._wave) self._means_entity = MeansEntity(self._arithmetic_mean, self._harmonic_mean, self._geometric_mean, self._trimmed_mean)
def main(): train_df = pd.read_csv('data/match_feature.csv') # Split X, y and scale X, y, min_max_scaler = get_X_y(train_df) print("Total dataset size : ", len(X)) #check_train_size_curve(X, y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42) model = Lgbm_Model() #param = model.tune(X_train, y_train) model.train(X_train, y_train, X_test, y_test) #model.save("Lgbm") model.evaluate(X_train, y_train, cross_val=True) model.evaluate(X_test, y_test) res = model.predict(X_train) print(tstd(res)) print(tmean(res)) plt.hist(res, bins=100) plt.show() model.plot_importance()
def aggregate_raw_times(self, key, service_name, max_batch_size=None): """ Aggregates values from a list living under a given key. Returns its min, max, mean and an overall usage count. 'max_batch_size' controls how many items will be fetched from the list so it's possible to fetch less items than its LLEN returns. """ key_len = self.server.kvdb.conn.llen(key) if max_batch_size: batch_size = min(key_len, max_batch_size) if batch_size < key_len: msg = ('batch_size:[{}] < key_len:[{}], max_batch_size:[{}], key:[{}], ' 'consider decreasing the job interval or increasing the max_batch_size').format( batch_size, key_len, max_batch_size, key) self.logger.warn(msg) else: batch_size = key_len times = [int(elem) for elem in self.server.kvdb.conn.lrange(key, 0, batch_size)] if times: mean_percentile = int(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'mean_percentile') or 0) max_score = int(sp_stats.scoreatpercentile(times, mean_percentile)) return min(times), max(times), (sp_stats.tmean(times, (None, max_score)) or 0), len(times) else: return 0, 0, 0, 0
def print_stats(datums): print 'Mean:', stats.tmean(datums) print 'Median:', stats.cmedian(datums) print 'Std Dev:', stats.tstd(datums) print 'Variation:', stats.variation(datums) print 'Kurtosis:', stats.kurtosis(datums, fisher=False) print 'Skewness:', stats.skew(datums)
def aggregate_raw_times(self, key, service_name, max_batch_size=None): """ Aggregates values from a list living under a given key. Returns its min, max, mean and an overall usage count. 'max_batch_size' controls how many items will be fetched from the list so it's possible to fetch less items than its LLEN returns. """ key_len = self.server.kvdb.conn.llen(key) if max_batch_size: batch_size = min(key_len, max_batch_size) if batch_size < key_len: msg = 'batch_size:`%s` < key_len:`%s`, max_batch_size:`%s`, key:`%s`, ' \ 'consider decreasing the job interval or increasing max_batch_size' self.logger.warn(msg, batch_size, key_len, max_batch_size, key) else: batch_size = key_len times = [ int(elem) for elem in self.server.kvdb.conn.lrange(key, 0, batch_size) ] if times: mean_percentile = int( self.server.kvdb.conn.hget( KVDB.SERVICE_TIME_BASIC + service_name, 'mean_percentile') or 0) max_score = int(sp_stats.scoreatpercentile(times, mean_percentile)) return min(times), max(times), (sp_stats.tmean( times, (None, max_score)) or 0), len(times) else: return 0, 0, 0, 0
def compute_metric(self): gvecrate = 0 if self.ts.pmc_type == 'amd64': gvecrate += self.arc(self.ts.data[0]) if self.ts.pmc_type == 'intel_hsw' or self.ts.pmc_type == 'intel_knl': #print "Haswell does not support FLOP counters" return if self.ts.pmc_type == 'intel_snb': schema = self.ts.j.get_schema('intel_snb') if 'ERROR' in schema: return data = self.ts.j.aggregate_stats('intel_snb') nodes = data[1] data = data[0].astype(float) try: vectorized = 2 * data[:, schema['SSE_DOUBLE_PACKED']. index] + 4 * data[:, schema[ 'SIMD_DOUBLE_256'].index] every = vectorized + data[:, schema['SSE_DOUBLE_SCALAR'].index] except: vectorized = 4 * data[:, schema['SIMD_D_256'].index] every = vectorized + data[:, schema['SSE_D_ALL'].index] vecs = numpy.diff(vectorized) / numpy.diff(every) self.metric = tmean(vecs) return
def compute_metric(self): gfloprate = 0 if self.ts.pmc_type == 'amd64': gfloprate += self.arc(self.ts.data[0]) if self.ts.pmc_type == 'intel_hsw' or self.ts.pmc_type == 'intel_knl': # print "Haswell chips do not have FLOP counters" return if self.ts.pmc_type == 'intel_snb': schema = self.ts.j.get_schema('intel_snb') if 'ERROR' in schema: return data = self.ts.j.aggregate_stats('intel_snb') try: flops = numpy.diff( data[0][:, schema['SSE_DOUBLE_SCALAR'].index] + 2 * data[0][:, schema['SSE_DOUBLE_PACKED'].index] + 4 * data[0][:, schema['SIMD_DOUBLE_256'].index]) / numpy.diff( self.ts.t) except: flops = numpy.diff(data[0][:, schema['SSE_D_ALL'].index] + 4 * data[0][:, schema['SIMD_D_256'].index] ) / numpy.diff(self.ts.t) flops = flops / data[1] self.metric = tmean(flops) / 1.0e9 return
def passos(xa, ya, xmax, ymax, series=1000): '''Retorna a média de tantas séries de quantos passos o bêbado leva pra chegar em tal ponto.''' lista_passos = [] for i in range(series): x, y = 0, 0 passos = 1 while x != xa and y != ya: a = direcao() if a == 'N': passos += 1 y += 1 if y >= ymax: y = ymax elif a == 'S': passos += 1 y -= 1 if abs(y) >= ymax: y = -ymax elif a == 'O': passos += 1 x -= 1 if abs(x) >= xmax: x = -xmax else: passos += 1 x += 1 if x >= xmax: x = xmax lista_passos.append(passos) print('Média=' + str(round(stats.tmean(lista_passos), 5)) + '\nDesvio-padrão=' + str(round(stats.tstd(lista_passos), 5)))
def collect_service_stats( self, keys_pattern, key_prefix, key_suffix, total_seconds, suffix_needs_colon=True, chop_off_service_name=True, needs_rate=True, ): service_stats = {} if suffix_needs_colon: key_suffix = ":" + key_suffix for key in self.kvdb.conn.keys(keys_pattern): service_name = key.replace(key_prefix, "").replace(key_suffix, "") if chop_off_service_name: service_name = service_name[:-3] values = self.kvdb.conn.hgetall(key) stats = service_stats.setdefault(service_name, {}) for name in STATS_KEYS: value = values.get(name) if value: if name in ("rate", "mean"): value = float(value) else: value = int(value) if not name in stats: if name == "mean": stats[name] = [] elif name == "min": stats[name] = maxint else: stats[name] = 0 if name == "usage": stats[name] += value elif name == "max": stats[name] = max(stats[name], value) elif name == "mean": stats[name].append(value) elif name == "min": stats[name] = min(stats[name], value) for service_name, values in service_stats.items(): mean = values.get("mean") if mean: values["mean"] = sp_stats.tmean(mean) if needs_rate: values["rate"] = values["usage"] / total_seconds return service_stats
def get_stats_Emmanuelle(stats_type, groups, *data): ''' author: version adjusted by Emmanuelle Mazur-Lainé 202206 Args: type of stats, data Return: float ''' data = data[0] nbr_gr = len(groups) if stats_type == 'mean': res_stats = () for group in data: res = stats.tmean(group) res_stats += (res, ) return (res_stats), 'mean' if stats_type == 'std': res_stats = () for group in data: res = stats.tstd(group) res_stats += (res, ) return res_stats, 'std' elif stats_type == 'kurtosis': res_stats = () for group in data: res = stats.kurtosis(group) res_stats += (res, ) return res_stats, 'kurtosis' elif stats_type == 'skewness': res_stats = () for group in data: res = stats.skew(group) res_stats += (res, ) return res_stats, 'skewness' elif stats_type == 'TTest': return stats.ttest_ind(data[0], data[1], equal_var=True), ('t', 'p') elif stats_type == 'Welch': return stats.ttest_ind(data[0], data[1], equal_var=False), ('t', 'p') elif stats_type == 'MannWhitneyu': try: return stats.mannwhitneyu(data[0], data[1]), ('u', 'p') except ValueError: return (0, 0), ('h', 'p') ########### RESTE À TROUVER COMMENT METTRE TOUS LES GROUPES # EN PRAMÈTRES DES TESTS BARTLETT, KRUSKAL ET ANOVA#### elif stats_type == 'Bartlett': return stats.bartlett(*data), ('t', 'p' ) # Bartlett, tests the null hypothesis elif stats_type == 'Kruskal': try: return stats.kruskal(*data), ('h', 'p') except ValueError: return (0, 0), ('h', 'p') elif stats_type == 'ANOVA': return stats.f_oneway(*data), ('t', 'p' ) #One way ANOVA, checks the variance
def main(self) -> float: """ One iteration of main loop of the service. Suppose to return sleep time im seconds """ start_mark = datetime.now() measurements = array('i') attempt = 0 while (datetime.now() - start_mark).total_seconds() < self.measure_duration and not ExitEvent().is_set(): try: attempt += 1 measurements.append(self._measure()) except DistanceMeasureException as exception: self.log.critical(f'Unsuccessful {attempt} attempt to measure', exception) if self.measure_attempts_pause_time > 0: ExitEvent().wait(self.measure_attempts_pause_time) if len(measurements) > 0: # assumed the reading was successful in technical terms # unfortunately the reading sometimes (quite often) can be invalid - unreliable current_level = int(stats.mode(measurements, nan_policy='omit').mode[0]) current_level_mean = stats.tmean(measurements) last_reliable_reading = self._get_last_reliable_cesspit_reading() last_stored_reading = self._get_last_stored_cesspit_reading() if self._is_reliable(current_level, last_reliable_reading) \ or self._is_reliable(current_level, last_stored_reading): self.log.info(f'OK {len(measurements)} measurements, ' f'mode: {current_level} [mm] ({self._get_fill_percentage(current_level):.2f} [%]), ' f'mean: {current_level_mean:.2f}') self._set_last_reliable_cesspit_reading(current_level) if self._do_store_reading(current_level, last_stored_reading): self._add_cesspit_reading(current_level) self._react_on_level(self._get_fill_percentage()) else: speed = (last_reliable_reading.level - current_level) / \ ((datetime.now() - last_reliable_reading.timestamp).total_seconds()/3600) self.log.info(f'UNRELIABLE! {len(measurements)} measurements, ' f'mode: {current_level} [mm] ({self._get_fill_percentage(current_level):.2f} [%]), ' f'increase {last_reliable_reading.level - current_level} [mm],' f'mean: {current_level_mean:.2f}, ' f'variance: {stats.variation(measurements):.2f}, ' f'increase speed: {speed:.4f} [mmph]') # signalize failure self._react_on_failure() else: self.log.critical(f"All attempts to measure the level failed") self._react_on_failure() return self._get_polling_period() - (datetime.now()-start_mark).total_seconds()
def main(argv): args = ARGS.parse_args() count = args.count concurrency = args.concurrency verbose = args.verbose tries = args.tries loop = asyncio.get_event_loop() suite = [run_aiohttp, run_tornado, run_twisted] suite *= tries random.shuffle(suite) all_times = collections.defaultdict(list) all_rps = collections.defaultdict(list) for test in suite: test_name = test.__name__ rps, times = loop.run_until_complete(run(test, count, concurrency, loop=loop, verbose=verbose, profile=args.profile)) all_times[test_name].extend(times) all_rps[test_name].append(rps) if args.profile: profiler.dump_stats('out.prof') print() for test_name in sorted(all_rps): rps = array(all_rps[test_name]) times = array(all_times[test_name]) * 1000 rps_mean = tmean(rps) times_mean = tmean(times) times_stdev = tstd(times) times_median = float(median(times)) print('Results for', test_name) print('RPS: {:d},\tmean: {:.3f} ms,' '\tstandard deviation {:.3f} ms\tmedian {:.3f} ms' .format(int(rps_mean), times_mean, times_stdev, times_median)) return 0
def norm_fit(self, show_it=0, save_it=0, save_dir=None, save_name=None): _guess = [stats.tmean(self.x), stats.tstd(self.x)] _x = self.x _x.sort() self.norm_params, self.norm_params_covariance = optimize.curve_fit( self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess) self.hist_norm_of_move( show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name)
def trim_mean(arr, proportion): """ """ #TODO: windowing (window len) and avoid error try: # except: np.sort(p)[window_len/2] percent = proportion*100. lower_lim = scoreatpercentile(arr, percent/2) upper_lim = scoreatpercentile(arr, 100-percent/2) tm = tmean(arr, limits=(lower_lim, upper_lim), inclusive=(False, False)) return tm
def get_envelope(data): env = abs(signal.hilbert(data)) env = env.astype('float') # normalize by max # env = env/max(env) # normalize by mean in top 10% p = scoreatpercentile(env,90) pm = tmean(env,limits=(p,max(env))) env = env/pm return env
def compute_metric(self): peak = 76.*1.e9 gdramrate = numpy.zeros(len(self.ts.t)-1) for h in self.ts.j.hosts.keys(): gdramrate += numpy.divide(numpy.diff(64.*self.ts.assemble([0,1],h,0)), numpy.diff(self.ts.t)) mdr=tmean(gdramrate)/self.ts.numhosts self.metric = mdr/peak return
def EstimateParameters(self, data): # we use our know equations to calculate MLEs for the distributions we use, but # we store the values in the same way scipy's MLE calculation function (fit) returns them, # so they can be easily passed to the distribution later. if self.dist.name == "uniform": minD = min(data) # MLE lower bound = min(data) maxD = max(data) # MLE upper bound = max(data) # uniform value range is [loc, loc+scale] self.mle = [floor(minD), ceil(maxD-minD)] elif self.dist.name == "norm": dataMean = tmean(data) # MLE mean = mean(data) dataStd = tstd(data) # MLE variance = variance(data) # loc = mean # scale = standard deviation self.mle = [dataMean, dataStd] elif self.dist.name == "expon": # MLE lambda = 1 / mean(data) # scale = 1 / lambda self.mle = [0, tmean(data)] else: self.mle = seld.dist.fit(data)
def getMean(flux,limits=(None,None),wave=None,wmin=None,wmax=None,minsize=20): ''' Get the mean of a flux array in a given wavelength range. If no wavelengths are given, the mean of the whole array is given. If the array used for mean calculation is too small, None is returned. A 1-sigma clipping of the flux array can be done by providing limits. @param flux: The wavelength array @type flux: array @keyword limits: Flux limits if flux clipping (1 sigma!) is needed before Meancalculation. None for both limits implies no clipping. None for one of the limits implies a half-open interval. (default: (None,None)) @type limits: (float,float) @keyword wave: The wavelength array. If default, the Mean is calculated of the whole flux array @type wave: array @keyword wmin: The minimum wavelength. If not given, the minimum wavelength is the first entry in the wave array (default: None) @type wmin: float @keyword wmin: The maximum wavelength. If not given, the maximum wavelength is the last entry in the wave array (default: None) @type wmax: float @keyword minsize: The minimum size of the selected array before proceeding with the noise calculation. 0 if no min size is needed. (default: 20) @type minsize: int @return: The flux mean between given wavelengths @rtype: float ''' fsel = selectArray(flux,wave,wmin,wmax) if fsel.size <= minsize: return None if limits == (None,None): return mean(fsel) else: return tmean(fsel,limits=limits)
def getLineScoreStats(df,lineScoreCol,histScoreCol,binNumber=50): '''Return a Dataframe of line score stats for each bin. Relevant one is probably the mean.''' D = {} binnedScores = binLineScore(df,lineScoreCol,histScoreCol,binNumber) for bin in binnedScores: L = binnedScores[bin] if len(L) <=1: mean,var,dev = L[0],0,0 continue mean = stats.tmean(L) var = stats.tvar(L) stanD = stats.tstd(L) D[bin] = {"mean":mean,"var":var,"stanDev.": stanD} return pd.DataFrame(D).T
def fit(self, X, y): self.business_winner_bias = {} business_review_votes = defaultdict(list) for review in self.data.training_reviews.values(): business_review_votes[review['business_id']].append(review['votes']['useful']) for business_id, review_votes in business_review_votes.iteritems(): median = cmedian(review_votes) mean = tmean(review_votes) if len(review_votes) > 0 and mean != 0: bias = median / mean else: bias = 1 self.business_winner_bias[business_id] = bias return self