def statsex(self, objects): """ Do some statistics on a source list Return dictionary """ import stats, pstat # Return if we have no objects if len(objects) == 0: return 0 # Define dictionary to hold statistics stat = {} # Get number of objects stat['N'] = str(len(objects)) # Define list (float) of FWHM values fwhm = [ float(obj[7]) for obj in objects ] # Define list (float) of ELLIPTICITY values el = [ float(obj[6]) for obj in objects ] # Define list (float) of THETA_IMAGE values pa = [ float(obj[5]) for obj in objects ] # Define list (float) of 'Stella-like' values stella = [ float(obj[9]) for obj in objects ] # Create a histogram of FWHM values of binsize 1 pixel hfwhm = stats.histogram(fwhm,40,[0,40])[0] stat['medianFWHM'] = "%.2f" % stats.median(fwhm) stat['meanFWHM'] = "%.2f" % stats.mean(fwhm) stat['modeFWHM'] = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5) try: stat['stdevFWHM'] = "%.2f" % stats.stdev(fwhm) except ZeroDivisionError: stat['stdevFWHM'] = '0.00'; stat['medianEL'] = "%.2f" % stats.median(el) stat['meanEL'] = "%.2f" % stats.mean(el) try: stat['stdevEL'] = "%.2f" % stats.stdev(el) except ZeroDivisionError: stat['stdevEL'] = '0.00' # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg) #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0] # Histogram of Stellarity (0 to 1, bins of 0.05) #stat['histoStella'] = stats.histogram(stella,20,[0,1.01])[0] return stat
def corr(xdata, ydata): """corr(xydata) -> float corr(xdata, ydata) -> float Return the sample Pearson's Correlation Coefficient of (x,y) data. If ydata is None or not given, then xdata must be an iterable of (x, y) pairs. Otherwise, both xdata and ydata must be iterables of values, which will be truncated to the shorter of the two. >>> corr([(0.1, 2.3), (0.5, 2.7), (1.2, 3.1), (1.7, 2.9)]) ... #doctest: +ELLIPSIS 0.827429009335... The Pearson correlation is +1 in the case of a perfect positive correlation (i.e. an increasing linear relationship), -1 in the case of a perfect anti-correlation (i.e. a decreasing linear relationship), and some value between -1 and 1 in all other cases, indicating the degree of linear dependence between the variables. >>> xdata = [1, 2, 3, 4, 5, 6] >>> ydata = [2*x for x in xdata] # Perfect correlation. >>> corr(xdata, ydata) 1.0 >>> corr(xdata, [5-y for y in ydata]) # Perfect anti-correlation. -1.0 If there are not at least two data points, or if either all the x values or all the y values are equal, StatsError is raised. """ n = len(xdata) assert n == len(ydata) if n < 2: raise StatsError( 'correlation requires at least two data points, got %d' % n) # First pass is to determine the means. mx = stats.mean(xdata) my = stats.mean(ydata) # Second pass to determine the standard deviations. sx = stats.stdev(xdata, mx) sy = stats.stdev(ydata, my) if sx == 0: raise StatsError('all x values are equal') if sy == 0: raise StatsError('all y values are equal') # Third pass to calculate the correlation coefficient. ap = add_partial total = [] for x, y in zip(xdata, ydata): term = ((x-mx)/sx) * ((y-my)/sy) ap(term, total) r = math.fsum(total)/(n-1) assert -1 <= r <= r return r
def corr(x, y): N = len(x) if len(y) != N: raise Exception( "Sequences must be of the same length. X length: {0} ; Y length {1}" .format(N, len(y))) else: sum = 0 for index, xi in enumerate(x): sum += xi * y[index] r = (sum - N * stats.mean(x) * stats.mean(y)) / ( (N - 1) * stats.stdev(x) * stats.stdev(y)) return r
def testVariance(self): data = [1, 2, 3] assert stats.mean(data) == 2 self.assertEqual(stats.pvariance(data), 2/3) self.assertEqual(stats.variance(data), 1.0) self.assertEqual(stats.pstdev(data), math.sqrt(2/3)) self.assertEqual(stats.stdev(data), 1.0)
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean): """Evaluate an expression using the provided values and a set of metrics. :returns: the score and the metrics used to calculate it """ # Calculate various metrics binding_distances = stats.seq_diff(primer_locs) namespace = { 'set_size': len(primer_set), 'fg_dist_mean': stats.mean(binding_distances), 'fg_dist_std': stats.stdev(binding_distances), 'fg_dist_gini': stats.gini(binding_distances), 'bg_dist_mean': bg_dist_mean, 'fg_max_dist': max_dist, '__builtins__': None} permitted_var_str = ", ".join( [key for key in namespace.keys() if key is not "__builtins__"]) score = None try: score = eval(expression, namespace, {'__builtins__': {}}) except NameError as e: raise NameError( e.message + '. Permitted variables are %s. Refer to README or docs for help.' % permitted_var_str) del namespace['__builtins__'] return score, namespace
def print_latex_stats(diffs, label): print '%s & %.3f & %.3f & %.3f & %.3f \\\\' % ( label, min(diffs) / 1000.0, max(diffs) / 1000.0, stats.mean(diffs) / 1000.0, stats.stdev(diffs) / 1000.0)
def liver(request): expressLevelsNishi = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment='Nishimura')) expressLevelsPMT = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment__startswith='PMT Sample')) expressBiotroveTransporters = naturallysortedtransporterlist(Transporter.objects.filter(expression__organ='Liver',expression__experiment__startswith='PMT Biotrove').distinct()) important = Transporter.objects.filter(organ__name='Liver') importantNames = [] for x in important: importantNames.append(str(x.symbol)) synquery = Transporter.objects.all() syns = {} for x in synquery: syns[x.symbol] = x.synonyms #Calculate mean expression across all PMT samples pmtTableValues = [] for x in range(len(expressLevelsPMT)/3): build = [] for y in range(3): build.append(expressLevelsPMT[x*3+y].value) avg = stats.mean(build) stdev = stats.stdev(build) id = expressLevelsPMT[x*3].trans pmtTableValues.append([id] + build + [avg, stdev]) #Calculate median and quartiles across biotrove samples biotroveTableValues = [] for x in expressBiotroveTransporters: values = Expression.objects.filter(organ='Liver', experiment__startswith='PMT Biotrove', trans=x).values_list('value',flat='True').order_by('value') build = [] build.append(x.symbol) build.append(quartiles(values,1)) build.append(quartiles(values,2)) build.append(quartiles(values,3)) biotroveTableValues.append(build) return render_to_response('liver.html', {'expressionNishi': expressLevelsNishi, 'expressionPMT': pmtTableValues, 'organ': 'Liver', 'syns': syns, 'important': importantNames, 'expressionBiotrove': biotroveTableValues})
def print_latex_stats(diffs, label): print "%s & %.3f & %.3f & %.3f & %.3f \\\\" % ( label, min(diffs) / 1000.0, max(diffs) / 1000.0, stats.mean(diffs) / 1000.0, stats.stdev(diffs) / 1000.0, )
def describe(self): df = OrderedDict([("names", ["mean", "stdev", "count", "min", "max"])]) if stats.is_numeric(self.x)==False: return df['value'] = [stats.mean(self.x), stats.stdev(self.x), len([i for i in self if i is not None]), min(self.x), max(self.x)] return DataFrame(df)
def estimate(self, l): mu = stats.mean(l) self.sigma = stats.stdev(l) / 2 self.mu0 = mu - self.sigma self.mu1 = mu + self.sigma self.p0 = self.p1 = 0.5 return self.mu0, self.mu1, self.sigma, self.p0, self.p1
def estimate(self,l): mu=stats.mean(l) self.sigma=stats.stdev(l)/2 self.mu0=mu-self.sigma self.mu1=mu+self.sigma self.p0=self.p1=0.5 return self.mu0, self.mu1, self.sigma, self.p0, self.p1
def scale(data_matrix): """returns the means and std dev of each col""" num_rows, num_cols = matrix.shape(data_matrix) means = [stats.mean(matrix.get_col(j,data_matrix)) for j in range(num_cols)] stdevs = [stats.stdev(matrix.get_col(j, data_matrix)) for j in range(num_cols)] return means, stdevs
def print_ascii_stats(diffs, label): print "%8d %8d %8d %8d %8d %-20s" % ( len(diffs), min(diffs), max(diffs), stats.mean(diffs), stats.stdev(diffs), label, )
def log_normal_distribution(self): # take logs of sequence log_sequence = [] for number in self.sequence: log_sequence.append(math.log(number,math.e)) mean= stats.lmean(log_sequence) stdev = stats.stdev(log_sequence) number_of_points = len(self.sequence) distribution = log_sequence for each_value in range(number_of_points): distribution[each_value]=(distribution[each_value] - mean)/stdev return distribution
def describe(self): df = OrderedDict([("names", ["mean", "stdev", "count", "min", "max"])]) for k, v in self: if stats.is_numeric(v.x)==False: continue df[k] = [ stats.mean(v.x), stats.stdev(v.x), len([i for i in v if i is not None]), v.min(), v.max() ] return DataFrame(df)
def log_normal_distribution(self): # take logs of sequence log_sequence = [] for number in self.sequence: log_sequence.append(math.log(number, math.e)) mean = stats.lmean(log_sequence) stdev = stats.stdev(log_sequence) number_of_points = len(self.sequence) distribution = log_sequence for each_value in range(number_of_points): distribution[each_value] = (distribution[each_value] - mean) / stdev return distribution
def zNormal(f0s): """ def zNormal(f0s): Input: list of (time,F0) tuples Output: list of z-normalized (time,F0) tuples """ from stats import stdev # it's easiest this way nF0s = [f0 for (time,f0) in f0s] # destructure mu = sum(nF0s)/len(nF0s) # get mean sigma = stdev(nF0s) # get s.d. return [(time,(f0-mu)/sigma) for (time,f0) in f0s] # apply normalization
def __remove_outliers(self, lista): #if len(lista) < 2: if True: return lista else: from stats import mean, stdev #Preparamos listas y limites lmean = mean(lista) limstdev = 2 * stdev(lista) cleaned_list = [] for item in lista: if abs(item - lmean) < limstdev: cleaned_list.append(item) #return cleaned_list return lista
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean): # Calculate various metrics binding_distances = seq_diff(primer_locs) namespace = { 'set_size': len(primer_set), 'fg_dist_mean': stats.mean(binding_distances), 'fg_dist_std': stats.stdev(binding_distances), 'fg_dist_gini': stats.gini(binding_distances), 'bg_dist_mean': bg_dist_mean, 'fg_max_dist': max_dist, '__builtins__': None} permitted_var_str = ", ".join([key for key in namespace.keys() if key is not "__builtins__"]) score = None try: score = eval(expression, namespace, {'__builtins__': {}}) except NameError as e: raise NameError(e.message + '. Permitted variables are %s. Refer to README or docs for help.' % permitted_var_str) del namespace['__builtins__'] # print_primer_set(primer_set, [score, namespace], output_handle) return score, namespace
def get_avg_box_width(): box_widths = [] filename = './image/test_bi3.jpg' image = cvLoadImage(filename, CV_8UC1) storage = cvCreateMemStorage(0) input_image = cvCloneImage(image) # output_image = cvCloneImage(image) output_image = cvCreateImage(cvGetSize(input_image), 8, 3) cvCvtColor(input_image, output_image, CV_GRAY2BGR) count, contours = cvFindContours (input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint (0,0)) for contour in contours.hrange(): bbox = cvBoundingRect(contour, 0) box_width = bbox.width if 100 > box_width > 10: box_widths.append(box_width) # return box_widths width_mean = mean(box_widths) width_lmean = lmean(box_widths) width_stdev = stdev(box_widths) width_lstdev = lstdev(box_widths) return (width_mean,width_lmean,width_stdev,width_lstdev)
def get_avg_box_width(): box_widths = [] filename = './image/test_bi3.jpg' image = cvLoadImage(filename, CV_8UC1) storage = cvCreateMemStorage(0) input_image = cvCloneImage(image) # output_image = cvCloneImage(image) output_image = cvCreateImage(cvGetSize(input_image), 8, 3) cvCvtColor(input_image, output_image, CV_GRAY2BGR) count, contours = cvFindContours(input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint(0, 0)) for contour in contours.hrange(): bbox = cvBoundingRect(contour, 0) box_width = bbox.width if 100 > box_width > 10: box_widths.append(box_width) # return box_widths width_mean = mean(box_widths) width_lmean = lmean(box_widths) width_stdev = stdev(box_widths) width_lstdev = lstdev(box_widths) return (width_mean, width_lmean, width_stdev, width_lstdev)
for line in sys.stdin: if line.startswith('#'): continue (mp_id, thread_id, secs, ns) = [int(x) for x in line.split()] nanosecs = (secs*1000000000+ns) #print mp_id, thread_id, nanosecs if mp_id == mp_id_start: times[thread_id] = nanosecs elif mp_id == mp_id_stop and times.has_key(thread_id): differences.append(nanosecs - times[thread_id]) del times[thread_id] # print 'Values = %8d' % len(differences) # print 'Min = %8d' % min(differences) # print 'Max = %8d' % max(differences) # print 'Mean = %8d' % stats.mean(differences) # print 'Stdev = %8d' % stats.stdev(differences) label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop) print ' Benchmark Values Min Max Mean Stdev' print ' %8s %8d %8d %8d %8d %8d' % ( label, len(differences), min(differences), max(differences), stats.mean(differences), stats.stdev(differences)) # EOF
for line in sys.stdin: if line.startswith('#'): continue (mp_id, thread_id, secs, ns) = [int(x) for x in line.split()] nanosecs = (secs * 1000000000 + ns) #print mp_id, thread_id, nanosecs if mp_id == mp_id_start: times[thread_id] = nanosecs elif mp_id == mp_id_stop and times.has_key(thread_id): differences.append(nanosecs - times[thread_id]) del times[thread_id] # print 'Values = %8d' % len(differences) # print 'Min = %8d' % min(differences) # print 'Max = %8d' % max(differences) # print 'Mean = %8d' % stats.mean(differences) # print 'Stdev = %8d' % stats.stdev(differences) label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop) print ' Benchmark Values Min Max Mean Stdev' print ' %8s %8d %8d %8d %8d %8d' % ( label, len(differences), min(differences), max(differences), stats.mean(differences), stats.stdev(differences)) # EOF
def test_stdev(self): """standard deviation is sqrt of variance""" self.assertEqual(0.0, stdev([2,2,2,2])) self.assertEqual(sqrt(2), stdev([1,2,3,4,5]))
# remove the records with invalid responses df2 = df[(df['in_hospital']<=2) & (df['health_status']<=5)] health_h1 = df2['health_status'][df2['in_hospital']==1] health_h0 = df2['health_status'][df2['in_hospital']==2] # First, count number of records to make sure we match MHE. Should be # 07774 hospital # 90049 no hospital n = len(df2) n_h1 = len(health_h1) n_h0 = len(health_h0) mean_h1 = s.mean(health_h1) mean_h0 = s.mean(health_h0) stdev_h1 = s.stdev(health_h1) stdev_h0 = s.stdev(health_h0) sterr_h1 = s.sterrmean(stdev_h1, n_h1) sterr_h0 = s.sterrmean(stdev_h0, n_h0) # calculate two-sample t-test to test if means are significantly different tt = (mean_h1 - mean_h0)/np.sqrt((stdev_h1**2/float(n_h1)) + (stdev_h0**2/float(n_h0))) pval = stats.t.sf(np.abs(tt), n-1)*2 # two sided t-value, prob(abs(t))>tt # do the same using scipy.stats canned routine # unequal variance tt2 = float(stats.ttest_ind(health_h1, health_h0, equal_var=0)[0]) pval2 = stats.t.sf(np.abs(tt2), n-1)*2 # two sided t-value, prob(abs(t))>tt print("Total Sample: {0}".format(n)) print("Group\t\tSample Size\t\tMean\t\tStd. Err")
def get_lowerbound( self, confidence_interval, lst ): return self.get_mean( lst ) - (self.get_z( confidence_interval ) * stats.stdev( lst ) / math.sqrt ( len( lst ) ) )
def print_ascii_stats(diffs, label): print '%8d %8d %8d %8d %8d %-20s' % ( len(diffs), min(diffs), max(diffs), stats.mean(diffs), stats.stdev(diffs), label)
print('\nCENTRAL TENDENCY') print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)) print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)) print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)) print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)) print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)) print('mode:',stats.mode(l),stats.mode(a)) print('\nMOMENTS') print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)) print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)) print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)) print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)) print('mean:',stats.mean(a),stats.mean(af)) print('var:',stats.var(a),stats.var(af)) print('stdev:',stats.stdev(a),stats.stdev(af)) print('sem:',stats.sem(a),stats.sem(af)) print('describe:') print(stats.describe(l)) print(stats.describe(lf)) print(stats.describe(a)) print(stats.describe(af)) print('\nFREQUENCY') print('freqtable:') print('itemfreq:') print(stats.itemfreq(l)) print(stats.itemfreq(a)) print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)) print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)) print('histogram:',stats.histogram(l),stats.histogram(a)) print('cumfreq:')
offset.append(teloff) writeLog(logpath,file,"FocusPyr: teloffset= %d" % teloff) #print "FocusPyr: teloffset= %d distance=(%f,%f) (%f,%f) %s" % (teloff,xdist,ydist,x1,y1,o[11]) # Append to a list to be inserted into Objects table pyrobjects.append((teloff,xdist,ydist,x1,y1,o[11])) if len(offset) > 0: # Determine mean, median and stdev of unclipped offsets mean = stats.mean(offset) median = stats.median(offset) try: stdev = stats.stdev(offset) except ZeroDivisionError: stdev = '0.00'; # Do a 1-sigma clipping clipLowLimit = float(mean) - 1 * float(stdev) clipHighLimit = float(mean) + 1 * float(stdev) offset = [off for off in offset if float(off) <= clipHighLimit and float(off) >= clipLowLimit ] # Determine stats on sigma clipped data h['meanFocusOffset'] = stats.mean(offset) h['medianFocusOffset'] = stats.median(offset) try: h['stdevFocusOffset'] = stats.stdev(offset) except ZeroDivisionError:
def __parse_data_file__(self, datafile, table): from stats import mean, stdev rt_list = [] #Parseamos el archivo de datos y rellenamos tablas for line in datafile: campos = line.split() if campos.__len__() == 0 : #Si la linea esta vacia continue elif campos[0] == "\n": continue elif campos[0] == "#?" : #Si es un texto de ayuda continue elif campos[0] == "#!": #Si empezamos el datablock continue #Si no es ninguna de las anteriores, es un datablock trial = table.row trial['order'] = int(campos[0]) trial['trial_type'] = trial_enum[campos[1]] trial['ctoa'] = float(campos[2]) trial['ctd'] = float(campos[3]) trial['rt'] = int(campos[4]) trial['train'] = False #Codificamos el tipo de trial: cued/uncued if (campos[1] == 'LL' or campos[1] == 'RR'): trial['cueing'] = cueing_enum['cued'] elif (campos[1] == 'LR' or campos[1] == 'RL'): trial['cueing'] = cueing_enum['uncued'] #Codificamos la tecla pulsada if (int(campos[5]) == 19): trial['key'] = key_enum['R'] elif (int(campos[5]) == 20): trial['key'] = key_enum['L'] #Codificamos la validez del ensayo # # Respondiendo a la derecha debe: if trial['key'] == key_enum['R'] \ and 250 < trial['rt'] < 550 \ and (trial['trial_type'] == trial_enum['LR'] \ or trial['trial_type'] == trial_enum['RR'] ): trial['valid'] = True # # Respondiendo a la izquierda debe: elif trial['key'] == key_enum['L'] \ and 300 < trial['rt'] < 450 \ and (trial['trial_type'] == trial_enum['RL'] \ or trial['trial_type'] == trial_enum['LL'] ): trial['valid'] = True # # Si no, es inválido else: trial['valid'] = False #Ahora vamos a generar una lista con los rt válidos if trial['valid'] == True: rt_list.append(trial['rt']) #Añadimos la fila y guardamos trial.append() table.flush() #Fin del bucle, ahora calculamos los valores normalizados de rt rt_mean = mean(rt_list) rt_stdev= stdev(rt_list) for trial in table.iterrows(): trial['rt_norm'] = (trial['rt'] - rt_mean) / rt_stdev trial.update() #Devuelve la lista de rt válidos para ser usada en promedios posteriores table.flush() return rt_list
def __simple_stats__(self, group): from stats import mean, stdev, sterr, ttest_ind #Creamos la descripción de la tabla de analisis estadistico simple class DiscreteData(tables.IsDescription): ctoa = tables.Float32Col() ctd = tables.Float32Col() mean = tables.Float32Col() stdev = tables.Float32Col() frequency = tables.UInt16Col() #Creamos la tabla para estadistica y un puntero a la de datos en bruto table_cued = self.h5file.createTable(group, 'discrete_data_cued', DiscreteData, "Discrete organization of cued IOR continous data") table_uncued = self.h5file.createTable(group, 'discrete_data_uncued', DiscreteData, "Discrete organization of uncued IOR continous data") table_raw = group.rawdata #Creamos un array para contener los valores y plotearlos con matrixplot3D self.cued_array = zeros((self.ctoa_size, self.ctd_size)) self.uncued_array = zeros((self.ctoa_size, self.ctd_size)) self.rt_diff = zeros((self.ctoa_size, self.ctd_size)) # Ahora, debería generamos intervalos para las clases en los datos # y hacer estadística con esas clases ctoa_index= -1 for ctoa in self.ctoa_range: ctoa_index += 1 ctoa_min = ctoa - self.ctoa_bean_width / 2 ctoa_max = ctoa + self.ctoa_bean_width / 2 ctd_index = -1 for ctd in self.ctd_range: ctd_index += 1 ctd_min = ctd - self.ctd_bean_width / 2 ctd_max = ctd + self.ctd_bean_width / 2 cued_lst_tmp = [ trial_raw['rt'] for trial_raw in table_raw.iterrows() if ctoa_min < trial_raw['ctoa'] < ctoa_max \ and ctd_min < trial_raw['ctd'] < ctd_max \ and trial_raw['cueing'] == cueing_enum['cued'] \ and trial_raw['valid'] == True ] uncued_lst_tmp = [ trial_raw['rt'] for trial_raw in table_raw.iterrows() if ctoa_min < trial_raw['ctoa'] < ctoa_max \ and ctd_min < trial_raw['ctd'] < ctd_max \ and trial_raw['cueing'] == cueing_enum['uncued']\ and trial_raw['valid'] == True ] cued_lst = self.__remove_outliers(cued_lst_tmp) uncued_lst = self.__remove_outliers(uncued_lst_tmp) cued = table_cued.row cued['ctoa'] = ctoa cued['ctd'] = ctd cued['frequency'] = len(cued_lst) if cued['frequency'] == 0: cued['mean'] = 0 cued['stdev'] = 0 elif cued['frequency'] == 1: cued['mean'] = mean(cued_lst) cued['stdev'] = 0 else: cued['mean'] = mean(cued_lst) cued['stdev'] = stdev(cued_lst) uncued = table_uncued.row uncued['ctoa'] = ctoa uncued['ctd'] = ctd uncued['frequency'] = len(uncued_lst) if uncued['frequency'] == 0: uncued['mean'] = 0 uncued['stdev'] = 0 elif uncued['frequency'] == 1: uncued['mean'] = mean(uncued_lst) uncued['stdev'] = 0 else: uncued['mean'] = mean(uncued_lst) uncued['stdev'] = stdev(uncued_lst) self.cued_array[ctoa_index][ctd_index] = cued['mean'] self.uncued_array[ctoa_index][ctd_index] = uncued['mean'] self.rt_diff[ctoa_index][ctd_index] = cued['mean'] - uncued['mean'] cued.append() uncued.append() table_cued.flush() table_uncued.flush()
directDelays = [centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance) for x,y,z,sampleDelay, azimuth, elevation in sampleDelays if elevation<math.radians(aboveCutoff) and elevation>math.radians(belowCutoff) and azimuth>math.radians(190) and azimuth<math.radians(260) ] for x,y,z,sampleDelay, azimuth, elevation in sampleDelays : if elevation<math.radians(aboveCutoff) and elevation>math.radians(belowCutoff) and \ azimuth>math.radians(190) and azimuth<math.radians(260) : if centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance) < 1.46: print math.degrees(azimuth), math.degrees(elevation) if centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance) > 1.50: print math.degrees(azimuth), math.degrees(elevation) import stats print "Direct delays stdev:", stats.stdev(directDelays) print "Direct delays mean:", stats.mean(directDelays) import pylab pylab.grid() pylab.hist(directDelays,bins=7) pylab.show() nominalDistance = 1.4 meanDelay = sum((sampleDelay for x,y,z,sampleDelay,_,_ in sampleDelays))*c/samplingRate/len(sampleDelays) for x,y,z,sampleDelay,azimuth,elevation in sampleDelays : distanceDelay = sampleDelay * c / samplingRate - meanDelay + nominalDistance centralizedDelay = centralizeDelay(distanceDelay, x, y, z, earDistance) deviations.append(sampleDelay*c/samplingRate) if (abs(sampleDelay-41)>13) :
def __load_data__(self): """ Funcion que lee los datos de salida del programa C """ from stats import mean, stdev #Creamos la descripción de la tabla de entrada de datos sin procesar class Trial(tables.IsDescription): trial_type = tables.EnumCol(trial_enum,'LL', base='uint8') key = tables.EnumCol(key_enum, 'L', base='uint8') cueing = tables.EnumCol(cueing_enum,'cued', base='uint8') ctoa = tables.Float32Col() ctd = tables.Float32Col() valid = tables.UInt8Col() rt = tables.Float32Col() rt_norm = tables.Float32Col() rt_corr = tables.Float32Col() index = tables.UInt16Col() order = tables.UInt16Col() train = tables.UInt8Col() #Creamos el archivo h5 h5file = tables.openFile(self.h5filename, mode = "w", title = "IOR experiment results") #Generamos la lista de ficheros de datos en crudo que serán procesado rawdata_filelist = [f for f in os.listdir(self.rawdata_path) if os.path.isfile(os.path.join(self.rawdata_path, f)) \ and f.find(self.rawdata_str) == 0 ] #Para cada sujeto, generamos un grupo de datos for rawdata_filename in rawdata_filelist: #Creamos los grupos y tablas necesarios datafile = open(os.path.join(self.rawdata_path,rawdata_filename), "r") group = h5file.createGroup("/", \ 'Data'+rawdata_filename[len(self.rawdata_str):],\ 'Data from '+rawdata_filename[len(self.rawdata_str):]) table = h5file.createTable(group, 'rawdata', Trial, "Raw data from IOR continous experiment") #Los rellenamos por medio del metodo privado __parse_data_file__() rt_tmp = self.__parse_data_file__(datafile, table) self.rt.extend(rt_tmp) datafile.close() #Aqui creo la tabla de datos raw combinada de todos los sujetos groupeto = h5file.createGroup("/", 'DataCombined', "Combined data from all subjects of IOR experiment") table = h5file.createTable(groupeto, 'rawdata', Trial, "Raw data from IOR experiment") for group in h5file.walkGroups(): if group._v_name == '/' or group._v_name == 'DataCombined' : continue temp_table = group.rawdata[:] table.append(temp_table) table.flush() #Calculamos la media y stdev del conjunto total de datos, para reescalar rt_mean = mean(self.rt) rt_stdev= stdev(self.rt) for trial in table.iterrows(): #if trial['valid'] == True: trial['rt'] = trial['rt_norm'] trial.update() table.flush() #trial['rt'] = trial['rt_norm'] * rt_stdev + rt_mean #Chapuzilla para que chute rapido #Devuelve el archivo h5 creado return h5file
print(stats.relfreq(lf)) print(stats.relfreq(l)) print(stats.relfreq(lf)) print('\nVARIATION') print('obrientransform:') l = [float(f) for f in list(range(1,21))] ll = [l]*5 print(stats.obrientransform(l,l,l,l,l)) print('samplevar:',stats.samplevar(l),stats.samplevar(l)) print('samplestdev:',stats.samplestdev(l),stats.samplestdev(l)) print('var:',stats.var(l),stats.var(l)) print('stdev:',stats.stdev(l),stats.stdev(l)) print('sterr:',stats.sterr(l),stats.sterr(l)) print('sem:',stats.sem(l),stats.sem(l)) print('z:',stats.z(l,4),stats.z(l,4)) print('zs:') print(stats.zs(l)) print(stats.zs(l)) print('\nTRIMMING') print('trimboth:') print(stats.trimboth(l,.2)) print(stats.trimboth(lf,.2)) print(stats.trimboth(l,.2)) print(stats.trimboth(lf,.2)) print('trim1:') print(stats.trim1(l,.2))
# print 'Values = %8d' % len(differences) # print 'Min = %8d' % min(differences) # print 'Max = %8d' % max(differences) # print 'Mean = %8d' % stats.mean(differences) # print 'Stdev = %8d' % stats.stdev(differences) # label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop) # print '\hline' # print 'Benchmark & Min & Max & Mean & StdDev \\\\' # print '\hline' # print '\hline' # no result for these meassuring points if len(differences) == 0: sys.exit(0) line = '%s & %.3f & %.3f & %.3f & %.3f \\\\' % ( label, min(differences) / 1000.0, max(differences) / 1000.0, stats.mean(differences) / 1000.0, stats.stdev(differences) / 1000.0) line = line.replace('.', ',') print line # EOF
a), stats.median(af) print 'medianscore:', stats.medianscore(l), stats.medianscore( lf), stats.medianscore(a), stats.medianscore(af) print 'mode:', stats.mode(l), stats.mode(a) print '\nMOMENTS' print 'moment:', stats.moment(l), stats.moment(lf), stats.moment( a), stats.moment(af) print 'variation:', stats.variation(l), stats.variation(a), stats.variation( lf), stats.variation(af) print 'skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af) print 'kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis( a), stats.kurtosis(af) print 'mean:', stats.mean(a), stats.mean(af) print 'var:', stats.var(a), stats.var(af) print 'stdev:', stats.stdev(a), stats.stdev(af) print 'sem:', stats.sem(a), stats.sem(af) print 'describe:' print stats.describe(l) print stats.describe(lf) print stats.describe(a) print stats.describe(af) print '\nFREQUENCY' print 'freqtable:' print 'itemfreq:' print stats.itemfreq(l) print stats.itemfreq(a) print 'scoreatpercentile:', stats.scoreatpercentile( l, 40), stats.scoreatpercentile(lf, 40), stats.scoreatpercentile( a, 40), stats.scoreatpercentile(af, 40)
def least_squares_fit(x, y): """x와 y가 학습 데이터로 주어졌을 때 오류의 제곱 값을 최소화해 주는 알파와 베타를 계산""" beta = corr(x, y) * stdev(y) / stdev(x) alpha = mean(y) - beta * mean(x) return alpha, beta
print '\nVARIATION' print 'obrientransform:' l = range(1,21) a = N.array(l) ll = [l]*5 aa = N.array(ll) print stats.obrientransform(l,l,l,l,l) print stats.obrientransform(a,a,a,a,a) print 'samplevar:',stats.samplevar(l),stats.samplevar(a) print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a) print 'var:',stats.var(l),stats.var(a) print 'stdev:',stats.stdev(l),stats.stdev(a) print 'sterr:',stats.sterr(l),stats.sterr(a) print 'sem:',stats.sem(l),stats.sem(a) print 'z:',stats.z(l,4),stats.z(a,4) print 'zs:' print stats.zs(l) print stats.zs(a) print '\nTRIMMING' print 'trimboth:' print stats.trimboth(l,.2) print stats.trimboth(lf,.2) print stats.trimboth(a,.2) print stats.trimboth(af,.2) print 'trim1:' print stats.trim1(l,.2)
offset.append(teloff) offset.append(teloff) else: offset.append(teloff) else: offset.append(teloff) #writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset) #print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1) if len(offset) > 0: # Determine mean, median and stdev of unclipped offsets mean = stats.mean(offset) median = stats.median(offset) try: stdev = stats.stdev(offset) except ZeroDivisionError: stdev = '0.00'; # Do a 1-sigma clipping clipLowLimit = float(mean) - 1 * float(stdev) clipHighLimit = float(mean) + 1 * float(stdev) offset = [off for off in offset if float(off) < clipHighLimit and float(off) > clipLowLimit ] # Determine stats on sigma clipped data mean_c = stats.mean(offset) median_c = stats.median(offset) try: stdev_c = stats.stdev(offset) except ZeroDivisionError:
def calculateStats(scores, nullvalues): """Calculate statistics based on actual DEAP scores and null value scores Args: scores: a list of pairs of floats representing expression scores nullvalues: a list of lists of pairs of floats representing null expression scores Returns: A triplet of StatOutput objects (minimum, maximum, absolute) """ minvalues=[] maxvalues=[] absvalues=[] for pairs in nullvalues: minList=[] maxList=[] for pair in pairs: minList.append(pair[0]) maxList.append(pair[1]) minVal=stats.mean(minList) maxVal=stats.mean(maxList) minvalues.append(minVal) maxvalues.append(maxVal) absvalues.append(max(maxVal,abs(minVal))) maxmean=stats.mean(maxvalues) maxstdev=stats.stdev(maxvalues) minmean=stats.mean(minvalues) minstdev=stats.stdev(minvalues) absmean=stats.mean(absvalues) absstdev=stats.stdev(absvalues) realMaxVals=[] realMinVals=[] realMaxPathSubset='' realMinPathSubset='' for score in scores: realMaxVals.append(score[1]) realMinVals.append(score[0]) realMaxPathSubset=score[3] realMinPathSubset=score[2] maxVal=stats.mean(realMaxVals) minVal=stats.mean(realMinVals) maxCount=0 for val in maxvalues: if val>=maxVal: maxCount+=1 maxPval=float(maxCount)/float(len(maxvalues)) maxStats=StatOutput(mean=maxmean,stdev=maxstdev,curval=maxVal,qval=maxPval) minCount=0 for val in minvalues: if val<=minVal: minCount+=1 minPval=float(minCount)/float(len(minvalues)) minStats=StatOutput(mean=minmean,stdev=minstdev,curval=minVal,qval=minPval) absVal=max(maxVal,abs(minVal)) absPathSubset=realMinPathSubset if maxVal==absVal: absPathSubset=realMaxPathSubset absCount=0 for i in range(0,len(maxvalues)): if max(maxvalues[i],abs(minvalues[i]))>=absVal: absCount+=1 absPval=float(absCount)/float(len(maxvalues)) absStats=StatOutput(mean=absmean,stdev=absstdev,curval=absVal,qval=absPval,pathSubset=absPathSubset) return [minStats,maxStats,absStats]
def get_lowerbound(self, confidence_interval, lst): return self.get_mean(lst) - (self.get_z(confidence_interval) * stats.stdev(lst) / math.sqrt(len(lst)))
def test_stdev(self): """Standard Deviation is sqrt of variance""" self.assertEqual(0.0, stdev([2, 2, 2, 2])) self.assertEqual(0.5, stdev([1, 1, 2, 2]))
offset.append(teloff) offset.append(teloff) else: offset.append(teloff) else: offset.append(teloff) #writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset) #print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1) if len(offset) > 0: # Determine mean, median and stdev of unclipped offsets mean = stats.mean(offset) median = stats.median(offset) try: stdev = stats.stdev(offset) except ZeroDivisionError: stdev = '0.00' # Do a 1-sigma clipping clipLowLimit = float(mean) - 1 * float(stdev) clipHighLimit = float(mean) + 1 * float(stdev) offset = [ off for off in offset if float(off) < clipHighLimit and float(off) > clipLowLimit ] # Determine stats on sigma clipped data mean_c = stats.mean(offset) median_c = stats.median(offset) try: