def sim_eigen(g1, g2): eigen_list1=[] eigen_list2=[] eigen_cent1 = nx.eigenvector_centrality(g1) eigen_cent2 = nx.eigenvector_centrality(g2) n1= len(g1.nodes()) n2=len(g2.nodes()) numbin= 50 for key, value in eigen_cent1.items(): eigen_list1.append(value) hist1= histogram(eigen_list1 ,numbins=numbin) for i in range(len(hist1[0])): hist1[0][i]+=1 for key, value in eigen_cent2.items(): eigen_list2.append(value) hist2= histogram(eigen_list2,numbins=numbin) for i in range(len(hist2[0])): hist2[0][i]+=1 KL= entropy(hist1[0] , hist2[0]) return KL
def sim_betweeness(g1, g2): betweeness_list1=[] betweeness_list2=[] betweeness_cent1= nx.betweenness_centrality(g1) betweeness_cent2= nx.betweenness_centrality(g2) n1= len(g1.nodes()) n2=len(g2.nodes()) numbin= 50 for key, value in betweeness_cent1.items(): betweeness_list1.append(value) hist1= histogram(betweeness_list1 ,numbins=numbin) for i in range(len(hist1[0])): hist1[0][i]+=1 for key, value in betweeness_cent2.items(): betweeness_list2.append(value) hist2= histogram(betweeness_list2,numbins=numbin) for i in range(len(hist2[0])): hist2[0][i]+=1 KL= entropy(hist1[0] , hist2[0]) return KL
def simulate_seasons(N, start_week, players): winner_equity = defaultdict(float) last_weeks = [] for i in range(N): if i % 1000 == 0: print i last_week, winners = simulate_season(start_week, players) for winner in winners: winner_equity[winner] += 1. / len(winners) last_weeks.append(last_week) pp({p: "%.2f" % (100 * e / N) for p, e in winner_equity.iteritems()}) print stats.describe(last_weeks) print stats.histogram(last_weeks)
def mutual_information(data1, data2, domain=256, smoothing=0, log_base=2): """ Mutual information for greyscale images Parameters ---------- data1 : ndarray first data array data2 : ndarray second data array (size is expected to be the same as in case of data1) domain : int, optional (default is 256) value domain (e.g. all available grey values in case of image) smoothing : int value "k" in additive (aka Laplace) smoothing """ m_hist = np.matrix(np.zeros((domain, domain))) img1_un = np.ravel(data1) img2_un = np.ravel(data2) # return entropy(histogram(np.ravel(image), numbins=256, defaultlimits=(0, 255))[0]) d1_hist = histogram(img1_un, numbins=256, defaultlimits=(0, 255))[0] d2_hist = histogram(img2_un, numbins=256, defaultlimits=(0, 255))[0] for i in range(np.size(img1_un)): color1 = int(round(img1_un[i])) color2 = int(round(img2_un[i])) m_hist[color1, color2] += 1 d1_hist = (d1_hist + smoothing) / float(len(img1_un) + smoothing * domain) d2_hist = (d2_hist + smoothing) / float(len(img2_un) + smoothing * domain) m_hist = (np.ravel(m_hist) + smoothing) / float(len(img1_un) + smoothing * domain * domain) m_hist = np.resize(m_hist, (domain, domain)) ans = 0 for i in range(0, domain): for j in range(0, domain): if m_hist[i][j] == 0 or d1_hist[i] == 0 or d2_hist[j] == 0: continue else: tmp = m_hist[i][j] * math.log( m_hist[i][j] / d1_hist[i] / d2_hist[j], log_base) ans += tmp return ans
def myFit(data,nBins=30): # data binning freqObs,xMin,dx,nOut = sciStat.histogram(data,nBins) # prepare observed x,y-Values N = len(data) xVals = [xMin + (i+0.5)*dx for i in range(nBins)] yVals = [freqObs[i]/(N*dx) for i in range(nBins)] # define objective function as the vertical difference # between the observed data and the fit-function fitFunc = lambda s,x: sciStat.rayleigh.pdf(x,scale=s) objFunc = lambda s,x,y: (y - fitFunc(s,x)) # set initial guess for the fit-parameter and perform # least squares fit s0=7. s,flag = sciOpt.leastsq(objFunc,s0,args=(xVals,yVals)) for i in range(nBins): print xVals[i],yVals[i], fitFunc(s[0],xVals[i]) print '#',s[0],sum(map(lambda x,y: objFunc(s[0],x,y)**2,xVals,yVals)) return s[0]
def var(theta, Nbins=360): """Sample circular variance, second moment Calculated using the minimum variance method with moving cut points. See: Weber RO (1997). J. Appl. Meteorol. 36(10), 1403-1415. Input: theta - array of radian angle values numbins - number of intervals across [0, 2pi] to minimize Returns: circular variance """ N = len(theta) delta_t = 2 * pi / Nbins lims = (0, 2 * pi) x = arange(delta_t, 2*pi + delta_t, delta_t) n, xmin, w, extra = histogram(theta, numbins=Nbins, defaultlimits=lims) tbar = empty((Nbins,), 'd') S = empty((Nbins,), 'd') s2 = empty((Nbins,), 'd') tbar[0] = (x*n).sum() / N # A1 S[0] = ((x**2)*n).sum() / (N - 1) # A2 s2[0] = S[0] - N * (tbar[0]**2) / (N - 1) # A3 for k in xrange(1, Nbins): tbar[k] = tbar[k-1] + (2*pi) * n[k-1] / N # A4 S[k] = S[k-1] + (2*pi) * (2*pi + 2*x[k-1]) * n[k-1] / (N - 1) # A5 s2[k] = S[k] - N * (tbar[k]**2) / (N - 1) # A6 return s2.min()
def percentile(x, countdat=False): if countdat: lo = min(x) hi = max(x) nbins = hi - lo + 1 hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1)) datn = int(_N.sum(hf)) pctl = _N.zeros((hi - lo + 1, 2)) tot = 0 for i in xrange(len(hf)): tot += hf[i] pctl[i, 0] = lo + i pctl[i, 1] = float(tot + 1) / float(datn + 1) else: sx = _N.sort(x) N = len(sx) pctl = _N.zeros((N, 2)) for n in xrange(N): pctl[n, 0] = sx[n] pctl[n, 1] = float(n + 1) / float(N + 1) return pctl
def var(theta, Nbins=360): """sample circular variance, second moment Calculated using the minimum variance method with moving cut points. See: Weber RO (1997). J. Appl. Meteorol. 36(10), 1403-1415. Input: theta - array of radian angle values numbins - number of intervals across [0, 2pi] to minimize Returns: circular variance """ from scipy.stats import histogram from numpy import empty, arange, pi N = len(theta) delta_t = 2 * pi / Nbins lims = (0, 2 * pi) x = arange(delta_t, 2 * pi + delta_t, delta_t) n, xmin, w, extra = histogram(theta, numbins=Nbins, defaultlimits=lims) tbar = empty((Nbins, ), 'd') S = empty((Nbins, ), 'd') s2 = empty((Nbins, ), 'd') tbar[0] = (x * n).sum() / N # A1 S[0] = ((x**2) * n).sum() / (N - 1) # A2 s2[0] = S[0] - N * (tbar[0]**2) / (N - 1) # A3 for k in xrange(1, Nbins): tbar[k] = tbar[k - 1] + (2 * pi) * n[k - 1] / N # A4 S[k] = S[k - 1] + (2 * pi) * (2 * pi + 2 * x[k - 1]) * n[k - 1] / ( N - 1) # A5 s2[k] = S[k] - N * (tbar[k]**2) / (N - 1) # A6 return s2.min()
def tile_entropy(image, coords, size): tmp = [] for i in range(coords[0], coords[0] + size[0]): for j in range(coords[1], coords[1] + size[1]): #print("(%d, %d" % (i, j)) tmp.append(image[i][j]) h = histogram(tmp, numbins=256, defaultlimits=(0, 255))[0] return entropy(h)
def mutual_information(data1, data2, domain=256, smoothing=0, log_base=2): """ Mutual information for greyscale images Parameters ---------- data1 : ndarray first data array data2 : ndarray second data array (size is expected to be the same as in case of data1) domain : int, optional (default is 256) value domain (e.g. all available grey values in case of image) smoothing : int value "k" in additive (aka Laplace) smoothing """ m_hist = np.matrix(np.zeros((domain, domain))) img1_un = np.ravel(data1) img2_un = np.ravel(data2) # return entropy(histogram(np.ravel(image), numbins=256, defaultlimits=(0, 255))[0]) d1_hist = histogram(img1_un, numbins=256, defaultlimits=(0, 255))[0] d2_hist = histogram(img2_un, numbins=256, defaultlimits=(0, 255))[0] for i in range(np.size(img1_un)): color1 = int(round(img1_un[i])) color2 = int(round(img2_un[i])) m_hist[color1, color2] += 1 d1_hist = (d1_hist + smoothing) / float(len(img1_un) + smoothing * domain) d2_hist = (d2_hist + smoothing) / float(len(img2_un) + smoothing * domain) m_hist = (np.ravel(m_hist) + smoothing) / float(len(img1_un) + smoothing * domain * domain) m_hist = np.resize(m_hist, (domain, domain)) ans = 0 for i in range(0, domain): for j in range(0, domain): if m_hist[i][j] == 0 or d1_hist[i] == 0 or d2_hist[j] == 0: continue else: tmp = m_hist[i][j] * math.log(m_hist[i][j] / d1_hist[i] / d2_hist[j], log_base) ans += tmp return ans
def calcentropy(v_array): ''' @todo: switch entropy base from e to 2. Currently calculating nats instead of bits ''' #Takes Array, converts to numpy array, flattens, creates a histogram, then runs histogram through entropy function #v_array=numpy.array(v_array) #v_array=numpy.hstack(v_array) v_hist=stats.histogram(v_array) return stats.entropy(v_hist[0],base=2)
def calcentropy(v_array): ''' @todo: switch entropy base from e to 2. Currently calculating nats instead of bits ''' #Takes Array, converts to numpy array, flattens, creates a histogram, then runs histogram through entropy function #v_array=numpy.array(v_array) #v_array=numpy.hstack(v_array) v_hist = stats.histogram(v_array) return stats.entropy(v_hist[0], base=2)
def writeHistogram(x,path,limits = None): hist, lowRange, binSize, extra = histogram(x,numbins = NUMBINS, defaultlimits = limits) with open(path,'wt') as fid: low = lowRange hi = lowRange + binSize for freq in hist: fid.write(str(low) + ', ' + str(hi) + ', ' + str(freq) + '\n') low += binSize hi += binSize
def calc_point(self, x, y): x1 = max(0, x - self.radius) x2 = min(np.size(self.image, 0) - 1, x + self.radius) y1 = max(0, y - self.radius) y2 = min(np.size(self.image, 1) - 1, y + self.radius) sub = np.ravel(self.image[x1:x2+1, y1:y2+1]) #print(histogram(sub, numbins=256, defaultlimits=(0, 255))) ent = entropy(histogram(sub, numbins=256, defaultlimits=(0, 255))[0]) self.map[x1:x2+1,y1:y2+1] = (self.map[x1:x2+1,y1:y2+1] + ent) / 2 return ent
def featureSplitValues(fn, fm, splits): fo = open(fn) data = json.load(fo) fo.close() fo = open(fm) # colheaders=fo.next().rstrip().split("\t")[1:] # skip the first line fo.next() rowheaders = [] # read in the feature names for line in fo: rowheaders.append(line.rstrip().split('\t')[0]) # read in the splits # each line is feature_id_integer split1 split2 ... (varying length) split_entries = {} fo = open(splits) for line in fo: vs = [float(v) for v in line.rstrip().split('\t')] split_entries[int(vs[0])] = np.array(vs[1:]) fo.close() matrix = [] split_list = [] for f_index in split_entries: split_list = split_entries[f_index] # don't worry about very few splits (bins, low, binsize,extra) = stats.histogram(split_list) bin_list = [] significant = False for (index, count) in enumerate(bins): if count > 50: significant = True bin_list.append({ "position" : low + (binsize * index), "count" : count }) if significant == True: feature_name = rowheaders[f_index] split_line = [feature_name] split_line.append(bin_list) matrix.append(split_line) return matrix
def calc_point(self, x, y): x1 = max(0, x - self.radius) x2 = min(np.size(self.image, 0) - 1, x + self.radius) y1 = max(0, y - self.radius) y2 = min(np.size(self.image, 1) - 1, y + self.radius) sub = np.ravel(self.image[x1:x2 + 1, y1:y2 + 1]) #print(histogram(sub, numbins=256, defaultlimits=(0, 255))) ent = entropy(histogram(sub, numbins=256, defaultlimits=(0, 255))[0]) self.map[x1:x2 + 1, y1:y2 + 1] = (self.map[x1:x2 + 1, y1:y2 + 1] + ent) / 2 return ent
def writeHistogram(x, path, limits=None): hist, lowRange, binSize, extra = histogram(x, numbins=NUMBINS, defaultlimits=limits) with open(path, 'wt') as fid: low = lowRange hi = lowRange + binSize for freq in hist: fid.write(str(low) + ', ' + str(hi) + ', ' + str(freq) + '\n') low += binSize hi += binSize
def normalized_histogram_for_print(score_distribution, num_bins, denominator, limits): out = [] if denominator > 0: h = stats.histogram(score_distribution, num_bins, limits) out.append("[" + str(h[1]) + " , " + str(h[2] * num_bins) + "], step = " + str(round(h[2], 2)) + " \n\t") for x in h[0]: out.append(str(round(x * 100.0 / denominator, 3)) + "%\t") out.append("\n\t") for x in range(num_bins): out.append("(" + str(round(h[1] + h[2] + x * h[2], 3)) + ")\t") out.append("\n") return "".join(out)
def sim_closeness(g1, g2): closness_list1=[] closness_list2=[] closeness_cent1= nx.closeness_centrality(g1) closeness_cent2= nx.closeness_centrality(g2) for key, value in closeness_cent1.items(): closness_list1.append(value) n1= len(g1.nodes()) n2= len(g2.nodes()) numbin= 50 hist1= histogram(closness_list1 ,numbins=numbin) for i in range(len(hist1[0])): hist1[0][i]+=1 print(hist1[0]) for key, value in closeness_cent2.items(): closness_list2.append(value) hist2= histogram(closness_list2,numbins=numbin) for i in range(len(hist2[0])): hist2[0][i]+=1 print(hist2[0]) KL= entropy(hist1[0] , hist2[0]) return KL
def outputBinFiles(outfilename, plotData, xtickLabels, minMin, maxMax, nbins=50): histoArrays = [] _low_range = -100 _binsize = -100 _extrapoints = -1 for col, xtickLabel in zip(plotData, xtickLabels): histoArray, low_range, binsize, extrapoints = histogram( col, numbins=nbins, defaultlimits=(minMin, maxMax)) histoArrays.append(histoArray) if _binsize == -100: _binsize = binsize _low_range = low_range else: if _binsize != binsize or low_range != _low_range: print >> stderr, "inconsistent histo", _binsize, _low_range, histoArray, low_range, binsize, extrapoints exit(1) if extrapoints > 0: print >> stderr, "extrapoints>0", histoArray, low_range, binsize, extrapoints exit(1) binLows = [] for i in range(0, nbins): binLows.append(i * binsize) outfil = open(outfilename, "w") outv = ["bins"] for binLow in binLows: outv.append(str(binLow)) print >> outfil, "\t".join(outv) #now the data for xtickLabel, histoArray in zip(xtickLabels, histoArrays): outv = [xtickLabel] totalPoint = sum(histoArray) for v in histoArray: outv.append(str(float(v) / totalPoint)) print >> outfil, "\t".join(outv) outfil.close()
def main(args): if args[0]: filename = args[0] else: filename = "CONTCAR" input_vasp_file(filename) vector_table = bond_vector_table(positions) all_neighbor_table = all_neighbor_distances(lat, vector_table) #write_table_to_file(all_neighbor_table,name) #for GePbTe sqs half, Ge-0,Pb-1,Te-2 #for PbS-PbTe sqs half Pb-0 S-1 Te-2 #Edit these numbers!!!!!!!!!! a_list = [[0]] b_list = [1, 2] num_nn = [6] # Number of nearest neighbors between atoms of type a and b bond_stats = [] bond_table = [] for i in range(len(a_list)): table, stats = find_nearest_neighbors(all_neighbor_table, a_list[i], b_list, num_nn[i], num_atom_types, atom_type_list) bond_table.append(table) bond_stats.append(stats) bond_table = np.array(bond_table).flatten() print "Table of bond lengths" print np.sort(bond_table) print "Avg. bond length (Ang), Std. Dev. (Ang)" print bond_stats print gauss = gaussian_kde(bond_table) #xdata = np.linspace(2.4,4.0,100) xdata = np.linspace( min(bond_table) - 3. * bond_stats[0][1], max(bond_table) + 3. * bond_stats[0][1], 100) ydata = gauss(xdata) print "Gaussian distribution fit" for i in range(len(xdata)): print xdata[i], ydata[i] print nbins = 10 hist, lowest, binsize, extra = histogram(bond_table, numbins=nbins) n = lowest print "histogram data" print n, "0.0" for i in range(len(hist)): print n, hist[i] n += binsize print n, hist[i] print n, "0.0" print
def normalized_histogram_for_print(score_distribution, num_bins, denominator, limits): out = [] if denominator > 0: h = stats.histogram(score_distribution, num_bins, limits) out.append('[' + str(h[1]) + ' , ' + str(h[2] * num_bins) + '], step = ' + str(round(h[2], 2)) + ' \n\t') for x in h[0]: out.append(str(round(x * 100.0 / denominator, 3)) + '%\t') out.append('\n\t') for x in range(num_bins): out.append('(' + str(round(h[1] + h[2] + x * h[2], 3)) + ')\t') out.append('\n') return ''.join(out)
def find_ref_values(cursor, values): ref_values = {} # for the genes showing a nice normal distribution for symbol, val_array in values.iteritems(): if bad(cursor, symbol): continue if not val_array: continue description = stats.describe(val_array) [nobs, [min, max], mean, variance, skewness, kurtosis] = description if mean < 3: continue if len(val_array) > 20: [teststat, pval] = stats.normaltest(val_array) else: teststat = 100 if teststat >= 4: continue if mean < 10: continue ref_values[symbol] = description continue # below is some descriptive output in_left_tail = float( len([x for x in val_array if x < mean - 2 * stdev])) / len(val_array) in_right_tail = float( len([x for x in val_array if x > mean + 2 * stdev])) / len(val_array) description += (in_left_tail, in_right_tail) blurb(symbol, description, "normal candidate", sys.stdout) [hist_numpy, low, binsize, extrapoints] = stats.histogram(val_array) histogram = hist_numpy.tolist() i = 0 bin_prev = low for val in histogram[:-1]: print " %5d %5d " % (int(bin_prev), int(bin_prev + binsize)), print " %5d " % val i += 1 bin_prev += binsize print return ref_values
def main(args): if args[0]: filename = args[0] else: filename = "CONTCAR" input_vasp_file(filename) vector_table = bond_vector_table(positions) all_neighbor_table = all_neighbor_distances(lat,vector_table) #write_table_to_file(all_neighbor_table,name) #for GePbTe sqs half, Ge-0,Pb-1,Te-2 #for PbS-PbTe sqs half Pb-0 S-1 Te-2 #Edit these numbers!!!!!!!!!! a_list = [[0]] b_list = [1,2] num_nn = [6] # Number of nearest neighbors between atoms of type a and b bond_stats=[] bond_table = [] for i in range(len(a_list)): table,stats = find_nearest_neighbors(all_neighbor_table,a_list[i],b_list,num_nn[i],num_atom_types,atom_type_list) bond_table.append(table) bond_stats.append(stats) bond_table = np.array(bond_table).flatten() print "Table of bond lengths" print np.sort(bond_table) print "Avg. bond length (Ang), Std. Dev. (Ang)" print bond_stats print gauss = gaussian_kde(bond_table) #xdata = np.linspace(2.4,4.0,100) xdata = np.linspace(min(bond_table)-3.*bond_stats[0][1],max(bond_table)+3.*bond_stats[0][1],100) ydata = gauss(xdata) print "Gaussian distribution fit" for i in range(len(xdata)): print xdata[i],ydata[i] print nbins = 10 hist,lowest,binsize,extra = histogram(bond_table,numbins=nbins) n = lowest print "histogram data" print n,"0.0" for i in range(len(hist)): print n,hist[i] n += binsize print n,hist[i] print n,"0.0" print
def outputBinFiles(outfilename,plotData,xtickLabels,minMin,maxMax,nbins=50): histoArrays=[] _low_range=-100 _binsize=-100 _extrapoints=-1 for col,xtickLabel in zip(plotData,xtickLabels): histoArray,low_range,binsize,extrapoints=histogram(col,numbins=nbins,defaultlimits=(minMin,maxMax)) histoArrays.append(histoArray) if _binsize==-100: _binsize=binsize _low_range=low_range else: if _binsize!=binsize or low_range!=_low_range: print >> stderr,"inconsistent histo",_binsize,_low_range,histoArray,low_range,binsize,extrapoints exit(1) if extrapoints>0: print >> stderr,"extrapoints>0",histoArray,low_range,binsize,extrapoints exit(1) binLows=[] for i in range(0,nbins): binLows.append(i*binsize) outfil=open(outfilename,"w") outv=["bins"] for binLow in binLows: outv.append(str(binLow)) print >> outfil,"\t".join(outv) #now the data for xtickLabel,histoArray in zip(xtickLabels,histoArrays): outv=[xtickLabel] totalPoint=sum(histoArray) for v in histoArray: outv.append(str(float(v)/totalPoint)) print >> outfil,"\t".join(outv) outfil.close()
def find_ref_values(cursor, values): ref_values = {} # for the genes showing a nice normal distribution for symbol, val_array in values.iteritems(): if bad (cursor, symbol): continue if not val_array: continue description = stats.describe(val_array) [nobs, [min,max], mean, variance, skewness, kurtosis] = description if mean < 3: continue if len(val_array) > 20: [teststat, pval] = stats.normaltest(val_array) else: teststat = 100 if teststat >= 4: continue if mean < 10: continue ref_values[symbol] = description continue # below is some descriptive output in_left_tail = float (len([x for x in val_array if x < mean-2*stdev ]))/len(val_array) in_right_tail = float (len([x for x in val_array if x > mean+2*stdev ]))/len(val_array) description += (in_left_tail, in_right_tail) blurb(symbol, description, "normal candidate", sys.stdout); [hist_numpy, low, binsize, extrapoints] = stats.histogram (val_array) histogram = hist_numpy.tolist() i = 0 bin_prev = low for val in histogram[:-1]: print " %5d %5d " % (int(bin_prev), int(bin_prev+binsize)), print " %5d " % val i += 1 bin_prev += binsize print return ref_values
def histogram1(comm, N, data, bins=50): """ Constructs the histogram (probability mass function) of an MPI- decomposed data. """ if not np.all(np.isfinite(data)): data[np.isfinite(data) is False] = np.nan (gmin, gmax), u1, c2, c3, c4, c5, c6 = moments(comm, N, data) try: g3 = c3 / sqrt(c2**3) # 3rd standardized moment g4 = c4 / (c2**2) # 4th standardized moment g5 = c5 / sqrt(c2**5) # 5th standardized moment g6 = c6 / (c2**3) # 6th standardized moment except (RuntimeError, FloatingPointError, ValueError) as e: if comm.Get_rank() == 0: print( '---------------------------------------' '---------------------------------------') print str(e), e.message() print 'min, mean, max:\n' print('min: {}, u1: {}, max {}\n'.format(gmin, u1, gmax)) print 'moments from two-pass algorithm:\n' print('c2: {}, c3: {}, c4: {}, c5: {}, c6: {}\n'.format( c2, c3, c4, c5, c6)) print( '---------------------------------------' '---------------------------------------') MPI.Finalize() sys.exit(1) hist, low, width, extra = stats.histogram(data, numbins=bins, defaultlimits=(gmin, gmax), printextras=True) comm.Allreduce(MPI.IN_PLACE, hist, op=MPI.SUM) hist *= 1 / psum(hist) # makes this a probability mass function return hist, u1, c2, g3, g4, g5, g6, gmin, gmax, width
def GetHistogramString(array, unit="", **kwargs): """ Returns the values in array represented as a histogram. Summary: Generates a histogram using scipy.stats.histogram and renders it as a string. Arguments: array: A list of values. unit: The human readable string to be used as the unit for the values. For example, unit="us" would cause values to displayed as "10us". **kwargs: A dict containing parameters to be forwarded directly to scipy.stats.histogram as keyword args. """ buckets, low_range, binsize, extrapoints = stats.histogram(array, **kwargs) hist = "%7.2f%s : " % (low_range, unit) for count in buckets: hist += GetBucketChar(count, max(buckets)) hist += " : %7.2f%s" % ((low_range + binsize * (len(buckets) + 1)), unit) return hist
def generate_random_segments(image, number, radius, entropy_hint=False, sample_size=1.5): """ Generates selected number of random segments with selected radius. The distribution of number values is uniform. Parameters ---------- image : numpy.ndarray 2-dimensional array representing a grayscale image number : int number of segments radius : int "radius" of the segment (= number of pixels to the edge when walking from the central pixel with azimuth i*Pi/4) Returns ------- """ segments = [] score_map = {} num_samples = number if entropy_hint is False else int(number * sample_size) domain = calculate_domain(image, radius) for i in range(num_samples): x = random.randint(domain[0][0], domain[1][0]) y = random.randint(domain[0][1], domain[1][1]) segment = generate_segment(image, (x, y), radius) if entropy_hint is True: ent = entropy(histogram(segment.image, numbins=256, defaultlimits=(0, 255))[0]) score_map[ent] = segment else: segments.append(segment) if entropy_hint is True: values = score_map.keys() values.sort() for v in values[:number]: segments.append(score_map[v]) return segments
def histNormal(color='000000', trace=False): """ Plot a normal distribution with mu and sigma sa,e ar original or clipped dataset. To place mu at the centre of the abscissa space, use: #mu = xao.returnsStats[1][0]+ (0.5* (xao.returnsStats[1][1] - xao.returnsStats[1][0])) """ mu = xao.returnsStats[2] sigma = np.sqrt(xao.returnsStats[3]) # sdev-[3] is variance normSamps = np.random.normal(mu, sigma, xao.returnsStats[0]) # [0] is nr datapoints dta, smallest, binsize, ignore = stats.histogram(normSamps, nrbins) histy, binEdges = np.histogram(normSamps, bins=nrbins, normed=0, new=False) # pylab.plot(binEdges,histy, 'b-', linewidth=1, color='#888800', antialiased=True,\ # label='normal distribution,\nwith same St.Dev.', alpha=1) pylab.hist(normSamps, nrbins, facecolor=color, align='mid' ,bottom=None, \ label='simulated normal\ndistrib. with same StD.', alpha=1) if trace: print("histy:", np.sum(histy)) # ,histy print("binEdges", np.sum(binEdges)) #, binEdges print("size of normSamps:", len(normSamps)) print("sum of hist:", histy.sum()) print("over same abscissa:", np.min(normSamps), np.max(normSamps)) print("over SND histogram", np.min(histy), np.max(histy))
def show_data(test_data, description): """Displays a box plot, histogram and a QQ graph (based on normality), also prints some descriptive statistics""" #at the moment it also saves as .png files - however naming is not unique plt.figure() plt.title('Box Plot-'+description) plt.boxplot(test_data) plt.savefig("boxplot.png") plt.show() plt.figure() plt.title('Histogram-'+description) plt.hist((test_data),histtype='bar') plt.savefig("Histogram.png") plt.show() plt.figure() qq = stats.probplot(test_data, dist="norm", plot=plt) plt.savefig("QQ.png") plt.show() description = stats.describe(test_data) print "The number of observations is:{}".format(description[0]) print "The minimum and max of the observations are :{} , {}".format(description[1][0],description[1][1]) print "The mean of the observations is {}".format(description[2]) print "The variance of the observations is {}".format(description[3]) print "The skewness of the observations is {}".format(description[4]) print "The normalised kurtosis of the observations is {}".format(description[5]) freq_dump = stats.histogram(test_data,10) for i in range(10): bottom_range = freq_dump[1]+freq_dump[2]*i top_range = bottom_range + freq_dump[2] print "The proportion from {} to {} is {}".format(bottom_range,top_range,freq_dump[0][i]/len(test_data))
def __init__(self, parent, layer, data, **kwargs): PlottingCanvas.__init__(self, parent, data) try: if isinstance(layer, str) or isinstance(layer, unicode): # in case of weights histogram self.layer_name = layer else: self.layer_name = layer.name self.isAutoScale = False self.intervals = 7 #self.title = "Histogram (%s)" % (self.layer_name) self.title = "" self.x_label = data.keys()[0] self.y_label = "Counts in bins" self.data = data[self.x_label] self.enable_axis_x = False self.draw_full_axis = False self.margin_right = 250 # for legend # create a dict for input data for Brushing self.data_dict = sorted(self.data, key=self.data.get) #[obj_id] sorted_data = sorted(self.data.values()) #[value] if self.x_label == 'Connectivity': self.intervals = len(set(sorted_data)) self.intervals = sorted_data[-1] - sorted_data[0] + 1 if self.intervals > 50: self.enable_axis_x = True self.margin_right = 40 if self.intervals > 1: self.hist, low_range, binsize, extrapoints = histogram( sorted_data, self.intervals) else: self.hist = np.array([len(sorted_data)]) cnt = 0 bin_idx = 0 self.bin_index = {} # key: obj_id, value: bin_idx for n in self.hist: for i in range(int(n)): obj_id = self.data_dict[cnt] self.bin_index[obj_id] = bin_idx cnt += 1 bin_idx += 1 data_min, data_max = sorted_data[0], sorted_data[-1] if self.x_label == 'Connectivity': #unique_num_neighbors = list(set(sorted_data)) self.data_intervals = [] for n in range(sorted_data[0], sorted_data[-1] + 1): self.data_intervals.append((n, n)) else: end_pos = np.cumsum(self.hist) start_pos = end_pos - self.hist + 1 self.data_intervals = [(start_pos[i], end_pos[i]) for i in range(len(self.hist))] # a NxN matrix self.x_min = 1 self.x_max = self.intervals + 1 self.y_min = 0 self.y_max = np.max(self.hist) + 1 self.extent = (self.x_min, self.y_min, self.x_max, self.y_max) self.status_bar = None #self.parentFrame.status_bar self.gradient_color = GradientColor(gradient_type='rdyibu') # color schema: from blue to red self.color_matrix = [] for i in range(self.intervals): p = float(i + 1) / self.intervals self.color_matrix.append(self.gradient_color.get_color_at(p)) self.selected_obj_ids = [] except Exception as err: self.ShowMsgBox( """ Histogram could not be created. Please select a numeric variable. Details: """ + str(err.message)) self.isValidPlot = False self.parentFrame.Close(True) return None # linking-brushing events self.Register(stars.EVT_OBJS_SELECT, self.OnObjsSelected) self.Register(stars.EVT_OBJS_UNSELECT, self.OnNoObjSelect)
# VW=[V[i]*W[i] for i in range(len(V))] # values=[(x,y) for (x,y) in zip(VW,W)] values = generate_high_amplitude_distribution(min_v, max_v, min_w, max_w, size) #2values=generate_random_distribution_of_weights_and_values(min_v,max_v,min_w,max_w,size) #print values color_index = 0 for k in [15, 20, 25, 45, 70, 100, 200, 300, 500]: a, b, edf, CIPercentiles, CINormal = compute_empiric_distribution( values, k, int(nb_draws)) print CIPercentiles #print shapiro(edf) #raw_input('...') c, l, largeur, e = histogram(edf, 50) c = [t / sum(c) for t in c] xaxis_edf = [l + largeur * i for i in range(50)] #plt.plot(a,b,label='Line '+str(k),color=colors[color_index]) plt.bar(xaxis_edf, c, largeur, label='Hist ' + str(k) + ' ' + str([str(float("%.3f" % float(x))) for x in CIPercentiles]), alpha=0.6, color=colors[color_index]) #plt.plot(a,b,label='Line '+str(k)+ ' ' +str(CINormal),color=colors[color_index]) plt.legend(loc='upper right', fancybox=True, framealpha=0.85, fontsize=25)
def makeSpect(fname,ftype,k = 250,m = 100,pulseHeight=None,energies = [],numbins=2000): ''' Takes a file name for data to be processed. Processes and produces a spectrum. ''' if pulseHeight == None: f = readFile(fname,ftype) M = 0 pulseHeight = numpy.zeros(len(f)) numPulses = len(f) zeropad = numpy.zeros(2*k+m) print 'Reading ', numPulses, ' traces from ', fname #for i in range(500): # o = fitExp(subtractBaseline(f[i,:])) # M = (M + o)/2 start = time.time() i = 0 numRead = 10000 iterations = (numPulses-numPulses%numRead)/numRead + 1 for n in range(iterations): n1 = n*10000 if n >= iterations-1: n2 = numPulses else: n2 = n1 + 10000 for trace in f[n1:n2,:]: trace_s = subtractBaseline(trace) s, pulseHeight[i] = trapezoidalFilter(k,m,4467,numpy.append(zeropad,trace_s)) #t = findRise(trace_s) #pulseHeight[i] = extractHeight(s,1000,k,m) i += 1 end = time.time() print 'Processing ',n2, ' samples (',n1, ' to ', n2, ') took: ', end-start #hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=max(pulseHeight)) #plt.plot(hist[0]) #plt.show() #def onclick(event): # return event.xdata #cid = fig.mpl_connect('button_press_event',onclick) if len(energies) == 0: usr_in = '' while usr_in != 'done': usr_in = raw_input('Please enter energies of the expected peaks. When finished, enter \'done\': ') try: energies.append(float(usr_in)) except: if usr_in != 'done': print 'Error: Please enter numbers. When finished, enter \'done\'' continue hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+100)) histogram = hist*(hist>1) histogram = [point for point in histogram if point>0] hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+numbins),defaultlimits=(0.,len(histogram)*binsize)) tries = 0 threshold = 10 peak_indices = [] while len(energies) != len(peak_indices): tries += 1 thresh=threshold+tries*10 peak_indices = findRise(hist,findPeak=True,thresh=thresh) if tries > 100: print 'Error: Could not match number of peaks to energies entered.' break peak_info = peakFit(hist,peak_indices) calib,cov = opt.curve_fit(lambda x,m,b: m*x+b, numpy.array([peak[1] for peak in peak_info])*binsize,numpy.array(energies)) print calib bins = numpy.array([binsize*x*calib[0]+calib[1] for x in xrange(len(hist))]) plt.plot(bins,hist) plt.show() peak_indices = findRise(hist,findPeak=True,thresh=thresh) peak_info = peakFit(hist,peak_indices) peak_info[:,1:2] *= binsize*calib[0] peak_info[:,1:2] += numpy.ones(peak_info[:,1:2].shape)*calib[1] peak_info[:,3] *= calib[0] print 'Peak #\tPeak Info' print '\tHeight (counts)\t\tCentroid (keV)\t\tFWHM (keV)\t\tsigma_FWHM (keV)' for peak_num,peak in enumerate(peak_info): print peak_num, list(peak) #plt.plot(hist[0]) #plt.show() return hist, bins, peak_info, pulseHeight
# Distance light traveled in Newcoumbs experiment distance = 7400.0 # Read the measurements from the data file. # There are a number of comments at the top of the file marked with "#" measured_time = loadtxt('speed_of_light.dat', comments="#") # measurements were in nanseconds difference from 24800 ns. measured_time += 24800.0 # Convert measured times to measured velocities. measured_velocity = distance / measured_time * 10.0 # m/ns * 10 == 1e8m/s # histogram the measured velocities. bin_counts, min_val, width, outside = stats.histogram(measured_velocity, numbins=30) print 'min val:', min_val # For plot, we want to know the velocities for each bin. binned_velocity = min_val + arange(len(bin_counts)) * width print binned_velocity # Plot a bar plot of the histogrammed data. pylab.hold(False) pylab.bar(binned_velocity, bin_counts, width=width) pylab.xlabel("velocity (1e8 m/s)") pylab.ylabel("counts") pylab.title("Newcoumbs Speed of Light Measurement Histogram")
#ttmp1 = ttmp1 + offset[t] ttmp1 = ttmp1 + corbias[vt] bias.append(offset[t]) #get mean and mode me1 = np.mean(ttmp1) me2 = np.mean(ttmp2) mean1.append(me1) mean2.append(me2) #primary mode binval = 200 bins = np.linspace(-.2, 2, num=binval) count, lowerlimit, binsize, extra = stats.histogram(ttmp1, binval, defaultlimits=(-.2, 2)) mindx1 = np.argmax(count) count, lowerlimit, binsize, extra = stats.histogram(ttmp2, binval, defaultlimits=(-.2, 2)) mindx2 = np.argmax(count) m1 = bins[mindx2] m2 = bins[mindx1] mode1.append(m1) mode2.append(m2) diff = (area2 - area1) / 1000000 diff_r = diff / (area1 / 1000000) #pdf plot
# -*- coding: utf-8 -*- import numpy as np from scipy import stats from matplotlib import pyplot as plt N = np.loadtxt('statistik.txt') print("Mittelwert der Zählraten: {0:.3f}".format(N.mean())) print("Standardabweichung der Zählraten: {0:.3f}".format(np.std(N, ddof=1))) # unterteile die Daten in 7 »Bins« binnum = 7 n, low_range, binsize, extra = stats.histogram(N, binnum) ind = np.arange(binnum) width = 0.50 x = np.linspace(0, 7) norm = stats.norm(4, 1.5).pdf(x) poisson = stats.poisson(5).pmf(ind) plt.plot(x, norm, "r", label="Normalverteilung") plt.bar(ind, n/100., width, color="blue", label="gemessene Verteilung") plt.bar(ind+0.5, poisson, width, color="green", label="Poisson-Verteilung") plt.title("Statistische Auswertung des Alpha-Zerfalls") plt.ylabel("relative Häufigkeit") plt.xticks(ind+width, ('1', '2', '3', '4', '5', '6', '7'))
import sys import os import string import numpy import scipy import scipy.stats from scipy.stats import histogram, histogram2 debug = os.getenv("DEBUG") rsptime_fn = sys.argv[1] f = open(rsptime_fn, "r") records = f.readlines() times = numpy.array( [ float(r.strip().split(',')[1]) for r in records ] ) maxtime = max(times) (time_histo, time_low_range, time_binsize, time_extrapoints) = histogram( times, defaultlimits=(0.0, maxtime)) assert(time_low_range == 0.0) assert(time_extrapoints == 0) if debug: print(time_histo, ' shape ', time_histo.shape, ' low_range ', time_low_range, ' binsize ', time_binsize, ' extrapoints ', time_extrapoints) print('time histogram: %s'%string.join([ str(v) for v in time_histo.tolist() ], ',')) rsptimes = numpy.array( [ float(r.strip().split(',')[2]) for r in records ] ) rsptime_histo = histogram2( rsptimes, [ 0.0001, 0.00032, 0.001, 0.0032, 0.01, 0.032, 0.1, 0.32, 1, 3.2, 10, 32, 100 ] ) if debug: print(rsptime_histo,rsptime_histo.shape) print('response time histogram: %s'%string.join( [ str(v) for v in rsptime_histo.tolist() ], ','))
def poi_ch2test(cts): if _N.sum(cts) == 0: # all of them 0 return 0.5 # Poisson with rate 0 TRIALS = len(cts) rareLimL = 0 rareLimH = 0 obsLam = _N.mean(cts) i = int(obsLam) while True: if poi_pdf(obsLam, i)*TRIALS < 1: rareLimH = i - 1 break # inclusive of rareLim and up i += 1 i = int(obsLam) while True and (i >= 0): if poi_pdf(obsLam, i)*TRIALS < 1: rareLimL = i + 1 break # inclusive of rareLim and up i -= 1 # no bin has < 1 expcted events expctd = _N.zeros(rareLimH + 1) for n in range(rareLimH): expctd[n] = poi_pdf(obsLam, n)*TRIALS expctd[rareLimH] = TRIALS - _N.sum(expctd[0:rareLimH]) # poipdf[rareLim] is at < 1. This will be last square. # so this is rareLim + 1 objects maxInd = max(cts) # 0 based index nbins = maxInd - 0 + 1 hf, low, bs, out = _ss.histogram(cts, numbins=nbins, defaultlimits=(0, maxInd + 1)) # # of categories: rareLim + 1 0..4 -> 1..5 (5 cats). # k == # of classes # shortened version # if rareLim is the last spot, then length is rareLim + 1 # highest count in cts is len(vals) - 1 # maxLim == len(vals) - 1 # to accomodate last index rareLim, we need size rareLim + 1 # svals = _N.zeros(rareLimH + 1) if maxInd <= rareLimH: svals[0:maxInd + 1] = hf[0:maxInd + 1] else: svals[0:rareLimH] = hf[0:rareLimH] svals[rareLimH] = _N.sum(hf[rareLimH:]) svals[rareLimL] = _N.sum(hf[0:rareLimL+1]) expctd[rareLimL] = _N.sum(expctd[0:rareLimL+1]) k = rareLimH - rareLimL + 1 # index of last element if index from 1 # [0, 1], [1, 2], ... [k-2, k-1] # # classes (counts) [0, 1, 2, k-2] (k - 1) classes # 1...k inclusive is k classes chi2 = 0 for i in xrange(rareLimL, rareLimH + 1): o = svals[i] e = expctd[i] chi2 += (o-e)**2/e i += 1 edf = k - 2 pv = 1 - _ss.chi2.cdf(chi2, edf) return pv
def cumfrac(x, staircase=False, countdat=False, histogram=False, bins=None, binsAlignLeft=False): """ cumulative fraction types of data: continuous data sort data, then assign size rank to sorted points countdata make a histogram of data first histogram cumulatively add values of bins cum frac looks like a staircase when plotted w/ lines staircase=True will include the cnts if data is a list of counts (like spks per trial), we will most likely have many instances of, ie the number 4, in our data. We just need to consider all sanity check cnts = [1, 2, 3, 4, 5] or [1, 2, 3, 4, 5, 3] cf = _ks.cumfrac(cnts, countdat=True, staircase=True) plot(cf[:, 0], cf[:, 1]) """ if countdat: lo = min(x) hi = max(x) nbins = hi - lo + 1 hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1)) datn = int(_N.sum(hf)) if not staircase: cf = _N.zeros((hi - lo + 1, 2)) tot = 0 for i in xrange(len(hf)): tot += hf[i] cf[i, 0] = lo + i cf[i, 1] = float(tot) / datn else: cf = _N.zeros(((hi - lo + 1)*2, 2)) tot = 0 for i in xrange(len(hf)): cf[2*i, 0] = lo + i cf[2*i + 1, 0] = lo + i cf[2*i, 1] = float(tot) / datn tot += hf[i] cf[2*i + 1, 1] = float(tot) / datn else: sx = _N.sort(x) N = len(sx) if not staircase: cf = _N.zeros((N, 2)) cf[:, 0] = sx[:] cf[:, 1] = _N.linspace(0, 1, N, endpoint=False) + 1./N else: cf = _N.zeros((2*N, 2)) yvals = _N.linspace(0, 1, N, endpoint=False) for n in xrange(N): cf[2*n, 0] = sx[n] cf[2*n + 1, 0] = sx[n] cf[2*n, 1] = yvals[n] cf[2*n + 1, 1] = yvals[n] + 1./N if histogram: tot = _N.sum(x) N = len(x) cf = _N.zeros((N, 2)) ccf = 0 if bins == None: bins = range(N + 1) for b in xrange(N): cf[b, 0] = bins[b + 1] cf[b, 1] = ccf ccf += x[b] cf[N - 1, 0] = bins[b + 1] cf[N - 1, 1] = tot return cf
#!/usr/bin/env python # -*- coding: utf-8 -*- from scipy import * from pylab import * from scipy import stats # f(x) f = lambda x, r: r * x * (1 - x) # набор r rlist = linspace(3.5, 3.7, 500) # начальная популяция зайцев X = [0.3 * ones_like(rlist)] # эволюция за N шагов for i in arange(0, 5000): X += [f(X[-1], rlist)] # берём последние N X = hsplit(vstack(X[-2000:]), rlist.size) # Разбиваем на участки по вертикали (чем больше точек, тем ярче будет участок) H = map(lambda Z: stats.histogram(Z, defaultlimits=(0.3, 0.7), numbins=500)[0], X) # нормируем по весу и инвертируем цвет (чтобы линия была чёрной а не белой) H = map(lambda Z: 1 - sqrt(Z / Z.max()), H) scale = 1 figure(figsize=(8 * scale, 5 * scale), dpi=130) imshow(rot90(vstack(H)), aspect="auto", extent=[3.5, 3.7, 0.3, 0.7]) bone() # переводим цвет в чб xlabel("r") ylabel(r"$X_{n \rightarrow \infty}$") savefig("Small_bifurcation.png")
#Generacion de numeros aleatorios menores que 1000 x = np.random.randn(1000) #Generacion de los datos del histograma con scipy.stats. n, low_range, binsize, extrapoints = st.histogram(x) #define el rango superior upper_range = low_range+binsize*(len(n)-1) #Se calcula los intervalos discretos bins = np.linspace(low_range, upper_range, len(n)) #Generacion del grafico de barras
# Note that scipy.stats.histogram behaves differently from the # numpy.histogram imported by default in this %pylab session. # your code goes here random_values = frozen_normal.rvs(size=5000) num_bins = 100 ''' figure() h = hist(random_values,bins = 100) show() print(h[0]) print(h[1]) ''' bin_counts, min_bin, bin_width, outside = histogram(random_values, numbins=num_bins) bin_x = min_bin + bin_width * arange(num_bins) hist_pdf = bin_counts / (len(random_values) * bin_width) mean_est, std_est = norm.fit(random_values) print "estimate of mean, std:", mean_est, std_est hist(random_values, bins=100, normed=True) figure() plot(bin_x, norm(mean_est, std_est).pdf(bin_x), 'r', linewidth=2) show()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
# Hint: Try using `stats.histogram`. There is also `pylab.hist` which makes life easier. from scipy.stats import histogram # Note that scipy.stats.histogram behaves differently from the # numpy.histogram imported by default in this %pylab session. # your code goes here random_values = frozen_normal.rvs(size=5000) num_bins = 100 ''' figure() h = hist(random_values,bins = 100) show() print(h[0]) print(h[1]) ''' bin_counts, min_bin, bin_width, outside = histogram(random_values, numbins=num_bins) bin_x = min_bin + bin_width * arange(num_bins) hist_pdf = bin_counts / (len(random_values) * bin_width) mean_est, std_est = norm.fit(random_values) print "estimate of mean, std:", mean_est, std_est hist(random_values, bins=100, normed=True) figure() plot(bin_x, norm(mean_est, std_est).pdf(bin_x), 'r', linewidth=2) show()
def __init__(self, parent, layer, data, **kwargs): PlottingCanvas.__init__(self,parent, data) try: if isinstance(layer,str) or isinstance(layer, unicode): # in case of weights histogram self.layer_name = layer else: self.layer_name = layer.name self.isAutoScale = False self.intervals = 7 #self.title = "Histogram (%s)" % (self.layer_name) self.title = "" self.x_label = data.keys()[0] self.y_label = "Counts in bins" self.data = data[self.x_label] self.enable_axis_x = False self.draw_full_axis = False self.margin_right = 250 # for legend # create a dict for input data for Brushing self.data_dict = sorted(self.data, key=self.data.get) #[obj_id] sorted_data = sorted(self.data.values()) #[value] if self.x_label == 'Connectivity': self.intervals = len(set(sorted_data)) self.intervals = sorted_data[-1] - sorted_data[0] + 1 if self.intervals > 50: self.enable_axis_x = True self.margin_right = 40 if self.intervals > 1: self.hist, low_range, binsize, extrapoints = histogram(sorted_data, self.intervals) else: self.hist = np.array([len(sorted_data)]) cnt = 0; bin_idx = 0 self.bin_index = {} # key: obj_id, value: bin_idx for n in self.hist: for i in range(int(n)): obj_id = self.data_dict[cnt] self.bin_index[obj_id] = bin_idx cnt += 1 bin_idx += 1 data_min, data_max = sorted_data[0], sorted_data[-1] if self.x_label == 'Connectivity': #unique_num_neighbors = list(set(sorted_data)) self.data_intervals = [] for n in range(sorted_data[0], sorted_data[-1]+1): self.data_intervals.append((n,n)) else: end_pos = np.cumsum(self.hist) start_pos = end_pos - self.hist + 1 self.data_intervals = [ (start_pos[i],end_pos[i]) for i in range(len(self.hist))] # a NxN matrix self.x_min = 1 self.x_max = self.intervals +1 self.y_min = 0 self.y_max = np.max(self.hist) +1 self.extent = (self.x_min, self.y_min, self.x_max,self.y_max) self.status_bar = None#self.parentFrame.status_bar self.gradient_color = GradientColor(gradient_type='rdyibu') # color schema: from blue to red self.color_matrix = [] for i in range(self.intervals): p = float(i+1) / self.intervals self.color_matrix.append( self.gradient_color.get_color_at(p)) self.selected_obj_ids = [] except Exception as err: self.ShowMsgBox(""" Histogram could not be created. Please select a numeric variable. Details: """ + str(err.message)) self.isValidPlot = False self.parentFrame.Close(True) return None # linking-brushing events self.Register(stars.EVT_OBJS_SELECT, self.OnObjsSelected) self.Register(stars.EVT_OBJS_UNSELECT, self.OnNoObjSelect)
def analyzeResults(self): self.matched = [] # for paired comparisons for i, f in enumerate(self.pickles): s = f.split(os.sep)[-1] if not (fnmatch.filter(itertools.chain.from_iterable(self.matched), "*" + s)): self.matched.append(fnmatch.filter(self.pickles, "*" + s)) plotname = f.replace('.pkl', '.xplt') print("\n... Analyzing results for {:s}".format(plotname)) results = febio.FebPlt(plotname) stress = np.zeros((len(self.data[f]['elements']), 3, 3), float) strain = np.copy(stress) #material element volumes mvolumes = np.zeros(len(self.data[f]['elements']), float) #spatial element volumes svolumes = np.copy(mvolumes) nnodes = len(list(results.NodeData.keys())) displacement = np.zeros((nnodes, 3)) for j, n in enumerate(self.data[f]['nodes']): tmp = results.NodeData[j + 1]['displacement'][-1, :] displacement[j, :] = [tmp[0], tmp[1], tmp[2]] pstress = [] pstressdir = [] pstrain = [] pstraindir = [] for j, e in enumerate(self.data[f]['elements']): tmp = results.ElementData[j + 1]['stress'][-1, :] stress[j, :, :] = [[tmp[0], tmp[3], tmp[5]], [tmp[3], tmp[1], tmp[4]], [tmp[5], tmp[4], tmp[2]]] #material coordinates X = np.zeros((4, 3), float) #spatial coordinates x = np.zeros((4, 3), float) for k in range(4): X[k, :] = self.data[f]['nodes'][e[k] - 1] x[k, :] = (X[k, :] + results.NodeData[e[k]]['displacement'][-1, :]) #set up tangent space W = np.zeros((6, 3), float) w = np.zeros((6, 3), float) for k, c in enumerate([(0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (1, 2)]): W[k, :] = X[c[1], :] - X[c[0], :] w[k, :] = x[c[1], :] - x[c[0], :] dX = np.zeros((6, 6), float) ds = np.zeros((6, 1), float) for k in range(6): for l in range(3): dX[k, l] = 2 * W[k, l]**2 dX[k, 3] = 4 * W[k, 0] * W[k, 1] dX[k, 4] = 4 * W[k, 1] * W[k, 2] dX[k, 5] = 4 * W[k, 0] * W[k, 2] ds[k, 0] = (np.linalg.norm(w[k, :])**2 - np.linalg.norm(W[k, :])**2) #solve for strain E = np.linalg.solve(dX, ds) #get volumes mvolumes[j] = old_div( np.abs(np.dot(W[0, :], np.cross(W[1, :], W[2, :]))), 6.0) svolumes[j] = old_div( np.abs(np.dot(w[0, :], np.cross(w[1, :], w[2, :]))), 6.0) strain[j, :, :] = [[E[0], E[3], E[5]], [E[3], E[1], E[4]], [E[5], E[4], E[2]]] #eigenvalues and eigenvectors of stress and strain tensors #eigenvectors are normalized eigstrain, eigstraindir = np.linalg.eigh(strain[j, :, :]) order = np.argsort(eigstrain) eigstrain = eigstrain[order] eigstraindir /= np.linalg.norm(eigstraindir, axis=0, keepdims=True) eigstraindir = eigstraindir[:, order] pstrain.append(eigstrain) pstraindir.append(eigstraindir) eigstress, eigstressdir = np.linalg.eigh(stress[j, :, :]) order = np.argsort(eigstress) eigstress = eigstress[order] eigstressdir /= np.linalg.norm(eigstressdir, axis=0, keepdims=True) eigstressdir = eigstressdir[:, order] pstress.append(eigstress) pstressdir.append(eigstressdir) pstress = np.array(pstress) pstressdir = np.array(pstressdir) pstrain = np.array(pstrain) pstraindir = np.array(pstraindir) #save reference volumes self.volumes.update({f: mvolumes}) self.results['Effective Strain (von Mises)'].update({ f: np.sqrt( old_div(((pstrain[:, 2] - pstrain[:, 1])**2 + (pstrain[:, 1] - pstrain[:, 0])**2 + (pstrain[:, 2] - pstrain[:, 0])**2), 2.0)) }) self.results['Maximum Compressive Strain'].update( {f: np.outer(pstrain[:, 0], [1, 1, 1]) * pstraindir[:, :, 0]}) self.results['Maximum Tensile Strain'].update( {f: np.outer(pstrain[:, 2], [1, 1, 1]) * pstraindir[:, :, 2]}) self.results['Maximum Shear Strain'].update( {f: 0.5 * (pstrain[:, 2] - pstrain[:, 0])}) self.results['Volumetric Strain'].update( {f: old_div(svolumes, mvolumes) - 1.0}) self.results['Effective Stress (von Mises)'].update({ f: np.sqrt( old_div(((pstress[:, 2] - pstress[:, 1])**2 + (pstress[:, 1] - pstress[:, 0])**2 + (pstress[:, 2] - pstress[:, 0])**2), 2.0)) }) self.results['Maximum Compressive Stress'].update( {f: np.outer(pstress[:, 0], [1, 1, 1]) * pstressdir[:, :, 0]}) self.results['Maximum Tensile Stress'].update( {f: np.outer(pstress[:, 2], [1, 1, 1]) * pstressdir[:, :, 2]}) self.results['Maximum Shear Stress'].update( {f: 0.5 * (pstress[:, 2] - pstress[:, 0])}) self.results['Pressure'].update( {f: old_div(np.sum(pstress, axis=1), 3.0)}) self.results['Displacement'].update({f: displacement}) for i, k in enumerate(self.outputs.keys()): if self.outputs[k].get(): for m in self.matched: weights = old_div(self.volumes[m[0]], np.sum(self.volumes[m[0]])) for j, f in enumerate(m): if len(self.results[k][f].shape) > 1: dat = np.ravel( np.linalg.norm(self.results[k][f], axis=1)) else: dat = np.ravel(self.results[k][f]) if self.analysis['Generate Histograms'].get(): IQR = np.subtract(*np.percentile(dat, [75, 25])) nbins = (int( old_div( np.ptp(dat), (2 * IQR * dat.size**(old_div(-1., 3.)))))) h = histogram(dat, numbins=nbins, weights=weights) bins = np.linspace(h[1], h[1] + h[2] * nbins, nbins, endpoint=False) self.histograms[k][f] = { 'bins': bins, 'heights': h[0], 'width': h[2] } if self.analysis['Tukey Boxplots'].get(): quantiles = np.zeros(3, float) for n, q in enumerate([0.25, 0.5, 0.75]): quantiles[n] = quantile_1D(dat, weights, q) self.boxwhiskers[k][f] = { 'quantiles': quantiles, 'data': dat } if self.analysis['Calculate Differences'].get(): for c in itertools.combinations(m, 2): if len(self.results[k][c[0]].shape) > 1: dat1 = np.ravel( np.linalg.norm(self.results[k][c[0]], axis=1)) dat2 = np.ravel( np.linalg.norm(self.results[k][c[1]], axis=1)) else: dat1 = np.ravel(self.results[k][c[0]]) dat2 = np.ravel(self.results[k][c[1]]) difference = dat2 - dat1 wrms = np.sqrt( np.average(difference**2, weights=weights)) self.differences[k][c[1] + "MINUS" + c[0]] = { 'difference': difference, 'weighted RMS': wrms } self.saveResults() print("... ... Analysis Complete")
# -*- coding: utf-8 -*- import numpy as np from scipy import stats from matplotlib import pyplot as plt N = np.loadtxt('statistik.txt') print("Mittelwert der Zählraten: {0:.3f}".format(N.mean())) print("Standardabweichung der Zählraten: {0:.3f}".format(np.std(N, ddof=1))) # unterteile die Daten in 7 »Bins« binnum = 7 n, low_range, binsize, extra = stats.histogram(N, binnum) ind = np.arange(binnum) width = 0.50 x = np.linspace(0, 7) norm = stats.norm(4, 1.5).pdf(x) poisson = stats.poisson(5).pmf(ind) plt.plot(x, norm, "r", label="Normalverteilung") plt.bar(ind, n / 100., width, color="blue", label="gemessene Verteilung") plt.bar(ind + 0.5, poisson, width, color="green", label="Poisson-Verteilung") plt.title("Statistische Auswertung des Alpha-Zerfalls") plt.ylabel("relative Häufigkeit") plt.xticks(ind + width, ('1', '2', '3', '4', '5', '6', '7'))
@author: Sat Kumar Tomer @website: www.ambhas.com @email: [email protected] """ # import required modules from __future__ import division import numpy as np import matplotlib.pyplot as plt import scipy.stats as st # genearte some sythetic data x = np.random.randn(100) # compute histogram n, low_range, binsize, extrapoints = st.histogram(x) upper_range = low_range + binsize * len(n) bins = np.linspace(low_range, upper_range, len(n) + 1) #bins = 0.5*(bins[:-1] + bins[1:]) # plot the histogram plt.clf() plt.bar(bins[:-1], n, width=0.4, color='red') plt.xlabel('X', fontsize=20) plt.ylabel('number of data points in the bin', fontsize=15) plt.savefig('/home/tomer/my_books/python_in_hydrology/images/hist.png') # compute and plot the relfreq relfreqs, lowlim, binsize, extrapoints = st.relfreq(x) plt.clf() plt.bar(bins[:-1], relfreqs, width=0.4, color='magenta')
def _quantize(features, num_clusters, iterations): logger.info('Obtaining codebook') cb, neigh = kmeans2(features, num_clusters, iter=iterations, minit='points') logger.info('Finished quantizing') dist, _ , _ , _ = histogram(neigh, numbins=num_clusters) return cb, dist
def analyzeResults(self): self.matched = [] # for paired comparisons for i, f in enumerate(self.pickles): s = f.split(os.sep)[-1] if not(fnmatch.filter( itertools.chain.from_iterable(self.matched), "*" + s)): self.matched.append(fnmatch.filter(self.pickles, "*" + s)) plotname = f.replace('.pkl', '.xplt') print("\n... Analyzing results for {:s}".format(plotname)) results = febio.FebPlt(plotname) stress = np.zeros((len(self.data[f]['elements']), 3, 3), float) strain = np.copy(stress) #material element volumes mvolumes = np.zeros(len(self.data[f]['elements']), float) #spatial element volumes svolumes = np.copy(mvolumes) nnodes = len(list(results.NodeData.keys())) displacement = np.zeros((nnodes, 3)) for j, n in enumerate(self.data[f]['nodes']): tmp = results.NodeData[j + 1]['displacement'][-1, :] displacement[j, :] = [tmp[0], tmp[1], tmp[2]] pstress = [] pstressdir = [] pstrain = [] pstraindir = [] for j, e in enumerate(self.data[f]['elements']): tmp = results.ElementData[j + 1]['stress'][-1, :] stress[j, :, :] = [[tmp[0], tmp[3], tmp[5]], [tmp[3], tmp[1], tmp[4]], [tmp[5], tmp[4], tmp[2]]] #material coordinates X = np.zeros((4, 3), float) #spatial coordinates x = np.zeros((4, 3), float) for k in range(4): X[k, :] = self.data[f]['nodes'][e[k] - 1] x[k, :] = (X[k, :] + results.NodeData[e[k]]['displacement'][-1, :]) #set up tangent space W = np.zeros((6, 3), float) w = np.zeros((6, 3), float) for k, c in enumerate( [(0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (1, 2)]): W[k, :] = X[c[1], :] - X[c[0], :] w[k, :] = x[c[1], :] - x[c[0], :] dX = np.zeros((6, 6), float) ds = np.zeros((6, 1), float) for k in range(6): for l in range(3): dX[k, l] = 2 * W[k, l] ** 2 dX[k, 3] = 4 * W[k, 0] * W[k, 1] dX[k, 4] = 4 * W[k, 1] * W[k, 2] dX[k, 5] = 4 * W[k, 0] * W[k, 2] ds[k, 0] = (np.linalg.norm(w[k, :]) ** 2 - np.linalg.norm(W[k, :]) ** 2) #solve for strain E = np.linalg.solve(dX, ds) #get volumes mvolumes[j] = old_div(np.abs( np.dot(W[0, :], np.cross(W[1, :], W[2, :]))), 6.0) svolumes[j] = old_div(np.abs( np.dot(w[0, :], np.cross(w[1, :], w[2, :]))), 6.0) strain[j, :, :] = [[E[0], E[3], E[5]], [E[3], E[1], E[4]], [E[5], E[4], E[2]]] #eigenvalues and eigenvectors of stress and strain tensors #eigenvectors are normalized eigstrain, eigstraindir = np.linalg.eigh(strain[j, :, :]) order = np.argsort(eigstrain) eigstrain = eigstrain[order] eigstraindir /= np.linalg.norm(eigstraindir, axis=0, keepdims=True) eigstraindir = eigstraindir[:, order] pstrain.append(eigstrain) pstraindir.append(eigstraindir) eigstress, eigstressdir = np.linalg.eigh(stress[j, :, :]) order = np.argsort(eigstress) eigstress = eigstress[order] eigstressdir /= np.linalg.norm(eigstressdir, axis=0, keepdims=True) eigstressdir = eigstressdir[:, order] pstress.append(eigstress) pstressdir.append(eigstressdir) pstress = np.array(pstress) pstressdir = np.array(pstressdir) pstrain = np.array(pstrain) pstraindir = np.array(pstraindir) #save reference volumes self.volumes.update({f: mvolumes}) self.results['Effective Strain (von Mises)'].update( {f: np.sqrt(old_div(((pstrain[:, 2] - pstrain[:, 1]) ** 2 + (pstrain[:, 1] - pstrain[:, 0]) ** 2 + (pstrain[:, 2] - pstrain[:, 0]) ** 2), 2.0))}) self.results['Maximum Compressive Strain'].update( {f: np.outer(pstrain[:, 0], [1 , 1, 1]) * pstraindir[:, :, 0]}) self.results['Maximum Tensile Strain'].update( {f: np.outer(pstrain[:, 2], [1, 1, 1]) * pstraindir[:, :, 2]}) self.results['Maximum Shear Strain'].update( {f: 0.5 * (pstrain[:, 2] - pstrain[:, 0])}) self.results['Volumetric Strain'].update( {f: old_div(svolumes, mvolumes) - 1.0}) self.results['Effective Stress (von Mises)'].update( {f: np.sqrt(old_div(((pstress[:, 2] - pstress[:, 1]) ** 2 + (pstress[:, 1] - pstress[:, 0]) ** 2 + (pstress[:, 2] - pstress[:, 0]) ** 2), 2.0))}) self.results['Maximum Compressive Stress'].update( {f: np.outer(pstress[:, 0], [1 , 1, 1]) * pstressdir[:, :, 0]}) self.results['Maximum Tensile Stress'].update( {f: np.outer(pstress[:, 2], [1, 1, 1]) * pstressdir[:, :, 2]}) self.results['Maximum Shear Stress'].update( {f: 0.5 * (pstress[:, 2] - pstress[:, 0])}) self.results['Pressure'].update( {f: old_div(np.sum(pstress, axis=1), 3.0)}) self.results['Displacement'].update({f: displacement}) for i, k in enumerate(self.outputs.keys()): if self.outputs[k].get(): for m in self.matched: weights = old_div(self.volumes[m[0]], np.sum(self.volumes[m[0]])) for j, f in enumerate(m): if len(self.results[k][f].shape) > 1: dat = np.ravel(np.linalg.norm(self.results[k][f], axis=1)) else: dat = np.ravel(self.results[k][f]) if self.analysis['Generate Histograms'].get(): IQR = np.subtract(*np.percentile(dat, [75, 25])) nbins = (int(old_div(np.ptp(dat), (2 * IQR * dat.size ** (old_div(-1., 3.)))))) h = histogram(dat, numbins=nbins, weights=weights) bins = np.linspace(h[1], h[1] + h[2] * nbins, nbins, endpoint=False) self.histograms[k][f] = {'bins': bins, 'heights': h[0], 'width': h[2]} if self.analysis['Tukey Boxplots'].get(): quantiles = np.zeros(3, float) for n, q in enumerate([0.25, 0.5, 0.75]): quantiles[n] = quantile_1D(dat, weights, q) self.boxwhiskers[k][f] = {'quantiles': quantiles, 'data': dat} if self.analysis['Calculate Differences'].get(): for c in itertools.combinations(m, 2): if len(self.results[k][c[0]].shape) > 1: dat1 = np.ravel(np.linalg.norm(self.results[k][c[0]], axis=1)) dat2 = np.ravel(np.linalg.norm(self.results[k][c[1]], axis=1)) else: dat1 = np.ravel(self.results[k][c[0]]) dat2 = np.ravel(self.results[k][c[1]]) difference = dat2 - dat1 wrms = np.sqrt(np.average(difference ** 2, weights=weights)) self.differences[k][c[1] + "MINUS" + c[0]] = { 'difference': difference, 'weighted RMS': wrms} self.saveResults() print("... ... Analysis Complete")
subplot(rlist.size, 1, i + 1) plot(X[:, i], label='r = %.2f' % r) ylim(0, 1) yticks('') xticks('') legend(loc=10) show() #! Bifurcation diagram #!---------------------- #! To study the lont term behavior of the sequence we can plot the values #! it visit after many iterations, as a function of the parameter rlist = linspace(2, 4, 800) X = [ 0.5 * ones_like(rlist), ] for i in arange(0, 10000): X += [ f(X[-1], rlist), ] X = hsplit(vstack(X[-2000:]), rlist.size) from scipy import stats H = map(lambda Z: stats.histogram(Z, defaultlimits=(0, 1), numbins=300)[0], X) H = map(lambda Z: 1 - Z / Z.max(), H) H = vstack(H) figure() imshow(rot90(H), aspect='auto', extent=[2, 4, 0, 1]) bone() xlabel('r') ylabel(r'$X_{n \rightarrow \infty}$') show()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument( "--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;", ) parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help= "Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help= "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values.", ) parser.add_argument( "--fisher", action="store_true", default=False, help="if true then Fisher definition is used", ) parser.add_argument( "--bias", action="store_true", default=False, help= "if false,then the calculations are corrected for statistical bias", ) parser.add_argument( "--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored", ) parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored", ) parser.add_argument( "--inclusive", action="store_true", default=False, help="if false,limit will be ignored", ) parser.add_argument( "--printextras", action="store_true", default=False, help= "If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help= "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument( "--correction", action="store_true", default=False, help="continuity correction ", ) parser.add_argument( "--axis", type=int, default=0, help= "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help= "the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument( "--score", type=int, default=0, help="Score that is compared to the elements in a.", ) parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help= "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument( "--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds", ) parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help= "lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help= "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument( "--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e", ) parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols is not None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols is not None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols is not None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe( map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis( map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias, ) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode, ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf == 0 and mf == 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf == 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf == 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf == 0 and mf == 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf == 0 and mf == 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf == 0 and mf == 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf == 0 and mf == 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation, ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation, ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf == 0 and mf == 0: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf == 0 and mf == 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf == 0 and mf == 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1( map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail, ) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf == 0 and mf == 0: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf == 0 and mf == 0: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf == 0 and mf == 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda == 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two)) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity, ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one), map(float, sample_two), equal_var=args.equal_var) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort, ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction, ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_, ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two), ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
# Distance light traveled in Newcoumbs experiment distance = 7400.0 # Read the measurements from the data file. # There are a number of comments at the top of the file marked with "#" measured_time = loadtxt('speed_of_light.dat',comments="#") # measurements were in nanseconds difference from 24800 ns. measured_time += 24800.0 # Convert measured times to measured velocities. measured_velocity = distance/measured_time*10.0 # m/ns * 10 == 1e8m/s # histogram the measured velocities. bin_counts, min_val, width, outside = stats.histogram(measured_velocity, numbins=30) print 'min val:', min_val # For plot, we want to know the velocities for each bin. binned_velocity = min_val + arange(len(bin_counts))*width print binned_velocity # Plot a bar plot of the histogrammed data. pylab.hold(False) pylab.bar(binned_velocity, bin_counts, width=width) pylab.xlabel("velocity (1e8 m/s)") pylab.ylabel("counts") pylab.title("Newcoumbs Speed of Light Measurement Histogram")
def __str__(self): return ('%f +/- %f (SEM)\nhist: %s' % (self.mean, self.SEM, stats.histogram(self.samples).count))
X = vstack(X) figure() for i, r in enumerate(rlist): subplot(rlist.size, 1, i + 1) plot(X[:, i], label="r = %.2f" % r) ylim(0, 1) yticks("") xticks("") legend(loc=10) show() #! Bifurcation diagram #!---------------------- #! To study the lont term behavior of the sequence we can plot the values #! it visit after many iterations, as a function of the parameter rlist = linspace(2, 4, 800) X = [0.5 * ones_like(rlist)] for i in arange(0, 10000): X += [f(X[-1], rlist)] X = hsplit(vstack(X[-2000:]), rlist.size) from scipy import stats H = map(lambda Z: stats.histogram(Z, defaultlimits=(0, 1), numbins=300)[0], X) H = map(lambda Z: 1 - Z / Z.max(), H) H = vstack(H) figure() imshow(rot90(H), aspect="auto", extent=[2, 4, 0, 1]) bone() xlabel("r") ylabel(r"$X_{n \rightarrow \infty}$") show()
def count_hist(self): h = stats.histogram(self.pixmatrix.flatten(), numbins=30) self.full_description['histogram'] = h return h