コード例 #1
0
ファイル: monte_carlo.py プロジェクト: jietang/nflsurvivor
def simulate_seasons(N, start_week, players):
    winner_equity = defaultdict(float)
    last_weeks = []
    for i in range(N):
        if i % 1000 == 0:
            print i
        last_week, winners = simulate_season(start_week, players)
        for winner in winners:
            winner_equity[winner] += 1. / len(winners)
        last_weeks.append(last_week)

    pp({p: "%.2f" % (100 * e / N) for p, e in winner_equity.iteritems()})
    print stats.describe(last_weeks)
    print stats.histogram(last_weeks)
コード例 #2
0
def var(theta, Nbins=360):
	"""Sample circular variance, second moment
	
    Calculated using the minimum variance method with moving cut points.
    See: Weber RO (1997). J. Appl. Meteorol. 36(10), 1403-1415.

    Input: theta - array of radian angle values
          numbins - number of intervals across [0, 2pi] to minimize
    Returns: circular variance
	"""
	N = len(theta)
	delta_t = 2 * pi / Nbins
	lims = (0, 2 * pi)
	x = arange(delta_t, 2*pi + delta_t, delta_t)
	n, xmin, w, extra = histogram(theta, numbins=Nbins, defaultlimits=lims)
	
	tbar = empty((Nbins,), 'd')
	S = empty((Nbins,), 'd')
	s2 = empty((Nbins,), 'd')
	
	tbar[0] = (x*n).sum() / N											# A1
	S[0] = ((x**2)*n).sum() / (N - 1)									# A2
	s2[0] = S[0] - N * (tbar[0]**2) / (N - 1)							# A3
	
	for k in xrange(1, Nbins):
		tbar[k] = tbar[k-1] + (2*pi) * n[k-1] / N						# A4
		S[k] = S[k-1] + (2*pi) * (2*pi + 2*x[k-1]) * n[k-1] / (N - 1)	# A5
		s2[k] = S[k] - N * (tbar[k]**2) / (N - 1)						# A6
	
	return s2.min()
コード例 #3
0
ファイル: kstat.py プロジェクト: AraiKensuke/mscripts
def percentile(x, countdat=False):
    if countdat:
        lo = min(x)
        hi = max(x)
        nbins = hi - lo + 1
        hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1))

        datn = int(_N.sum(hf))
        pctl   = _N.zeros((hi - lo + 1, 2))
        tot  = 0
        for i in xrange(len(hf)):
            tot += hf[i]
            pctl[i, 0] = lo + i
            pctl[i, 1] = float(tot + 1) / float(datn + 1)

    else:
        sx = _N.sort(x)
        N  = len(sx)

        pctl = _N.zeros((N, 2))

        for n in xrange(N):
            pctl[n, 0]      =  sx[n]
            pctl[n, 1]      =  float(n + 1) / float(N + 1)
            
    return pctl
コード例 #4
0
def myFit(data,nBins=30):

        # data binning
        freqObs,xMin,dx,nOut = sciStat.histogram(data,nBins)

        # prepare observed x,y-Values
        N = len(data)
        xVals = [xMin + (i+0.5)*dx for i in range(nBins)]
        yVals = [freqObs[i]/(N*dx) for i in range(nBins)]
        
        # define objective function as the vertical difference
        # between the observed data and the fit-function
        fitFunc = lambda s,x: sciStat.rayleigh.pdf(x,scale=s)
        objFunc = lambda s,x,y: (y - fitFunc(s,x))

        # set initial guess for the fit-parameter and perform
        # least squares fit
        s0=7.
        s,flag = sciOpt.leastsq(objFunc,s0,args=(xVals,yVals))

        for i in range(nBins):
                print xVals[i],yVals[i], fitFunc(s[0],xVals[i])

        print '#',s[0],sum(map(lambda x,y: objFunc(s[0],x,y)**2,xVals,yVals))
        return s[0]
コード例 #5
0
ファイル: mi.py プロジェクト: tomachalek/marg
def mutual_information(data1, data2, domain=256, smoothing=0, log_base=2):
    """
    Mutual information for greyscale images
    
    Parameters
    ----------
    
    data1 : ndarray
        first data array
    
    data2 : ndarray
        second data array (size is expected to be the same as in case of data1)
    
    domain : int, optional (default is 256)
        value domain (e.g. all available grey values in case of image)
    
    smoothing : int
        value "k" in additive (aka Laplace) smoothing
    """
    
    m_hist = np.matrix(np.zeros((domain, domain)))
    img1_un = np.ravel(data1)
    img2_un = np.ravel(data2)
    
    # return entropy(histogram(np.ravel(image), numbins=256, defaultlimits=(0, 255))[0])
    
    d1_hist = histogram(img1_un, numbins=256, defaultlimits=(0, 255))[0]
    d2_hist = histogram(img2_un, numbins=256, defaultlimits=(0, 255))[0]
    for i in range(np.size(img1_un)):
        color1 = int(round(img1_un[i]))
        color2 = int(round(img2_un[i]))
        m_hist[color1, color2] += 1

    d1_hist = (d1_hist + smoothing) /  float(len(img1_un) + smoothing * domain)
    d2_hist = (d2_hist + smoothing) /  float(len(img2_un) + smoothing * domain)
    m_hist = (np.ravel(m_hist) + smoothing) /  float(len(img1_un) + smoothing * domain * domain)
    m_hist = np.resize(m_hist, (domain, domain))
    ans = 0
    for i in range(0, domain):
        for j in range(0, domain):
            if m_hist[i][j] == 0 or d1_hist[i] == 0 or d2_hist[j] == 0:
                continue
            else:
                tmp = m_hist[i][j] * math.log(m_hist[i][j] / d1_hist[i] / d2_hist[j], log_base)
                ans += tmp
    return ans
コード例 #6
0
ファイル: entropy_map.py プロジェクト: tomachalek/marg
def tile_entropy(image, coords, size):
    tmp = []
    for i in range(coords[0], coords[0] + size[0]):
        for j in range(coords[1], coords[1] + size[1]):
            #print("(%d, %d" % (i, j))
            tmp.append(image[i][j])
    h = histogram(tmp, numbins=256, defaultlimits=(0, 255))[0]
    return entropy(h)
コード例 #7
0
ファイル: byteimage.py プロジェクト: eik18/byteimage
def calcentropy(v_array):
	'''
	@todo:  switch entropy base from e to 2.  Currently calculating nats instead of bits
	'''
	#Takes Array, converts to numpy array, flattens, creates a histogram, then runs histogram through entropy function
	#v_array=numpy.array(v_array)
	#v_array=numpy.hstack(v_array)
	v_hist=stats.histogram(v_array)
	return stats.entropy(v_hist[0],base=2)
コード例 #8
0
ファイル: CombineResults.py プロジェクト: jinpa/MOOC-data
def writeHistogram(x,path,limits = None):
    hist, lowRange, binSize, extra = histogram(x,numbins = NUMBINS, defaultlimits = limits)
    with open(path,'wt') as fid:
        low = lowRange
        hi = lowRange + binSize
        for freq in hist:
            fid.write(str(low) + ', ' + str(hi) + ', ' + str(freq) + '\n')
            low += binSize
            hi += binSize
コード例 #9
0
ファイル: entropy_map.py プロジェクト: tomachalek/marg
 def calc_point(self, x, y):
     x1 = max(0, x - self.radius)
     x2 = min(np.size(self.image, 0) - 1, x + self.radius)
     y1 = max(0, y - self.radius)
     y2 = min(np.size(self.image, 1) - 1, y + self.radius)
     sub = np.ravel(self.image[x1:x2+1, y1:y2+1])
     #print(histogram(sub, numbins=256, defaultlimits=(0, 255)))
     ent = entropy(histogram(sub, numbins=256, defaultlimits=(0, 255))[0])
     self.map[x1:x2+1,y1:y2+1] = (self.map[x1:x2+1,y1:y2+1] + ent) / 2
     return ent 
コード例 #10
0
def featureSplitValues(fn, fm, splits):

    fo = open(fn)
    data = json.load(fo)
    fo.close()

    fo = open(fm)

    # colheaders=fo.next().rstrip().split("\t")[1:]
    # skip the first line

    fo.next()
    rowheaders = []

    # read in the feature names

    for line in fo:
        rowheaders.append(line.rstrip().split('\t')[0])

    # read in the splits
    # each line is feature_id_integer split1 split2 ... (varying length)

    split_entries = {}
    fo = open(splits)
    for line in fo:
        vs = [float(v) for v in line.rstrip().split('\t')]
        split_entries[int(vs[0])] = np.array(vs[1:])

    fo.close()

    matrix = []
    split_list = []

    for f_index in split_entries:
        split_list = split_entries[f_index]
        # don't worry about very few splits

        (bins, low, binsize,extra) = stats.histogram(split_list)

        bin_list = []
        significant = False

        for (index, count) in enumerate(bins):
            if count > 50:
                significant = True
            bin_list.append({ "position" : low + (binsize * index),
                            "count" : count
                            })
        if significant == True:
            feature_name = rowheaders[f_index]
            split_line = [feature_name]
            split_line.append(bin_list)
            matrix.append(split_line)

    return matrix
コード例 #11
0
ファイル: para_analysis.py プロジェクト: codeaudit/paraquery
def normalized_histogram_for_print(score_distribution, num_bins, denominator, limits):
    out = []
    if denominator > 0:
        h = stats.histogram(score_distribution, num_bins, limits)
        out.append("[" + str(h[1]) + " , " + str(h[2] * num_bins) + "], step = " + str(round(h[2], 2)) + " \n\t")
        for x in h[0]:
            out.append(str(round(x * 100.0 / denominator, 3)) + "%\t")
        out.append("\n\t")
        for x in range(num_bins):
            out.append("(" + str(round(h[1] + h[2] + x * h[2], 3)) + ")\t")
    out.append("\n")
    return "".join(out)
コード例 #12
0
ファイル: bondlengths.py プロジェクト: daliwa7/vasp_scripts
def main(args):
    if args[0]:
        filename = args[0]
    else:
        filename = "CONTCAR"
    input_vasp_file(filename)
    vector_table = bond_vector_table(positions)
    all_neighbor_table = all_neighbor_distances(lat,vector_table)
    #write_table_to_file(all_neighbor_table,name)
    #for GePbTe sqs half, Ge-0,Pb-1,Te-2
    #for PbS-PbTe sqs half Pb-0 S-1 Te-2
#Edit these numbers!!!!!!!!!!
    a_list = [[0]]  
    b_list = [1,2]
    num_nn = [6]  # Number of nearest neighbors between atoms of type a and b
    bond_stats=[]
    bond_table = []
    for i in range(len(a_list)):
        table,stats = find_nearest_neighbors(all_neighbor_table,a_list[i],b_list,num_nn[i],num_atom_types,atom_type_list)
        bond_table.append(table)
        bond_stats.append(stats)
    bond_table = np.array(bond_table).flatten()
    print "Table of bond lengths"
    print np.sort(bond_table)
    print "Avg. bond length (Ang), Std. Dev. (Ang)"
    print bond_stats
    print 
    gauss = gaussian_kde(bond_table)
    #xdata = np.linspace(2.4,4.0,100)
    xdata = np.linspace(min(bond_table)-3.*bond_stats[0][1],max(bond_table)+3.*bond_stats[0][1],100)
    ydata = gauss(xdata)
    print "Gaussian distribution fit"
    for i in range(len(xdata)):
        print xdata[i],ydata[i]
    print
    nbins = 10
    hist,lowest,binsize,extra = histogram(bond_table,numbins=nbins)
    n = lowest
    print "histogram data"
    print n,"0.0"
    for i in range(len(hist)):
        print n,hist[i]
        n += binsize
        print n,hist[i]
    print n,"0.0"
    print 
コード例 #13
0
def outputBinFiles(outfilename,plotData,xtickLabels,minMin,maxMax,nbins=50):
	
	histoArrays=[]
	
	_low_range=-100
	_binsize=-100
	_extrapoints=-1
	for col,xtickLabel in zip(plotData,xtickLabels):
		histoArray,low_range,binsize,extrapoints=histogram(col,numbins=nbins,defaultlimits=(minMin,maxMax))
		histoArrays.append(histoArray)
		
		if _binsize==-100:
			_binsize=binsize
			_low_range=low_range
		else:
			if _binsize!=binsize or low_range!=_low_range:
				print >> stderr,"inconsistent histo",_binsize,_low_range,histoArray,low_range,binsize,extrapoints
				exit(1)
				
		
		if extrapoints>0:
			print >> stderr,"extrapoints>0",histoArray,low_range,binsize,extrapoints
			exit(1)
	
	binLows=[]
	
	for i in range(0,nbins):
		binLows.append(i*binsize)
	
	outfil=open(outfilename,"w")
	outv=["bins"]
	for binLow in binLows:
		outv.append(str(binLow))
	
	print >> outfil,"\t".join(outv)

	#now the data
	for xtickLabel,histoArray in zip(xtickLabels,histoArrays):
		outv=[xtickLabel]
		totalPoint=sum(histoArray)
		for v in histoArray:
			outv.append(str(float(v)/totalPoint))
	
		print >> outfil,"\t".join(outv)
			
	outfil.close()
コード例 #14
0
def find_ref_values(cursor, values):

    ref_values  = {}   # for the genes showing a nice normal distribution
    for symbol, val_array in values.iteritems():
        if bad (cursor, symbol): continue
        if not val_array: continue

        description = stats.describe(val_array)
        [nobs, [min,max], mean, variance, skewness, kurtosis]  = description

        if mean < 3: continue

        if len(val_array) > 20:
            [teststat, pval] = stats.normaltest(val_array)
        else:
            teststat  = 100


        if teststat >= 4: continue

        if mean < 10: continue

        ref_values[symbol] = description

        continue # below is some descriptive output

        in_left_tail  = float (len([x for x in val_array if x < mean-2*stdev ]))/len(val_array)
        in_right_tail = float (len([x for x in val_array if x > mean+2*stdev ]))/len(val_array)
        description += (in_left_tail, in_right_tail)

        blurb(symbol, description, "normal candidate", sys.stdout);
        [hist_numpy, low, binsize, extrapoints] = stats.histogram (val_array)
        histogram = hist_numpy.tolist()
        i = 0
        bin_prev = low
        for val in histogram[:-1]:
            print " %5d %5d " % (int(bin_prev), int(bin_prev+binsize)),
            print " %5d " %  val
            i += 1
            bin_prev += binsize

        print

    return ref_values
コード例 #15
0
ファイル: abstats.py プロジェクト: memsql/dbbench-tools
def GetHistogramString(array, unit="", **kwargs):
    """
    Returns the values in array represented as a histogram.

    Summary:
        Generates a histogram using scipy.stats.histogram and renders it
        as a string.

    Arguments:
        array: A list of values.
        unit: The human readable string to be used as the unit for the values.
            For example, unit="us" would cause values to displayed as "10us".
        **kwargs: A dict containing parameters to be forwarded directly to
            scipy.stats.histogram as keyword args.
    """
    buckets, low_range, binsize, extrapoints = stats.histogram(array, **kwargs)
    hist = "%7.2f%s : " % (low_range, unit)
    for count in buckets:
        hist += GetBucketChar(count, max(buckets))
    hist += " : %7.2f%s" % ((low_range + binsize * (len(buckets) + 1)), unit)
    return hist
コード例 #16
0
ファイル: marg.py プロジェクト: tomachalek/marg
def generate_random_segments(image, number, radius, entropy_hint=False, sample_size=1.5):
    """
    Generates selected number of random segments with selected radius. The distribution
    of number values is uniform.

    Parameters
    ----------
    image : numpy.ndarray
        2-dimensional array representing a grayscale image
    number : int
        number of segments
    radius : int
        "radius" of the segment (= number of pixels to the edge when walking from the central pixel
        with azimuth i*Pi/4)

    Returns
    -------

    """
    segments = []
    score_map = {}
    num_samples = number if entropy_hint is False else int(number * sample_size)
    domain = calculate_domain(image, radius)
    for i in range(num_samples):
        x = random.randint(domain[0][0], domain[1][0])
        y = random.randint(domain[0][1], domain[1][1])
        segment = generate_segment(image, (x, y), radius)
        if entropy_hint is True:
            ent = entropy(histogram(segment.image, numbins=256, defaultlimits=(0, 255))[0])
            score_map[ent] = segment
        else:
            segments.append(segment)

    if entropy_hint is True:
        values = score_map.keys()
        values.sort()
        for v in values[:number]:
            segments.append(score_map[v])

    return segments
コード例 #17
0
ファイル: example2.py プロジェクト: susairajs18/phython
X = vstack(X)
figure()
for i, r in enumerate(rlist):
    subplot(rlist.size, 1, i + 1)
    plot(X[:, i], label="r = %.2f" % r)
    ylim(0, 1)
    yticks("")
    xticks("")
    legend(loc=10)
show()
#! Bifurcation diagram
#!----------------------
#! To study the lont term behavior of the sequence we can plot the values
#! it visit after many iterations, as a function of the parameter
rlist = linspace(2, 4, 800)
X = [0.5 * ones_like(rlist)]
for i in arange(0, 10000):
    X += [f(X[-1], rlist)]
X = hsplit(vstack(X[-2000:]), rlist.size)
from scipy import stats

H = map(lambda Z: stats.histogram(Z, defaultlimits=(0, 1), numbins=300)[0], X)
H = map(lambda Z: 1 - Z / Z.max(), H)
H = vstack(H)
figure()
imshow(rot90(H), aspect="auto", extent=[2, 4, 0, 1])
bone()
xlabel("r")
ylabel(r"$X_{n \rightarrow \infty}$")
show()
コード例 #18
0
# Distance light traveled in Newcoumbs experiment
distance = 7400.0

# Read the measurements from the data file.
# There are a number of comments at the top of the file marked with "#"
measured_time = loadtxt('speed_of_light.dat',comments="#")

# measurements were in nanseconds difference from 24800 ns.
measured_time += 24800.0

# Convert measured times to measured velocities.
measured_velocity = distance/measured_time*10.0  # m/ns * 10 == 1e8m/s

# histogram the measured velocities.
bin_counts, min_val, width, outside = stats.histogram(measured_velocity,
                                                      numbins=30)
print 'min val:', min_val

# For plot, we want to know the velocities for each bin.
binned_velocity = min_val + arange(len(bin_counts))*width
print binned_velocity

# Plot a bar plot of the histogrammed data.
pylab.hold(False)

pylab.bar(binned_velocity, bin_counts, width=width)

pylab.xlabel("velocity (1e8 m/s)")
pylab.ylabel("counts")
pylab.title("Newcoumbs Speed of Light Measurement Histogram")
コード例 #19
0
def makeSpect(fname,ftype,k = 250,m = 100,pulseHeight=None,energies = [],numbins=2000):
    
    '''
    Takes a file name for data to be processed.  Processes and produces a spectrum.
    '''
    
    if pulseHeight == None:
        f = readFile(fname,ftype)
        M = 0
        pulseHeight = numpy.zeros(len(f))
        numPulses = len(f)
        zeropad = numpy.zeros(2*k+m)
        print 'Reading ', numPulses, ' traces from ', fname
        
        #for i in range(500):
        #    o = fitExp(subtractBaseline(f[i,:]))
        #    M = (M + o)/2
        start = time.time()
        
        i = 0
        numRead = 10000
        iterations = (numPulses-numPulses%numRead)/numRead + 1
        for n in range(iterations):
            n1 = n*10000
            if n >= iterations-1:
                n2 = numPulses
            else:
                n2 = n1 + 10000
            for trace in f[n1:n2,:]:
                trace_s = subtractBaseline(trace)
                s, pulseHeight[i] = trapezoidalFilter(k,m,4467,numpy.append(zeropad,trace_s))
                #t = findRise(trace_s)
                #pulseHeight[i] = extractHeight(s,1000,k,m)
                i += 1
            end = time.time()
        print 'Processing ',n2, ' samples (',n1, ' to ', n2, ') took: ', end-start
        
    #hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=max(pulseHeight))
    #plt.plot(hist[0])
    #plt.show()
    #def onclick(event):
    #    return event.xdata
    #cid = fig.mpl_connect('button_press_event',onclick)
    if len(energies) == 0:
        usr_in = ''
        while usr_in != 'done':
            usr_in = raw_input('Please enter energies of the expected peaks.  When finished, enter \'done\': ')
            try:
                energies.append(float(usr_in))
            except:
                if usr_in != 'done':
                    print 'Error: Please enter numbers. When finished, enter \'done\''
                continue
    
    hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+100))
    histogram = hist*(hist>1)
    histogram = [point for point in histogram if point>0]
    hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+numbins),defaultlimits=(0.,len(histogram)*binsize))
    tries = 0
    threshold = 10
    peak_indices = []
    while len(energies) != len(peak_indices):
        tries += 1
        thresh=threshold+tries*10
        peak_indices = findRise(hist,findPeak=True,thresh=thresh)
        if tries > 100:
            print 'Error: Could not match number of peaks to energies entered.'
            break
    
    peak_info = peakFit(hist,peak_indices)
    calib,cov = opt.curve_fit(lambda x,m,b: m*x+b, numpy.array([peak[1] for peak in peak_info])*binsize,numpy.array(energies))
    print calib
    bins = numpy.array([binsize*x*calib[0]+calib[1] for x in xrange(len(hist))])
    plt.plot(bins,hist)
    plt.show()
    peak_indices = findRise(hist,findPeak=True,thresh=thresh)
    peak_info = peakFit(hist,peak_indices)
    peak_info[:,1:2] *= binsize*calib[0]
    peak_info[:,1:2] += numpy.ones(peak_info[:,1:2].shape)*calib[1]
    peak_info[:,3] *= calib[0]
    print 'Peak #\tPeak Info'
    print '\tHeight (counts)\t\tCentroid (keV)\t\tFWHM (keV)\t\tsigma_FWHM (keV)'
    for peak_num,peak in enumerate(peak_info):
        print peak_num, list(peak)
    #plt.plot(hist[0])
    #plt.show()
    return hist, bins, peak_info, pulseHeight
コード例 #20
0
ファイル: Histogram.py プロジェクト: GeoDaCenter/CAST
    def __init__(self, parent, layer, data, **kwargs):
        PlottingCanvas.__init__(self,parent, data)
        
        try:
            if isinstance(layer,str) or isinstance(layer, unicode):
                # in case of weights histogram
                self.layer_name = layer
            else:
                self.layer_name = layer.name
            self.isAutoScale = False 
            self.intervals = 7
            #self.title = "Histogram (%s)" % (self.layer_name)
            self.title = ""
            self.x_label = data.keys()[0]
            self.y_label = "Counts in bins"
            self.data = data[self.x_label]
            
            self.enable_axis_x = False
            self.draw_full_axis = False
            self.margin_right = 250 # for legend
            
            # create a dict for input data for Brushing
            self.data_dict = sorted(self.data, key=self.data.get) #[obj_id]
            sorted_data = sorted(self.data.values()) #[value]
            
            if self.x_label == 'Connectivity': 
                self.intervals = len(set(sorted_data))
                self.intervals = sorted_data[-1] - sorted_data[0] + 1
                if self.intervals > 50:
                    self.enable_axis_x = True
                    self.margin_right = 40
                
            if self.intervals > 1:
                self.hist, low_range, binsize, extrapoints = histogram(sorted_data, self.intervals)
            else:
                self.hist = np.array([len(sorted_data)])
           
            cnt = 0; bin_idx = 0
            self.bin_index = {} # key: obj_id, value: bin_idx
            for n in self.hist:
                for i in range(int(n)):
                    obj_id = self.data_dict[cnt]
                    self.bin_index[obj_id] = bin_idx
                    cnt += 1
                bin_idx += 1
            
            data_min, data_max = sorted_data[0], sorted_data[-1]
            
            if self.x_label == 'Connectivity': 
                #unique_num_neighbors = list(set(sorted_data))
                self.data_intervals = []
                for n in range(sorted_data[0], sorted_data[-1]+1):
                    self.data_intervals.append((n,n))
            else:
                end_pos = np.cumsum(self.hist)
                start_pos = end_pos - self.hist + 1
                self.data_intervals = [ (start_pos[i],end_pos[i]) for i in range(len(self.hist))]
        
            # a NxN matrix
            self.x_min = 1
            self.x_max = self.intervals +1
            self.y_min = 0
            self.y_max = np.max(self.hist) +1
    
            self.extent = (self.x_min, self.y_min, self.x_max,self.y_max)
            self.status_bar = None#self.parentFrame.status_bar
            
            self.gradient_color = GradientColor(gradient_type='rdyibu')
            
            # color schema: from blue to red
            self.color_matrix = []
            for i in range(self.intervals):
                p = float(i+1) / self.intervals
                self.color_matrix.append( self.gradient_color.get_color_at(p))
       
            self.selected_obj_ids = []
            
        except Exception as err:
            self.ShowMsgBox(""" Histogram could not be created. Please select a numeric variable.
            
Details: """ + str(err.message))
            self.isValidPlot = False
            self.parentFrame.Close(True)
            return None
        
        # linking-brushing events
        self.Register(stars.EVT_OBJS_SELECT, self.OnObjsSelected)
        self.Register(stars.EVT_OBJS_UNSELECT, self.OnNoObjSelect)
コード例 #21
0
ファイル: statistik.py プロジェクト: TheMrMo/praktikum
# -*- coding: utf-8 -*-
import numpy as np
from scipy import stats
from  matplotlib import pyplot as plt

N = np.loadtxt('statistik.txt')

print("Mittelwert der Zählraten: {0:.3f}".format(N.mean()))
print("Standardabweichung der Zählraten: {0:.3f}".format(np.std(N, ddof=1)))

# unterteile die Daten in 7 »Bins«
binnum = 7
n, low_range, binsize, extra = stats.histogram(N, binnum)

ind = np.arange(binnum)
width = 0.50

x = np.linspace(0, 7)
norm = stats.norm(4, 1.5).pdf(x)
poisson = stats.poisson(5).pmf(ind)

plt.plot(x, norm, "r", label="Normalverteilung")

plt.bar(ind, n/100., width, color="blue", label="gemessene Verteilung")

plt.bar(ind+0.5, poisson, width, color="green", label="Poisson-Verteilung")

plt.title("Statistische Auswertung des Alpha-Zerfalls")
plt.ylabel("relative Häufigkeit")

plt.xticks(ind+width, ('1', '2', '3', '4', '5', '6', '7'))
コード例 #22
0
# Note that scipy.stats.histogram behaves differently from the
# numpy.histogram imported by default in this %pylab session.



# your code goes here
random_values = frozen_normal.rvs(size=5000)


num_bins = 100
'''
figure()
h = hist(random_values,bins = 100)
show()
print(h[0])
print(h[1])
'''
bin_counts, min_bin, bin_width, outside = histogram(random_values,
                                                    numbins=num_bins)
bin_x = min_bin + bin_width * arange(num_bins)

hist_pdf = bin_counts / (len(random_values) * bin_width)
 
mean_est, std_est = norm.fit(random_values)
print "estimate of mean, std:", mean_est, std_est


hist(random_values, bins=100, normed=True)
figure()
plot(bin_x, norm(mean_est, std_est).pdf(bin_x), 'r', linewidth=2)
show()
コード例 #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
コード例 #24
0
ファイル: kstat.py プロジェクト: AraiKensuke/mscripts
def poi_ch2test(cts):
    if _N.sum(cts) == 0:  #  all of them 0
        return 0.5    #  Poisson with rate 0
    TRIALS    = len(cts)

    rareLimL = 0
    rareLimH = 0
    obsLam   = _N.mean(cts)

    i = int(obsLam)
    while True:
        if poi_pdf(obsLam, i)*TRIALS < 1:
            rareLimH = i - 1
            break   #  inclusive of rareLim and up
        i += 1

    i = int(obsLam)
    while True and (i >= 0):
        if poi_pdf(obsLam, i)*TRIALS < 1:
            rareLimL = i + 1
            break   #  inclusive of rareLim and up
        i -= 1

    #  no bin has < 1 expcted events

    expctd   = _N.zeros(rareLimH + 1)
    for n in range(rareLimH):
        expctd[n] = poi_pdf(obsLam, n)*TRIALS
    expctd[rareLimH] = TRIALS - _N.sum(expctd[0:rareLimH])

    #  poipdf[rareLim] is at < 1.  This will be last square.
    #  so this is rareLim + 1 objects
    maxInd              = max(cts)  #  0 based index
    nbins = maxInd - 0 + 1

    hf, low, bs, out = _ss.histogram(cts, numbins=nbins, defaultlimits=(0, maxInd + 1))

    #  # of categories:  rareLim + 1    0..4 -> 1..5  (5 cats).  
    #  k == # of classes

    #  shortened version

    #  if rareLim is the last spot, then length is rareLim + 1
    #  highest count in cts is len(vals) - 1
    #  maxLim == len(vals) - 1   
    #  to accomodate last index rareLim, we need size rareLim + 1
    #  
    svals                =  _N.zeros(rareLimH + 1)
    if maxInd <= rareLimH:
        svals[0:maxInd + 1] =  hf[0:maxInd + 1]
        
    else:
        svals[0:rareLimH] =  hf[0:rareLimH]
        svals[rareLimH]       =  _N.sum(hf[rareLimH:])

    svals[rareLimL]      = _N.sum(hf[0:rareLimL+1])
    expctd[rareLimL]     = _N.sum(expctd[0:rareLimL+1])

    k                   = rareLimH - rareLimL + 1  # index of last element if index from 1
    # [0, 1], [1, 2], ... [k-2, k-1]
    #   # classes (counts) [0, 1, 2, k-2]   (k - 1) classes
    #  1...k inclusive is k classes

    chi2   = 0

    for i in xrange(rareLimL, rareLimH + 1):
        o    =  svals[i]   
        e    =  expctd[i]
        chi2  +=  (o-e)**2/e
        i    += 1

    edf  = k - 2
    pv = 1 - _ss.chi2.cdf(chi2, edf)

    return pv
コード例 #25
0
ファイル: FEA_GUI.py プロジェクト: siboles/pyCellAnalyst
    def analyzeResults(self):
        self.matched = [] # for paired comparisons
        for i, f in enumerate(self.pickles):
            s = f.split(os.sep)[-1]
            if not(fnmatch.filter(
                    itertools.chain.from_iterable(self.matched), "*" + s)):
                self.matched.append(fnmatch.filter(self.pickles, "*" + s))
            plotname = f.replace('.pkl', '.xplt')
            print("\n... Analyzing results for {:s}".format(plotname))

            results = febio.FebPlt(plotname)
            stress = np.zeros((len(self.data[f]['elements']), 3, 3), float)
            strain = np.copy(stress)
            #material element volumes
            mvolumes = np.zeros(len(self.data[f]['elements']), float)
            #spatial element volumes
            svolumes = np.copy(mvolumes)
            nnodes = len(list(results.NodeData.keys()))
            displacement = np.zeros((nnodes, 3))
            for j, n in enumerate(self.data[f]['nodes']):
                tmp = results.NodeData[j + 1]['displacement'][-1, :]
                displacement[j, :] = [tmp[0], tmp[1], tmp[2]]
            pstress = []
            pstressdir = []
            pstrain = []
            pstraindir = []
            for j, e in enumerate(self.data[f]['elements']):
                tmp = results.ElementData[j + 1]['stress'][-1, :]
                stress[j, :, :] = [[tmp[0], tmp[3], tmp[5]],
                                   [tmp[3], tmp[1], tmp[4]],
                                   [tmp[5], tmp[4], tmp[2]]]
                #material coordinates
                X = np.zeros((4, 3), float)
                #spatial coordinates
                x = np.zeros((4, 3), float)
                for k in range(4):
                    X[k, :] = self.data[f]['nodes'][e[k] - 1]
                    x[k, :] = (X[k, :] +
                               results.NodeData[e[k]]['displacement'][-1, :])
                #set up tangent space
                W = np.zeros((6, 3), float)
                w = np.zeros((6, 3), float)
                for k, c in enumerate(
                        [(0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (1, 2)]):
                    W[k, :] = X[c[1], :] - X[c[0], :]
                    w[k, :] = x[c[1], :] - x[c[0], :]
                dX = np.zeros((6, 6), float)
                ds = np.zeros((6, 1), float)
                for k in range(6):
                    for l in range(3):
                        dX[k, l] = 2 * W[k, l] ** 2
                    dX[k, 3] = 4 * W[k, 0] * W[k, 1]
                    dX[k, 4] = 4 * W[k, 1] * W[k, 2]
                    dX[k, 5] = 4 * W[k, 0] * W[k, 2]
                    ds[k, 0] = (np.linalg.norm(w[k, :]) ** 2 -
                                np.linalg.norm(W[k, :]) ** 2)
                #solve for strain
                E = np.linalg.solve(dX, ds)
                #get volumes
                mvolumes[j] = old_div(np.abs(
                    np.dot(W[0, :], np.cross(W[1, :], W[2, :]))), 6.0)
                svolumes[j] = old_div(np.abs(
                    np.dot(w[0, :], np.cross(w[1, :], w[2, :]))), 6.0)
                strain[j, :, :] = [[E[0], E[3], E[5]],
                                   [E[3], E[1], E[4]],
                                   [E[5], E[4], E[2]]]
                #eigenvalues and eigenvectors of stress and strain tensors
                #eigenvectors are normalized
                eigstrain, eigstraindir = np.linalg.eigh(strain[j, :, :])
                order = np.argsort(eigstrain)
                eigstrain = eigstrain[order]
                eigstraindir /= np.linalg.norm(eigstraindir, axis=0, keepdims=True)
                eigstraindir = eigstraindir[:, order]
                pstrain.append(eigstrain)
                pstraindir.append(eigstraindir)
                eigstress, eigstressdir = np.linalg.eigh(stress[j, :, :])
                order = np.argsort(eigstress)
                eigstress = eigstress[order]
                eigstressdir /= np.linalg.norm(eigstressdir, axis=0, keepdims=True)
                eigstressdir = eigstressdir[:, order]
                pstress.append(eigstress)
                pstressdir.append(eigstressdir)
            pstress = np.array(pstress)
            pstressdir = np.array(pstressdir)
            pstrain = np.array(pstrain)
            pstraindir = np.array(pstraindir)
            #save reference volumes
            self.volumes.update({f: mvolumes})
            self.results['Effective Strain (von Mises)'].update(
                {f: np.sqrt(old_div(((pstrain[:, 2] - pstrain[:, 1]) ** 2 +
                             (pstrain[:, 1] - pstrain[:, 0]) ** 2 +
                             (pstrain[:, 2] - pstrain[:, 0]) ** 2),
                            2.0))})
            self.results['Maximum Compressive Strain'].update(
                {f: np.outer(pstrain[:, 0], [1 , 1, 1]) * pstraindir[:, :, 0]})
            self.results['Maximum Tensile Strain'].update(
                {f: np.outer(pstrain[:, 2], [1, 1, 1]) * pstraindir[:, :, 2]})
            self.results['Maximum Shear Strain'].update(
                {f: 0.5 * (pstrain[:, 2] - pstrain[:, 0])})
            self.results['Volumetric Strain'].update(
                {f: old_div(svolumes, mvolumes) - 1.0})

            self.results['Effective Stress (von Mises)'].update(
                {f: np.sqrt(old_div(((pstress[:, 2] - pstress[:, 1]) ** 2 +
                             (pstress[:, 1] - pstress[:, 0]) ** 2 +
                             (pstress[:, 2] - pstress[:, 0]) ** 2), 2.0))})
            self.results['Maximum Compressive Stress'].update(
                {f: np.outer(pstress[:, 0], [1 , 1, 1]) * pstressdir[:, :, 0]})
            self.results['Maximum Tensile Stress'].update(
                {f: np.outer(pstress[:, 2], [1, 1, 1]) * pstressdir[:, :, 2]})
            self.results['Maximum Shear Stress'].update(
                {f: 0.5 * (pstress[:, 2] - pstress[:, 0])})
            self.results['Pressure'].update(
                {f: old_div(np.sum(pstress, axis=1), 3.0)})

            self.results['Displacement'].update({f: displacement})

        for i, k in enumerate(self.outputs.keys()):
            if self.outputs[k].get():
                for m in self.matched:
                    weights = old_div(self.volumes[m[0]], np.sum(self.volumes[m[0]]))
                    for j, f in enumerate(m):
                        if len(self.results[k][f].shape) > 1:
                            dat = np.ravel(np.linalg.norm(self.results[k][f], axis=1))
                        else:
                            dat = np.ravel(self.results[k][f])
                        if self.analysis['Generate Histograms'].get():
                            IQR = np.subtract(*np.percentile(dat, [75, 25]))
                            nbins = (int(old_div(np.ptp(dat),
                                         (2 * IQR * dat.size ** (old_div(-1., 3.))))))
                            h = histogram(dat, numbins=nbins, weights=weights)
                            bins = np.linspace(h[1], h[1] + h[2] * nbins,
                                               nbins, endpoint=False)
                            self.histograms[k][f] = {'bins': bins,
                                                     'heights': h[0],
                                                     'width': h[2]}
                        if self.analysis['Tukey Boxplots'].get():
                            quantiles = np.zeros(3, float)
                            for n, q in enumerate([0.25, 0.5, 0.75]):
                                quantiles[n] = quantile_1D(dat, weights, q)
                            self.boxwhiskers[k][f] = {'quantiles': quantiles,
                                                      'data': dat}
                    if self.analysis['Calculate Differences'].get():
                        for c in itertools.combinations(m, 2):
                            if len(self.results[k][c[0]].shape) > 1:
                                dat1 = np.ravel(np.linalg.norm(self.results[k][c[0]], axis=1))
                                dat2 = np.ravel(np.linalg.norm(self.results[k][c[1]], axis=1))
                            else:
                                dat1 = np.ravel(self.results[k][c[0]])
                                dat2 = np.ravel(self.results[k][c[1]])
                            difference = dat2 - dat1
                            wrms = np.sqrt(np.average(difference ** 2,
                                                      weights=weights))
                            self.differences[k][c[1] + "MINUS" + c[0]] = {
                                'difference': difference, 'weighted RMS': wrms}
        self.saveResults()
        print("... ... Analysis Complete")
コード例 #26
0
import sys
import os
import string
import numpy
import scipy
import scipy.stats
from scipy.stats import histogram, histogram2

debug = os.getenv("DEBUG")

rsptime_fn = sys.argv[1]
f = open(rsptime_fn, "r")
records = f.readlines()

times = numpy.array( [ float(r.strip().split(',')[1]) for r in records ] )
maxtime = max(times)
(time_histo, time_low_range, time_binsize, time_extrapoints) = histogram( times, defaultlimits=(0.0, maxtime))
assert(time_low_range == 0.0)
assert(time_extrapoints == 0)
if debug: 
  print(time_histo, ' shape ', time_histo.shape, ' low_range ', time_low_range, ' binsize ', time_binsize, ' extrapoints ', time_extrapoints)
print('time histogram: %s'%string.join([ str(v) for v in time_histo.tolist() ], ','))

rsptimes = numpy.array( [ float(r.strip().split(',')[2]) for r in records ] )
rsptime_histo = histogram2( rsptimes, [ 0.0001, 0.00032, 0.001, 0.0032, 0.01, 0.032, 0.1, 0.32, 1, 3.2, 10, 32, 100 ] )
if debug: 
  print(rsptime_histo,rsptime_histo.shape)
print('response time histogram: %s'%string.join( [ str(v) for v in rsptime_histo.tolist() ], ','))

コード例 #27
0
ファイル: kstat.py プロジェクト: AraiKensuke/mscripts
def cumfrac(x, staircase=False, countdat=False, histogram=False, bins=None, binsAlignLeft=False):
    """
    cumulative fraction
    types of data:

    continuous data        sort data, then assign size rank to sorted points
    countdata              make a histogram of data first
    histogram              cumulatively add values of bins

    cum frac looks like a staircase when plotted w/ lines
    staircase=True will include the 
    cnts   if data is a list of counts (like spks per trial), we will most likely have many instances of, ie the number 4, in our data.  We just need to consider all 

    sanity check
    cnts = [1, 2, 3, 4, 5]  or [1, 2, 3, 4, 5, 3]
    cf = _ks.cumfrac(cnts, countdat=True, staircase=True)    
    plot(cf[:, 0], cf[:, 1])
    """

    if countdat:
        lo = min(x)
        hi = max(x)
        nbins = hi - lo + 1
        hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1))

        datn = int(_N.sum(hf))
        if not staircase:
            cf   = _N.zeros((hi - lo + 1, 2))
            tot  = 0
            for i in xrange(len(hf)):
                tot += hf[i]
                cf[i, 0] = lo + i
                cf[i, 1] = float(tot) / datn
        else:
            cf   = _N.zeros(((hi - lo + 1)*2, 2))
            tot  = 0
            for i in xrange(len(hf)):
                cf[2*i, 0]     = lo + i
                cf[2*i + 1, 0] = lo + i
                cf[2*i, 1]     = float(tot) / datn
                tot            += hf[i]
                cf[2*i + 1, 1] = float(tot) / datn

    else:
        sx = _N.sort(x)
        N  = len(sx)

        if not staircase:  
            cf = _N.zeros((N, 2))
            cf[:, 0] = sx[:]
            cf[:, 1] = _N.linspace(0, 1, N, endpoint=False) + 1./N
        else:
            cf = _N.zeros((2*N, 2))

            yvals = _N.linspace(0, 1, N, endpoint=False)
            for n in xrange(N):
                cf[2*n, 0]      =  sx[n]
                cf[2*n + 1, 0]  =  sx[n]
                cf[2*n, 1]      =  yvals[n]
                cf[2*n + 1, 1]  =  yvals[n] + 1./N
                
    if histogram:
        tot = _N.sum(x)
        N   = len(x)
        cf  = _N.zeros((N, 2))
        ccf = 0
        if bins == None:
            bins = range(N + 1)
        for b in xrange(N):
            cf[b, 0] = bins[b + 1]
            cf[b, 1] = ccf
            ccf += x[b]
        cf[N - 1, 0] = bins[b + 1]
        cf[N - 1, 1] = tot

    return cf
コード例 #28
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from scipy import *
from pylab import *
from scipy import stats

# f(x)
f = lambda x, r: r * x * (1 - x)
# набор r
rlist = linspace(3.5, 3.7, 500)
# начальная популяция зайцев
X = [0.3 * ones_like(rlist)]
# эволюция за N шагов
for i in arange(0, 5000):
    X += [f(X[-1], rlist)]
# берём последние N
X = hsplit(vstack(X[-2000:]), rlist.size)
# Разбиваем на участки по вертикали (чем больше точек, тем ярче будет участок)
H = map(lambda Z: stats.histogram(Z, defaultlimits=(0.3, 0.7), numbins=500)[0], X)
# нормируем по весу и инвертируем цвет (чтобы линия была чёрной а не белой)
H = map(lambda Z: 1 - sqrt(Z / Z.max()), H)
scale = 1
figure(figsize=(8 * scale, 5 * scale), dpi=130)
imshow(rot90(vstack(H)), aspect="auto", extent=[3.5, 3.7, 0.3, 0.7])
bone()  # переводим цвет в чб
xlabel("r")
ylabel(r"$X_{n \rightarrow \infty}$")
savefig("Small_bifurcation.png")
コード例 #29
0
ファイル: histogram.py プロジェクト: ovtmagic/phd_work



#Generacion de numeros aleatorios menores que 1000


x = np.random.randn(1000)





#Generacion de los datos del histograma con scipy.stats.
n, low_range, binsize, extrapoints = st.histogram(x)
#define el rango superior
upper_range = low_range+binsize*(len(n)-1)





#Se calcula los intervalos discretos


bins = np.linspace(low_range, upper_range, len(n))


#Generacion del grafico de barras
コード例 #30
0
ファイル: util.py プロジェクト: toros-astro/toritos
 def count_hist(self):
     h = stats.histogram(self.pixmatrix.flatten(), numbins=30)
     self.full_description['histogram'] = h
     return h