def sim_eigen(g1, g2):  
    eigen_list1=[]
    eigen_list2=[]
    eigen_cent1 = nx.eigenvector_centrality(g1)
    eigen_cent2 = nx.eigenvector_centrality(g2)
     
    n1= len(g1.nodes())
    n2=len(g2.nodes())
     
    numbin= 50
     
    for key, value in eigen_cent1.items():
        eigen_list1.append(value)
     
     
    hist1= histogram(eigen_list1 ,numbins=numbin)
      
    for i in range(len(hist1[0])):
        hist1[0][i]+=1  
    
      
    for key, value in eigen_cent2.items():
        eigen_list2.append(value)
           
    hist2= histogram(eigen_list2,numbins=numbin) 
    for i in range(len(hist2[0])):
        hist2[0][i]+=1     
    
         
    KL= entropy(hist1[0] , hist2[0])
     
    return KL
def sim_betweeness(g1, g2):
    betweeness_list1=[]
    betweeness_list2=[]
    betweeness_cent1= nx.betweenness_centrality(g1)
    betweeness_cent2= nx.betweenness_centrality(g2)
     
     
    n1= len(g1.nodes())
    n2=len(g2.nodes())
     
    numbin= 50
 
 
    for key, value in betweeness_cent1.items():
         
        betweeness_list1.append(value)
         
    hist1= histogram(betweeness_list1 ,numbins=numbin)
     
    for i in range(len(hist1[0])):
        hist1[0][i]+=1    
     
    for key, value in betweeness_cent2.items():
          
        betweeness_list2.append(value)
         
               
    hist2= histogram(betweeness_list2,numbins=numbin)
     
    for i in range(len(hist2[0])):
        hist2[0][i]+=1     
    
    KL= entropy(hist1[0] , hist2[0])
            
    return KL 
Example #3
0
def simulate_seasons(N, start_week, players):
    winner_equity = defaultdict(float)
    last_weeks = []
    for i in range(N):
        if i % 1000 == 0:
            print i
        last_week, winners = simulate_season(start_week, players)
        for winner in winners:
            winner_equity[winner] += 1. / len(winners)
        last_weeks.append(last_week)

    pp({p: "%.2f" % (100 * e / N) for p, e in winner_equity.iteritems()})
    print stats.describe(last_weeks)
    print stats.histogram(last_weeks)
Example #4
0
def mutual_information(data1, data2, domain=256, smoothing=0, log_base=2):
    """
    Mutual information for greyscale images
    
    Parameters
    ----------
    
    data1 : ndarray
        first data array
    
    data2 : ndarray
        second data array (size is expected to be the same as in case of data1)
    
    domain : int, optional (default is 256)
        value domain (e.g. all available grey values in case of image)
    
    smoothing : int
        value "k" in additive (aka Laplace) smoothing
    """

    m_hist = np.matrix(np.zeros((domain, domain)))
    img1_un = np.ravel(data1)
    img2_un = np.ravel(data2)

    # return entropy(histogram(np.ravel(image), numbins=256, defaultlimits=(0, 255))[0])

    d1_hist = histogram(img1_un, numbins=256, defaultlimits=(0, 255))[0]
    d2_hist = histogram(img2_un, numbins=256, defaultlimits=(0, 255))[0]
    for i in range(np.size(img1_un)):
        color1 = int(round(img1_un[i]))
        color2 = int(round(img2_un[i]))
        m_hist[color1, color2] += 1

    d1_hist = (d1_hist + smoothing) / float(len(img1_un) + smoothing * domain)
    d2_hist = (d2_hist + smoothing) / float(len(img2_un) + smoothing * domain)
    m_hist = (np.ravel(m_hist) +
              smoothing) / float(len(img1_un) + smoothing * domain * domain)
    m_hist = np.resize(m_hist, (domain, domain))
    ans = 0
    for i in range(0, domain):
        for j in range(0, domain):
            if m_hist[i][j] == 0 or d1_hist[i] == 0 or d2_hist[j] == 0:
                continue
            else:
                tmp = m_hist[i][j] * math.log(
                    m_hist[i][j] / d1_hist[i] / d2_hist[j], log_base)
                ans += tmp
    return ans
Example #5
0
def myFit(data,nBins=30):

        # data binning
        freqObs,xMin,dx,nOut = sciStat.histogram(data,nBins)

        # prepare observed x,y-Values
        N = len(data)
        xVals = [xMin + (i+0.5)*dx for i in range(nBins)]
        yVals = [freqObs[i]/(N*dx) for i in range(nBins)]
        
        # define objective function as the vertical difference
        # between the observed data and the fit-function
        fitFunc = lambda s,x: sciStat.rayleigh.pdf(x,scale=s)
        objFunc = lambda s,x,y: (y - fitFunc(s,x))

        # set initial guess for the fit-parameter and perform
        # least squares fit
        s0=7.
        s,flag = sciOpt.leastsq(objFunc,s0,args=(xVals,yVals))

        for i in range(nBins):
                print xVals[i],yVals[i], fitFunc(s[0],xVals[i])

        print '#',s[0],sum(map(lambda x,y: objFunc(s[0],x,y)**2,xVals,yVals))
        return s[0]
Example #6
0
def var(theta, Nbins=360):
	"""Sample circular variance, second moment
	
    Calculated using the minimum variance method with moving cut points.
    See: Weber RO (1997). J. Appl. Meteorol. 36(10), 1403-1415.

    Input: theta - array of radian angle values
          numbins - number of intervals across [0, 2pi] to minimize
    Returns: circular variance
	"""
	N = len(theta)
	delta_t = 2 * pi / Nbins
	lims = (0, 2 * pi)
	x = arange(delta_t, 2*pi + delta_t, delta_t)
	n, xmin, w, extra = histogram(theta, numbins=Nbins, defaultlimits=lims)
	
	tbar = empty((Nbins,), 'd')
	S = empty((Nbins,), 'd')
	s2 = empty((Nbins,), 'd')
	
	tbar[0] = (x*n).sum() / N											# A1
	S[0] = ((x**2)*n).sum() / (N - 1)									# A2
	s2[0] = S[0] - N * (tbar[0]**2) / (N - 1)							# A3
	
	for k in xrange(1, Nbins):
		tbar[k] = tbar[k-1] + (2*pi) * n[k-1] / N						# A4
		S[k] = S[k-1] + (2*pi) * (2*pi + 2*x[k-1]) * n[k-1] / (N - 1)	# A5
		s2[k] = S[k] - N * (tbar[k]**2) / (N - 1)						# A6
	
	return s2.min()
Example #7
0
def percentile(x, countdat=False):
    if countdat:
        lo = min(x)
        hi = max(x)
        nbins = hi - lo + 1
        hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1))

        datn = int(_N.sum(hf))
        pctl   = _N.zeros((hi - lo + 1, 2))
        tot  = 0
        for i in xrange(len(hf)):
            tot += hf[i]
            pctl[i, 0] = lo + i
            pctl[i, 1] = float(tot + 1) / float(datn + 1)

    else:
        sx = _N.sort(x)
        N  = len(sx)

        pctl = _N.zeros((N, 2))

        for n in xrange(N):
            pctl[n, 0]      =  sx[n]
            pctl[n, 1]      =  float(n + 1) / float(N + 1)
            
    return pctl
Example #8
0
def var(theta, Nbins=360):
    """sample circular variance, second moment

       Calculated using the minimum variance method with moving cut points.
       See: Weber RO (1997). J. Appl. Meteorol. 36(10), 1403-1415.

       Input: theta - array of radian angle values
          numbins - number of intervals across [0, 2pi] to minimize
       Returns: circular variance
    """
    from scipy.stats import histogram
    from numpy import empty, arange, pi

    N = len(theta)
    delta_t = 2 * pi / Nbins
    lims = (0, 2 * pi)
    x = arange(delta_t, 2 * pi + delta_t, delta_t)
    n, xmin, w, extra = histogram(theta, numbins=Nbins, defaultlimits=lims)

    tbar = empty((Nbins, ), 'd')
    S = empty((Nbins, ), 'd')
    s2 = empty((Nbins, ), 'd')

    tbar[0] = (x * n).sum() / N  # A1
    S[0] = ((x**2) * n).sum() / (N - 1)  # A2
    s2[0] = S[0] - N * (tbar[0]**2) / (N - 1)  # A3

    for k in xrange(1, Nbins):
        tbar[k] = tbar[k - 1] + (2 * pi) * n[k - 1] / N  # A4
        S[k] = S[k - 1] + (2 * pi) * (2 * pi + 2 * x[k - 1]) * n[k - 1] / (
            N - 1)  # A5
        s2[k] = S[k] - N * (tbar[k]**2) / (N - 1)  # A6

    return s2.min()
Example #9
0
def tile_entropy(image, coords, size):
    tmp = []
    for i in range(coords[0], coords[0] + size[0]):
        for j in range(coords[1], coords[1] + size[1]):
            #print("(%d, %d" % (i, j))
            tmp.append(image[i][j])
    h = histogram(tmp, numbins=256, defaultlimits=(0, 255))[0]
    return entropy(h)
Example #10
0
def tile_entropy(image, coords, size):
    tmp = []
    for i in range(coords[0], coords[0] + size[0]):
        for j in range(coords[1], coords[1] + size[1]):
            #print("(%d, %d" % (i, j))
            tmp.append(image[i][j])
    h = histogram(tmp, numbins=256, defaultlimits=(0, 255))[0]
    return entropy(h)
Example #11
0
def mutual_information(data1, data2, domain=256, smoothing=0, log_base=2):
    """
    Mutual information for greyscale images
    
    Parameters
    ----------
    
    data1 : ndarray
        first data array
    
    data2 : ndarray
        second data array (size is expected to be the same as in case of data1)
    
    domain : int, optional (default is 256)
        value domain (e.g. all available grey values in case of image)
    
    smoothing : int
        value "k" in additive (aka Laplace) smoothing
    """
    
    m_hist = np.matrix(np.zeros((domain, domain)))
    img1_un = np.ravel(data1)
    img2_un = np.ravel(data2)
    
    # return entropy(histogram(np.ravel(image), numbins=256, defaultlimits=(0, 255))[0])
    
    d1_hist = histogram(img1_un, numbins=256, defaultlimits=(0, 255))[0]
    d2_hist = histogram(img2_un, numbins=256, defaultlimits=(0, 255))[0]
    for i in range(np.size(img1_un)):
        color1 = int(round(img1_un[i]))
        color2 = int(round(img2_un[i]))
        m_hist[color1, color2] += 1

    d1_hist = (d1_hist + smoothing) /  float(len(img1_un) + smoothing * domain)
    d2_hist = (d2_hist + smoothing) /  float(len(img2_un) + smoothing * domain)
    m_hist = (np.ravel(m_hist) + smoothing) /  float(len(img1_un) + smoothing * domain * domain)
    m_hist = np.resize(m_hist, (domain, domain))
    ans = 0
    for i in range(0, domain):
        for j in range(0, domain):
            if m_hist[i][j] == 0 or d1_hist[i] == 0 or d2_hist[j] == 0:
                continue
            else:
                tmp = m_hist[i][j] * math.log(m_hist[i][j] / d1_hist[i] / d2_hist[j], log_base)
                ans += tmp
    return ans
Example #12
0
def calcentropy(v_array):
	'''
	@todo:  switch entropy base from e to 2.  Currently calculating nats instead of bits
	'''
	#Takes Array, converts to numpy array, flattens, creates a histogram, then runs histogram through entropy function
	#v_array=numpy.array(v_array)
	#v_array=numpy.hstack(v_array)
	v_hist=stats.histogram(v_array)
	return stats.entropy(v_hist[0],base=2)
Example #13
0
def calcentropy(v_array):
    '''
	@todo:  switch entropy base from e to 2.  Currently calculating nats instead of bits
	'''
    #Takes Array, converts to numpy array, flattens, creates a histogram, then runs histogram through entropy function
    #v_array=numpy.array(v_array)
    #v_array=numpy.hstack(v_array)
    v_hist = stats.histogram(v_array)
    return stats.entropy(v_hist[0], base=2)
Example #14
0
def writeHistogram(x,path,limits = None):
    hist, lowRange, binSize, extra = histogram(x,numbins = NUMBINS, defaultlimits = limits)
    with open(path,'wt') as fid:
        low = lowRange
        hi = lowRange + binSize
        for freq in hist:
            fid.write(str(low) + ', ' + str(hi) + ', ' + str(freq) + '\n')
            low += binSize
            hi += binSize
Example #15
0
 def calc_point(self, x, y):
     x1 = max(0, x - self.radius)
     x2 = min(np.size(self.image, 0) - 1, x + self.radius)
     y1 = max(0, y - self.radius)
     y2 = min(np.size(self.image, 1) - 1, y + self.radius)
     sub = np.ravel(self.image[x1:x2+1, y1:y2+1])
     #print(histogram(sub, numbins=256, defaultlimits=(0, 255)))
     ent = entropy(histogram(sub, numbins=256, defaultlimits=(0, 255))[0])
     self.map[x1:x2+1,y1:y2+1] = (self.map[x1:x2+1,y1:y2+1] + ent) / 2
     return ent 
def featureSplitValues(fn, fm, splits):

    fo = open(fn)
    data = json.load(fo)
    fo.close()

    fo = open(fm)

    # colheaders=fo.next().rstrip().split("\t")[1:]
    # skip the first line

    fo.next()
    rowheaders = []

    # read in the feature names

    for line in fo:
        rowheaders.append(line.rstrip().split('\t')[0])

    # read in the splits
    # each line is feature_id_integer split1 split2 ... (varying length)

    split_entries = {}
    fo = open(splits)
    for line in fo:
        vs = [float(v) for v in line.rstrip().split('\t')]
        split_entries[int(vs[0])] = np.array(vs[1:])

    fo.close()

    matrix = []
    split_list = []

    for f_index in split_entries:
        split_list = split_entries[f_index]
        # don't worry about very few splits

        (bins, low, binsize,extra) = stats.histogram(split_list)

        bin_list = []
        significant = False

        for (index, count) in enumerate(bins):
            if count > 50:
                significant = True
            bin_list.append({ "position" : low + (binsize * index),
                            "count" : count
                            })
        if significant == True:
            feature_name = rowheaders[f_index]
            split_line = [feature_name]
            split_line.append(bin_list)
            matrix.append(split_line)

    return matrix
Example #17
0
 def calc_point(self, x, y):
     x1 = max(0, x - self.radius)
     x2 = min(np.size(self.image, 0) - 1, x + self.radius)
     y1 = max(0, y - self.radius)
     y2 = min(np.size(self.image, 1) - 1, y + self.radius)
     sub = np.ravel(self.image[x1:x2 + 1, y1:y2 + 1])
     #print(histogram(sub, numbins=256, defaultlimits=(0, 255)))
     ent = entropy(histogram(sub, numbins=256, defaultlimits=(0, 255))[0])
     self.map[x1:x2 + 1,
              y1:y2 + 1] = (self.map[x1:x2 + 1, y1:y2 + 1] + ent) / 2
     return ent
Example #18
0
def writeHistogram(x, path, limits=None):
    hist, lowRange, binSize, extra = histogram(x,
                                               numbins=NUMBINS,
                                               defaultlimits=limits)
    with open(path, 'wt') as fid:
        low = lowRange
        hi = lowRange + binSize
        for freq in hist:
            fid.write(str(low) + ', ' + str(hi) + ', ' + str(freq) + '\n')
            low += binSize
            hi += binSize
Example #19
0
def normalized_histogram_for_print(score_distribution, num_bins, denominator, limits):
    out = []
    if denominator > 0:
        h = stats.histogram(score_distribution, num_bins, limits)
        out.append("[" + str(h[1]) + " , " + str(h[2] * num_bins) + "], step = " + str(round(h[2], 2)) + " \n\t")
        for x in h[0]:
            out.append(str(round(x * 100.0 / denominator, 3)) + "%\t")
        out.append("\n\t")
        for x in range(num_bins):
            out.append("(" + str(round(h[1] + h[2] + x * h[2], 3)) + ")\t")
    out.append("\n")
    return "".join(out)
def sim_closeness(g1, g2):
     
    closness_list1=[]
    closness_list2=[]
    closeness_cent1= nx.closeness_centrality(g1)
    closeness_cent2= nx.closeness_centrality(g2)
     
    for key, value in closeness_cent1.items():
        closness_list1.append(value)
     
    n1= len(g1.nodes())
    n2= len(g2.nodes())
     
    numbin= 50
 
    
    hist1= histogram(closness_list1 ,numbins=numbin)
     
    for i in range(len(hist1[0])):
        hist1[0][i]+=1 
         
             
    print(hist1[0])
     
    for key, value in closeness_cent2.items():
        closness_list2.append(value)
           
     
    hist2= histogram(closness_list2,numbins=numbin) 
     
    for i in range(len(hist2[0])):
        hist2[0][i]+=1 
         
    print(hist2[0])
     
     
    KL= entropy(hist1[0] , hist2[0])
     
         
    return KL
def outputBinFiles(outfilename,
                   plotData,
                   xtickLabels,
                   minMin,
                   maxMax,
                   nbins=50):

    histoArrays = []

    _low_range = -100
    _binsize = -100
    _extrapoints = -1
    for col, xtickLabel in zip(plotData, xtickLabels):
        histoArray, low_range, binsize, extrapoints = histogram(
            col, numbins=nbins, defaultlimits=(minMin, maxMax))
        histoArrays.append(histoArray)

        if _binsize == -100:
            _binsize = binsize
            _low_range = low_range
        else:
            if _binsize != binsize or low_range != _low_range:
                print >> stderr, "inconsistent histo", _binsize, _low_range, histoArray, low_range, binsize, extrapoints
                exit(1)

        if extrapoints > 0:
            print >> stderr, "extrapoints>0", histoArray, low_range, binsize, extrapoints
            exit(1)

    binLows = []

    for i in range(0, nbins):
        binLows.append(i * binsize)

    outfil = open(outfilename, "w")
    outv = ["bins"]
    for binLow in binLows:
        outv.append(str(binLow))

    print >> outfil, "\t".join(outv)

    #now the data
    for xtickLabel, histoArray in zip(xtickLabels, histoArrays):
        outv = [xtickLabel]
        totalPoint = sum(histoArray)
        for v in histoArray:
            outv.append(str(float(v) / totalPoint))

        print >> outfil, "\t".join(outv)

    outfil.close()
Example #22
0
def main(args):
    if args[0]:
        filename = args[0]
    else:
        filename = "CONTCAR"
    input_vasp_file(filename)
    vector_table = bond_vector_table(positions)
    all_neighbor_table = all_neighbor_distances(lat, vector_table)
    #write_table_to_file(all_neighbor_table,name)
    #for GePbTe sqs half, Ge-0,Pb-1,Te-2
    #for PbS-PbTe sqs half Pb-0 S-1 Te-2
    #Edit these numbers!!!!!!!!!!
    a_list = [[0]]
    b_list = [1, 2]
    num_nn = [6]  # Number of nearest neighbors between atoms of type a and b
    bond_stats = []
    bond_table = []
    for i in range(len(a_list)):
        table, stats = find_nearest_neighbors(all_neighbor_table, a_list[i],
                                              b_list, num_nn[i],
                                              num_atom_types, atom_type_list)
        bond_table.append(table)
        bond_stats.append(stats)
    bond_table = np.array(bond_table).flatten()
    print "Table of bond lengths"
    print np.sort(bond_table)
    print "Avg. bond length (Ang), Std. Dev. (Ang)"
    print bond_stats
    print
    gauss = gaussian_kde(bond_table)
    #xdata = np.linspace(2.4,4.0,100)
    xdata = np.linspace(
        min(bond_table) - 3. * bond_stats[0][1],
        max(bond_table) + 3. * bond_stats[0][1], 100)
    ydata = gauss(xdata)
    print "Gaussian distribution fit"
    for i in range(len(xdata)):
        print xdata[i], ydata[i]
    print
    nbins = 10
    hist, lowest, binsize, extra = histogram(bond_table, numbins=nbins)
    n = lowest
    print "histogram data"
    print n, "0.0"
    for i in range(len(hist)):
        print n, hist[i]
        n += binsize
        print n, hist[i]
    print n, "0.0"
    print
Example #23
0
def normalized_histogram_for_print(score_distribution, num_bins, denominator,
                                   limits):
    out = []
    if denominator > 0:
        h = stats.histogram(score_distribution, num_bins, limits)
        out.append('[' + str(h[1]) + ' , ' + str(h[2] * num_bins) +
                   '], step = ' + str(round(h[2], 2)) + ' \n\t')
        for x in h[0]:
            out.append(str(round(x * 100.0 / denominator, 3)) + '%\t')
        out.append('\n\t')
        for x in range(num_bins):
            out.append('(' + str(round(h[1] + h[2] + x * h[2], 3)) + ')\t')
    out.append('\n')
    return ''.join(out)
Example #24
0
def find_ref_values(cursor, values):

    ref_values = {}  # for the genes showing a nice normal distribution
    for symbol, val_array in values.iteritems():
        if bad(cursor, symbol): continue
        if not val_array: continue

        description = stats.describe(val_array)
        [nobs, [min, max], mean, variance, skewness, kurtosis] = description

        if mean < 3: continue

        if len(val_array) > 20:
            [teststat, pval] = stats.normaltest(val_array)
        else:
            teststat = 100

        if teststat >= 4: continue

        if mean < 10: continue

        ref_values[symbol] = description

        continue  # below is some descriptive output

        in_left_tail = float(
            len([x
                 for x in val_array if x < mean - 2 * stdev])) / len(val_array)
        in_right_tail = float(
            len([x
                 for x in val_array if x > mean + 2 * stdev])) / len(val_array)
        description += (in_left_tail, in_right_tail)

        blurb(symbol, description, "normal candidate", sys.stdout)
        [hist_numpy, low, binsize, extrapoints] = stats.histogram(val_array)
        histogram = hist_numpy.tolist()
        i = 0
        bin_prev = low
        for val in histogram[:-1]:
            print " %5d %5d " % (int(bin_prev), int(bin_prev + binsize)),
            print " %5d " % val
            i += 1
            bin_prev += binsize

        print

    return ref_values
Example #25
0
def main(args):
    if args[0]:
        filename = args[0]
    else:
        filename = "CONTCAR"
    input_vasp_file(filename)
    vector_table = bond_vector_table(positions)
    all_neighbor_table = all_neighbor_distances(lat,vector_table)
    #write_table_to_file(all_neighbor_table,name)
    #for GePbTe sqs half, Ge-0,Pb-1,Te-2
    #for PbS-PbTe sqs half Pb-0 S-1 Te-2
#Edit these numbers!!!!!!!!!!
    a_list = [[0]]  
    b_list = [1,2]
    num_nn = [6]  # Number of nearest neighbors between atoms of type a and b
    bond_stats=[]
    bond_table = []
    for i in range(len(a_list)):
        table,stats = find_nearest_neighbors(all_neighbor_table,a_list[i],b_list,num_nn[i],num_atom_types,atom_type_list)
        bond_table.append(table)
        bond_stats.append(stats)
    bond_table = np.array(bond_table).flatten()
    print "Table of bond lengths"
    print np.sort(bond_table)
    print "Avg. bond length (Ang), Std. Dev. (Ang)"
    print bond_stats
    print 
    gauss = gaussian_kde(bond_table)
    #xdata = np.linspace(2.4,4.0,100)
    xdata = np.linspace(min(bond_table)-3.*bond_stats[0][1],max(bond_table)+3.*bond_stats[0][1],100)
    ydata = gauss(xdata)
    print "Gaussian distribution fit"
    for i in range(len(xdata)):
        print xdata[i],ydata[i]
    print
    nbins = 10
    hist,lowest,binsize,extra = histogram(bond_table,numbins=nbins)
    n = lowest
    print "histogram data"
    print n,"0.0"
    for i in range(len(hist)):
        print n,hist[i]
        n += binsize
        print n,hist[i]
    print n,"0.0"
    print 
def outputBinFiles(outfilename,plotData,xtickLabels,minMin,maxMax,nbins=50):
	
	histoArrays=[]
	
	_low_range=-100
	_binsize=-100
	_extrapoints=-1
	for col,xtickLabel in zip(plotData,xtickLabels):
		histoArray,low_range,binsize,extrapoints=histogram(col,numbins=nbins,defaultlimits=(minMin,maxMax))
		histoArrays.append(histoArray)
		
		if _binsize==-100:
			_binsize=binsize
			_low_range=low_range
		else:
			if _binsize!=binsize or low_range!=_low_range:
				print >> stderr,"inconsistent histo",_binsize,_low_range,histoArray,low_range,binsize,extrapoints
				exit(1)
				
		
		if extrapoints>0:
			print >> stderr,"extrapoints>0",histoArray,low_range,binsize,extrapoints
			exit(1)
	
	binLows=[]
	
	for i in range(0,nbins):
		binLows.append(i*binsize)
	
	outfil=open(outfilename,"w")
	outv=["bins"]
	for binLow in binLows:
		outv.append(str(binLow))
	
	print >> outfil,"\t".join(outv)

	#now the data
	for xtickLabel,histoArray in zip(xtickLabels,histoArrays):
		outv=[xtickLabel]
		totalPoint=sum(histoArray)
		for v in histoArray:
			outv.append(str(float(v)/totalPoint))
	
		print >> outfil,"\t".join(outv)
			
	outfil.close()
def find_ref_values(cursor, values):

    ref_values  = {}   # for the genes showing a nice normal distribution
    for symbol, val_array in values.iteritems():
        if bad (cursor, symbol): continue
        if not val_array: continue

        description = stats.describe(val_array)
        [nobs, [min,max], mean, variance, skewness, kurtosis]  = description

        if mean < 3: continue

        if len(val_array) > 20:
            [teststat, pval] = stats.normaltest(val_array)
        else:
            teststat  = 100


        if teststat >= 4: continue

        if mean < 10: continue

        ref_values[symbol] = description

        continue # below is some descriptive output

        in_left_tail  = float (len([x for x in val_array if x < mean-2*stdev ]))/len(val_array)
        in_right_tail = float (len([x for x in val_array if x > mean+2*stdev ]))/len(val_array)
        description += (in_left_tail, in_right_tail)

        blurb(symbol, description, "normal candidate", sys.stdout);
        [hist_numpy, low, binsize, extrapoints] = stats.histogram (val_array)
        histogram = hist_numpy.tolist()
        i = 0
        bin_prev = low
        for val in histogram[:-1]:
            print " %5d %5d " % (int(bin_prev), int(bin_prev+binsize)),
            print " %5d " %  val
            i += 1
            bin_prev += binsize

        print

    return ref_values
Example #28
0
def histogram1(comm, N, data, bins=50):
    """
    Constructs the histogram (probability mass function) of an MPI-
    decomposed data.
    """

    if not np.all(np.isfinite(data)):
        data[np.isfinite(data) is False] = np.nan

    (gmin, gmax), u1, c2, c3, c4, c5, c6 = moments(comm, N, data)

    try:
        g3 = c3 / sqrt(c2**3)  # 3rd standardized moment
        g4 = c4 / (c2**2)  # 4th standardized moment
        g5 = c5 / sqrt(c2**5)  # 5th standardized moment
        g6 = c6 / (c2**3)  # 6th standardized moment

    except (RuntimeError, FloatingPointError, ValueError) as e:
        if comm.Get_rank() == 0:
            print(
                '---------------------------------------'
                '---------------------------------------')
            print str(e), e.message()
            print 'min, mean, max:\n'
            print('min: {}, u1: {}, max {}\n'.format(gmin, u1, gmax))
            print 'moments from two-pass algorithm:\n'
            print('c2: {}, c3: {}, c4: {}, c5: {}, c6: {}\n'.format(
                c2, c3, c4, c5, c6))
            print(
                '---------------------------------------'
                '---------------------------------------')
        MPI.Finalize()
        sys.exit(1)

    hist, low, width, extra = stats.histogram(data,
                                              numbins=bins,
                                              defaultlimits=(gmin, gmax),
                                              printextras=True)

    comm.Allreduce(MPI.IN_PLACE, hist, op=MPI.SUM)
    hist *= 1 / psum(hist)  # makes this a probability mass function

    return hist, u1, c2, g3, g4, g5, g6, gmin, gmax, width
Example #29
0
def GetHistogramString(array, unit="", **kwargs):
    """
    Returns the values in array represented as a histogram.

    Summary:
        Generates a histogram using scipy.stats.histogram and renders it
        as a string.

    Arguments:
        array: A list of values.
        unit: The human readable string to be used as the unit for the values.
            For example, unit="us" would cause values to displayed as "10us".
        **kwargs: A dict containing parameters to be forwarded directly to
            scipy.stats.histogram as keyword args.
    """
    buckets, low_range, binsize, extrapoints = stats.histogram(array, **kwargs)
    hist = "%7.2f%s : " % (low_range, unit)
    for count in buckets:
        hist += GetBucketChar(count, max(buckets))
    hist += " : %7.2f%s" % ((low_range + binsize * (len(buckets) + 1)), unit)
    return hist
Example #30
0
def generate_random_segments(image, number, radius, entropy_hint=False, sample_size=1.5):
    """
    Generates selected number of random segments with selected radius. The distribution
    of number values is uniform.

    Parameters
    ----------
    image : numpy.ndarray
        2-dimensional array representing a grayscale image
    number : int
        number of segments
    radius : int
        "radius" of the segment (= number of pixels to the edge when walking from the central pixel
        with azimuth i*Pi/4)

    Returns
    -------

    """
    segments = []
    score_map = {}
    num_samples = number if entropy_hint is False else int(number * sample_size)
    domain = calculate_domain(image, radius)
    for i in range(num_samples):
        x = random.randint(domain[0][0], domain[1][0])
        y = random.randint(domain[0][1], domain[1][1])
        segment = generate_segment(image, (x, y), radius)
        if entropy_hint is True:
            ent = entropy(histogram(segment.image, numbins=256, defaultlimits=(0, 255))[0])
            score_map[ent] = segment
        else:
            segments.append(segment)

    if entropy_hint is True:
        values = score_map.keys()
        values.sort()
        for v in values[:number]:
            segments.append(score_map[v])

    return segments
Example #31
0
def histNormal(color='000000', trace=False):
    """
    Plot a normal distribution with mu and sigma sa,e ar original or clipped dataset.
    To place mu at the centre of the abscissa space, use:
    #mu = xao.returnsStats[1][0]+ (0.5* (xao.returnsStats[1][1] - xao.returnsStats[1][0]))
    """
    mu = xao.returnsStats[2]
    sigma = np.sqrt(xao.returnsStats[3])  # sdev-[3] is variance
    normSamps = np.random.normal(mu, sigma,
                                 xao.returnsStats[0])  # [0] is nr datapoints
    dta, smallest, binsize, ignore = stats.histogram(normSamps, nrbins)
    histy, binEdges = np.histogram(normSamps, bins=nrbins, normed=0, new=False)
    #    pylab.plot(binEdges,histy, 'b-', linewidth=1, color='#888800', antialiased=True,\
    #           label='normal distribution,\nwith same St.Dev.', alpha=1)
    pylab.hist(normSamps, nrbins, facecolor=color, align='mid' ,bottom=None, \
               label='simulated normal\ndistrib. with same StD.', alpha=1)
    if trace:
        print("histy:", np.sum(histy))  # ,histy
        print("binEdges", np.sum(binEdges))  #, binEdges
        print("size of normSamps:", len(normSamps))
        print("sum of hist:", histy.sum())
        print("over same abscissa:", np.min(normSamps), np.max(normSamps))
        print("over SND histogram", np.min(histy), np.max(histy))
Example #32
0
def show_data(test_data, description):
	"""Displays a box plot, histogram and a QQ graph (based on normality), also prints some descriptive
	statistics"""

	#at the moment it also saves as .png files - however naming is not unique
	plt.figure()
	plt.title('Box Plot-'+description)
	plt.boxplot(test_data)
	plt.savefig("boxplot.png")
	plt.show()

	plt.figure()
	plt.title('Histogram-'+description)
	plt.hist((test_data),histtype='bar')
	plt.savefig("Histogram.png")
	plt.show()

	plt.figure()
	qq = stats.probplot(test_data, dist="norm", plot=plt)
	plt.savefig("QQ.png")
	plt.show()

	description = stats.describe(test_data)
	print "The number of observations is:{}".format(description[0])
	print "The minimum and max of the observations are :{} , {}".format(description[1][0],description[1][1])
	print "The mean of the observations is {}".format(description[2])
	print "The variance of the observations is {}".format(description[3])
	print "The skewness of the observations is {}".format(description[4])
	print "The normalised kurtosis of the observations is {}".format(description[5])

	freq_dump = stats.histogram(test_data,10)

	for i in range(10):
		bottom_range = freq_dump[1]+freq_dump[2]*i
		top_range = bottom_range + freq_dump[2]
		print "The proportion from {} to {} is {}".format(bottom_range,top_range,freq_dump[0][i]/len(test_data))
Example #33
0
    def __init__(self, parent, layer, data, **kwargs):
        PlottingCanvas.__init__(self, parent, data)

        try:
            if isinstance(layer, str) or isinstance(layer, unicode):
                # in case of weights histogram
                self.layer_name = layer
            else:
                self.layer_name = layer.name
            self.isAutoScale = False
            self.intervals = 7
            #self.title = "Histogram (%s)" % (self.layer_name)
            self.title = ""
            self.x_label = data.keys()[0]
            self.y_label = "Counts in bins"
            self.data = data[self.x_label]

            self.enable_axis_x = False
            self.draw_full_axis = False
            self.margin_right = 250  # for legend

            # create a dict for input data for Brushing
            self.data_dict = sorted(self.data, key=self.data.get)  #[obj_id]
            sorted_data = sorted(self.data.values())  #[value]

            if self.x_label == 'Connectivity':
                self.intervals = len(set(sorted_data))
                self.intervals = sorted_data[-1] - sorted_data[0] + 1
                if self.intervals > 50:
                    self.enable_axis_x = True
                    self.margin_right = 40

            if self.intervals > 1:
                self.hist, low_range, binsize, extrapoints = histogram(
                    sorted_data, self.intervals)
            else:
                self.hist = np.array([len(sorted_data)])

            cnt = 0
            bin_idx = 0
            self.bin_index = {}  # key: obj_id, value: bin_idx
            for n in self.hist:
                for i in range(int(n)):
                    obj_id = self.data_dict[cnt]
                    self.bin_index[obj_id] = bin_idx
                    cnt += 1
                bin_idx += 1

            data_min, data_max = sorted_data[0], sorted_data[-1]

            if self.x_label == 'Connectivity':
                #unique_num_neighbors = list(set(sorted_data))
                self.data_intervals = []
                for n in range(sorted_data[0], sorted_data[-1] + 1):
                    self.data_intervals.append((n, n))
            else:
                end_pos = np.cumsum(self.hist)
                start_pos = end_pos - self.hist + 1
                self.data_intervals = [(start_pos[i], end_pos[i])
                                       for i in range(len(self.hist))]

            # a NxN matrix
            self.x_min = 1
            self.x_max = self.intervals + 1
            self.y_min = 0
            self.y_max = np.max(self.hist) + 1

            self.extent = (self.x_min, self.y_min, self.x_max, self.y_max)
            self.status_bar = None  #self.parentFrame.status_bar

            self.gradient_color = GradientColor(gradient_type='rdyibu')

            # color schema: from blue to red
            self.color_matrix = []
            for i in range(self.intervals):
                p = float(i + 1) / self.intervals
                self.color_matrix.append(self.gradient_color.get_color_at(p))

            self.selected_obj_ids = []

        except Exception as err:
            self.ShowMsgBox(
                """ Histogram could not be created. Please select a numeric variable.
            
Details: """ + str(err.message))
            self.isValidPlot = False
            self.parentFrame.Close(True)
            return None

        # linking-brushing events
        self.Register(stars.EVT_OBJS_SELECT, self.OnObjsSelected)
        self.Register(stars.EVT_OBJS_UNSELECT, self.OnNoObjSelect)
    # VW=[V[i]*W[i] for i in range(len(V))]
    # values=[(x,y) for (x,y) in zip(VW,W)]

    values = generate_high_amplitude_distribution(min_v, max_v, min_w, max_w,
                                                  size)
    #2values=generate_random_distribution_of_weights_and_values(min_v,max_v,min_w,max_w,size)
    #print values
    color_index = 0

    for k in [15, 20, 25, 45, 70, 100, 200, 300, 500]:
        a, b, edf, CIPercentiles, CINormal = compute_empiric_distribution(
            values, k, int(nb_draws))
        print CIPercentiles
        #print shapiro(edf)
        #raw_input('...')
        c, l, largeur, e = histogram(edf, 50)
        c = [t / sum(c) for t in c]
        xaxis_edf = [l + largeur * i for i in range(50)]
        #plt.plot(a,b,label='Line '+str(k),color=colors[color_index])
        plt.bar(xaxis_edf,
                c,
                largeur,
                label='Hist ' + str(k) + ' ' +
                str([str(float("%.3f" % float(x))) for x in CIPercentiles]),
                alpha=0.6,
                color=colors[color_index])
        #plt.plot(a,b,label='Line '+str(k)+ ' ' +str(CINormal),color=colors[color_index])
        plt.legend(loc='upper right',
                   fancybox=True,
                   framealpha=0.85,
                   fontsize=25)
def makeSpect(fname,ftype,k = 250,m = 100,pulseHeight=None,energies = [],numbins=2000):
    
    '''
    Takes a file name for data to be processed.  Processes and produces a spectrum.
    '''
    
    if pulseHeight == None:
        f = readFile(fname,ftype)
        M = 0
        pulseHeight = numpy.zeros(len(f))
        numPulses = len(f)
        zeropad = numpy.zeros(2*k+m)
        print 'Reading ', numPulses, ' traces from ', fname
        
        #for i in range(500):
        #    o = fitExp(subtractBaseline(f[i,:]))
        #    M = (M + o)/2
        start = time.time()
        
        i = 0
        numRead = 10000
        iterations = (numPulses-numPulses%numRead)/numRead + 1
        for n in range(iterations):
            n1 = n*10000
            if n >= iterations-1:
                n2 = numPulses
            else:
                n2 = n1 + 10000
            for trace in f[n1:n2,:]:
                trace_s = subtractBaseline(trace)
                s, pulseHeight[i] = trapezoidalFilter(k,m,4467,numpy.append(zeropad,trace_s))
                #t = findRise(trace_s)
                #pulseHeight[i] = extractHeight(s,1000,k,m)
                i += 1
            end = time.time()
        print 'Processing ',n2, ' samples (',n1, ' to ', n2, ') took: ', end-start
        
    #hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=max(pulseHeight))
    #plt.plot(hist[0])
    #plt.show()
    #def onclick(event):
    #    return event.xdata
    #cid = fig.mpl_connect('button_press_event',onclick)
    if len(energies) == 0:
        usr_in = ''
        while usr_in != 'done':
            usr_in = raw_input('Please enter energies of the expected peaks.  When finished, enter \'done\': ')
            try:
                energies.append(float(usr_in))
            except:
                if usr_in != 'done':
                    print 'Error: Please enter numbers. When finished, enter \'done\''
                continue
    
    hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+100))
    histogram = hist*(hist>1)
    histogram = [point for point in histogram if point>0]
    hist,low_range,binsize,extrapoints = stats.histogram(pulseHeight,numbins=int(max(energies)+numbins),defaultlimits=(0.,len(histogram)*binsize))
    tries = 0
    threshold = 10
    peak_indices = []
    while len(energies) != len(peak_indices):
        tries += 1
        thresh=threshold+tries*10
        peak_indices = findRise(hist,findPeak=True,thresh=thresh)
        if tries > 100:
            print 'Error: Could not match number of peaks to energies entered.'
            break
    
    peak_info = peakFit(hist,peak_indices)
    calib,cov = opt.curve_fit(lambda x,m,b: m*x+b, numpy.array([peak[1] for peak in peak_info])*binsize,numpy.array(energies))
    print calib
    bins = numpy.array([binsize*x*calib[0]+calib[1] for x in xrange(len(hist))])
    plt.plot(bins,hist)
    plt.show()
    peak_indices = findRise(hist,findPeak=True,thresh=thresh)
    peak_info = peakFit(hist,peak_indices)
    peak_info[:,1:2] *= binsize*calib[0]
    peak_info[:,1:2] += numpy.ones(peak_info[:,1:2].shape)*calib[1]
    peak_info[:,3] *= calib[0]
    print 'Peak #\tPeak Info'
    print '\tHeight (counts)\t\tCentroid (keV)\t\tFWHM (keV)\t\tsigma_FWHM (keV)'
    for peak_num,peak in enumerate(peak_info):
        print peak_num, list(peak)
    #plt.plot(hist[0])
    #plt.show()
    return hist, bins, peak_info, pulseHeight
Example #36
0
# Distance light traveled in Newcoumbs experiment
distance = 7400.0

# Read the measurements from the data file.
# There are a number of comments at the top of the file marked with "#"
measured_time = loadtxt('speed_of_light.dat', comments="#")

# measurements were in nanseconds difference from 24800 ns.
measured_time += 24800.0

# Convert measured times to measured velocities.
measured_velocity = distance / measured_time * 10.0  # m/ns * 10 == 1e8m/s

# histogram the measured velocities.
bin_counts, min_val, width, outside = stats.histogram(measured_velocity,
                                                      numbins=30)
print 'min val:', min_val

# For plot, we want to know the velocities for each bin.
binned_velocity = min_val + arange(len(bin_counts)) * width
print binned_velocity

# Plot a bar plot of the histogrammed data.
pylab.hold(False)

pylab.bar(binned_velocity, bin_counts, width=width)

pylab.xlabel("velocity (1e8 m/s)")
pylab.ylabel("counts")
pylab.title("Newcoumbs Speed of Light Measurement Histogram")
        #ttmp1 = ttmp1 + offset[t]
        ttmp1 = ttmp1 + corbias[vt]

        bias.append(offset[t])

        #get mean and mode
        me1 = np.mean(ttmp1)
        me2 = np.mean(ttmp2)
        mean1.append(me1)
        mean2.append(me2)

        #primary mode
        binval = 200
        bins = np.linspace(-.2, 2, num=binval)
        count, lowerlimit, binsize, extra = stats.histogram(ttmp1,
                                                            binval,
                                                            defaultlimits=(-.2,
                                                                           2))
        mindx1 = np.argmax(count)
        count, lowerlimit, binsize, extra = stats.histogram(ttmp2,
                                                            binval,
                                                            defaultlimits=(-.2,
                                                                           2))
        mindx2 = np.argmax(count)
        m1 = bins[mindx2]
        m2 = bins[mindx1]
        mode1.append(m1)
        mode2.append(m2)

        diff = (area2 - area1) / 1000000
        diff_r = diff / (area1 / 1000000)
        #pdf plot
Example #38
0
# -*- coding: utf-8 -*-
import numpy as np
from scipy import stats
from  matplotlib import pyplot as plt

N = np.loadtxt('statistik.txt')

print("Mittelwert der Zählraten: {0:.3f}".format(N.mean()))
print("Standardabweichung der Zählraten: {0:.3f}".format(np.std(N, ddof=1)))

# unterteile die Daten in 7 »Bins«
binnum = 7
n, low_range, binsize, extra = stats.histogram(N, binnum)

ind = np.arange(binnum)
width = 0.50

x = np.linspace(0, 7)
norm = stats.norm(4, 1.5).pdf(x)
poisson = stats.poisson(5).pmf(ind)

plt.plot(x, norm, "r", label="Normalverteilung")

plt.bar(ind, n/100., width, color="blue", label="gemessene Verteilung")

plt.bar(ind+0.5, poisson, width, color="green", label="Poisson-Verteilung")

plt.title("Statistische Auswertung des Alpha-Zerfalls")
plt.ylabel("relative Häufigkeit")

plt.xticks(ind+width, ('1', '2', '3', '4', '5', '6', '7'))
Example #39
0
import sys
import os
import string
import numpy
import scipy
import scipy.stats
from scipy.stats import histogram, histogram2

debug = os.getenv("DEBUG")

rsptime_fn = sys.argv[1]
f = open(rsptime_fn, "r")
records = f.readlines()

times = numpy.array( [ float(r.strip().split(',')[1]) for r in records ] )
maxtime = max(times)
(time_histo, time_low_range, time_binsize, time_extrapoints) = histogram( times, defaultlimits=(0.0, maxtime))
assert(time_low_range == 0.0)
assert(time_extrapoints == 0)
if debug: 
  print(time_histo, ' shape ', time_histo.shape, ' low_range ', time_low_range, ' binsize ', time_binsize, ' extrapoints ', time_extrapoints)
print('time histogram: %s'%string.join([ str(v) for v in time_histo.tolist() ], ','))

rsptimes = numpy.array( [ float(r.strip().split(',')[2]) for r in records ] )
rsptime_histo = histogram2( rsptimes, [ 0.0001, 0.00032, 0.001, 0.0032, 0.01, 0.032, 0.1, 0.32, 1, 3.2, 10, 32, 100 ] )
if debug: 
  print(rsptime_histo,rsptime_histo.shape)
print('response time histogram: %s'%string.join( [ str(v) for v in rsptime_histo.tolist() ], ','))

Example #40
0
def poi_ch2test(cts):
    if _N.sum(cts) == 0:  #  all of them 0
        return 0.5    #  Poisson with rate 0
    TRIALS    = len(cts)

    rareLimL = 0
    rareLimH = 0
    obsLam   = _N.mean(cts)

    i = int(obsLam)
    while True:
        if poi_pdf(obsLam, i)*TRIALS < 1:
            rareLimH = i - 1
            break   #  inclusive of rareLim and up
        i += 1

    i = int(obsLam)
    while True and (i >= 0):
        if poi_pdf(obsLam, i)*TRIALS < 1:
            rareLimL = i + 1
            break   #  inclusive of rareLim and up
        i -= 1

    #  no bin has < 1 expcted events

    expctd   = _N.zeros(rareLimH + 1)
    for n in range(rareLimH):
        expctd[n] = poi_pdf(obsLam, n)*TRIALS
    expctd[rareLimH] = TRIALS - _N.sum(expctd[0:rareLimH])

    #  poipdf[rareLim] is at < 1.  This will be last square.
    #  so this is rareLim + 1 objects
    maxInd              = max(cts)  #  0 based index
    nbins = maxInd - 0 + 1

    hf, low, bs, out = _ss.histogram(cts, numbins=nbins, defaultlimits=(0, maxInd + 1))

    #  # of categories:  rareLim + 1    0..4 -> 1..5  (5 cats).  
    #  k == # of classes

    #  shortened version

    #  if rareLim is the last spot, then length is rareLim + 1
    #  highest count in cts is len(vals) - 1
    #  maxLim == len(vals) - 1   
    #  to accomodate last index rareLim, we need size rareLim + 1
    #  
    svals                =  _N.zeros(rareLimH + 1)
    if maxInd <= rareLimH:
        svals[0:maxInd + 1] =  hf[0:maxInd + 1]
        
    else:
        svals[0:rareLimH] =  hf[0:rareLimH]
        svals[rareLimH]       =  _N.sum(hf[rareLimH:])

    svals[rareLimL]      = _N.sum(hf[0:rareLimL+1])
    expctd[rareLimL]     = _N.sum(expctd[0:rareLimL+1])

    k                   = rareLimH - rareLimL + 1  # index of last element if index from 1
    # [0, 1], [1, 2], ... [k-2, k-1]
    #   # classes (counts) [0, 1, 2, k-2]   (k - 1) classes
    #  1...k inclusive is k classes

    chi2   = 0

    for i in xrange(rareLimL, rareLimH + 1):
        o    =  svals[i]   
        e    =  expctd[i]
        chi2  +=  (o-e)**2/e
        i    += 1

    edf  = k - 2
    pv = 1 - _ss.chi2.cdf(chi2, edf)

    return pv
Example #41
0
def cumfrac(x, staircase=False, countdat=False, histogram=False, bins=None, binsAlignLeft=False):
    """
    cumulative fraction
    types of data:

    continuous data        sort data, then assign size rank to sorted points
    countdata              make a histogram of data first
    histogram              cumulatively add values of bins

    cum frac looks like a staircase when plotted w/ lines
    staircase=True will include the 
    cnts   if data is a list of counts (like spks per trial), we will most likely have many instances of, ie the number 4, in our data.  We just need to consider all 

    sanity check
    cnts = [1, 2, 3, 4, 5]  or [1, 2, 3, 4, 5, 3]
    cf = _ks.cumfrac(cnts, countdat=True, staircase=True)    
    plot(cf[:, 0], cf[:, 1])
    """

    if countdat:
        lo = min(x)
        hi = max(x)
        nbins = hi - lo + 1
        hf, low, bs, out = _ss.histogram(x, numbins=nbins, defaultlimits=(lo, hi + 1))

        datn = int(_N.sum(hf))
        if not staircase:
            cf   = _N.zeros((hi - lo + 1, 2))
            tot  = 0
            for i in xrange(len(hf)):
                tot += hf[i]
                cf[i, 0] = lo + i
                cf[i, 1] = float(tot) / datn
        else:
            cf   = _N.zeros(((hi - lo + 1)*2, 2))
            tot  = 0
            for i in xrange(len(hf)):
                cf[2*i, 0]     = lo + i
                cf[2*i + 1, 0] = lo + i
                cf[2*i, 1]     = float(tot) / datn
                tot            += hf[i]
                cf[2*i + 1, 1] = float(tot) / datn

    else:
        sx = _N.sort(x)
        N  = len(sx)

        if not staircase:  
            cf = _N.zeros((N, 2))
            cf[:, 0] = sx[:]
            cf[:, 1] = _N.linspace(0, 1, N, endpoint=False) + 1./N
        else:
            cf = _N.zeros((2*N, 2))

            yvals = _N.linspace(0, 1, N, endpoint=False)
            for n in xrange(N):
                cf[2*n, 0]      =  sx[n]
                cf[2*n + 1, 0]  =  sx[n]
                cf[2*n, 1]      =  yvals[n]
                cf[2*n + 1, 1]  =  yvals[n] + 1./N
                
    if histogram:
        tot = _N.sum(x)
        N   = len(x)
        cf  = _N.zeros((N, 2))
        ccf = 0
        if bins == None:
            bins = range(N + 1)
        for b in xrange(N):
            cf[b, 0] = bins[b + 1]
            cf[b, 1] = ccf
            ccf += x[b]
        cf[N - 1, 0] = bins[b + 1]
        cf[N - 1, 1] = tot

    return cf
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from scipy import *
from pylab import *
from scipy import stats

# f(x)
f = lambda x, r: r * x * (1 - x)
# набор r
rlist = linspace(3.5, 3.7, 500)
# начальная популяция зайцев
X = [0.3 * ones_like(rlist)]
# эволюция за N шагов
for i in arange(0, 5000):
    X += [f(X[-1], rlist)]
# берём последние N
X = hsplit(vstack(X[-2000:]), rlist.size)
# Разбиваем на участки по вертикали (чем больше точек, тем ярче будет участок)
H = map(lambda Z: stats.histogram(Z, defaultlimits=(0.3, 0.7), numbins=500)[0], X)
# нормируем по весу и инвертируем цвет (чтобы линия была чёрной а не белой)
H = map(lambda Z: 1 - sqrt(Z / Z.max()), H)
scale = 1
figure(figsize=(8 * scale, 5 * scale), dpi=130)
imshow(rot90(vstack(H)), aspect="auto", extent=[3.5, 3.7, 0.3, 0.7])
bone()  # переводим цвет в чб
xlabel("r")
ylabel(r"$X_{n \rightarrow \infty}$")
savefig("Small_bifurcation.png")
Example #43
0




#Generacion de numeros aleatorios menores que 1000


x = np.random.randn(1000)





#Generacion de los datos del histograma con scipy.stats.
n, low_range, binsize, extrapoints = st.histogram(x)
#define el rango superior
upper_range = low_range+binsize*(len(n)-1)





#Se calcula los intervalos discretos


bins = np.linspace(low_range, upper_range, len(n))


#Generacion del grafico de barras
Example #44
0
# Note that scipy.stats.histogram behaves differently from the
# numpy.histogram imported by default in this %pylab session.



# your code goes here
random_values = frozen_normal.rvs(size=5000)


num_bins = 100
'''
figure()
h = hist(random_values,bins = 100)
show()
print(h[0])
print(h[1])
'''
bin_counts, min_bin, bin_width, outside = histogram(random_values,
                                                    numbins=num_bins)
bin_x = min_bin + bin_width * arange(num_bins)

hist_pdf = bin_counts / (len(random_values) * bin_width)
 
mean_est, std_est = norm.fit(random_values)
print "estimate of mean, std:", mean_est, std_est


hist(random_values, bins=100, normed=True)
figure()
plot(bin_x, norm(mean_est, std_est).pdf(bin_x), 'r', linewidth=2)
show()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Example #46
0
# Hint: Try using `stats.histogram`. There is also `pylab.hist` which makes life easier.

from scipy.stats import histogram
# Note that scipy.stats.histogram behaves differently from the
# numpy.histogram imported by default in this %pylab session.

# your code goes here
random_values = frozen_normal.rvs(size=5000)

num_bins = 100
'''
figure()
h = hist(random_values,bins = 100)
show()
print(h[0])
print(h[1])
'''
bin_counts, min_bin, bin_width, outside = histogram(random_values,
                                                    numbins=num_bins)
bin_x = min_bin + bin_width * arange(num_bins)

hist_pdf = bin_counts / (len(random_values) * bin_width)

mean_est, std_est = norm.fit(random_values)
print "estimate of mean, std:", mean_est, std_est

hist(random_values, bins=100, normed=True)
figure()
plot(bin_x, norm(mean_est, std_est).pdf(bin_x), 'r', linewidth=2)
show()
Example #47
0
    def __init__(self, parent, layer, data, **kwargs):
        PlottingCanvas.__init__(self,parent, data)
        
        try:
            if isinstance(layer,str) or isinstance(layer, unicode):
                # in case of weights histogram
                self.layer_name = layer
            else:
                self.layer_name = layer.name
            self.isAutoScale = False 
            self.intervals = 7
            #self.title = "Histogram (%s)" % (self.layer_name)
            self.title = ""
            self.x_label = data.keys()[0]
            self.y_label = "Counts in bins"
            self.data = data[self.x_label]
            
            self.enable_axis_x = False
            self.draw_full_axis = False
            self.margin_right = 250 # for legend
            
            # create a dict for input data for Brushing
            self.data_dict = sorted(self.data, key=self.data.get) #[obj_id]
            sorted_data = sorted(self.data.values()) #[value]
            
            if self.x_label == 'Connectivity': 
                self.intervals = len(set(sorted_data))
                self.intervals = sorted_data[-1] - sorted_data[0] + 1
                if self.intervals > 50:
                    self.enable_axis_x = True
                    self.margin_right = 40
                
            if self.intervals > 1:
                self.hist, low_range, binsize, extrapoints = histogram(sorted_data, self.intervals)
            else:
                self.hist = np.array([len(sorted_data)])
           
            cnt = 0; bin_idx = 0
            self.bin_index = {} # key: obj_id, value: bin_idx
            for n in self.hist:
                for i in range(int(n)):
                    obj_id = self.data_dict[cnt]
                    self.bin_index[obj_id] = bin_idx
                    cnt += 1
                bin_idx += 1
            
            data_min, data_max = sorted_data[0], sorted_data[-1]
            
            if self.x_label == 'Connectivity': 
                #unique_num_neighbors = list(set(sorted_data))
                self.data_intervals = []
                for n in range(sorted_data[0], sorted_data[-1]+1):
                    self.data_intervals.append((n,n))
            else:
                end_pos = np.cumsum(self.hist)
                start_pos = end_pos - self.hist + 1
                self.data_intervals = [ (start_pos[i],end_pos[i]) for i in range(len(self.hist))]
        
            # a NxN matrix
            self.x_min = 1
            self.x_max = self.intervals +1
            self.y_min = 0
            self.y_max = np.max(self.hist) +1
    
            self.extent = (self.x_min, self.y_min, self.x_max,self.y_max)
            self.status_bar = None#self.parentFrame.status_bar
            
            self.gradient_color = GradientColor(gradient_type='rdyibu')
            
            # color schema: from blue to red
            self.color_matrix = []
            for i in range(self.intervals):
                p = float(i+1) / self.intervals
                self.color_matrix.append( self.gradient_color.get_color_at(p))
       
            self.selected_obj_ids = []
            
        except Exception as err:
            self.ShowMsgBox(""" Histogram could not be created. Please select a numeric variable.
            
Details: """ + str(err.message))
            self.isValidPlot = False
            self.parentFrame.Close(True)
            return None
        
        # linking-brushing events
        self.Register(stars.EVT_OBJS_SELECT, self.OnObjsSelected)
        self.Register(stars.EVT_OBJS_UNSELECT, self.OnNoObjSelect)
Example #48
0
    def analyzeResults(self):
        self.matched = []  # for paired comparisons
        for i, f in enumerate(self.pickles):
            s = f.split(os.sep)[-1]
            if not (fnmatch.filter(itertools.chain.from_iterable(self.matched),
                                   "*" + s)):
                self.matched.append(fnmatch.filter(self.pickles, "*" + s))
            plotname = f.replace('.pkl', '.xplt')
            print("\n... Analyzing results for {:s}".format(plotname))

            results = febio.FebPlt(plotname)
            stress = np.zeros((len(self.data[f]['elements']), 3, 3), float)
            strain = np.copy(stress)
            #material element volumes
            mvolumes = np.zeros(len(self.data[f]['elements']), float)
            #spatial element volumes
            svolumes = np.copy(mvolumes)
            nnodes = len(list(results.NodeData.keys()))
            displacement = np.zeros((nnodes, 3))
            for j, n in enumerate(self.data[f]['nodes']):
                tmp = results.NodeData[j + 1]['displacement'][-1, :]
                displacement[j, :] = [tmp[0], tmp[1], tmp[2]]
            pstress = []
            pstressdir = []
            pstrain = []
            pstraindir = []
            for j, e in enumerate(self.data[f]['elements']):
                tmp = results.ElementData[j + 1]['stress'][-1, :]
                stress[j, :, :] = [[tmp[0], tmp[3], tmp[5]],
                                   [tmp[3], tmp[1], tmp[4]],
                                   [tmp[5], tmp[4], tmp[2]]]
                #material coordinates
                X = np.zeros((4, 3), float)
                #spatial coordinates
                x = np.zeros((4, 3), float)
                for k in range(4):
                    X[k, :] = self.data[f]['nodes'][e[k] - 1]
                    x[k, :] = (X[k, :] +
                               results.NodeData[e[k]]['displacement'][-1, :])
                #set up tangent space
                W = np.zeros((6, 3), float)
                w = np.zeros((6, 3), float)
                for k, c in enumerate([(0, 1), (0, 2), (0, 3), (1, 3), (2, 3),
                                       (1, 2)]):
                    W[k, :] = X[c[1], :] - X[c[0], :]
                    w[k, :] = x[c[1], :] - x[c[0], :]
                dX = np.zeros((6, 6), float)
                ds = np.zeros((6, 1), float)
                for k in range(6):
                    for l in range(3):
                        dX[k, l] = 2 * W[k, l]**2
                    dX[k, 3] = 4 * W[k, 0] * W[k, 1]
                    dX[k, 4] = 4 * W[k, 1] * W[k, 2]
                    dX[k, 5] = 4 * W[k, 0] * W[k, 2]
                    ds[k, 0] = (np.linalg.norm(w[k, :])**2 -
                                np.linalg.norm(W[k, :])**2)
                #solve for strain
                E = np.linalg.solve(dX, ds)
                #get volumes
                mvolumes[j] = old_div(
                    np.abs(np.dot(W[0, :], np.cross(W[1, :], W[2, :]))), 6.0)
                svolumes[j] = old_div(
                    np.abs(np.dot(w[0, :], np.cross(w[1, :], w[2, :]))), 6.0)
                strain[j, :, :] = [[E[0], E[3], E[5]], [E[3], E[1], E[4]],
                                   [E[5], E[4], E[2]]]
                #eigenvalues and eigenvectors of stress and strain tensors
                #eigenvectors are normalized
                eigstrain, eigstraindir = np.linalg.eigh(strain[j, :, :])
                order = np.argsort(eigstrain)
                eigstrain = eigstrain[order]
                eigstraindir /= np.linalg.norm(eigstraindir,
                                               axis=0,
                                               keepdims=True)
                eigstraindir = eigstraindir[:, order]
                pstrain.append(eigstrain)
                pstraindir.append(eigstraindir)
                eigstress, eigstressdir = np.linalg.eigh(stress[j, :, :])
                order = np.argsort(eigstress)
                eigstress = eigstress[order]
                eigstressdir /= np.linalg.norm(eigstressdir,
                                               axis=0,
                                               keepdims=True)
                eigstressdir = eigstressdir[:, order]
                pstress.append(eigstress)
                pstressdir.append(eigstressdir)
            pstress = np.array(pstress)
            pstressdir = np.array(pstressdir)
            pstrain = np.array(pstrain)
            pstraindir = np.array(pstraindir)
            #save reference volumes
            self.volumes.update({f: mvolumes})
            self.results['Effective Strain (von Mises)'].update({
                f:
                np.sqrt(
                    old_div(((pstrain[:, 2] - pstrain[:, 1])**2 +
                             (pstrain[:, 1] - pstrain[:, 0])**2 +
                             (pstrain[:, 2] - pstrain[:, 0])**2), 2.0))
            })
            self.results['Maximum Compressive Strain'].update(
                {f: np.outer(pstrain[:, 0], [1, 1, 1]) * pstraindir[:, :, 0]})
            self.results['Maximum Tensile Strain'].update(
                {f: np.outer(pstrain[:, 2], [1, 1, 1]) * pstraindir[:, :, 2]})
            self.results['Maximum Shear Strain'].update(
                {f: 0.5 * (pstrain[:, 2] - pstrain[:, 0])})
            self.results['Volumetric Strain'].update(
                {f: old_div(svolumes, mvolumes) - 1.0})

            self.results['Effective Stress (von Mises)'].update({
                f:
                np.sqrt(
                    old_div(((pstress[:, 2] - pstress[:, 1])**2 +
                             (pstress[:, 1] - pstress[:, 0])**2 +
                             (pstress[:, 2] - pstress[:, 0])**2), 2.0))
            })
            self.results['Maximum Compressive Stress'].update(
                {f: np.outer(pstress[:, 0], [1, 1, 1]) * pstressdir[:, :, 0]})
            self.results['Maximum Tensile Stress'].update(
                {f: np.outer(pstress[:, 2], [1, 1, 1]) * pstressdir[:, :, 2]})
            self.results['Maximum Shear Stress'].update(
                {f: 0.5 * (pstress[:, 2] - pstress[:, 0])})
            self.results['Pressure'].update(
                {f: old_div(np.sum(pstress, axis=1), 3.0)})

            self.results['Displacement'].update({f: displacement})

        for i, k in enumerate(self.outputs.keys()):
            if self.outputs[k].get():
                for m in self.matched:
                    weights = old_div(self.volumes[m[0]],
                                      np.sum(self.volumes[m[0]]))
                    for j, f in enumerate(m):
                        if len(self.results[k][f].shape) > 1:
                            dat = np.ravel(
                                np.linalg.norm(self.results[k][f], axis=1))
                        else:
                            dat = np.ravel(self.results[k][f])
                        if self.analysis['Generate Histograms'].get():
                            IQR = np.subtract(*np.percentile(dat, [75, 25]))
                            nbins = (int(
                                old_div(
                                    np.ptp(dat),
                                    (2 * IQR * dat.size**(old_div(-1., 3.))))))
                            h = histogram(dat, numbins=nbins, weights=weights)
                            bins = np.linspace(h[1],
                                               h[1] + h[2] * nbins,
                                               nbins,
                                               endpoint=False)
                            self.histograms[k][f] = {
                                'bins': bins,
                                'heights': h[0],
                                'width': h[2]
                            }
                        if self.analysis['Tukey Boxplots'].get():
                            quantiles = np.zeros(3, float)
                            for n, q in enumerate([0.25, 0.5, 0.75]):
                                quantiles[n] = quantile_1D(dat, weights, q)
                            self.boxwhiskers[k][f] = {
                                'quantiles': quantiles,
                                'data': dat
                            }
                    if self.analysis['Calculate Differences'].get():
                        for c in itertools.combinations(m, 2):
                            if len(self.results[k][c[0]].shape) > 1:
                                dat1 = np.ravel(
                                    np.linalg.norm(self.results[k][c[0]],
                                                   axis=1))
                                dat2 = np.ravel(
                                    np.linalg.norm(self.results[k][c[1]],
                                                   axis=1))
                            else:
                                dat1 = np.ravel(self.results[k][c[0]])
                                dat2 = np.ravel(self.results[k][c[1]])
                            difference = dat2 - dat1
                            wrms = np.sqrt(
                                np.average(difference**2, weights=weights))
                            self.differences[k][c[1] + "MINUS" + c[0]] = {
                                'difference': difference,
                                'weighted RMS': wrms
                            }
        self.saveResults()
        print("... ... Analysis Complete")
Example #49
0
# -*- coding: utf-8 -*-
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt

N = np.loadtxt('statistik.txt')

print("Mittelwert der Zählraten: {0:.3f}".format(N.mean()))
print("Standardabweichung der Zählraten: {0:.3f}".format(np.std(N, ddof=1)))

# unterteile die Daten in 7 »Bins«
binnum = 7
n, low_range, binsize, extra = stats.histogram(N, binnum)

ind = np.arange(binnum)
width = 0.50

x = np.linspace(0, 7)
norm = stats.norm(4, 1.5).pdf(x)
poisson = stats.poisson(5).pmf(ind)

plt.plot(x, norm, "r", label="Normalverteilung")

plt.bar(ind, n / 100., width, color="blue", label="gemessene Verteilung")

plt.bar(ind + 0.5, poisson, width, color="green", label="Poisson-Verteilung")

plt.title("Statistische Auswertung des Alpha-Zerfalls")
plt.ylabel("relative Häufigkeit")

plt.xticks(ind + width, ('1', '2', '3', '4', '5', '6', '7'))
@author: Sat Kumar Tomer
@website: www.ambhas.com
@email: [email protected]
"""

# import required modules
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

# genearte some sythetic data
x = np.random.randn(100)

# compute histogram
n, low_range, binsize, extrapoints = st.histogram(x)
upper_range = low_range + binsize * len(n)
bins = np.linspace(low_range, upper_range, len(n) + 1)
#bins = 0.5*(bins[:-1] + bins[1:])

# plot the histogram
plt.clf()
plt.bar(bins[:-1], n, width=0.4, color='red')
plt.xlabel('X', fontsize=20)
plt.ylabel('number of data points in the bin', fontsize=15)
plt.savefig('/home/tomer/my_books/python_in_hydrology/images/hist.png')

# compute and plot the relfreq
relfreqs, lowlim, binsize, extrapoints = st.relfreq(x)
plt.clf()
plt.bar(bins[:-1], relfreqs, width=0.4, color='magenta')
def _quantize(features, num_clusters, iterations):
    logger.info('Obtaining codebook')
    cb, neigh = kmeans2(features, num_clusters, iter=iterations, minit='points')
    logger.info('Finished quantizing')
    dist, _ , _ , _ = histogram(neigh, numbins=num_clusters)
    return cb, dist
Example #52
0
    def analyzeResults(self):
        self.matched = [] # for paired comparisons
        for i, f in enumerate(self.pickles):
            s = f.split(os.sep)[-1]
            if not(fnmatch.filter(
                    itertools.chain.from_iterable(self.matched), "*" + s)):
                self.matched.append(fnmatch.filter(self.pickles, "*" + s))
            plotname = f.replace('.pkl', '.xplt')
            print("\n... Analyzing results for {:s}".format(plotname))

            results = febio.FebPlt(plotname)
            stress = np.zeros((len(self.data[f]['elements']), 3, 3), float)
            strain = np.copy(stress)
            #material element volumes
            mvolumes = np.zeros(len(self.data[f]['elements']), float)
            #spatial element volumes
            svolumes = np.copy(mvolumes)
            nnodes = len(list(results.NodeData.keys()))
            displacement = np.zeros((nnodes, 3))
            for j, n in enumerate(self.data[f]['nodes']):
                tmp = results.NodeData[j + 1]['displacement'][-1, :]
                displacement[j, :] = [tmp[0], tmp[1], tmp[2]]
            pstress = []
            pstressdir = []
            pstrain = []
            pstraindir = []
            for j, e in enumerate(self.data[f]['elements']):
                tmp = results.ElementData[j + 1]['stress'][-1, :]
                stress[j, :, :] = [[tmp[0], tmp[3], tmp[5]],
                                   [tmp[3], tmp[1], tmp[4]],
                                   [tmp[5], tmp[4], tmp[2]]]
                #material coordinates
                X = np.zeros((4, 3), float)
                #spatial coordinates
                x = np.zeros((4, 3), float)
                for k in range(4):
                    X[k, :] = self.data[f]['nodes'][e[k] - 1]
                    x[k, :] = (X[k, :] +
                               results.NodeData[e[k]]['displacement'][-1, :])
                #set up tangent space
                W = np.zeros((6, 3), float)
                w = np.zeros((6, 3), float)
                for k, c in enumerate(
                        [(0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (1, 2)]):
                    W[k, :] = X[c[1], :] - X[c[0], :]
                    w[k, :] = x[c[1], :] - x[c[0], :]
                dX = np.zeros((6, 6), float)
                ds = np.zeros((6, 1), float)
                for k in range(6):
                    for l in range(3):
                        dX[k, l] = 2 * W[k, l] ** 2
                    dX[k, 3] = 4 * W[k, 0] * W[k, 1]
                    dX[k, 4] = 4 * W[k, 1] * W[k, 2]
                    dX[k, 5] = 4 * W[k, 0] * W[k, 2]
                    ds[k, 0] = (np.linalg.norm(w[k, :]) ** 2 -
                                np.linalg.norm(W[k, :]) ** 2)
                #solve for strain
                E = np.linalg.solve(dX, ds)
                #get volumes
                mvolumes[j] = old_div(np.abs(
                    np.dot(W[0, :], np.cross(W[1, :], W[2, :]))), 6.0)
                svolumes[j] = old_div(np.abs(
                    np.dot(w[0, :], np.cross(w[1, :], w[2, :]))), 6.0)
                strain[j, :, :] = [[E[0], E[3], E[5]],
                                   [E[3], E[1], E[4]],
                                   [E[5], E[4], E[2]]]
                #eigenvalues and eigenvectors of stress and strain tensors
                #eigenvectors are normalized
                eigstrain, eigstraindir = np.linalg.eigh(strain[j, :, :])
                order = np.argsort(eigstrain)
                eigstrain = eigstrain[order]
                eigstraindir /= np.linalg.norm(eigstraindir, axis=0, keepdims=True)
                eigstraindir = eigstraindir[:, order]
                pstrain.append(eigstrain)
                pstraindir.append(eigstraindir)
                eigstress, eigstressdir = np.linalg.eigh(stress[j, :, :])
                order = np.argsort(eigstress)
                eigstress = eigstress[order]
                eigstressdir /= np.linalg.norm(eigstressdir, axis=0, keepdims=True)
                eigstressdir = eigstressdir[:, order]
                pstress.append(eigstress)
                pstressdir.append(eigstressdir)
            pstress = np.array(pstress)
            pstressdir = np.array(pstressdir)
            pstrain = np.array(pstrain)
            pstraindir = np.array(pstraindir)
            #save reference volumes
            self.volumes.update({f: mvolumes})
            self.results['Effective Strain (von Mises)'].update(
                {f: np.sqrt(old_div(((pstrain[:, 2] - pstrain[:, 1]) ** 2 +
                             (pstrain[:, 1] - pstrain[:, 0]) ** 2 +
                             (pstrain[:, 2] - pstrain[:, 0]) ** 2),
                            2.0))})
            self.results['Maximum Compressive Strain'].update(
                {f: np.outer(pstrain[:, 0], [1 , 1, 1]) * pstraindir[:, :, 0]})
            self.results['Maximum Tensile Strain'].update(
                {f: np.outer(pstrain[:, 2], [1, 1, 1]) * pstraindir[:, :, 2]})
            self.results['Maximum Shear Strain'].update(
                {f: 0.5 * (pstrain[:, 2] - pstrain[:, 0])})
            self.results['Volumetric Strain'].update(
                {f: old_div(svolumes, mvolumes) - 1.0})

            self.results['Effective Stress (von Mises)'].update(
                {f: np.sqrt(old_div(((pstress[:, 2] - pstress[:, 1]) ** 2 +
                             (pstress[:, 1] - pstress[:, 0]) ** 2 +
                             (pstress[:, 2] - pstress[:, 0]) ** 2), 2.0))})
            self.results['Maximum Compressive Stress'].update(
                {f: np.outer(pstress[:, 0], [1 , 1, 1]) * pstressdir[:, :, 0]})
            self.results['Maximum Tensile Stress'].update(
                {f: np.outer(pstress[:, 2], [1, 1, 1]) * pstressdir[:, :, 2]})
            self.results['Maximum Shear Stress'].update(
                {f: 0.5 * (pstress[:, 2] - pstress[:, 0])})
            self.results['Pressure'].update(
                {f: old_div(np.sum(pstress, axis=1), 3.0)})

            self.results['Displacement'].update({f: displacement})

        for i, k in enumerate(self.outputs.keys()):
            if self.outputs[k].get():
                for m in self.matched:
                    weights = old_div(self.volumes[m[0]], np.sum(self.volumes[m[0]]))
                    for j, f in enumerate(m):
                        if len(self.results[k][f].shape) > 1:
                            dat = np.ravel(np.linalg.norm(self.results[k][f], axis=1))
                        else:
                            dat = np.ravel(self.results[k][f])
                        if self.analysis['Generate Histograms'].get():
                            IQR = np.subtract(*np.percentile(dat, [75, 25]))
                            nbins = (int(old_div(np.ptp(dat),
                                         (2 * IQR * dat.size ** (old_div(-1., 3.))))))
                            h = histogram(dat, numbins=nbins, weights=weights)
                            bins = np.linspace(h[1], h[1] + h[2] * nbins,
                                               nbins, endpoint=False)
                            self.histograms[k][f] = {'bins': bins,
                                                     'heights': h[0],
                                                     'width': h[2]}
                        if self.analysis['Tukey Boxplots'].get():
                            quantiles = np.zeros(3, float)
                            for n, q in enumerate([0.25, 0.5, 0.75]):
                                quantiles[n] = quantile_1D(dat, weights, q)
                            self.boxwhiskers[k][f] = {'quantiles': quantiles,
                                                      'data': dat}
                    if self.analysis['Calculate Differences'].get():
                        for c in itertools.combinations(m, 2):
                            if len(self.results[k][c[0]].shape) > 1:
                                dat1 = np.ravel(np.linalg.norm(self.results[k][c[0]], axis=1))
                                dat2 = np.ravel(np.linalg.norm(self.results[k][c[1]], axis=1))
                            else:
                                dat1 = np.ravel(self.results[k][c[0]])
                                dat2 = np.ravel(self.results[k][c[1]])
                            difference = dat2 - dat1
                            wrms = np.sqrt(np.average(difference ** 2,
                                                      weights=weights))
                            self.differences[k][c[1] + "MINUS" + c[0]] = {
                                'difference': difference, 'weighted RMS': wrms}
        self.saveResults()
        print("... ... Analysis Complete")
Example #53
0
    subplot(rlist.size, 1, i + 1)
    plot(X[:, i], label='r = %.2f' % r)
    ylim(0, 1)
    yticks('')
    xticks('')
    legend(loc=10)
show()
#! Bifurcation diagram
#!----------------------
#! To study the lont term behavior of the sequence we can plot the values
#! it visit after many iterations, as a function of the parameter
rlist = linspace(2, 4, 800)
X = [
    0.5 * ones_like(rlist),
]
for i in arange(0, 10000):
    X += [
        f(X[-1], rlist),
    ]
X = hsplit(vstack(X[-2000:]), rlist.size)
from scipy import stats
H = map(lambda Z: stats.histogram(Z, defaultlimits=(0, 1), numbins=300)[0], X)
H = map(lambda Z: 1 - Z / Z.max(), H)
H = vstack(H)
figure()
imshow(rot90(H), aspect='auto', extent=[2, 4, 0, 1])
bone()
xlabel('r')
ylabel(r'$X_{n \rightarrow \infty}$')
show()
Example #54
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o",
                        "--outfile",
                        required=True,
                        help="Path to the output file.")
    parser.add_argument("--sample_one_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument(
        "--sample_cols",
        help="Input format, like smi, sdf, inchi,separate arrays using ;",
    )
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help=
        "Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help=
        "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta",
        action="store_true",
        default=False,
        help="Whether or not to return the internally computed a values.",
    )
    parser.add_argument(
        "--fisher",
        action="store_true",
        default=False,
        help="if true then Fisher definition is used",
    )
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help=
        "if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument(
        "--inclusive1",
        action="store_true",
        default=False,
        help="if false,lower_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive2",
        action="store_true",
        default=False,
        help="if false,higher_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive",
        action="store_true",
        default=False,
        help="if false,limit will be ignored",
    )
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help=
        "If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help=
        "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument(
        "--correction",
        action="store_true",
        default=False,
        help="continuity correction ",
    )
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help=
        "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help=
        "the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b",
                        type=int,
                        default=0,
                        help="The number of bins to use for the histogram")
    parser.add_argument("--N",
                        type=int,
                        default=0,
                        help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof",
                        type=int,
                        default=0,
                        help="Degrees of freedom correction")
    parser.add_argument(
        "--score",
        type=int,
        default=0,
        help="Score that is compared to the elements in a.",
    )
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help=
        "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument(
        "--new",
        type=float,
        default=0.0,
        help="Value to put in place of values in a outside of bounds",
    )
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help=
        "lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help=
        "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument(
        "--base",
        type=float,
        default=1.6,
        help="The logarithmic base to use, defaults to e",
    )
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols is not None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols is not None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols is not None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(
                map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one),
                                               dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one),
                                       n=args.n,
                                       p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(
                map(float, sample_one),
                axis=args.axis,
                fisher=args.fisher,
                bias=args.bias,
            )
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one),
                                        score=args.score,
                                        kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one),
                                                   alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one),
                                             low=args.m,
                                             high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one),
                cdf=args.cdf,
                N=args.N,
                alternative=args.alternative,
                mode=args.mode,
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one),
                correction=args.correction,
                lambda_=args.lambda_)
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf == 0 and mf == 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf),
                                   (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf == 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one),
                                 lowerlimit=mf,
                                 inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf == 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one),
                                 upperlimit=nf,
                                 inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf == 0 and mf == 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf == 0 and mf == 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf == 0 and mf == 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf),
                               (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf == 0 and mf == 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    interpolation_method=args.interpolation,
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    (mf, nf),
                    interpolation_method=args.interpolation,
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf == 0 and mf == 0:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf == 0 and mf == 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf == 0 and mf == 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one),
                                    mf,
                                    nf,
                                    newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one),
                               proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(
                map(float, sample_one),
                proportiontocut=args.proportiontocut,
                tail=args.tail,
            )
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf == 0 and mf == 0:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf == 0 and mf == 0:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf == 0 and mf == 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf),
                                          method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda == 0:
                box, ma, ci = stats.boxcox(map(float, sample_one),
                                           alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one),
                                   imbda,
                                   alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one),
                                  map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one),
                                                  map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one),
                                        map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one),
                                       map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two))
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one),
                                          map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one),
                                              map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one),
                                        map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one),
                map(float, sample_two),
                use_continuity=args.mwu_use_continuity,
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one),
                           map(float, sample_two),
                           ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one),
                                                  map(float, sample_two),
                                                  equal_var=args.equal_var)
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one),
                                      map(float, sample_two),
                                      axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one),
                                    map(float, sample_two),
                                    axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one),
                                          map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one),
                map(float, sample_two),
                initial_lexsort=args.initial_lexsort,
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one),
                              map(float, sample_two),
                              base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one),
                                               map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               map(float, sample_two),
                                               ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one),
                    map(float, sample_two),
                    ddof=args.ddof,
                    lambda_=args.lambda_,
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one),
                                                       ddof=args.ddof,
                                                       lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     map(float, sample_two),
                                                     alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one),
                    method=args.med,
                    weights=map(float, sample_two),
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one),
                                                      method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center,
                                         proportiontocut=args.proportiontocut,
                                         *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center,
                                      proportiontocut=args.proportiontocut,
                                      *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties,
                correction=args.correction,
                lambda_=args.lambda_,
                *b_samples)
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Example #55
0
# Distance light traveled in Newcoumbs experiment
distance = 7400.0

# Read the measurements from the data file.
# There are a number of comments at the top of the file marked with "#"
measured_time = loadtxt('speed_of_light.dat',comments="#")

# measurements were in nanseconds difference from 24800 ns.
measured_time += 24800.0

# Convert measured times to measured velocities.
measured_velocity = distance/measured_time*10.0  # m/ns * 10 == 1e8m/s

# histogram the measured velocities.
bin_counts, min_val, width, outside = stats.histogram(measured_velocity,
                                                      numbins=30)
print 'min val:', min_val

# For plot, we want to know the velocities for each bin.
binned_velocity = min_val + arange(len(bin_counts))*width
print binned_velocity

# Plot a bar plot of the histogrammed data.
pylab.hold(False)

pylab.bar(binned_velocity, bin_counts, width=width)

pylab.xlabel("velocity (1e8 m/s)")
pylab.ylabel("counts")
pylab.title("Newcoumbs Speed of Light Measurement Histogram")
Example #56
0
 def __str__(self):
     return ('%f +/- %f (SEM)\nhist: %s' %
             (self.mean, self.SEM, stats.histogram(self.samples).count))
Example #57
0
X = vstack(X)
figure()
for i, r in enumerate(rlist):
    subplot(rlist.size, 1, i + 1)
    plot(X[:, i], label="r = %.2f" % r)
    ylim(0, 1)
    yticks("")
    xticks("")
    legend(loc=10)
show()
#! Bifurcation diagram
#!----------------------
#! To study the lont term behavior of the sequence we can plot the values
#! it visit after many iterations, as a function of the parameter
rlist = linspace(2, 4, 800)
X = [0.5 * ones_like(rlist)]
for i in arange(0, 10000):
    X += [f(X[-1], rlist)]
X = hsplit(vstack(X[-2000:]), rlist.size)
from scipy import stats

H = map(lambda Z: stats.histogram(Z, defaultlimits=(0, 1), numbins=300)[0], X)
H = map(lambda Z: 1 - Z / Z.max(), H)
H = vstack(H)
figure()
imshow(rot90(H), aspect="auto", extent=[2, 4, 0, 1])
bone()
xlabel("r")
ylabel(r"$X_{n \rightarrow \infty}$")
show()
Example #58
0
 def count_hist(self):
     h = stats.histogram(self.pixmatrix.flatten(), numbins=30)
     self.full_description['histogram'] = h
     return h