def get_cluster_distribution(g, method = 'average'):
	""" 
		The clustering coefficient distribution grouped by degree. Similar to the histogram shows the possible degree k,
		and average/median clustering coefficient of nodes with degree k in graph g.

		Parameters:
		-----------
			g: NetworkX Graph
			method: str, ('average', 'median'), (default = 'average')
		Returns:
		--------
			xdata, ydata, a 2-tuple of array, (k, avg_cc(V_k)), where V_k are the nodes with degree k
	"""
	g = to_undirected(g)
	k = nx.clustering(g)
	d = g.degree()
	ck = defaultdict(list)
	for n in g.nodes_iter():
		ck[d[n]].append(k[n])
	xdata, ydata = list(), list()
	
	if method == 'average':
		for x, y in ifilter(lambda x: x[0] > 1 and average(x[1]) > 0, ck.iteritems()):
			xdata.append(x)
			ydata.append(average(y))
	elif method == 'median':
		for x, y in ifilter(lambda x: x[0] > 1 and median(x[1]) > 0, ck.iteritems()):
			xdata.append(x)
			ydata.append(median(y))
	else:
		raise NameError("method should be 'average' or 'mean'")
	xdata = array(xdata)
	ydata = array(ydata)
	return(xdata, ydata)
def plotAllCombinations(aclasses, avariants,
                        fclasses, fvariants,
                        trials, maxsteps, maxbatchsize=10):
    fundic = {}    
    ploti = 1
    rows = sum([len(avariants[ac]) for ac in aclasses]) + len(aclasses) - 1
    cols = len(fvariants) * len(fclasses) + len(fclasses) - 1
    f_mid = int(median(range(len(fvariants))))
    for ac_id, aclass in enumerate(aclasses):
        a_mid = int(median(range(len(avariants[aclass]))))
        for as_id, aparams in enumerate(avariants[aclass]):
            if as_id == 0 and ac_id > 0:
                ploti += cols
            
            for fc_id, fclass in enumerate(fclasses):
                if fc_id not in fundic:
                    # shared samples across all uses of one function
                    fun = fclass()
                    fwrap = FunctionWrapper(trials, fun, record_samples=True)
                    fwrap.nextSamples(maxbatchsize * (maxsteps+10))
                    fundic[fc_id] = fwrap._seen
                data = fundic[fc_id]
                for fs_id, fsettings in enumerate(fvariants):
                    if fs_id == 0 and fc_id > 0:
                        ploti += 1
                    fun = fclass(**fsettings)
                    provider = DataFunctionWrapper(data, fun, shuffling=False)            
                    pylab.subplot(rows, cols, ploti); ploti += 1
                    plotHeatmap(provider, aclass, aparams, trials, maxsteps)
                    if ac_id == 0 and as_id == 0 and fs_id == f_mid:
                        pylab.title(fclass.__name__[5:])
                    if fs_id == 0 and as_id == a_mid:
                        pylab.ylabel(aclass.__name__[:6])
    pylab.subplots_adjust(left=0.1, bottom=0.01, right=0.99, top=0.9, wspace=0.05, hspace=0.05)        
Exemple #3
0
def showVectorDisplacements():

    global testImage, croppedRefImage, u, v, valid, q1, umean, vmean, x, y, sxyVar, wxyVar, goodvectorsVar
    from scipy import where, compress, logical_and, median, logical_or, nan
    from pylab import resize, transpose, quiver, title, show, find, imshow, hist, figure, clf, draw, save, load, xlabel, ylabel, flipud

    mxy = 3
    wxy = int(wxyVar.get())
    sxy = int(sxyVar.get())
    goodvectors = float(goodvectorsVar.get())
    #process to find PIV-style displacements
    x, y, u, v, q1, valid = simplepiv(croppedRefImage, testImage, wxy, mxy,
                                      sxy)
    good = where(logical_and(q1 > goodvectors, valid > 0), True, False)
    umean = median(compress(good.flat, u.flat))
    vmean = median(compress(good.flat, v.flat))
    u = where(logical_or(q1 < goodvectors, valid < 0), 0, u)
    v = where(logical_or(q1 < goodvectors, valid < 0), 0, v)
    u = u - umean
    v = v - vmean
    save('vecx.out', x)
    save('vecy.out', y)
    save('vecu.out', u)
    save('vecv.out', v)
    save('vecq1.out', q1)
    save('vecvalid.out', valid)
    u = flipud(u)
    v = -flipud(v)
    quiver(x, y, u, v)
    title('Vector displacements')
    xlabel('Pixels')
    ylabel('Pixels')
    show()
    return
Exemple #4
0
def PrintValues( outfile, values,  options, prefix = "",titles = None):

    if options.flat or options.aggregate_column:

        if options.add_header:
            if prefix: outfile.write( "prefix\t" )
            
            if titles: outfile.write( "column\t" )
                
            print "\t".join( ("nval", "min", "max", "mean", "median", "stddev", "sum", "q1", "q3" ) )
        
        for x in range(len(values)):

            vals = values[x]

            if len(vals) == 0:

                if options.output_empty:
                    if titles: outfile.write( titles[x] + "\t" )
                    if prefix: outfile.write( prefix + "\t" )

                    outfile.write( "0" + "\tna" * 8  + "\n" )

                continue

            if titles: outfile.write( titles[x] + "\t" )
            if prefix: outfile.write( prefix + "\t" )

            vals.sort()
            if len(vals) > 4:
                q1 = options.value_format % vals[len(vals) // 4]
                q3 = options.value_format % vals[len(vals) * 3 // 4]
            else:
                q1 = options.value_format % vals[0]
                q3 = options.value_format % vals[-1]

            outfile.write( "\t".join( ( "%i" % len(vals),
                                        options.value_format % float(min(vals)),
                                        options.value_format % float(max(vals)),
                                        options.value_format % scipy.mean(vals),
                                        options.value_format % scipy.median(vals),
                                        options.value_format % scipy.std(vals),                                      
                                        options.value_format % reduce( lambda x, y: x+y, vals),
                                        q1, q3,
                                        )) + "\n")
            
    else:

        if titles:
            print "category\t%s" % string.join(titles,"\t")

        print "count\t%s"  % (string.join( map(lambda v: "%i" % len(v), values), "\t"))
        print "min\t%s"    % (string.join( map(lambda v: options.value_format % min(v), values), "\t"))
        print "max\t%s"    % (string.join( map(lambda v: options.value_format % max(v), values), "\t"))
        print "mean\t%s"   % (string.join( map(lambda v: options.value_format % scipy.mean(v), values), "\t"))
        print "median\t%s" % (string.join( map(lambda v: options.value_format % scipy.median(v), values), "\t"))
        print "stddev\t%s" % (string.join( map(lambda v: options.value_format % scipy.std(v), values), "\t"))
        print "sum\t%s"    % (string.join( map(lambda v: options.value_format % reduce( lambda x,y: x+y, v), values), "\t"))
        print "q1\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=25), values), "\t"))
        print "q3\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=75), values), "\t"))
Exemple #5
0
def WriteRadius(mali, identifiers, prefix="", gap_char="-"):
    """write percent identities in pairwise comparisons both for nucleotide acids and amino acids."""

    pides_na = []
    seq_aa = []
    for x in range(0, len(identifiers)):

        seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]]))

        for y in range(x + 1, len(identifiers)):
            if x == y:
                continue
            pides_na.append(MaliIO.getPercentIdentity(
                mali[identifiers[x]], mali[identifiers[y]], gap_char))

    pides_aa = []
    for x in range(0, len(identifiers) - 1):
        for y in range(x + 1, len(identifiers)):
            pides_aa.append(
                MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char))

    print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_na),
                                                 max(pides_na),
                                                 scipy.mean(pides_na),
                                                 scipy.median(pides_na),
                                                 scipy.std(pides_na))), "\t") + "\t" +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_aa),
                                                 max(pides_aa),
                                                 scipy.mean(pides_aa),
                                                 scipy.median(pides_aa),
                                                 scipy.std(pides_aa))), "\t")
Exemple #6
0
def lossTraces(fwrap, aclass, dim, maxsteps, storesteps=None, x0=None,
               initNoise=0., minLoss=1e-10, algoparams={}):
    """ Compute a number of loss curves, for the provided settings,
    stored at specific storestep points. """
    if not storesteps:
        storesteps = range(maxsteps + 1)
    
    # initial points, potentially noisy
    if x0 is None:
        x0 = ones(dim) + randn(dim) * initNoise
    
    # tracking progress by callback
    paramtraces = {'index':-1}
    def storer(a):
        lastseen = paramtraces['index']
        for ts in [x for x in storesteps if x > lastseen and x <= a._num_updates]:
            paramtraces[ts] = a.bestParameters.copy()
        paramtraces['index'] = a._num_updates
        
    # initialization    
    algo = aclass(fwrap, x0, callback=storer, **algoparams)
    print algo, fwrap, dim, maxsteps,
    
    # store initial step   
    algo.callback(algo)
    algo.run(maxsteps)

    # process learning curve
    del paramtraces['index']
    paramtraces = array([x for _, x in sorted(paramtraces.items())])
    oloss = mean(fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum))
    ls = abs(fwrap.stochfun.expectedLoss(ravel(paramtraces)) - oloss) + minLoss
    ls = reshape(ls, paramtraces.shape)
    print median(ls[-1])
    return ls
def centroid(stamp):
    """
    Calcula el centro de la estrella viendo un centro de masasx
    con el flujo.

    Parameters
    ----------
    stamp : (N,)array_like
            Arreglo en 2-D, representa una seccion de imagen que
            engloba a una estrella.
    Returns
    -------
    cx : float
         Coordenada x del centro de la estrella.

    cy : float
         Coordenada y del centro de la estrella.
    """
    # Se crean vectores con los indices x e y de la estampilla.
    x_vect = sp.arange(0, sp.shape(stamp)[1])
    y_vect = sp.arange(0, sp.shape(stamp)[0])
    # Se estima un centro de la estrella.
    cx = sp.median(x_vect)
    cy = sp.median(y_vect)
    # Se calcula la coordenada x del centro de la estrella.
    sum_x = sp.nansum(x_vect * stamp[cy, :])
    cx = sum_x / sp.nansum(stamp[cy, :])
    # Se calcula la coordenada y del centro de la estrella.
    sum_y = sp.nansum(y_vect * stamp[:, cx])
    cy = sum_y / sp.nansum(stamp[:, cx])
    return cx, cy
Exemple #8
0
def WriteRadius(mali, identifiers, prefix="", gap_char="-"):
    """write percent identities in pairwise comparisons both for nucleotide acids and amino acids."""

    pides_na = []
    seq_aa = []

    for x in range(0, len(identifiers)):

        seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]]))

        for y in range(x + 1, len(identifiers)):
            if x == y:
                continue
            pides_na.append(MaliIO.getPercentIdentity(
                mali[identifiers[x]], mali[identifiers[y]], gap_char))

    pides_aa = []
    for x in range(0, len(identifiers) - 1):
        for y in range(x + 1, len(identifiers)):
            pides_aa.append(
                MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char))

    print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_na),
                                                 max(pides_na),
                                                 scipy.mean(pides_na),
                                                 scipy.median(pides_na),
                                                 numpy.std(pides_na))), "\t") + "\t" +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_aa),
                                                 max(pides_aa),
                                                 scipy.mean(pides_aa),
                                                 scipy.median(pides_aa),
                                                 numpy.std(pides_aa))), "\t")
Exemple #9
0
def merged_event_breakpoint_stats(mev):
    bp1d, bp2d = [], []
    bend1 = bend2 = None
    reads = []
    quals = []
    for ev in mev.events:
        bp1d.append(ev.bp1.pos)
        bp2d.append(ev.bp2.pos)
        reads.append(ev.reads)
        quals.append(ev.qual)
        bend1 = ev.bp1.breakend
        bend2 = ev.bp2.breakend
    bp1d = np.array(bp1d)
    bp2d = np.array(bp2d)
    if bend1 == "+":
        bp1limit = scipy.amin(bp1d)
    else:
        bp1limit = scipy.amax(bp1d)
    if bend2 == "+":
        bp2limit = scipy.amin(bp2d)
    else:
        bp2limit = scipy.amax(bp2d)
    reads_median = int(scipy.median(reads))
    qual_median = int(scipy.median(quals))
    return int(bp1limit), int(bp2limit), int(bp2limit - bp1limit), scipy.mean(
        bp1d), scipy.amax(bp1d) - scipy.amin(bp1d), scipy.std(
            bp1d), scipy.mean(bp2d), scipy.amax(bp2d) - scipy.amin(
                bp2d), scipy.std(bp2d), reads_median, qual_median
Exemple #10
0
def plot_collated(r_set="truth", infl_set="varinfl-0.25", subplots=True, save=False):
    d = cl("%s/output-2013/sim3-results_r-%s_%s"%(DATA_DIR,r_set, infl_set))
    coverages = SP.array(range(20,200,20) + range(200,1001,100)) #range(200,500,50) + range(500,1001,100))
    if r_set == "truth": coverages = SP.array(range(20,200,20) + range(200,500,50) + range(500,1001,100))
    afs = map(lambda x:"%.2f"%x, [0.7,0.85,0.99])
    models = ['sQTL','Smooth','ML','MP']
    p = 0
    colors = 'bgry'
    if subplots: PL.figure(figsize=(14,10))
    for feature in 'FX':
        for af in afs:
            if subplots: PL.subplot(2,3,p+1)
            else: PL.figure()
            p += 1
            lines = []
            
            for i,model in enumerate(models):
                I = SP.where(d[af][model][feature].var(axis=0) > 1e-10)[0]
                err = d[af][model][feature][:,I].var(axis=1)**0.5
                lines.append(PL.plot(coverages + 2*i,SP.median(d[af][model][feature][:,I],axis=1), "-o", linewidth=3, markersize=9, color=colors[i])[0])
                PL.errorbar(coverages + 2*i, SP.median(d[af][model][feature][:,I],axis=1), yerr=err, fmt="-o", linewidth=1, markersize=9,color=colors[i])
            PL.xticks(coverages)
            #PL.xlim(min(coverages),max(coverages))
            PL.title("%s %s - %s"%(infl_set, feature, af))
            PL.xlim(15,220)

            if feature == "X": PL.ylim(0,8)
            if p == 1:  PL.legend(lines, models)
            if save: PL.savefig("/Users/leopold/doc/write/manuscripts/2011_X_sQTL/figures/figure2013-3_2%s.pdf"%("ABCDEF"[p-1:p]))
    PL.show()
Exemple #11
0
def subtract_overscan(data,x,y):

   """This function finds the median values in each of the four overscan
      regions and subtracts them from the appropriate regions of the
      input data file.  It then converts the results back to electrons
      rather than ADU"""

   # Define bias region limits
   bx1 = slice(0,15,1)
   bx2 = slice(2065,2080,1)
   y1 = slice(0,1024,1)
   y2 = slice(1024,2048,1)

   # Define limits of regions associated with the four amps
   x1 = slice(16,1040)
   x2 = slice(1040,2064)

   # Define median values of overscan regions from appropriate data regions
   newdata = data.astype(scipy.float32)
   overscan = scipy.zeros((4,1))
   overscan[0] = scipy.median(newdata[y1,bx1].ravel())
   overscan[1] = scipy.median(newdata[y2,bx1].ravel())
   overscan[2] = scipy.median(newdata[y1,bx2].ravel())
   overscan[3] = scipy.median(newdata[y2,bx2].ravel())

   # Subtract overscan
   newdata[y1,x1] = newdata[y1,x1] - overscan[0]
   newdata[y2,x1] = newdata[y2,x1] - overscan[1]
   newdata[y1,x2] = newdata[y1,x2] - overscan[2]
   newdata[y2,x2] = newdata[y2,x2] - overscan[3]

   newdata = newdata[y,x]
   return newdata
def major_axis(x, y, ndist=10, mask=1):
    dist = scipy.array([scipy.hypot(x-i,y-j) \
                        for i, j in itertools.izip(x, y)])
    shape = dist.shape
    imax = scipy.zeros(ndist, dtype=int)
    jmax = scipy.zeros(ndist, dtype=int)
    # dummy run
    for i in xrange((mask - 1) * ndist):
        ii, jj = scipy.unravel_index(scipy.argmax(dist), shape)
        dist[ii] = scipy.zeros(shape[0])
        dist[jj] = scipy.zeros(shape[0])
        dist[:, ii] = scipy.zeros(shape[1])
        dist[:, jj] = scipy.zeros(shape[1])
    for i in xrange(ndist):
        imax[i], jmax[i] = scipy.unravel_index(scipy.argmax(dist), shape)
        dist[imax[i]] = scipy.zeros(shape[0])
        dist[jmax[i]] = scipy.zeros(shape[0])
        dist[:, imax[i]] = scipy.zeros(shape[1])
        dist[:, jmax[i]] = scipy.zeros(shape[1])
    #print imax, jmax
    slopes = [(y[i]-y[j])/(x[i]-x[j]) \
              for i, j in itertools.izip(imax, jmax)]
    zeros = [y[i] - m * x[i] for i, m in itertools.izip(imax, slopes)]
    m = scipy.median(slopes)
    n = scipy.median(zeros)
    return imax, jmax, m, n
   def plot_hist_compare(self,which_case):
        plt.ylabel('Percentage of points')
        plt.xlabel('Percentage RMS relative error')
        
        def yto_percent(y, x):
            s = str(sp.around((y/(len(self.REL_ERR)*1.0)*100),2))
            if matplotlib.rcParams['text.usetex'] is True:
                return s + r'$\%$'
            else:
                return s + '%'     

        def xto_percent(y, x):
            s = str(y*100)
            if matplotlib.rcParams['text.usetex'] is True:
                return s + r'$\%$'
            else:
                    return s + '%' 
        
        thermo1, thermo2, = self.select[which_case]
        #Plot the SU2 error
        i=0;
        self.REL_ERR = 0;
        for v in self.variables[sp.where\
        ((self.variables!=thermo1) * (self.variables!=thermo2))]:
            i=i+1;
            self.REL_ERR = self.REL_ERR + \
            ((getattr(self.SU2[which_case],v)-getattr(self.RandomSamples,v))/\
            (getattr(self.RandomSamples,v)))**2;
        self.REL_ERR = sp.sqrt(self.REL_ERR)/i
        plt.hist(self.REL_ERR, bins=25, color='k', alpha=0.3, label='SU2')
        print 'Error max SU2', max(self.REL_ERR)
        setattr(self.SU2[which_case],"median_ERR",sp.median(self.REL_ERR));
        
        #Plot the SciPy error
        i =0;
        self.REL_ERR = 0;
        for v in self.variables[sp.where\
        ((self.variables!=thermo1) * (self.variables!=thermo2))]:
            i=i+1;
            self.REL_ERR = self.REL_ERR + \
            ((getattr(self.SciPy[which_case],v)-getattr(self.RandomSamples,v))/\
            (getattr(self.RandomSamples,v)))**2;
        self.REL_ERR = sp.sqrt(self.REL_ERR)/i
        
        plt.hist(self.REL_ERR, bins=25, color='c', alpha=0.5, label='SciPy')
        print 'Error max SciPy', max(self.REL_ERR)
        setattr(self.SciPy[which_case],"median_ERR",sp.median(self.REL_ERR));

        
        formatter_y = FuncFormatter(yto_percent)
        formatter_x = FuncFormatter(xto_percent)
        plt.gca().yaxis.set_major_formatter(formatter_y)
        plt.gca().xaxis.set_major_formatter(formatter_x)
        plt.grid(which='both')
        plt.legend()

       
        return       
 def __call__(self, x):
     res = median([self.f(x) for _ in range(int(self.resample_over))])
     if self.num_evals % self.batchsize == 0 and self.num_evals > 0:
         alt_res = median([self.f(x) for _ in range(int(self.resample_over))])
         self._adaptResampling(res, alt_res)
         res = 0.5 * res + 0.5 * alt_res
     self.recents[self.num_evals % self.batchsize] = res
     self.num_evals += 1
     return res
Exemple #15
0
def calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp):

    logRes = sp.log(disp_raw[idx]) - sp.log(disp_fitted[idx])
    stdLogRes = sp.median(abs(logRes - sp.median(logRes))) * 1.4826

    varLogRes = stdLogRes**2
    varPrior = varLogRes - varLogDispSamp

    return max(varPrior, 0.1)
Exemple #16
0
 def _printStuff(self):
     print self._num_updates,
     for n, a in self._print_quantities:
         #print n, type(a)
         if abs(median(a)) > 1e4 or abs(median(a)) < 1e-3:
             print n, median(a), '\t',
         else:
             print n, round(median(a), 4), '\t',
     print
 def __get_params(self, kplanet):
     """Retrieve model parameters."""
     period = sp.median(self.cold[:, 5 * kplanet])
     amplitude = sp.median(self.cold[:, 5 * kplanet + 1])
     phase = sp.median(self.cold[:, 5 * kplanet + 2])
     eccentricity = sp.median(self.cold[:, 5 * kplanet + 3])
     longitude = sp.median(self.cold[:, 5 * kplanet + 4])
     params = (period, amplitude, phase, eccentricity, longitude)
     return params
def calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp):

    logRes = sp.log(disp_raw[idx]) - sp.log(disp_fitted[idx])
    stdLogRes = sp.median(abs(logRes - sp.median(logRes))) * 1.4826

    varLogRes = stdLogRes ** 2
    varPrior = varLogRes - varLogDispSamp

    return max(varPrior, 0.1)
Exemple #19
0
 def _printStuff(self):
     print self._num_updates,
     for n, a in self._print_quantities:
         #print n, type(a)
         if abs(median(a)) > 1e4 or abs(median(a)) < 1e-3:
             print n, median(a), '\t',
         else:
             print n, round(median(a), 4), '\t',
     print
 def __call__(self, x):
     res = median([self.f(x) for _ in range(int(self.resample_over))])
     if self.num_evals % self.batchsize == 0 and self.num_evals > 0:
         alt_res = median(
             [self.f(x) for _ in range(int(self.resample_over))])
         self._adaptResampling(res, alt_res)
         res = 0.5 * res + 0.5 * alt_res
     self.recents[self.num_evals % self.batchsize] = res
     self.num_evals += 1
     return res
Exemple #21
0
def analyzeMali(mali, options, prefix_row=""):

    if len(mali) == 0:
        raise "not analyzing empty multiple alignment"

    # count empty sequences
    row_data = map(
        lambda x: Mali.MaliData(x.mString, options.gap_chars, options.
                                mask_chars), mali.values())
    col_data = map(
        lambda x: Mali.MaliData(x, options.gap_chars, options.mask_chars),
        mali.getColumns())

    if len(row_data) == 0 or len(col_data) == 0:
        return False

    if options.loglevel >= 2:
        for row in row_data:
            options.stdlog.write("# row: %s\n" % str(row))
        for col in col_data:
            options.stdlog.write("# col: %s\n" % str(col))

    options.stdout.write(prefix_row)

    # calculate average column occupancy
    col_mean = scipy.mean(map(lambda x: x.mNChars, col_data))
    col_median = scipy.median(map(lambda x: x.mNChars, col_data))
    length = mali.getLength()

    if float(int(col_median)) == col_median:
        options.stdout.write("%5.2f\t%5.2f\t%i\t%5.2f" %
                             (col_mean, 100.0 * col_mean / length, col_median,
                              100.0 * col_median / length))
    else:
        options.stdout.write("%5.2f\t%5.2f\t%5.1f\t%5.2f" %
                             (col_mean, 100.0 * col_mean / length, col_median,
                              100.0 * col_median / length))

    row_mean = scipy.mean(map(lambda x: x.mNChars, row_data))
    row_median = scipy.median(map(lambda x: x.mNChars, row_data))
    width = mali.getWidth()

    if float(int(row_median)) == row_median:
        options.stdout.write("\t%5.2f\t%5.2f\t%i\t%5.2f" %
                             (row_mean, 100.0 * row_mean / width, row_median,
                              100.0 * row_median / width))
    else:
        options.stdout.write("\t%5.2f\t%5.2f\t%5.1f\t%5.2f" %
                             (row_mean, 100.0 * row_mean / width, row_median,
                              100.0 * row_median / width))

    options.stdout.write("\n")

    return True
Exemple #22
0
def MAD(a, c=0.6745):
    """
    Median Absolute Deviation along first axis of an array:

    median(abs(a - median(a))) / c

    """

    a = N.asarray(a, N.float64)
    d = N.multiply.outer(median(a), N.ones(a.shape[1:]))
    return median(N.fabs(a - d) / c)
Exemple #23
0
 def update_sp_stat(self):
     """Calculate the statistics of the spectrum/spectra selected and print it"""
     n = len(self.axes[2].lines) - 1
     data = self.axes[2].lines[n].get_ydata()
     med = scipy.median(data)
     Mean = scipy.mean(data)
     sigma = scipy.std(data)
     disp = sqrt(scipy.median((data - med)**2))
     self.text['mean_sp1'].set_text('%8.2f' % Mean)
     self.text['sigma_sp1'].set_text('%8.2f' % sigma)
     self.text['median_sp1'].set_text('%8.2f' % med)
     self.text['med_disp_sp1'].set_text('%8.2f' % disp)
Exemple #24
0
    def startStopClicked_Callback(self):
        print('start/stop clicked')
        if self.running:
            self.timer.stop()
            self.running = False
            self.outfile.close()

            t = scipy.array(self.timeList)
            dt = (t[1:] - t[:-1])*1e-6
            dt = dt[100:]
            dtMean = dt.mean()
            dtMedian = scipy.median(dt)
            dtStdDev = dt.std()
            dtMedianAbsDev = scipy.median(scipy.absolute(dt - dtMedian))
            dtMaxAbsDev = scipy.absolute(dt - dtMean).max()
            dtMinAbsDev = scipy.absolute(dt - dtMean).min()
            numBin = int(0.01*dt.shape[0])
            numBin = max([numBin,20]) 

            diff = 1e-6*scipy.array(self.diffTimeList)

            print()
            print('dt mean:           ', dtMean)
            print('dt median:         ', dtMedian)
            print('dt standard dev:   ', dtStdDev)
            print('dt medain abs dev: ', dtMedianAbsDev)
            print('dt max abs dev:    ', dtMaxAbsDev)
            print()

            fig = plt.figure(1)
            plt.clf()
            plt.hist(dt,numBin)
            plt.xlabel('Period dt (sec)')
            plt.ylabel('Count')
            plt.grid('on')
            plt.draw()

            #fig = plt.figure(2)
            #plt.clf()
            #plt.hist(diff,numBin)
            #plt.xlabel('Get-Set dt (sec)')
            #plt.ylabel('Count')
            #plt.grid('on')
            #plt.draw()

        else:
            self.running = True 
            self.count = 0
            self.timeList = []
            self.outfile = open(self.outfileName,'w')
            self.timer.start(1.0e3/self.freq)
        self.updateStartStopText()
def mad_clipping(input_data, sigma_clip_level, return_length=False):
    medval = median(input_data)
    sigma = 1.4826 * median(abs(medval - input_data))
    high_sigma_clip_limit = medval + sigma_clip_level * sigma
    low_sigma_clip_limit = medval - sigma_clip_level * sigma
    clipped_data = input_data[(input_data>(low_sigma_clip_limit)) &            \
                              (input_data<(high_sigma_clip_limit))]
    new_medval = median(clipped_data)
    new_sigma = 1.4826 * median(abs(medval - clipped_data))
    if return_length:
        return new_medval, new_sigma, len(clipped_data)
    else:
        return new_medval, new_sigma
def mad_clipping(input_data, sigma_clip_level):
    medval = median(input_data)
    sigma = 1.48 * median(abs(medval - input_data))
    high_sigma_clip_limit = medval + sigma_clip_level * sigma
    low_sigma_clip_limit = medval - sigma_clip_level * sigma
    clipped_data = []
    for value in input_data:
        if (value > low_sigma_clip_limit) and (value < high_sigma_clip_limit):
            clipped_data.append(value)
    clipped_data_array = array(clipped_data)
    new_medval = median(clipped_data_array)
    new_sigma = 1.48 * median(abs(medval - clipped_data_array))
    return clipped_data_array, new_medval, new_sigma
Exemple #27
0
def TMMNormalization(input_downweighted_df):
    # Adding small number to prevent doing log(0)
    final_df = input_downweighted_df.copy()
    # Output DataFrames
    first_df = pd.DataFrame()
    geom_mean_df = pd.DataFrame()
    # Iterate on the sample names list to obtain a DataFrame of downweighted read counts divided by geometric mean of a transcript
    for i in range(final_df.shape[0]):
        # Obtain list of counts for a particular transcript
        transcript_counts = list(final_df.iloc[i, 2:])
        # Obtain geometric mean
        transcript_geom_mean = scipy.stats.mstats.gmean(transcript_counts)
        # If the geometric mean of the transcript is equal 0, omit the transcript
        if transcript_geom_mean == 0:
            del transcript_counts, transcript_geom_mean
            continue
        # Creating output DataFrames
        transcript_geom_mean_df = pd.DataFrame({
            0: [final_df.iloc[i, 0]],
            1: [final_df.iloc[i, 1]],
            2: [transcript_geom_mean]
        })
        geom_mean_df = geom_mean_df.append(transcript_geom_mean_df)
        del transcript_geom_mean_df
        # Obtain DataFrame with downweighted read counts multiplied by geometric mean of a transcript
        temp_df = pd.DataFrame([final_df.iloc[i, :]])
        temp_df.iloc[:, 2:] = temp_df.iloc[:, 2:] / transcript_geom_mean
        # Append the row to the final DataFrame
        first_df = first_df.append(temp_df)
        del transcript_counts, transcript_geom_mean, temp_df
    del i
    geom_mean_df.columns = [
        '#Transcript_splicing_pattern', 'Transcript_ID', 'geometric_mean'
    ]
    # Read sample names from column names of DataFrame
    iteration_list = list(final_df.columns)[2:]
    # Iterate on the DataFrame sample name columns with normalization factors and normalize the read counts by multiplification of each read count by median of normalization factor per sample
    sample_factor_df = pd.DataFrame()
    for i in range(len(iteration_list)):
        # Creating output DataFrame with sample median of normalized values
        norm_sample_factor_df = pd.DataFrame({
            0: [iteration_list[i]],
            1: [scipy.median(list(first_df[iteration_list[i]]))]
        })
        sample_factor_df = sample_factor_df.append(norm_sample_factor_df)
        del norm_sample_factor_df
        final_df[iteration_list[i]] = final_df[iteration_list[i]] * (
            scipy.median(list(first_df[iteration_list[i]])))
    del i, first_df, iteration_list
    sample_factor_df.columns = ['#sample_id', 'normalized_values_median']
    return final_df, geom_mean_df, sample_factor_df
Exemple #28
0
def lossTraces(fwrap,
               aclass,
               dim,
               maxsteps,
               storesteps=None,
               x0=None,
               initNoise=0.,
               minLoss=1e-10,
               algoparams={}):
    """ Compute a number of loss curves, for the provided settings,
    stored at specific storestep points. """
    if not storesteps:
        storesteps = range(maxsteps + 1)

    # initial points, potentially noisy
    if x0 is None:
        x0 = ones(dim) + randn(dim) * initNoise
    elif not isinstance(x0, ndarray):
        x0 = ones(dim) * x0

    # optimal loss
    oloss = mean(
        fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum))

    # tracking progress by callback
    paramtraces = {'index': -1}
    losstraces = {}

    def storer(a):
        lastseen = paramtraces['index']
        for ts in [
                x for x in storesteps if x > lastseen and x <= a._num_updates
        ]:
            paramtraces[ts] = a.bestParameters.copy()
            losstraces[ts] = abs(
                fwrap.stochfun.expectedLoss(paramtraces[ts]) - oloss) + minLoss
        paramtraces['index'] = a._num_updates

    # initialization
    algo = aclass(fwrap, x0, callback=storer, **algoparams)
    print algo, fwrap, dim, maxsteps,

    # store initial step
    algo.callback(algo)
    algo.run(maxsteps)

    # process learning curve
    del paramtraces['index']
    ls = array([x for _, x in sorted(losstraces.items())])
    print median(ls[-1])
    return ls
Exemple #29
0
def analysis(records, analysis_function):
    """Read in the results of one of the tools and calculate certain statistics.
       fn is a function for reading in the results (e.g. cleangingTools.parseScopaInfo)
       """
    trim = []
    left_trim = []
    right_trim = []

    tp, fp, tn, fn = [0]*4   # true positive, false positive, ...
    for i,seq_record in enumerate(records):
        id, present, actual_start, actual_end, found, predicted_start, predicted_end = analysis_function(seq_record)[-1]

        if present:
            if found:
                tp = tp + 1
                left_trim.append(int(actual_start) - int(predicted_start))
                right_trim.append(int(predicted_end) - int(actual_end))
                trim.append(left_trim[-1] + right_trim[-1])
            else:
                fn = fn + 1
        else:
            if found:
                fp = fp + 1
            else:
                tn = tn + 1

    sensitivity = float(tp) / (tp + fn) if tp + fn > 0 else -1
    specificity = float(tn) / (tn + fp) if tn + fp > 0 else -1
    if len(trim) > 0:
        pct_correct = len(filter(lambda x: x==0, trim)) / float(len(trim))
        avg_trim = scipy.mean(trim)
        median_trim = scipy.median(trim)
        SoS_trim = scipy.mean(map(lambda x : x*x, trim))
        avg_left = scipy.mean(left_trim)
        median_left = scipy.median(left_trim)
        avg_right = scipy.mean(right_trim)
        median_right = scipy.median(right_trim)

        overArr = filter(lambda x : x > 0, trim)
        pct_over = len(overArr) / float(len(trim))
        avg_over = scipy.mean(overArr) if len(overArr) > 0 else -99999
        median_over = scipy.median(overArr) if len(overArr) > 0 else -99999

        underArr = filter(lambda x : x < 0, trim)
        pct_under = len(underArr) / float(len(trim))
        avg_under = scipy.mean(underArr) if len(underArr) > 0 else -99999
        median_under = scipy.median(underArr) if len(underArr) > 0 else -99999
    else:
        return [sensitivity, specificity] + [9999]*10

    return [sensitivity, specificity, pct_correct, avg_trim, median_trim, avg_left, median_left, avg_right, median_right, SoS_trim, pct_over, avg_over, median_over, pct_under, avg_under, median_under]
Exemple #30
0
    def __amp_detect(self, x):
        
        ref = np.floor(self.min_ref_per*self.sr/1000.0)
        
        # HIGH-PASS FILTER OF THE DATA
        (b,a) = signal.ellip(2, 0.1, 40, [self.fmin_detect*2.0/self.sr,self.fmax_detect*2.0/self.sr], btype='bandpass', analog=0, output='ba')
        xf_detect = signal.filtfilt(b, a, x)
        (b,a) = signal.ellip(2, 0.1, 40, [self.fmin_sort*2.0/self.sr,self.fmax_sort*2.0/self.sr], btype='bandpass', analog=0, output='ba')
        xf = signal.filtfilt(b, a, x)
        
        
        noise_std_detect = scipy.median(np.abs(xf_detect))/0.6745;
        noise_std_sorted = scipy.median(np.abs(xf))/0.6745;
       
        thr = self.stdmin * noise_std_detect        #thr for detection is based on detected settings.
        thrmax = self.stdmax * noise_std_sorted     #thrmax for artifact removal is based on sorted settings.
        
        # LOCATE SPIKE TIMES
        nspk = 0;
        xaux = np.argwhere(xf_detect[self.w_pre+1:len(xf_detect)-self.w_post-1-1] > thr) + self.w_pre + 1
        xaux = np.resize(xaux,len(xaux))
        xaux0 = 0;
        index = []
        for i in range(len(xaux)):
            if xaux[i] >= (xaux0 + ref):
            # after find a peak it begin search after ref over the last xaux
                iaux = xf[xaux[i]:xaux[i]+np.floor(ref/2.0)].argmax(0)    # introduces alignment
                nspk = nspk + 1
                index.append(iaux + xaux[i])
                xaux0 = index[nspk-1];
        
        # SPIKE STORING (with or without interpolation)
        ls = self.w_pre + self.w_post
        spikes = np.zeros([nspk,ls+4])
        xf = np.concatenate((xf,np.zeros(self.w_post)),axis=0)
        
        for i in range(nspk):                          # Eliminates artifacts
            if np.max( np.abs( xf[index[i]-self.w_pre:index[i]+self.w_post] )) < thrmax :
                spikes[i,:] = xf[index[i]-self.w_pre-1:index[i]+self.w_post+3]
     
        aux = np.argwhere(spikes[:,self.w_pre] == 0)       #erases indexes that were artifacts
        if len(aux) != 0:
            aux = aux.reshape((1,len(aux)))[0]
            spikes = np.delete(spikes, aux, axis = 0)
            index = np.delete(index,aux)
 
        if self.interpolation == 'y':
            # Does interpolation
            spikes = self.__int_spikes(spikes)

        return spikes, thr, index
Exemple #31
0
def skyopt(p, x, data, model):
    par = special_functions.unpack_coeff(p).tolist()

    wave = special_functions.genfunc(x, 0, p).astype(scipy.float64)
    sky = interpolate.splev(wave, model).astype(scipy.float64)
    ratio = scipy.median(data) / scipy.median(sky)
    offset = 0.
    par.append(ratio)
    par.append(offset)

    coeff, ier = optimize.leastsq(skyfit,
                                  par, (x, data, model, p),
                                  maxfev=100000)
    return special_functions.build_coeff(coeff, p), coeff[-2], coeff[-1]
Exemple #32
0
    def _analysis(self):
        self._prepareDictionary()
        arr_d = scipy.array(self.dic.values(), scipy.int32)
        arr_b = scipy.array(self.bmd.values(), scipy.int32)
        median_d = scipy.median(arr_d)
        median_b = scipy.median(arr_b)
        mean_d = arr_d.mean()
        mean_b = arr_b.mean()
        self.rate = scipy.sqrt(median_b) * median_d / median_b

        tpl = "%s\t%s\t%s"
        print tpl % (r"r\c", "median", "mean")
        tpl = "%s\t%d\t%d"
        print tpl % ("dict", median_d, mean_d)
        print tpl % ("bmf", median_b, mean_b)
Exemple #33
0
def find_holes(data):
    sample = data.copy()
    size = sample.size

    # Here's a little hack to "flatten" star boxes
    tmp = scipy.sort(sample)
    star_cutoff = scipy.median(tmp[-30:-10]) * 0.6
    sample = scipy.where(sample > star_cutoff, star_cutoff, sample)

    derivative = deriv_1d(sample)
    derivative = ndimage.gaussian_filter1d(derivative, 3)
    derivative = abs(derivative)

    tmp = scipy.sort(derivative)
    avg = scipy.median(tmp[size / 8:size * 3 / 8])
    sigma = tmp[size / 8:size * 3 / 8].std()

    threshold = avg + sigma * 100.

    edge = []

    count = 0
    while derivative.max() > threshold:
        start = derivative.argmax() - 7
        end = derivative.argmax() + 8

        if start < 0:
            start = 0
        if end > derivative.size:
            end = derivative.size

        fit = find_peak(derivative[start:end])

        if start > 7 and end < derivative.size - 7:
            edge.append(float(start) + fit[2])

        start -= 3
        end += 3

        if start < 0:
            start = 0
        if end > derivative.size:
            end = derivative.size

        derivative[start:end] = 0.

    edge.sort()
    return edge, threshold, star_cutoff
 def __clean_rvs(self):
     """Clean radial-velocities by adding the offset and jitter."""
     instrumental = self.cold[:, -2 * self.nins:]
     rv0 = copy.deepcopy(self.rv)
     err0 = copy.deepcopy(self.err)
     acc = sp.median(self.cold[:, -2 * self.nins - 1])
     for i in range(self.nins):
         jitter = sp.median(instrumental[:, i])
         offset = sp.median(instrumental[:, i + 1])
         ins = self.ins == i
         # Assume linear acceleration for now.
         rv0[ins] -= offset + acc
         err0[ins] = sp.sqrt(err0[ins]**2 + jitter**2)
     self.rv0 = rv0
     self.err0 = err0
     pass
Exemple #35
0
def cluster_points(r, z):
    R, Z = geom.pointloop(r, z)
    dX = norm([r[1:] - r[:-1], z[1:] - z[:-1]], axis=0)
    dx_median = sp.median(dX)
    cluster, i = OrderedDict(), count(0)
    for r, z in zip(R, Z):
        dx = []
        for cl in cluster:
            rc, zc = cluster[cl]['r'], cluster[cl]['z']
            dx.append(np.min(norm([r - rc, z - zc], axis=0)))
        if len(dx) == 0 or np.min(dx) > 2 * dx_median:  # new
            cl = 'group{:1.0f}'.format(next(i))
            cluster[cl] = {}
            cluster[cl] = {'r': [r], 'z': [z]}
        else:
            icl = np.argmin(dx)
            cl = list(cluster.keys())[icl]
            cluster[cl]['r'] = np.append(cluster[cl]['r'], r)
            cluster[cl]['z'] = np.append(cluster[cl]['z'], z)
    for cl in cluster:
        r, z = cluster[cl]['r'], cluster[cl]['z']
        dx = norm([r[:1] - r[:-1], z[:1] - z[:-1]], axis=0)
        imax = np.argmax(dx) + 1
        r = np.append(r[imax:], r[:imax])
        z = np.append(z[imax:], z[:imax])
        cluster[cl]['r'], cluster[cl]['z'] = r, z
    return cluster
Exemple #36
0
def spike_detect(trace, threshold=2.5):
    """
    takes a single trace, returns a spike location mask
    ::param trace:
    ::param threshold: number of standard deviations
    ::return spike_mask:
    """
    
    trace = trace-scipy.median(trace) #median subraction
    trace_std = np.std(trace)

    above_threshold = (trace < -trace_std*threshold).astype(int) # all points below spike threshold
    threshold_bounds = np.diff(above_threshold) # find places where the spike mask changes value

    putative_event_starts = np.where(threshold_bounds>0)[0] # change from 0 -> 1 is a start
    putative_event_ends = np.where(threshold_bounds<0)[0] # change from 1 -> 0 is an end

    event_maxima = np.zeros(trace.shape[0])

    for start, end in zip(putative_event_starts, putative_event_ends):
        event = trace[start:end]
        minimum_val = min(event)
        event_max_loc = np.where(event==minimum_val)[0]
        event_maxima[start+event_max_loc] = 1

    return event_maxima
Exemple #37
0
def rendGauss(x,y, sx, imageBounds, pixelSize):
    fuzz = 3*scipy.median(sx)
    roiSize = int(fuzz/pixelSize)
    fuzz = pixelSize*roiSize

    X = numpy.arange(imageBounds.x0 - fuzz,imageBounds.x1 + fuzz, pixelSize)
    Y = numpy.arange(imageBounds.y0 - fuzz,imageBounds.y1 + fuzz, pixelSize)

    #print X
    
    im = scipy.zeros((len(X), len(Y)), 'f')

    #record our image resolution so we can plot pts with a minimum size equal to res (to avoid missing small pts)
    delX = scipy.absolute(X[1] - X[0]) 
    
    for i in range(len(x)):
        ix = scipy.absolute(X - x[i]).argmin()
        iy = scipy.absolute(Y - y[i]).argmin()

        sxi =  max(sx[i], delX)       
        
        imp = Gauss2D(X[(ix - roiSize):(ix + roiSize + 1)], Y[(iy - roiSize):(iy + roiSize + 1)],1/sxi, x[i],y[i],sxi)
        im[(ix - roiSize):(ix + roiSize + 1), (iy - roiSize):(iy + roiSize + 1)] += imp

    im = im[roiSize:-roiSize, roiSize:-roiSize]

    return im
Exemple #38
0
def median_filter_bord(im, size=3):
    """The   function  performs   a  local   median  filter   on  a   flat
    image. Border's pixels are processed.

    Args:
    im: the image to process
    size: the size in pixels of the local square window. Default value is 3.
    
    Returns:
    out: the filtered image
    """

    ## Get the size of the image
    [nl, nc, d] = im.shape

    ## Get the size of the moving window
    s = (size - 1) / 2

    ## Initialization of the output
    out = sp.empty((nl, nc, d))
    temp = sp.empty((nl + 2 * s, nc + 2 * s, d))  # A temporary file is created
    temp[0:s, :] = sp.NaN
    temp[:, 0:s] = sp.NaN
    temp[-s:, :] = sp.NaN
    temp[:, -s:] = sp.NaN
    temp[s : s + nl, s : nc + s] = im

    ## Apply the max filter
    for i in range(s, nl + s):  # Shift the origin to remove border effect
        for j in range(s, nc + s):
            for k in range(d):
                window = temp[i - s : i + 1 + s, j - s : j + s + 1, k]
                out[i - s, j - s, k] = sp.median(window[sp.isfinite(window)])

    return out.astype(im.dtype.name)
def normalization(data, ntype):
    if ntype == 'mean_maxs':
        data /= scipy.mean(numpy.max(data, 1))
    elif ntype == 'median_maxs':
        data /= scipy.median(numpy.max(data, 1))
    else:
        raise TypeError, "Normalization type %s not implemented."
Exemple #40
0
    def getDistance(self, sampleSize=10, waitTime=0.00001):
        distances = []
        time.sleep(2)

        for i in range(0, sampleSize):
            # set Trigger to HIGH
            GPIO.output(self.trig, True)

            # set Trigger after specificied wait time to LOW
            time.sleep(waitTime)
            GPIO.output(self.trig, False)

            StartTime = time.time()
            StopTime = time.time()

            # save StartTime
            while GPIO.input(self.echo) == 0:
                StartTime = time.time()

            # save time of arrival
            while GPIO.input(self.echo) == 1:
                StopTime = time.time()

            # time difference between start and arrival
            TimeElapsed = StopTime - StartTime

            # multiply with the speed of sounds(34300 cm/s)
            # and divide by 2, because the signal has to travel there and back
            distance = (TimeElapsed * 34300) / 2

            # Upper and lower bound on distance, 2cm to 500cm
            if ((distance > 2) and (distance < 500)):
                distances.append(distance)

        return median(distances)
Exemple #41
0
def plotmedian(x, y, clr, opt=1, nbins=10, ax=[], xmin=30., xmax=150):
    xmid, ymid, ylist = [], [], []
    dx = (xmax - xmin) / nbins
    for i in range(nbins):
        xmid.append(xmin + (0.5 + i) * dx)
        ylist.append([])
        ymid.append(0.)
    for i in range(len(x)):
        idx = int((x[i] - xmin) / dx)
        if (idx < 0):
            idx = 0
        if (idx > nbins - 1):
            idx = nbins - 1
        ylist[idx].append(y[i])
    for i in range(nbins):
        if ((len(ylist[i]) == 0 or max(ylist[i]) < 0.1) and i > 0):
            ymid[i] = ymid[i - 1]
            xmid[i] = xmid[i - 1]
        else:
            ymid[i] = median(ylist[i])
    if (opt == 1):
        ax.plot(xmid, ymid, ".-", color=clr)
    if (opt == 2):
        plt.plot(xmid, ymid, ".-", color=clr)
    print(xmid, ymid)
 def plot_REL_ERR_SU2(self,which_case):
      i=0;
      thermo1 = self.select[which_case][0]
      thermo2 = self.select[which_case][1]
      get_REL_ERR_SU2(self,which_case)
      
      print 'Median error SU2', sp.median(self.REL_ERR)
      print 'Mean error SU2', sp.mean(self.REL_ERR)
      print 'Max error SU2', max(self.REL_ERR)
      print 'Min error SU2', min(self.REL_ERR)
      x = getattr(self.SU2[which_case],thermo1)
      y = getattr(self.SU2[which_case],thermo2)
      #trusted_values = sp.where(self.REL_ERR>0<0.9*max(self.REL_ERR))
      self.REL_ERR = self.REL_ERR[trusted_values]
      x = x[trusted_values]
      y = y[trusted_values]
      scat=plt.scatter(x,y,c=self.REL_ERR, s=1)                
      plt.grid(which='both')
      scat.set_array(self.REL_ERR)        
      plt.colorbar(scat)
      plt.xlim((min(x)*0.95,max(x)*1.05));
      plt.ylim((min(y)*0.95,max(y)*1.05));
      print 'x argmax %i , x_val: %f ' %(sp.argmax(self.REL_ERR),x[sp.argmax(self.REL_ERR)])
      print 'y argmax %i , y_val: %f ' %(sp.argmax(self.REL_ERR),y[sp.argmax(self.REL_ERR)])
      return;
Exemple #43
0
 def app_convert_ratio_median(self, appID, _day, tw):
     cr = list()
     for i in range(tw):
         convert = self.ad.get_app_count(appID, 1, _day - i - 1, _day - i)
         click = self.ad.get_app_count(appID, 0, _day - i - 1, _day - i)
         cr.append(convert / (click + convert + 1))
     return [scipy.median(cr)]
Exemple #44
0
def median_filter(im, size=3):
    """The function performs a local median filter on a flat image. Border's
    pixels are not processed.

    Args:
    im: the image to process
    size: the size in pixels of the local square window. Default value is 3.
    
    Returns:
    out: the filtered image    
    """

    ## Get the size of the image
    [nl, nc, d] = im.shape

    ## Get the size of the moving window
    s = (size - 1) / 2

    ## Initialization of the output
    out = sp.zeros((nl, nc, d), dtype=im.dtype.name)

    ## Apply the max filter
    for i in range(s, nl - s):  # Shift the origin to remove border effect
        for j in range(s, nc - s):
            for k in range(d):
                temp = im[i - s : i + 1 + s, j - s : j + s + 1, k]
                out[i, j, k] = sp.median(temp)

    return out
Exemple #45
0
def avgFoundAfter(decreasingTargetValues, listsOfActualValues, batchSize=1, useMedian=False):
    """ Determine the average number of steps to reach a certain value (for the first time),
    given a list of value sequences.
    If a value is not always encountered, the length of the longest sequence is used.
    Returns an array. """
    from scipy import sum

    numLists = len(listsOfActualValues)
    longest = max(map(len, listsOfActualValues))
    # gather a list of indices of first encounters
    res = [[0] for _ in range(numLists)]
    for tval in decreasingTargetValues:
        for li, l in enumerate(listsOfActualValues):
            lres = res[li]
            found = False
            for i in range(lres[-1], len(l)):
                if l[i] <= tval:
                    lres.append(i)
                    found = True
                    break
            if not found:
                lres.append(longest)
    tmp = array(res)
    if useMedian:
        resx = median(tmp, axis=0)[1:]
    else:
        resx = sum(tmp, axis=0)[1:] / float(numLists)
    return resx * batchSize
Exemple #46
0
def print_all_stats(ctx, series):
    ftime = get_ftime(series)
    start = 0 
    end = ctx.interval
    print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
    while (start < ftime):  # for each time interval
        end = ftime if ftime < end else end
        sample_arrays = [ s.get_samples(start, end) for s in series ]
        samplevalue_arrays = []
        for sample_array in sample_arrays:
            samplevalue_arrays.append( 
                [ sample.value for sample in sample_array ] )
        #print('samplevalue_arrays len: %d' % len(samplevalue_arrays))
        #print('samplevalue_arrays elements len: ' + \
               #str(map( lambda l: len(l), samplevalue_arrays)))
        # collapse list of lists of sample values into list of sample values
        samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
        #print('samplevalues: ' + str(sorted(samplevalues)))
        # compute all stats and print them
        myarray = scipy.fromiter(samplevalues, float)
        mymin = scipy.amin(myarray)
        myavg = scipy.average(myarray)
        mymedian = scipy.median(myarray)
        my90th = scipy.percentile(myarray, 90)
        my95th = scipy.percentile(myarray, 95)
        my99th = scipy.percentile(myarray, 99)
        mymax = scipy.amax(myarray)
        print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
            start, len(samplevalues), 
            mymin, myavg, mymedian, my90th, my95th, my99th, mymax))

        # advance to next interval
        start += ctx.interval
        end += ctx.interval
Exemple #47
0
    def updateProperties(self, values):
        """update properties.

        If values is an vector of strings, each entry will be converted
        to float. Entries that can not be converted are ignored.
        """
        values = [x for x in values if x != None]

        if len(values) == 0:
            raise ValueError("no data for statistics")

        ## convert
        self.mNErrors = 0
        if type(values[0]) not in (types.IntType, types.FloatType):
            n = []
            for x in values:
                try:
                    n.append(float(x))
                except ValueError:
                    self.mNErrors += 1
        else:
            n = values

        ## use a non-sort algorithm later.
        n.sort()
        self.mQ1 = n[len(n) / 4]
        self.mQ3 = n[len(n) * 3 / 4]

        self.mCounts = len(n)
        self.mMin = min(n)
        self.mMax = max(n)
        self.mMean = scipy.mean(n)
        self.mMedian = scipy.median(n)
        self.mSampleStd = scipy.std(n)
        self.mSum = reduce(lambda x, y: x + y, n)
Exemple #48
0
    def __call__(self, a, mu=None, scale=None):
        """
        Compute Huber\'s proposal 2 estimate of scale, using an optional
        initial value of scale and an optional estimate of mu. If mu
        is supplied, it is not reestimated.
        """

        self.a = N.asarray(a, N.float64)
        if mu is None:
            self.n = self.a.shape[0] - 1
            self.mu = N.multiply.outer(median(self.a), N.ones(self.a.shape[1:]))
            self.est_mu = True
        else:
            self.n = self.a.shape[0]
            self.mu = mu
            self.est_mu = False

        if scale is None:
            self.scale = MAD(self.a)**2
        else:
            self.scale = scale

        for donothing in self:
            pass

        self.s = N.sqrt(self.scale)
        return self.s
Exemple #49
0
 def domain_length(self,face_1,face_2):
     r'''
     Calculate the distance between two faces of the network
     
     Parameters
     ----------
     face_1 and face_2 : array_like
         Lists of pores belonging to opposite faces of the network
         
     Returns
     -------
     The length of the domain in the specified direction
     
     Notes
     -----
     - Does not yet check if input faces are perpendicular to each other
     '''
     #Ensure given points are coplanar before proceeding
     if misc.iscoplanar(self['pore.coords'][face_1]) and misc.iscoplanar(self['pore.coords'][face_2]):
         #Find distance between given faces
         x = self['pore.coords'][face_1]
         y = self['pore.coords'][face_2]
         Ds = misc.dist(x,y)
         L = sp.median(sp.amin(Ds,axis=0))
     else:
         self._logger.warning('The supplied pores are not coplanar. Length will be approximate.')
         f1 = self['pore.coords'][face_1]
         f2 = self['pore.coords'][face_2]
         distavg = [0,0,0]
         distavg[0] = sp.absolute(sp.average(f1[:,0]) - sp.average(f2[:,0]))
         distavg[1] = sp.absolute(sp.average(f1[:,1]) - sp.average(f2[:,1]))
         distavg[2] = sp.absolute(sp.average(f1[:,2]) - sp.average(f2[:,2]))
         L = max(distavg)
     return L
Exemple #50
0
    def FindCenterVel(self, Median = True,N_Median = 1000):
        if Median == False:
            self.Mean_vx = scipy.mean(self.Snapshot.vx)
            self.Mean_vy = scipy.mean(self.Snapshot.vy)
            self.Mean_vz = scipy.mean(self.Snapshot.vz)
        else:
            if self.Snapshot.V == None:
                print 'Potentials not defined. Can not find center'
                return None

            V_index = scipy.argsort(self.Snapshot.V)
            self.Mean_vx = scipy.median(self.Snapshot.vx[V_index[0:N_Median]])
            self.Mean_vy =  scipy.median(self.Snapshot.vy[V_index[0:N_Median]])
            self.Mean_vz = scipy.median( self.Snapshot.vz[V_index[0:N_Median]])
        
        self.CenterVelFound = True
Exemple #51
0
    def updateProperties( self, values):
        """update properties.

        If values is an vector of strings, each entry will be converted
        to float. Entries that can not be converted are ignored.
        """
        values = [x for x in values if x != None ]

        if len(values) == 0:
            raise ValueError( "no data for statistics" )

        ## convert
        self.mNErrors = 0
        if type(values[0]) not in (types.IntType, types.FloatType):
            n = []
            for x in values:
                try:
                    n.append( float(x) )
                except ValueError:
                    self.mNErrors += 1
        else:
            n = values

        ## use a non-sort algorithm later.
        n.sort()
        self.mQ1 = n[len(n) / 4]
        self.mQ3 = n[len(n) * 3 / 4]
        
        self.mCounts = len(n)
        self.mMin = min(n)
        self.mMax = max(n)
        self.mMean = scipy.mean( n )
        self.mMedian = scipy.median( n )
        self.mSampleStd = scipy.std( n )
        self.mSum = reduce( lambda x, y: x+y, n )
Exemple #52
0
    def domain_length(self,face_1,face_2):
        r'''
        Calculate the distance between two faces of the network

        Parameters
        ----------
        face_1 and face_2 : array_like
            Lists of pores belonging to opposite faces of the network

        Returns
        -------
        The length of the domain in the specified direction

        Notes
        -----
        - Does not yet check if input faces are perpendicular to each other
        '''
        #Ensure given points are coplanar before proceeding
        if misc.iscoplanar(self['pore.coords'][face_1]) and misc.iscoplanar(self['pore.coords'][face_2]):
            #Find distance between given faces
            x = self['pore.coords'][face_1]
            y = self['pore.coords'][face_2]
            Ds = misc.dist(x,y)
            L = sp.median(sp.amin(Ds,axis=0))
        else:
            logger.warning('The supplied pores are not coplanar. Length will be approximate.')
            f1 = self['pore.coords'][face_1]
            f2 = self['pore.coords'][face_2]
            distavg = [0,0,0]
            distavg[0] = sp.absolute(sp.average(f1[:,0]) - sp.average(f2[:,0]))
            distavg[1] = sp.absolute(sp.average(f1[:,1]) - sp.average(f2[:,1]))
            distavg[2] = sp.absolute(sp.average(f1[:,2]) - sp.average(f2[:,2]))
            L = max(distavg)
        return L
Exemple #53
0
def avgFoundAfter(decreasingTargetValues,
                  listsOfActualValues,
                  batchSize=1,
                  useMedian=False):
    """ Determine the average number of steps to reach a certain value (for the first time),
    given a list of value sequences.
    If a value is not always encountered, the length of the longest sequence is used.
    Returns an array. """
    from scipy import sum
    numLists = len(listsOfActualValues)
    longest = max(list(map(len, listsOfActualValues)))
    # gather a list of indices of first encounters
    res = [[0] for _ in range(numLists)]
    for tval in decreasingTargetValues:
        for li, l in enumerate(listsOfActualValues):
            lres = res[li]
            found = False
            for i in range(lres[-1], len(l)):
                if l[i] <= tval:
                    lres.append(i)
                    found = True
                    break
            if not found:
                lres.append(longest)
    tmp = array(res)
    if useMedian:
        resx = median(tmp, axis=0)[1:]
    else:
        resx = sum(tmp, axis=0)[1:] / float(numLists)
    return resx * batchSize
Exemple #54
0
def main(database):

    #Commits per committer limited to the 30 first with the highest accumulated activity
    query = "select count(*) from scmlog group by committer_id order by count(*) desc limit 40"

    #Connecting to the data base and retrieving data
    connector = connect(database)
    results = int(connector.execute(query))
    if results > 0:
        results_aux = connector.fetchall()
    else:
        print("Error when retrieving data")
        return

    #Moving data to a list
    commits = []
    for commit in results_aux[5:]:
        #   for commits in results_aux:
        commits.append(int(commit[0]))

    #Calculating basic statistics
    print "max: " + str(sp.amax(commits))
    print "min: " + str(sp.amin(commits))
    print "mean: " + str(sp.mean(commits))
    print "median: " + str(sp.median(commits))
    print "std: " + str(sp.std(commits))
    print ".25 quartile: " + str(sp.percentile(commits, 25))
    print ".50 quartile: " + str(sp.percentile(commits, 50))
    print ".75 quartile: " + str(sp.percentile(commits, 75))
Exemple #55
0
 def music_heat(qvals):
     heat = scipy.median(
         [qvals["FDR_CT"], qvals["FDR_LRT"], qvals["FDR_FCPT"]])
     if heat != 0:
         return -log10(heat)
     else:
         return max_heat
def main(database):

    # Commits per committer limited to the 30 first with the highest accumulated activity
    query = "select count(*) from scmlog group by committer_id order by count(*) desc limit 40"

    # Connecting to the data base and retrieving data
    connector = connect(database)
    results = int(connector.execute(query))
    if results > 0:
        results_aux = connector.fetchall()
    else:
        print ("Error when retrieving data")
        return

    # Moving data to a list
    commits = []
    for commit in results_aux[5:]:
        #   for commits in results_aux:
        commits.append(int(commit[0]))

    # Calculating basic statistics
    print "max: " + str(sp.amax(commits))
    print "min: " + str(sp.amin(commits))
    print "mean: " + str(sp.mean(commits))
    print "median: " + str(sp.median(commits))
    print "std: " + str(sp.std(commits))
    print ".25 quartile: " + str(sp.percentile(commits, 25))
    print ".50 quartile: " + str(sp.percentile(commits, 50))
    print ".75 quartile: " + str(sp.percentile(commits, 75))
def DataArrayStatisticsReport(parent, titleString, tempdata):
    scrolledText = tk_stxt.ScrolledText(parent, width=textboxWidth, height=textboxHeight, wrap=tk.NONE)
    scrolledText.insert(tk.END, titleString + '\n\n')
    
    # must at least have max and min
    minData = min(tempdata)
    maxData = max(tempdata)
    
    if maxData == minData:
        scrolledText.insert(tk.END, 'All data has the same value,\n')
        scrolledText.insert(tk.END, "value = %-.16E\n" % (minData))
        scrolledText.insert(tk.END, 'statistics cannot be calculated.')
    else:
        scrolledText.insert(tk.END, "max = %-.16E\n" % (maxData))
        scrolledText.insert(tk.END, "min = %-.16E\n" % (minData))
        
        try:
            temp = scipy.mean(tempdata)
            scrolledText.insert(tk.END, "mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "mean gave error in calculation\n")

        try:
            temp = scipy.stats.sem(tempdata)
            scrolledText.insert(tk.END, "standard error of mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "standard error of mean gave error in calculation\n")

        try:
            temp = scipy.median(tempdata)
            scrolledText.insert(tk.END, "median = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "median gave error in calculation\n")

        try:
            temp = scipy.var(tempdata)
            scrolledText.insert(tk.END, "variance = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "variance gave error in calculation\n")

        try:
            temp = scipy.std(tempdata)
            scrolledText.insert(tk.END, "std. deviation = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "std. deviation gave error in calculation\n")

        try:
            temp = scipy.stats.skew(tempdata)
            scrolledText.insert(tk.END, "skew = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "skew gave error in calculation\n")

        try:
            temp = scipy.stats.kurtosis(tempdata)
            scrolledText.insert(tk.END, "kurtosis = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "kurtosis gave error in calculation\n")
            
    return scrolledText
Exemple #58
0
 def FindCenter(self,Median = True,N_Median=10):
     "define center to be the particle with the lowest potential"
     
     if self.Snapshot.V == None:
         print 'Potentials not defined. Can not find center'
         return None
     self.V_index = scipy.argsort(self.Snapshot.V)
     if Median == False:
         self.x_C = self.Snapshot.x[self.V_index[0]]
         self.y_C = self.Snapshot.y[self.V_index[0]]
         self.z_C = self.Snapshot.z[self.V_index[0]]
     else:
         self.x_C = scipy.median(self.Snapshot.x[self.V_index[0:N_Median]])
         self.y_C =  scipy.median(self.Snapshot.y[self.V_index[0:N_Median]])
         self.z_C = scipy.median( self.Snapshot.z[self.V_index[0:N_Median]])
     
     self.CenterFound = True
Exemple #59
0
 def center_scale(row):
     """centers the provided row around the median"""
     filtered = row[np.isfinite(row)]
     center = scipy.median(filtered)
     scale = util.r_stddev(filtered)
     nurow = [((value - center) / scale)
              if not np.isnan(value) else value for value in row]
     return nurow