def draw(options):
    files = [f for f in os.listdir(options['outdir']) if f.endswith('.data')]

    degrees = list()
    diameters = list()
    velocities = list()
    for f in files:
        fin = open(options['outdir']+'/'+f, 'r')
        ts = -1
        for line in fin:
            if line.startswith('#'):
                continue
            time, degree, diameter, velocity = [t.strip() for t in line.split(',')]
            time = int(time)
            assert(ts == time-1)
            ts = time
            try:
                degrees[time].append(float(degree))
                diameters[time].append(int(diameter))
                velocities[time].append(float(velocity))
            except IndexError:
                degrees.append([float(degree)])
                diameters.append([int(diameter)])
                velocities.append([float(velocity)])

    polies = list()
    times = range(len(degrees))
    times2 = times + times[::-1]

    degrees_conf_upper = [confidence(d)[0] for d in degrees]
    degrees_conf_lower = [confidence(d)[1] for d in degrees]
    polies.append(conf2poly(times, degrees_conf_upper, degrees_conf_lower, color='blue'))

    diameters_conf_upper = [confidence(d)[0] for d in diameters]
    diameters_conf_lower = [confidence(d)[1] for d in diameters]
    polies.append(conf2poly(times, diameters_conf_upper, diameters_conf_lower, color='blue'))

    velocities_conf_upper = [confidence(d)[0] for d in velocities]
    velocities_conf_lower = [confidence(d)[1] for d in velocities]
    polies.append(conf2poly(times, velocities_conf_upper, velocities_conf_lower, color='green'))

    velocities = [scipy.mean(d) for d in velocities]
    diameters = [scipy.mean(d) for d in diameters]
    degrees = [scipy.mean(d) for d in degrees]

    fig = MyFig(options, figsize=(10, 8), xlabel='Time [s]', ylabel='Metric', grid=False, legend=True, aspect='auto', legend_pos='upper right')

    patch_collection = PatchCollection(polies, match_original=True)
    patch_collection.set_alpha(0.3)
    patch_collection.set_linestyle('dashed')
    fig.ax.add_collection(patch_collection)

    fig.ax.plot(times, degrees, label='Mean degree', color='blue')
    fig.ax.plot(times, diameters, label='Diameter', color='red')
    fig.ax.plot(times, velocities, label='Mean velocity $[m/s]$', color='green')

    fig.ax.set_xlim(0, options['duration'])
    y_max = max(max(degrees), max(diameters), max(velocities))
    fig.ax.set_ylim(0, y_max+10)
    fig.save('metrics', fileformat='pdf')
def plot_pairwise_velocities_r(case,color,all_radial_distances,all_radial_velocities):
    dr = 0.3 # Mpc/h
    rmin, rmax = sp.amin(all_radial_distances), sp.amax(all_radial_distances) 
    rrange = rmax-rmin 
    N = int(sp.ceil(rrange/dr))
    rs = sp.linspace(rmin,rmax,N)
    v12_of_r = [[] for index in range(N)]
    
    for r,v12 in zip(all_radial_distances,all_pairwise_velocities):
    
        index = int(sp.floor((r-rmin)/dr))
        v12_of_r[index].append(v12)
            
    
    sigma_12s = sp.zeros(N)
    v12_means = sp.zeros(N)
    for index in range(len(sigma_12s)):
        v12_of_r_index = sp.array(v12_of_r[index])
        print "number of counts in the", index,"th bin:", len(v12_of_r_index)
        sigma_12 = sp.sqrt(sp.mean(v12_of_r_index**2))
        v12_mean = -sp.mean(v12_of_r_index)
        sigma_12s[index] = sigma_12
        v12_means[index] = v12_mean
    
    
    plt.plot(rs,sigma_12s,color=color,label='$\sigma_{12}$')
    plt.plot(rs,v12_means,color=color,label='$|v_{12}|$')
    plt.xlabel('r [Mpc/h]')
    plt.ylabel('[km/s]')
    plt.xscale('log')
    plt.axis([0.5,100,0,600])
Example #3
0
def printy(s):
    if ((s._num_updates * s.batch_size < 100 
         and s._num_updates % (20 / s.batch_size) == 0)
        or s._num_updates % (100 / s.batch_size) == 0):
        print s._num_updates * s.batch_size, #s.bestParameters, 
        s.provider.nextSamples(4)
        print mean(s.provider.currentLosses(s.bestParameters))
Example #4
0
 def _read_sky_logfile(self):
     #TODO : expand to read errors, msgs etc
     # read in the whole sky log file, shouldn't be big
     f = open(self.skylogfile)
     lines = f.readlines()
     f.close()
     dust = [line.split()[1:] for line in lines if line.startswith('dtau_dust')]
     line = [line.split()[1:] for line in lines if line.startswith('dtau_line')]
     dust = _sp.array(dust, dtype='float')
     line = _sp.array(line, dtype='float')
     transitions = _sp.unique(dust[:,0])
     shells = _sp.unique(dust[:,1])
     dtau_dust = dict()
     dtau_line = dict()
     dtau_tot = dict()
     for t in transitions:
         d = []
         l = []
         for s in shells:
             d.append( _sp.mean([i[2] for i in dust if ((i[0]==t) * (i[1]==s))]) )
             l.append( _sp.mean([i[2] for i in line if ((i[0]==t) * (i[1]==s))]) )
         dtau_dust[t] = _sp.copy(d)
         dtau_line[t] = _sp.copy(l)
         dtau_tot[t] = _sp.array(d) + _sp.array(l)
     # create object to store in main class
     class Tau(object):pass
     Tau.dtau_dust = dtau_dust
     Tau.dtau_line = dtau_line
     Tau.dtau_tot = dtau_tot
     Tau.transitions = transitions
     Tau.shells = shells
     self.Tau = Tau
def pForest_vs_flann_20Trials(numTrees=10):
    print "Comparing FLANN to Proximity Forest on 500 Random 2D Points"
    flann_scores=[]
    pf_scores=[]
    discrepancies=[]
    for i in range(20):
        print "=============================================="
        print "TRIAL: %d"%(i+1)
        print "=============================================="
        (nd, sum_flann, sum_pf) = pForest_vs_flann(numTrees=numTrees, verbose=False)
        flann_scores.append(sum_flann)
        pf_scores.append(sum_pf)
        discrepancies.append(nd)
        print "=============================================="
        print "Discrepancies: %d, Cost per Discrepancy: %3.2f"%(nd,(sum_flann - sum_pf)*1.0/nd)
        print "=============================================="
        
    print "=============================================="
    print "20 TRIAL SUMMARY"
    print "Average Discrepancies: %3.2f"%( 1.0*sum(discrepancies)/len(discrepancies))
    flann_scores = scipy.array(flann_scores)
    pf_scores = scipy.array(pf_scores)
    avg_delta_score = (sum(flann_scores) - sum(pf_scores))*1.0/len(discrepancies)
    print "Average Cost Per Discrepancy: %3.2f"%avg_delta_score
    print "Average FLANN Distance: %3.2f, StdDev: %3.2f"%(scipy.mean(flann_scores),scipy.std(flann_scores))
    print "Average Proximity Forest Distance: %3.2f, StdDev: %3.2f"%(scipy.mean(pf_scores),scipy.std(pf_scores))
    print "=============================================="
    return (discrepancies, flann_scores, pf_scores)
Example #6
0
def metal_con(filename, distances, real_dist, bins=35, limits=(-3.1, 0.2),
              avgs=1, detection=1, tag="out"):
    """ main bit """
    if filename[-4:] == '.csv':  delim = ','
    else:  delim = None
    data = shift_data(fi.read_data(filename, delim), real_dist, distances[0])
    mod_actual = 5.*(ma.log10(real_dist*1000) - 1.)
    mod_new = 5.*(ma.log10(distances[0]*1000) - 1.)
    mod = mod_actual - mod_new
    print "Effective Magnitude Shift = {0}, average g={1}".format(mod, sc.mean(data[:,2]))
    new_data = cut_data(data, 4,2,3,5, deff=0, modulus=mod, full=1)
    FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
    ref_hist = np.histogram(FeH, bins, limits)
    hist = []
    #Also iterate over several runs and average
    for i in range(len(distances)):
        print "#- Convolving to distance {0} kpc".format(distances[i])
        if i==0:  deff=0
        else: deff=detection
        temp_hist = []
        for j in range(avgs):
            #holds dist constant, applies appropriate errors for new distance
            new_data = con.convolve(data, real_dist, distances[i])
            #shift data so detection efficiency works correctly;  has no noticable effect if deff=0
            new_data = shift_data(new_data, distances[0], distances[i])
            # apply color cuts and detection efficiency to shifted and convolved data
            new_data = cut_data(new_data, 4,2,3,5, deff=deff, modulus=None, full=0)
            print "Average g = {0}, total stars = {1}".format(sc.mean(new_data[:,2]), len(new_data[:,0]))
            FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
            temp_hist.append(np.histogram(FeH, bins, limits))
        new_hist = avg_hists(temp_hist)
        hist.append(new_hist)
    plot_hists(hist, ref_hist, distances, tag)
    return hist
Example #7
0
 def infer_diag_post(self,X_ii,D_i):
     
     X_i = dc(X_ii)
     ns = len(D_i)
     
     X_i.resize([ns,self.D])
     [m,V] = self.infer_diag(X_i,D_i)
     if sp.amin(V)<=-0.:
         class MJMError(Exception):
             pass
         print "negative/eq variance"
         print [m,V,X_i,D_i]
         print "_______________"
         #self.printc()
         raise(MJMError)
     if sp.amin(sp.var(m,axis=0))<-0.:
         class MJMError(Exception):
             pass
         print "negativevar of mean"
         print X_i.shape
         print [m,V,sp.var(m,axis=0),X_i,D_i]
         print "_______________"
         #self.printc()
         raise(MJMError)
     
     return [sp.mean(m,axis=0).reshape([1,ns]),(sp.mean(V,axis=0)+sp.var(m,axis=0)).reshape([1,ns])]
Example #8
0
def WriteRadius(mali, identifiers, prefix="", gap_char="-"):
    """write percent identities in pairwise comparisons both for nucleotide acids and amino acids."""

    pides_na = []
    seq_aa = []

    for x in range(0, len(identifiers)):

        seq_aa.append(Genomics.TranslateDNA2Protein(mali[identifiers[x]]))

        for y in range(x + 1, len(identifiers)):
            if x == y:
                continue
            pides_na.append(MaliIO.getPercentIdentity(
                mali[identifiers[x]], mali[identifiers[y]], gap_char))

    pides_aa = []
    for x in range(0, len(identifiers) - 1):
        for y in range(x + 1, len(identifiers)):
            pides_aa.append(
                MaliIO.getPercentIdentity(seq_aa[x], seq_aa[y], gap_char))

    print "%s\tpide\t%i\t" % (prefix, len(pides_na)) +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_na),
                                                 max(pides_na),
                                                 scipy.mean(pides_na),
                                                 scipy.median(pides_na),
                                                 numpy.std(pides_na))), "\t") + "\t" +\
          string.join(map(lambda x: "%.2f" % x, (min(pides_aa),
                                                 max(pides_aa),
                                                 scipy.mean(pides_aa),
                                                 scipy.median(pides_aa),
                                                 numpy.std(pides_aa))), "\t")
def flux_qg(q, parms):

    # - (u + U) q_x - (q_y + Q_y) v
    qe = np.vstack((q,-np.flipud(q)))
    qe_hat = fftn(qe)

    # Compute gradient of PV
    q_x = (ifftn( parms.ikx*qe_hat)).real
    q_y = (ifftn( parms.iky*qe_hat)).real

    # Compute streamfunction
    psie_hat = parms.K2Fi*qe_hat
    psi = (ifftn(psie_hat)).real

    # Compute physical velocities
    u = (ifftn(-parms.iky*psie_hat)).real
    v = (ifftn( parms.ikx*psie_hat)).real

    # Restrict to physical domain
    q_x = q_x[0:parms.Ny,:]
    q_y = q_y[0:parms.Ny,:]
    u   = u[0:parms.Ny,:]
    v   = v[0:parms.Ny,:]
    psi = psi[0:parms.Ny,:]

    # Compute flux
    flux = - (u + parms.U)*q_x - (q_y + parms.Q_y)*v

    # FJP: energy should include potential energy
    energy = 0.5*np.mean(u**2 + v**2) + np.mean(parms.F*psi**2)
    enstr  = np.mean(q**2)
    mass   = np.mean(psi)

    return flux, energy, enstr, mass
Example #10
0
def makeinputh5(Iono,basedir):
    """This will make a h5 file for the IonoContainer that can be used as starting
    points for the fitter. The ionocontainer taken will be average over the x and y dimensions
    of space to make an average value of the parameters for each altitude.
    Inputs
    Iono - An instance of the Ionocontainer class that will be averaged over so it can
    be used for fitter starting points.
    basdir - A string that holds the directory that the file will be saved to.
    """
    # Get the parameters from the original data
    Param_List = Iono.Param_List
    dataloc = Iono.Cart_Coords
    times = Iono.Time_Vector
    velocity = Iono.Velocity
    zlist,idx = sp.unique(dataloc[:,2],return_inverse=True)
    siz = list(Param_List.shape[1:])
    vsiz = list(velocity.shape[1:])

    datalocsave = sp.column_stack((sp.zeros_like(zlist),sp.zeros_like(zlist),zlist))
    outdata = sp.zeros([len(zlist)]+siz)
    outvel = sp.zeros([len(zlist)]+vsiz)
    #  Do the averaging across space
    for izn,iz in enumerate(zlist):
        arr = sp.argwhere(idx==izn)
        outdata[izn] = sp.mean(Param_List[arr],axis=0)
        outvel[izn] = sp.mean(velocity[arr],axis=0)

    Ionoout = IonoContainer(datalocsave,outdata,times,Iono.Sensor_loc,ver=0,
                            paramnames=Iono.Param_Names, species=Iono.Species,velocity=outvel)
    Ionoout.saveh5(basedir/'startdata.h5')
Example #11
0
def allan(t, freq, tau, base):
    """
    allan(t, y, tau, base)
    Allan variance calculation

    Input variables:
    ----------------
    t    : time of measurement
    freq : measured frequency
    tau  : averaging time
    base : base frequency

    Output variables:
    -----------------
    s : Squared Allan variance
    """
    # Divide time up to 'tau' length units for averaging
    times = np.arange(min(t), max(t), tau)
    # Create temporary variable for fractional frequencies
    vari = np.zeros(len(times))
    for tstep in range(0, len(times)):
        # Get the data within the time interval
        data = freq[(t >= times[tstep]) & (t < (times[tstep] + tau))]
        # Fractional frequency calculation
        vari[tstep] = (sp.mean(data) - base) / base
    # Squared Allan variance
    s = sp.mean((vari[0:-1] - vari[1:]) ** 2) / 2
    return s
Example #12
0
 def signalToNoiseRatio(self, xs):
     """ What is the one-sample signal-to-noise ratio. """         
     rxs = repmat(xs, self.ESamples, 1).T
     gs = self._df(rxs)
     g2s = mean(gs **2, axis=1)
     gs = mean(gs, axis=1)
     return gs**2/g2s
    def update_data(self, x, taps, psd, syms, table):
        try:
            eqdata_key = 'dtv_atsc_equalizer0::taps'
            symdata_key = 'dtv_atsc_equalizer0::data'
            rs_nump_key = 'dtv_atsc_rs_decoder0::num_packets'
            rs_numbp_key = 'dtv_atsc_rs_decoder0::num_bad_packets'
            rs_numerrs_key = 'dtv_atsc_rs_decoder0::num_errors_corrected'
            vt_metrics_key = 'dtv_atsc_viterbi_decoder0::decoder_metrics'
            snr_key = 'probe2_f0::SNR'

            data = self.radio.getKnobs([])
            eqdata = data[eqdata_key]
            symdata = data[symdata_key]
            rs_num_packets = data[rs_nump_key]
            rs_num_bad_packets = data[rs_numbp_key]
            rs_num_errors_corrected = data[rs_numerrs_key]
            vt_decoder_metrics = data[vt_metrics_key]
            snr_est = data[snr_key]

            vt_decoder_metrics = scipy.mean(vt_decoder_metrics.value)
            self._viterbi_metric.pop()
            self._viterbi_metric.insert(0, vt_decoder_metrics)

        except:
            sys.stderr.write("Lost connection, exiting")
            sys.exit(1)

        ntaps = len(eqdata.value)
        taps.set_ydata(eqdata.value)
        taps.set_xdata(xrange(ntaps))
        self._sp0.set_xlim(0, ntaps)
        self._sp0.set_ylim(min(eqdata.value), max(eqdata.value))

        fs = 6.25e6
        freq = scipy.linspace(-fs/2, fs/2, 10000)
        H = fftpack.fftshift(fftpack.fft(eqdata.value, 10000))
        HdB = 20.0*scipy.log10(abs(H))
        psd.set_ydata(HdB)
        psd.set_xdata(freq)
        self._sp1.set_xlim(0, fs/2)
        self._sp1.set_ylim([min(HdB), max(HdB)])
        self._sp1.set_yticks([min(HdB), max(HdB)])
        self._sp1.set_yticklabels(["min", "max"])

        nsyms = len(symdata.value)
        syms.set_ydata(symdata.value)
        syms.set_xdata(nsyms*[0,])
        self._sp2.set_xlim([-1, 1])
        self._sp2.set_ylim([-10, 10])

        per = float(rs_num_bad_packets.value) / float(rs_num_packets.value)
        ber = float(rs_num_errors_corrected.value) / float(187*rs_num_packets.value)

        table._cells[(1,0)]._text.set_text("{0}".format(rs_num_packets.value))
        table._cells[(1,1)]._text.set_text("{0:.2g}".format(ber))
        table._cells[(1,2)]._text.set_text("{0:.2g}".format(per))
        table._cells[(1,3)]._text.set_text("{0:.1f}".format(scipy.mean(self._viterbi_metric)))
        table._cells[(1,4)]._text.set_text("{0:.4f}".format(snr_est.value[0]))

        return (taps, psd, syms, table)
Example #14
0
def makeinputh5(Iono,basedir):
    basedir = Path(basedir).expanduser()

    Param_List = Iono.Param_List
    dataloc = Iono.Cart_Coords
    times = Iono.Time_Vector
    velocity = Iono.Velocity
    zlist,idx = sp.unique(dataloc[:,2],return_inverse=True)
    siz = list(Param_List.shape[1:])
    vsiz = list(velocity.shape[1:])

    datalocsave = sp.column_stack((sp.zeros_like(zlist),sp.zeros_like(zlist),zlist))
    outdata = sp.zeros([len(zlist)]+siz)
    outvel = sp.zeros([len(zlist)]+vsiz)

    for izn,iz in enumerate(zlist):
        arr = sp.argwhere(idx==izn)
        outdata[izn]=sp.mean(Param_List[arr],axis=0)
        outvel[izn]=sp.mean(velocity[arr],axis=0)

    Ionoout = IonoContainer(datalocsave,outdata,times,Iono.Sensor_loc,ver=0,
                            paramnames=Iono.Param_Names, species=Iono.Species,velocity=outvel)


    ofn = basedir/'startdata.h5'
    print('writing {}'.format(ofn))
    Ionoout.saveh5(str(ofn))
def Corr(GDP,I,C):
	m = sp.shape(GDP)[1]
	GDPIcorr = []
	GDPCcorr = []
	for i in range(0, m):
		gdp = GDP[:,i]
		inv = I[:,i]
		con = C[:,i]
		#Correlation between output and investment for each series
		gdpi = sp.corrcoef(gdp,inv)
		GDPIcorr.append(gdpi[0,1])
		#Correlation between output and consumption for each series
		gdpc = sp.corrcoef(gdp,con)
		GDPCcorr.append(gdpc[0,1])
	#Mean and standard deviation of correlation between GDP and
	#Investment and Consumption over total number of simulations
	GDPICORR = sp.array(GDPIcorr)
	gdpimean = sp.mean(GDPICORR)
	gdpistdev = sp.std(GDPICORR)
	GDPCCORR = sp.array(GDPCcorr)
	gdpcmean = sp.mean(GDPCCORR)
	gdpcstdev = sp.std(GDPCCORR)
	sp.savetxt('GDPICORR.csv',GDPICORR)
	sp.savetxt('GDPCCORR.csv',GDPCCORR)
	print "The mean and standard deviation between GDP and"
	print "Investment and GDP and Consumption followed by"
	print "The lists of each correlation coefficient for"
	print "each series are saved in csv files"
	return gdpimean, gdpistdev, gdpcmean, gdpcstdev
def plot_optimal_tau_for_mean_uncertainty_reduction(
        results_for_exp, results_for_exp_inftau):
    """ Plot the optimal tau for the mean of uncertainty reduction.

    :param results_for_exp: The results of one experiment as 4-D array of the
        shape (metrics, z-values, tau-values, experimental repetitions).
    :type results_for_exp: 4-D array
    :param result_list_inftau: The results of one experiment for `tau = inf` as
        3-D array of the shape (metrics, z-values, experimental repetitions).
    :type results_for_exp_inftau: 3-D array.
    """
    values = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1]))
    err = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1], 2, 1))
    mark = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1]))
    for m, metric in enumerate(cfg['metrics']):
        for z in xrange(len(cfg['zs'])):
            r = sp.mean(results_for_exp[m, z], axis=1)
            mark[m, z] = r.max()
            values[m, z] = sp.mean(cfg['time_scales'][r == r.max()]).magnitude
            r = cfg['time_scales'][r > 0.8 * r.max()]
            err[m, z, 0] = values[m, z] - min(r).magnitude
            err[m, z, 1] = max(r).magnitude + values[m, z]
    plot_param_per_metric_and_z(values, err)
    plot_bool_indicator_per_metric_and_z(
        sp.mean(results_for_exp_inftau, axis=2) >= mark)
Example #17
0
def computeOpenMaxProbability(openmax_fc8, openmax_score_u):
    """ Convert the scores in probability value using openmax
    
    Input:
    ---------------
    openmax_fc8 : modified FC8 layer from Weibull based computation
    openmax_score_u : degree

    Output:
    ---------------
    modified_scores : probability values modified using OpenMax framework,
    by incorporating degree of uncertainity/openness for a given class
    
    """
    prob_scores, prob_unknowns = [], []
    for channel in range(NCHANNELS):
        channel_scores, channel_unknowns = [], []
        for category in range(NCLASSES):
            channel_scores += [sp.exp(openmax_fc8[channel, category])]
                    
        total_denominator = sp.sum(sp.exp(openmax_fc8[channel, :])) + sp.exp(sp.sum(openmax_score_u[channel, :]))
        prob_scores += [channel_scores/total_denominator ]
        prob_unknowns += [sp.exp(sp.sum(openmax_score_u[channel, :]))/total_denominator]
        
    prob_scores = sp.asarray(prob_scores)
    prob_unknowns = sp.asarray(prob_unknowns)

    scores = sp.mean(prob_scores, axis = 0)
    unknowns = sp.mean(prob_unknowns, axis=0)
    modified_scores =  scores.tolist() + [unknowns]
    assert len(modified_scores) == 1001
    return modified_scores
Example #18
0
    def plotmap(self,fig,ax):
        """ This function will plot the map of Alaska. The data will be plotted
            over it and will use the basemap class to position everything.
            Input
                fig - The figure handle for the plots.
                ax - The axes handle that the map will be plotted over.
            Output
                m - This is the handle for the basemap object.
        """
        latlim2 = self.params['latbounds']
        lonlim2 = self.params['lonbounds']
        m = Basemap(projection='merc',lon_0=sp.mean(lonlim2),lat_0=sp.mean(latlim2),\
        lat_ts=sp.mean(latlim2),llcrnrlat=latlim2[0],urcrnrlat=latlim2[1],\
        llcrnrlon=lonlim2[0],urcrnrlon=lonlim2[1],\
        rsphere=6371200.,resolution='i',ax=ax)
        # draw coastlines, state and country boundaries, edge of map.
        #m.drawcoastlines()
    #    m.drawstates()
    #    m.drawcountries()
        m.readshapefile('st99_d00','states',drawbounds=True)

        merstep = sp.round_((lonlim2[1]-lonlim2[0])/5.)
        parstep = sp.round_((latlim2[1]-latlim2[0])/5.)
        meridians=sp.arange(lonlim2[0],lonlim2[1],merstep)
        parallels = sp.arange(latlim2[0],latlim2[1],parstep)
        m.drawparallels(parallels,labels=[1,0,0,0],fontsize=10)
        m.drawmeridians(meridians,labels=[0,0,0,1],fontsize=10)
        plt.hold(True)
        return m
Example #19
0
 def _speed_up(self, property: str, data1: RunData, data2: RunData):
     """
     Calculates the speed up from the second to the first
     (e.g. the first is RESULT * 100 % faster than the second).
     """
     return (scipy.mean(data1[property]) - scipy.mean(data2[property])) \
            / scipy.mean(data1[property])
def test_psd_normalization():
    ''' This function tests the normalization of function psd. Mock data is
        one second of normal, mean zero, std = 2 data sampled at
        1kHz.  Since this is white noise, the white noise level of the PSD times
        the root of the bandwidth should give the rms amplitude of the
        data (in this case rt(2)).

        The normalization for a hanning window is also tested.  Windowing
        the data removes power from the time stream.  The data must be
        recalibrated in order to recover the best estimate of the white
        noise level.  For a hanning window the time stream must be multipled by
        root(8/3) before the PSD is taken.
        '''

    # make fake data, window, window and rescale
    x = sp.random.normal(0, 2, 10000)
    wrx = window(x, 'hanning', 1)
    ms_x = sp.mean(x ** 2)
    ms_wrx = sp.mean(np.array(wrx) ** 2)
    ratio = ms_x / ms_wrx
    print ('MSA of timestream = %.4f\t\nMSA of windowed timestream = %.4f\nratio = %.4f' % (ms_x, ms_wrx, ratio))
    # take PSDs
    x_psd = psd(x, 381.47)
    wrx_psd = psd(wrx, 381.47)
    pylab.subplot(2, 1, 1)
    pylab.title('Test psd normalization')
    pylab.xlabel('Sample')
    pylab.ylabel('Cnts')
    pylab.plot(x, 'bo', wrx, 'ro')
    pylab.subplot(2, 1, 2)
    pylab.title('PSD')
    pylab.xlabel('Frequency [Hz]')
    pylab.ylabel('Cnts/rtHz')
    pylab.loglog(x_psd[0], x_psd[1], 'b-', wrx_psd[0], wrx_psd[1], 'r-')
    pylab.show()
 def test_fit_over_f_plus_const(self):
     dt = 0.13
     n_time = 10000
     amp = 0.67 # K**2/Hz
     index = -1.3
     f_0 = 1.0
     thermal = 2.7 # K**2/Hz
     BW = 1./dt/2
     window = sig.get_window('hanning', n_time)
     n_spec = 10
     p = 0
     for ii in range(n_spec):
         time_stream = noise_power.generate_overf_noise(amp, index, f_0,
                                                             dt, n_time)
         time_stream += rand.normal(size=n_time) * sp.sqrt(thermal * BW * 2)
         time_stream -= sp.mean(time_stream)
         time_stream *= window
         p += noise_power.calculate_power(time_stream)
     p /= n_spec
     p = noise_power.make_power_physical_units(p, dt)
     w = noise_power.calculate_power(window)
     w_norm = sp.mean(w).real
     #w /= w_norm
     p = noise_power.prune_power(p).real
     #p /= w_norm
     f = noise_power.ps_freq_axis(dt, n_time)
     p = p[1:]
     f = f[1:]
     amp_m, index_m, f0_m, thermal_m = mn.fit_overf_const(p, w, f)
     self.assertTrue(sp.allclose(amp_m, amp, atol=0.2))
     self.assertTrue(sp.allclose(index_m, index, atol=0.1))
     self.assertTrue(sp.allclose(thermal_m, thermal, atol=0.1))
Example #22
0
File: nrm.py Project: corps-g/nrm
    def _compute_cycle_equal_weighted_power(self, T_F, T_C) :
        """ Computes cycle burnups and peaking factors assuming 
            equal batch powers.
        """
        N = len(T_F)
        rho_L = self.p['leakage_penalty']       
        
        # shorten function call by eliminating p and boron dependence
        rho = lambda b, t_f, t_c: self.rho(self.p, b, t_f, t_c, 0.0)   
        
        # equal power sharing implies equal temperatures--using the average
        T_Fa, T_Ca = sp.mean(T_F), sp.mean(T_C)
        
        # linearize the reactivity, i.e., rho ~ rho_0 + AB.  (this may fail
        # if poison is still dominant at 10 GWd/MTU)
        B_a, B_b = 10.0, 20.0
        rho_a, rho_b = rho(B_a, T_Fa, T_Ca), rho(B_b, T_Fa, T_Ca)
        A = (rho_b-rho_a)/(B_b-B_a)
        rho_0 = rho_a - A*B_a
        
        # then B_s and B_c are *approximately*
        B_s = (rho_L - rho_0)/A
        B_c = 2.0*B_s/(len(T_F)+1)
        
        # solve f(B_c) = mean(rho)-rho_L = 0 via scipy's root finder
        f = lambda B : sp.mean(rho(B*sp.arange(1, N+1), T_Fa, T_Ca)) - rho_L
        B_c = root(f, B_c).x[0]

        # compute batch-wise, EOC burnups and associated peaking factors
        B = B_c * sp.arange(1, N+1)
        ppf = sp.ones(N)
        return B, ppf
Example #23
0
 def compactDistance(self, target, candidates):
     #compare the candidates to the target accordin to some measure
     targetarr = target.reshape((self.totalSize, 3))
     candidatesarr = candidates.reshape((candidates.shape[0], self.totalSize, 3))
     target_avg = scipy.mean(targetarr, axis=0)
     candidates_avg = scipy.mean(candidatesarr, axis=1)
     return scipy.sum((target_avg - candidates_avg)**2, axis=1)
Example #24
0
    def calculateGradient(self):

        # normalize rewards
        # self.ds.data['reward'] /= max(ravel(abs(self.ds.data['reward'])))

        g = zeros((self.ds.getNumSequences(), self.ds.getDimension('loglh')), float)

        # get maximal length
        maxlen = max([self.ds.getSequenceLength(n) for n in range(self.ds.getNumSequences())])
        baselines = zeros((maxlen, self.ds.getDimension('loglh')), float)
        seqcount = zeros((maxlen, 1))

        # calculcate individual baseline for each timestep and episode
        for seq in range(self.ds.getNumSequences()):
            _, _, rewards, loglhs = self.ds.getSequence(seq)
            for t in range(len(rewards)):
                baselines[t, :] += mean(sum(loglhs[:t + 1, :], 0) ** 2 * rewards[t, :], 0) / mean(sum(loglhs[:t + 1, :], 0) ** 2, 0)
                seqcount[t, :] += 1

        baselines = baselines / seqcount
        # print baselines
        for seq in range(self.ds.getNumSequences()):
            _, _, rewards, loglhs = self.ds.getSequence(seq)
            for t in range(len(rewards)):
                g[seq, :] += sum(loglhs[:t + 1, :], 0) * (rewards[t, :] - baselines[t])

        gradient = mean(g, 0)
        return gradient
Example #25
0
    def remove_baseline(self, anchorx, window, lead=0):
        """
        Remove baseline wander by subtracting a cubic spline.
        anchorx is a vector of isoelectric points (usually qrs onset -20ms)
        window is width of window to use (in ms) for averaging the amplitude at anchors
        """
        ecg = self.data[:, lead]                    
        windowwidth = _ms_to_samples(window, self.samplingrate) / 2
        #Do we have enough points before first anchor to use it
        if anchorx[0] < windowwidth:
            anchorx = anchorx[1:]
        # subtract dc
        ecg -= scipy.mean(ecg[anchorx[:]]) 
        # amplitudes for anchors
        # window is zero, no averaging
        if windowwidth == 0:
            anchory = scipy.array([ecg[x] for x in anchorx])
        # or average around the anchor
        else:
            anchory = scipy.array([scipy.mean(ecg[x-windowwidth:x+windowwidth])
                      for x in anchorx])
        # x values for spline that we are going to calculate
        splinex = scipy.array(range(len(ecg)))
        # calculate cubic spline fit
        tck = scipy.interpolate.splrep(anchorx, anchory)
        spliney = scipy.interpolate.splev(splinex, tck)
        # subtract the spline
        ecg -= spliney

        self.data[:, lead] = ecg

        return ecg
Example #26
0
 def execute(self):
     self.power_mat, self.thermal_expectation = self.full_calculation()
     n_chan = self.power_mat.shape[1]
     n_freq = self.power_mat.shape[0]
     # Calculate the the mean channel correlations at low frequencies.
     low_f_mat = sp.mean(self.power_mat[1:4 * n_chan + 1,:,:], 0).real
     # Factorize it into preinciple components.
     e, v = linalg.eigh(low_f_mat)
     self.low_f_mode_values = e
     # Make sure the eigenvalues are sorted.
     if sp.any(sp.diff(e) < 0):
         raise RuntimeError("Eigenvalues not sorted.")
     self.low_f_modes = v
     # Now subtract out the noisiest channel modes and see what is left.
     n_modes_subtract = 10
     mode_subtracted_power_mat = sp.copy(self.power_mat.real)
     mode_subtracted_auto_power = sp.empty((n_modes_subtract, n_freq))
     for ii in range(n_modes_subtract):
         mode = v[:,-ii]
         amp = sp.sum(mode[:,None] * mode_subtracted_power_mat, 1)
         amp = sp.sum(amp * mode, 1)
         to_subtract = amp[:,None,None] * mode[:,None] * mode
         mode_subtracted_power_mat -= to_subtract
         auto_power = mode_subtracted_power_mat.view()
         auto_power.shape = (n_freq, n_chan**2)
         auto_power = auto_power[:,::n_chan + 1]
         mode_subtracted_auto_power[ii,:] = sp.mean(auto_power, -1)
     self.subtracted_auto_power = mode_subtracted_auto_power
Example #27
0
    def plot_temporal_average( self, 
                                                        color = 'g',
                                                        plot_std = True,
                                                        
                                                        t_start = None,
                                                        
                                                        label = None,
                                                        **kargs):
        if 'ax'in kargs:
            ax = kargs['ax']
        else:
            from matplotlib import pyplot
            fig = pyplot.figure()
            ax = fig.add_subplot(1,1,1)
        
        allpixel = self.selectAndPreprocess( **kargs ) 
        

        m = mean( allpixel , axis = 1 )
        
        if t_start is None:
            t = self.t()
        else:
            t = self.t() - self.t()[0] + t_start
        
        ax.plot(t , m , color = color , linewidth = 2 , label = label)
        
        if plot_std:
            s = mean( allpixel , axis = 1 )
            ax.fill_between(t , m+s , m-s , color = color , alpha = .3 , )
Example #28
0
def estimate_performance_xgboost(X,labels,param, num_round, folds):
    '''
    Cross validation for XGBoost performance
    '''
    f=open("summary_bst_scan.txt","a")
    start = np.random.random_integers(1000) #time.time()
    # Cross validate
    kf = cv.KFold(labels.size, n_folds=folds, random_state=start)
    # Dictionary to store all the AMSs
    all_rmse = []
    for train_indices, test_indices in kf:
        X_train, X_test = X.loc[train_indices], X.loc[test_indices]
        y_train, y_test = labels[train_indices], labels[test_indices]
        xgmat = xgb.DMatrix(X_train, label=y_train)
        plst = param.items()#+[('eval_metric', '[email protected]')]

        watchlist = []#[(xgmat, 'train')]
        bst = xgb.train(plst, xgmat, num_round, watchlist)

        xgmat_test = xgb.DMatrix(X_test)
        y_out = bst.predict(xgmat_test)
        num=y_test.shape[0]
        y_test=np.reshape(y_test,num)
        rmse_score=rmse(y_out,y_test)
        print('rmse={}'.format(rmse_score))
        f.write('rmse={}'.format(rmse_score))
        f.write('\n')
        all_rmse.append(rmse_score)
    print ("------------------------------------------------------")
    print ("mean rmse ={} with std={}".format(sp.mean(all_rmse),sp.std(all_rmse)))
    f.write("mean rmse ={} with std={}".format(sp.mean(all_rmse),sp.std(all_rmse)))
    f.write('\n')   
    f.close()
Example #29
0
def PrintValues( outfile, values,  options, prefix = "",titles = None):

    if options.flat or options.aggregate_column:

        if options.add_header:
            if prefix: outfile.write( "prefix\t" )
            
            if titles: outfile.write( "column\t" )
                
            print "\t".join( ("nval", "min", "max", "mean", "median", "stddev", "sum", "q1", "q3" ) )
        
        for x in range(len(values)):

            vals = values[x]

            if len(vals) == 0:

                if options.output_empty:
                    if titles: outfile.write( titles[x] + "\t" )
                    if prefix: outfile.write( prefix + "\t" )

                    outfile.write( "0" + "\tna" * 8  + "\n" )

                continue

            if titles: outfile.write( titles[x] + "\t" )
            if prefix: outfile.write( prefix + "\t" )

            vals.sort()
            if len(vals) > 4:
                q1 = options.value_format % vals[len(vals) // 4]
                q3 = options.value_format % vals[len(vals) * 3 // 4]
            else:
                q1 = options.value_format % vals[0]
                q3 = options.value_format % vals[-1]

            outfile.write( "\t".join( ( "%i" % len(vals),
                                        options.value_format % float(min(vals)),
                                        options.value_format % float(max(vals)),
                                        options.value_format % scipy.mean(vals),
                                        options.value_format % scipy.median(vals),
                                        options.value_format % scipy.std(vals),                                      
                                        options.value_format % reduce( lambda x, y: x+y, vals),
                                        q1, q3,
                                        )) + "\n")
            
    else:

        if titles:
            print "category\t%s" % string.join(titles,"\t")

        print "count\t%s"  % (string.join( map(lambda v: "%i" % len(v), values), "\t"))
        print "min\t%s"    % (string.join( map(lambda v: options.value_format % min(v), values), "\t"))
        print "max\t%s"    % (string.join( map(lambda v: options.value_format % max(v), values), "\t"))
        print "mean\t%s"   % (string.join( map(lambda v: options.value_format % scipy.mean(v), values), "\t"))
        print "median\t%s" % (string.join( map(lambda v: options.value_format % scipy.median(v), values), "\t"))
        print "stddev\t%s" % (string.join( map(lambda v: options.value_format % scipy.std(v), values), "\t"))
        print "sum\t%s"    % (string.join( map(lambda v: options.value_format % reduce( lambda x,y: x+y, v), values), "\t"))
        print "q1\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=25), values), "\t"))
        print "q3\t%s"     % (string.join( map(lambda v: options.value_format % scipy.stats.scoreatpercentile(v,per=75), values), "\t"))
Example #30
0
def mean_and_std_from_binned_report(br_json, antibody_type_idx):
    Ab_mean_results = [0] # needs extra zero since there are n_ages + 1 bins in demographics layer
    Ab_std_results  = [0]

    age_bins = br_json['Header']['Subchannel_Metadata']['NumBinsPerAxis'][0]
    for age_idx in range(0,age_bins):
        Ab        = br_json["Channels"]["Sum " + br_channel_titles[antibody_type_idx] + " Variant Fractions"]["Data"][age_idx][-365:]
        ss_Ab     = br_json["Channels"]["Sum of Squared " + br_channel_titles[antibody_type_idx] + " Variant Fractions"]["Data"][age_idx][-365:]
        statpop   = br_json["Channels"]["Population"]["Data"][age_idx][-365:]

        mean_Ab = []
        std_Ab  = []
        for val,ss,pop in zip(Ab,ss_Ab,statpop):
            if pop > 0:
                mean = val/pop
                variance = ss/pop - mean**2
            else:
                mean = 0
                variance = 0
            mean_Ab.append(mean)
            if variance < 0:
                std_Ab.append(0)
            else:
                std_Ab.append(variance**0.5)

        #print(scipy.mean(mean_Ab), scipy.mean(std_Ab))
        Ab_mean_results.append(scipy.mean(mean_Ab))
        Ab_std_results.append(scipy.mean(std_Ab))

    return (Ab_mean_results, Ab_std_results)
Example #31
0
nreps = 12
bd = 100
s = 1e-6
f, a = plt.subplots(2)

names = [
    "../cache/camelrecc/EIMLE_" + str(int(100 * sp.log10(s))) + "_" +
    str(pwr) + "_" + str(i) + ".p" for i in xrange(nreps)
]
results = search.multiMLEFS(ojf, lb, ub, kernel, s, bd, names)

C = results[0][5]

yr = [r[11].flatten() for r in results]
Z = sp.vstack(yr) - camelmin
m = sp.mean(sp.log10(Z), axis=0)
v = sp.var(sp.log10(Z), axis=0)
sq = sp.sqrt(v)
a[1].fill_between(sp.array([sum(C[:j]) for j in xrange(len(C))]),
                  (m - sq).flatten(), (m + sq).flatten(),
                  facecolor='lightblue',
                  edgecolor='lightblue',
                  alpha=0.5)
a[1].plot([sum(C[:j]) for j in xrange(len(C))], m.flatten(), 'b.-')

yy = [r[10].flatten() for r in results]
Zy = sp.vstack(yy) - camelmin
my = sp.mean(sp.log10(Zy), axis=0)
vy = sp.var(sp.log10(Zy), axis=0)
sqy = sp.sqrt(vy)
a[1].fill_between(sp.array([sum(C[:j]) for j in xrange(len(C))]),
Example #32
0
        iter_EE, theta_sol, iter_maximin_rate = func_oht.oht_alg(
            d2d_to_d2d_gains_diag, uav_to_d2d_gains, d2d_to_d2d_gains_diff,
            eta, power_UAV, power_cir_UAV)
        EE_sol.append(iter_EE)
        tau_sol.append(1 - 1 / theta_sol)
        maximin_rate.append(iter_maximin_rate)

    except (SolverError, TypeError):
        # pass
        num_infeasible += 1

# Calculate the total time of solving
time_sol = (time.time() - t0)

v1 = sp.array(EE_sol)
EE_sol_vec_Mon = sp.mean(v1)
v2 = sp.array(maximin_rate)
maximin_rate_sol_Mon.append(sp.mean(v2))
v3 = sp.array(tau_sol)
tau_sol_vec_Mon = sp.mean(v3)

print "EE from OHT-OPT:", EE_sol_vec_Mon
print "tau_value from OHT-OPT", tau_sol_vec_Mon
print "maximin rate from OHT-OPT", maximin_rate_sol_Mon
print "Solving time for OHT-OPT Alg:", time_sol, "seconds"

print "# ################################################ #"

# #########################################################
# This part is use DNN model for sovling OHT problem
Example #33
0
    def process(self, element, unsampled_results):
        slice_key, metrics = element
        # metrics should be a list of dicts, but the dataflow runner has a quirk
        # that requires specific casting.
        metrics = list(metrics)
        side_input_results = {}

        for result in unsampled_results:
            unsampled_slice_key, unsampled_metrics = result
            side_input_results[unsampled_slice_key] = unsampled_metrics
        if len(metrics) == 1:
            yield slice_key, metrics[0]
            return

        original_structure = copy.copy(metrics[0])
        uber_metrics = {}
        unsampled_metrics = {}
        for m_dict in metrics:
            # For each metric in each slice, aggregate values over all of the computed
            # samples.
            for key in m_dict:
                _collect_metrics(m_dict[key], key, uber_metrics)
                unsampled_slice_key = slice_key
                _collect_metrics(side_input_results[unsampled_slice_key][key],
                                 key, unsampled_metrics)

        for key in uber_metrics:
            # Compute confidence interval given the data points per metric.
            confidence = 0.95
            data = uber_metrics[key]
            # Data has to be numeric. That means throw out nan values.
            n_samples = len(data)
            if n_samples:
                sample_mean = mean(data)
                std_err = sem(data)
                t_stat = t.ppf((1 + confidence) / 2, n_samples - 1)
                upper_bound = sample_mean + t_stat * std_err
                lower_bound = sample_mean - t_stat * std_err
                # Set [mean, lower_bound, upper_bound] for each metric component.
                uber_metrics[key] = types.ValueWithConfidenceInterval(
                    sample_mean, lower_bound, upper_bound,
                    unsampled_metrics[key][0])
            else:
                uber_metrics[key] = types.ValueWithConfidenceInterval(
                    float('nan'), float('nan'), float('nan'), float('nan'))

        # Convert metrics back into expected format with bounded values.
        for sub_key in uber_metrics:
            # Break sub-key into components.
            key_components = sub_key.split(',')
            original_key = key_components[0]
            metric_structure = original_structure[original_key]
            if isinstance(metric_structure, np.ndarray):
                metric_structure = np.array(metric_structure, dtype=object)
                _populate_bounded_metrics(key_components[1:], metric_structure,
                                          uber_metrics[sub_key])
            else:
                metric_structure = uber_metrics[sub_key]
            original_structure[original_key] = metric_structure

        yield slice_key, original_structure
Example #34
0
def snr_est_m2m4(signal):
    M2 = scipy.mean(abs(signal)**2)
    M4 = scipy.mean(abs(signal)**4)
    snr_rat = scipy.sqrt(2*M2*M2 - M4) / (M2 - scipy.sqrt(2*M2*M2 - M4))
    return 10.0*scipy.log10(snr_rat), snr_rat
Example #35
0
def main():
    p_dict = parse_parameters()
    local_ld_dict_file = '%s_ldradius%d.pickled.gz' % (p_dict['local_ld_file_prefix'], p_dict['ld_radius'])
    
    print """
Note: For maximal accuracy all SNPs with LDpred weights should be included in the validation data set.
If they are a subset of the validation data set, then we suggest recalculate LDpred for the overlapping SNPs. 
"""
    if not os.path.isfile(local_ld_dict_file):
        df = h5py.File(p_dict['coord'])
                 
        chrom_ld_scores_dict = {}
        chrom_ld_dict = {}
        chrom_ref_ld_mats = {}
        if p_dict['gm_ld_radius'] is not None:
            chrom_ld_boundaries = {}
        ld_score_sum = 0
        num_snps = 0
        print 'Calculating LD information w. radius %d' % p_dict['ld_radius']

        cord_data_g = df['cord_data']

        for chrom_str in cord_data_g.keys():
            print 'Working on %s' % chrom_str
            g = cord_data_g[chrom_str]
            if 'raw_snps_ref' in g.keys():
                raw_snps = g['raw_snps_ref'][...]
                snp_stds = g['snp_stds_ref'][...]
                snp_means = g['snp_means_ref'][...]
            
            
            # Filter monomorphic SNPs
            ok_snps_filter = snp_stds > 0
            ok_snps_filter = ok_snps_filter.flatten()
            raw_snps = raw_snps[ok_snps_filter]
            snp_means = snp_means[ok_snps_filter]
            snp_stds = snp_stds[ok_snps_filter]

            n_snps = len(raw_snps)
            snp_means.shape = (n_snps, 1)   
            snp_stds.shape = (n_snps, 1)   
            
            
            # Normalize SNPs..
            snps = sp.array((raw_snps - snp_means) / snp_stds, dtype='float32')
            assert snps.shape == raw_snps.shape, 'Array Shape mismatch'
            if p_dict['gm_ld_radius'] is not None:
                assert 'genetic_map' in g.keys(), 'Genetic map is missing.'
                gm = g['genetic_map'][...]
                ret_dict = ld.get_LDpred_ld_tables(snps, gm=gm, gm_ld_radius=p_dict['gm_ld_radius'])
                chrom_ld_boundaries[chrom_str] = ret_dict['ld_boundaries']
            else:
                ret_dict = ld.get_LDpred_ld_tables(snps, ld_radius=p_dict['ld_radius'], ld_window_size=2 * p_dict['ld_radius'])
            chrom_ld_dict[chrom_str] = ret_dict['ld_dict']
            chrom_ref_ld_mats[chrom_str] = ret_dict['ref_ld_matrices']
            ld_scores = ret_dict['ld_scores']
            chrom_ld_scores_dict[chrom_str] = {'ld_scores':ld_scores, 'avg_ld_score':sp.mean(ld_scores)}
            ld_score_sum += sp.sum(ld_scores)
            num_snps += n_snps
        avg_gw_ld_score = ld_score_sum / float(num_snps)
        ld_scores_dict = {'avg_gw_ld_score': avg_gw_ld_score, 'chrom_dict':chrom_ld_scores_dict}    
        
        print 'Done calculating the LD table and LD score, writing to file:', local_ld_dict_file
        print 'Genome-wide average LD score was:', ld_scores_dict['avg_gw_ld_score']
        ld_dict = {'ld_scores_dict':ld_scores_dict, 'chrom_ld_dict':chrom_ld_dict, 'chrom_ref_ld_mats':chrom_ref_ld_mats}
        if p_dict['gm_ld_radius'] is not None:
            ld_dict['chrom_ld_boundaries'] = chrom_ld_boundaries 
        f = gzip.open(local_ld_dict_file, 'wb')
        cPickle.dump(ld_dict, f, protocol=2)
        f.close()
        print 'LD information is now pickled.'
    else:
        print 'Loading LD information from file: %s' % local_ld_dict_file
        f = gzip.open(local_ld_dict_file, 'r')
        ld_dict = cPickle.load(f)
        f.close()
        
    ldpred_genomewide(data_file=p_dict['coord'], out_file_prefix=p_dict['out'], ps=p_dict['PS'], ld_radius=p_dict['ld_radius'],
                      ld_dict=ld_dict, n=p_dict['N'], num_iter=p_dict['num_iter'], h2=p_dict['H2'], verbose=False)
Example #36
0
 def search_acq(self,cfn,sfn,volper=1e-6,dv=[[sp.NaN]]):
     def directwrap(Q,extra):
         x = sp.array([Q])
         if self.noS:
             alls = [k(x,x,dv,dv,gets=True)[1] for k in self.G.kf]
             s = sp.exp(sp.mean(sp.log(alls)))
         else:
             s = sfn(x)
         acq = PESgain(self.G,self.Ga,self.Z,x,dv,[s])
         try:
             #print x[0,1:],x[0,0]
             R = -acq/cfn(x[0,1:],**{'xa':x[0,0]})
         except TypeError:
             R = -acq/cfn(x,s)
         return (R,0)
     #print self.lb
     #print self.ub
     [xmin, ymin, ierror] = DIRECT.solve(directwrap,self.lb,self.ub,user_data=[], algmethod=1, volper=volper, logfilename='/dev/null')
     
     
     if False:
         from matplotlib import pyplot as plt
         import time
         D = self.lb.size
         ns=200
         f,a = plt.subplots(2*D)
         
         if self.noS:
             alls = [k(xmin,xmin,dv,dv,gets=True)[1] for k in self.G.kf]
             s = sp.exp(sp.mean(sp.log(alls)))
         else:
             s = sfn(x)
             
         for d in xrange(D):
             sup = sp.linspace(self.lb[d],self.ub[d],ns)
             X = sp.vstack([xmin]*ns)
             for j in xrange(ns):
                 X[j,d] = sup[j]
             [m,v] = self.G.infer_diag_post(X,[[sp.NaN]]*ns)
             
             sq = sp.sqrt(v)
             a[d].fill_between(sup,(m-2*sq).flatten(),(m+2.*sq).flatten(),facecolor = 'lightblue',edgecolor='lightblue')
             a[d].plot(sup,m.flatten())
             ps = sp.empty(ns)
             aps = sp.empty(ns)
             for j in xrange(ns):
                 ps[j] = self.query_pes(X[j,:],[s],[[sp.NaN]])
                 if d==0:
                     aps[j] = ps[j]/cfn(sp.array([X[j,:].flatten()]),s)
             
             a[d].twinx().plot(sup,ps,'r')
             if d==0:
                 a[d].twinx().plot(sup,aps,'g')
         
         for d in xrange(1,D):
             sup = sp.linspace(self.lb[d],self.ub[d],ns)
             X = sp.vstack([xmin]*ns)
             for j in xrange(ns):
                 X[j,d] = sup[j]
                 X[j,0] = 0.
             [m,v] = self.G.infer_diag_post(X,[[sp.NaN]]*ns)
             
             sq = sp.sqrt(v)
             a[d+D-1].fill_between(sup,(m-2*sq).flatten(),(m+2.*sq).flatten(),facecolor = 'lightblue',edgecolor='lightblue')
             a[d+D-1].plot(sup,m.flatten())
             ps = sp.empty(ns)
             aps = sp.empty(ns)
             for j in xrange(ns):
                 ps[j] = self.query_pes(X[j,:],[s],[[sp.NaN]])
                 
             
             a[d+D-1].twinx().plot(sup,ps,'r')
         
         
         a[2*D-1].hist(self.X[:,0].flatten(),50,facecolor='g')    
         print xmin
         
         f.savefig('../figcache/{0}.png'.format(time.time()))
         #plt.show()
     return [xmin,ymin,ierror]
Example #37
0
def edge_type_identification(g, kmax, ext_dic, return_all_list=False):
    global silent
    if not (silent):
        epbar = tqdm(total=g.number_of_edges(), desc='identifying')
    for u, v in g.edges():
        g[u][v]['type'] = None

    silks = []
    bonds = []
    Lbridges = []
    Gbridges = []
    int_threshold = {}

    ## phase 1: identify silk links
    edges = list(g.edges(data=True))
    nextphase = []
    for e in edges:
        u, v, w = e
        if (g.degree(u) == 1) | (g.degree(v) == 1):
            g[u][v]['type'] = 'Silk'
            silks.append(e)
            if not (silent): epbar.update(1)
        else:
            nextphase.append(e)
    #print len(silks)

    ## phase 2: identify bond and local bridges
    for i in range(kmax):
        l = str(i + 1)
        lindex = 'w' + l + 'a'
        Boname = 'Bond' + l
        Bdname = 'Local_Bridge' + l
        T_outk = ext_dic[l]

        edges = nextphase
        nextphase = []

        nextstep = []
        Rnextstep = []
        for e in edges:
            u, v, w = e
            Re = w[lindex]
            if Re >= T_outk:
                g[u][v]['type'] = Boname
                bonds.append((Boname, e))
                if not (silent): epbar.update(1)
            else:
                nextstep.append(e)
                Rnextstep.append(Re)

        if len(Rnextstep) == 0:
            T_ink = 0
        else:
            T_ink = scipy.mean(Rnextstep) - scipy.std(Rnextstep)
            if T_ink < 0:
                T_ink = 0.0
            for e in nextstep:
                u, v, w = e
                Re = w[lindex]
                if Re > T_ink:
                    g[u][v]['type'] = Bdname
                    Lbridges.append((Bdname, e))
                    if not (silent): epbar.update(1)
                else:
                    nextphase.append(e)
        int_threshold[l] = T_ink
        ## for kmax loop end here

    ## phase 3: identify global bridge
    edges = nextphase
    #nextphase = []
    for e in edges:
        u, v, w = e
        g[u][v]['type'] = 'Global_Bridge'
        Gbridges.append(e)
        if not (silent): epbar.update(1)

    if not (silent): print('done identify edge types')
    if return_all_list:
        return g, bonds, Lbridges, Gbridges, silks, int_threshold
    else:
        return g, int_threshold
Example #38
0
		current_flame = str(" "*int(cols/2 - cake_size/2))+str((" "+FLAMES[randint(0,2)]+" ")*int(cake_size/5))
	print(current_flame)

	# candles
	print(str(" "*int(cols/2 - cake_size/2))+str("  |  "*int(cake_size/5)))
	# cake top layer
	print(str(" "*int(cols/2 - cake_size/2))+str("-"*cake_size))

	bucket = []
	mug = []
	# mug contains the current frame samples (absolute values) of given sample_size
	# average of mugs are put into bucket
	for value in data[:,0][_f*display_rate+1:(_f+1)*display_rate]:
		mug.append(abs(value))
		if len(mug) == sample_size:
			bucket.append(mean(mug))
			mug = []
	bucket = [ (float)((x - _min) * max_display)/(_max - _min) for x in bucket ]

	# print the equalizer from the bucket
	for value in bucket:
		print(str(" "*int(cols/2 - cake_size/2))+"| "+str("8"*int(value%(cake_size-2)))+str(" "*(cake_size-value-2))+"|")

	# bottom crust of the cake
	print(str(" "*int(cols/2 - cake_size/2))+str("-"*cake_size))

	# print happy birthday message
	os.system("figlet -c -f small Happy Birthday for Yo Horiuchi")

	# sleep to match with the audio
	time.sleep(((float)(display_rate * t_total) / data_length)*correction)
Example #39
0
def sub_queue_depth(lists_action=None,
                    lists_cmd=None,
                    options=None,
                    *args,
                    **kwargs):
    '''
 [queue_length_c, queue_length]=sub_queue_depth(lists_action,lists_cmd,options)
 --> average queue depth for completion and arrival
 
 inputs
   lists_action: n samples x 2 array for arrival time and completion time; 
   lists_cmd: n samples x 3 for LBA, size, flags
   options: control parameters
       plot_fontsize: the figure's font size
       time_interval: the time interval for moving average windows
       plot_figure: >=1: plot the figure; otherwise not
       save_figure: >=1: save the figures
       export_report: >=1: export the figure/data into a ppt
       report_name: report name
 outputs
   queue_record: structure for queue    
      queue_length_c: queue_length for completion
      queue_length: queue_lenght for arrival 
      queue_length_ave2: average queue length (arrival) based on given time interval
 contact [email protected] for questions
    '''

    if hasattr(options, 'plot_fontsize'):
        plot_fontsize = options.plot_fontsize
    else:
        plot_fontsize = 10

    if hasattr(options, 'time_interval'):
        time_interval = options.time_interval
    else:
        time_interval = 50

    if hasattr(options, 'save_figure'):
        save_figure = options.save_figure
    else:
        save_figure = 1

    if hasattr(options, 'plot_figure'):
        plot_figure = options.plot_figure
    else:
        plot_figure = 1

    a = shape(lists_action)[0]
    max_time = lists_action[a - 1, 0]
    ## method 1: based on the a queue
    if a > 1024:
        max_queue_length = 1024 * 4
    else:
        max_queue_length = 512

    queue_length_c = zeros((a, 2))
    queue_length = zeros((a, 2))

    for i in arange(1, a):

        idx_queue = nonzero(
            logical_and((lists_action[:, 0] < lists_action[i, 1]),
                        (lists_action[:, 1] > lists_action[i, 1])))

        x = shape(idx_queue)[1]
        queue_length_c[i, :] = [lists_action[i, 0], x]

    if plot_figure == 1:
        figure()
        plot(queue_length_c[:, 0],
             queue_length_c[:, 1],
             marker='*',
             markersize=0.5)
        xlabel('time')
        ylabel('depth')
        title(('Estimated Device Queue Depth (completed); ave=' +
               str(mean(queue_length_c[:, 1]))))
        #set(findall(gcf,'-property','FontSize'),'FontSize',plot_fontsize)
        savefig('est_dev_queue_length_com.eps')
        savefig('est_dev_queue_length_com.png')
        savefig('est_dev_queue_length_com.jpg')
    xi = 0
    # we shall sort lists_action based on lists_action[:,0], increasing order
    for i in arange(1, a).reshape(-1):
        idx_queue = nonzero(lists_action[0:i, 1] > lists_action[i, 0])
        #idx_queue=nonzero(logical_and((lists_action[:,0] < lists_action[i,0]) , (lists_action[:,1] > lists_action[i,0])))
        x = shape(idx_queue)[1]
        queue_length[i, :] = [lists_action[i, 0], x]

    if plot_figure == 1:
        figure()
        plot(queue_length[:, 0],
             queue_length[:, 1],
             marker='*',
             markersize=0.5)
        xlabel('time')
        ylabel('depth')
        title(('Estimated Device Queue Depth (arrival); ave=' +
               str(mean(queue_length[:, 1]))))
        savefig('est_dev_queue_length_arr.eps')
        savefig('est_dev_queue_length_arr.png')
        savefig('est_dev_queue_length_arr.jpg')
        #set(findall(gcf,'-property','FontSize'),'FontSize',plot_fontsize)

    max_num = int(ceil(max_time / time_interval))
    queue_length_ave = zeros((max_num + 5, 2))
    queue_length_ave2 = zeros((max_num, 2))
    for i in arange(0, max_num).reshape(-1):
        cur_time = dot((i - 1), time_interval)
        end_time = dot(i, time_interval)
        idx = nonzero(
            logical_and((queue_length[:, 0] > cur_time),
                        (queue_length[:, 0] <= end_time)))
        dq = 0
        if shape(idx)[1] > 1:
            for j0 in arange(shape(idx)[1]):
                j = idx[0][j0]
                if j == 0:
                    dq = dot((queue_length[j, 0]), queue_length[j, 1])
                else:
                    dq = dq + dot(
                        (queue_length[j, 0] - queue_length[j - 1, 0]),
                        queue_length[j, 1])
            queue_length_ave2[i, :] = [
                end_time,
                dq / (queue_length[idx[0][-1], 0] - queue_length[idx[0][0], 0])
            ]
        elif shape(idx)[1] > 0:
            queue_length_ave2[i, :] = [end_time, queue_length[idx[0][0], 1]]
        else:
            queue_length_ave2[i, :] = [end_time, 0]

    if plot_figure == 1:
        figure()
        plot(queue_length_ave2[1:max_num, 0], queue_length_ave2[1:max_num, 1])
        xlabel('time (s)')
        ylabel('depth')
        # the avearge value may be different from previous one, as this one is
        # time weighted. a more precise way shall consider the head and tail
        title(('Estimated Average Device Queue Depth (time weighted) = ' +
               str(mean(queue_length_ave2[1:max_num, 1])) + ' @' +
               str(time_interval) + 'seconds interval'))
        #set(findall(gcf,'-property','FontSize'),'FontSize',plot_fontsize)
        savefig('est_dev_queue_depth.eps')
        savefig('est_dev_queue_depth.png')
        savefig('est_dev_queue_depth.jpg')

    queue_record = queue_record_class(queue_length_c, queue_length,
                                      queue_length_ave2)
    if options != None:
        options.section_name = 'Queue Depth'

    return queue_record

    # generate_ppt(options)
    #     cur_time=0;


#     cur_idx=1;
#     interval_idx=0;
#     for i=1:con0
#         if lists_action(i,7)>cur_time+time_interval
#             act_time_interval=lists_action(i,7)-lists_action(cur_idx,7);
#             interval_idx=interval_idx+1;
#             queue_length_ave(interval_idx,:)=[lists_action(i,6),sum(queue_length(cur_idx:i,2))/(i-cur_idx+1)];
#             #queue_length_ave(interval_idx,:)=[lists_action(i,6),sum(queue_length(cur_idx:i,2))/time_interval];
#             cur_idx=i;
#             cur_time=lists_action(i,7);
#         else

#         end
#     end

#     figure;
#     plot(queue_length_ave(1:interval_idx,1),queue_length_ave(1:interval_idx,2));
#     xlabel('time (s)');
#     ylabel('depth');
#     title(['Estimated Average Device Queue Depth @', num2str(time_interval), 'seconds interval'])

#a1=a - max_queue_length
#for i in arange(1,a):
#    if i <= max_queue_length:
#        idx_tmp=1
#        idx_back=i - 1 + max_queue_length
#    else:
#        if i >= a1:
#            idx_tmp=i - max_queue_length
#            idx_back=a
#        else:
#            idx_tmp=i - max_queue_length
#            idx_back=i - 1 + max_queue_length
#            # the current --> in the queue
#    idx_queue=nonzero(logical_and((lists_action[idx_tmp:idx_back,0] <= lists_action[i,1]),(lists_action[idx_tmp:idx_back,1] > lists_action[i,1])))
#    idx_queue=nonzero(logical_and((lists_action[:,0] < lists_action[i,1]),(lists_action[:,1] > lists_action[i,1])))
#queue_length_c[i,:]=[lists_action[i,1],len(idx_queue)]
#idx_queue=nonzero(logical_and((lists_action[idx_tmp-1:idx_back-1,0] <= lists_action[i-1,1]), (lists_action[idx_tmp-1:idx_back-1,1] > lists_action[i-1,1])))
Example #40
0
def vca(Y, R, verbose=True, snr_input=0):
    '''
    Vertex Component Analysis
    #
    Ae, indice, Yp = vca(Y,R,verbose = True,snr_input = 0)

    Arguments:
      Y - matrix with dimensions L(channels) x N(pixels)
          each pixel is a linear mixture of R endmembers
          signatures Y = M x s, where s = gamma x alfa
          gamma is a illumination perturbation factor and
          alfa are the abundance fractions of each endmember.
      R - positive integer number of endmembers in the scene

    Returns:
     Ae     - estimated mixing matrix (endmembers signatures)
     indice - pixels that were chosen to be the most pure
     Yp     - Data matrix Y projected.

     ------- Optional parameters---------
     snr_input - (float) signal to noise ratio (dB)
     v         - [True | False]
     ------------------------------------

     Author: Adrien Lagrange ([email protected])
     This code is a translation of a matlab code provided by
     Jose Nascimento ([email protected]) and Jose Bioucas Dias ([email protected])
     available at http://www.lx.it.pt/~bioucas/code.htm under a non-specified Copyright (c)
     Translation of last version at 22-February-2018 (Matlab version 2.1 (7-May-2004))

     more details on:
     Jose M. P. Nascimento and Jose M. B. Dias
     "Vertex Component Analysis: A Fast Algorithm to Unmix Hyperspectral Data"
     submited to IEEE Trans. Geosci. Remote Sensing, vol. .., no. .., pp. .-., 2004

    '''
    import sys
    import scipy as sp
    import scipy.linalg as splin

    #############################################
    # Initializations
    #############################################
    if len(Y.shape) != 2:
        sys.exit(
            'Input data must be of size L (number of bands i.e. channels) by N (number of pixels)'
        )

    [L, N] = Y.shape  # L number of bands (channels), N number of pixels

    R = int(R)
    if (R < 0 or R > L):
        sys.exit('ENDMEMBER parameter must be integer between 1 and L')

#############################################
# SNR Estimates
#############################################

    if snr_input == 0:
        y_m = sp.mean(Y, axis=1, keepdims=True)
        Y_o = Y - y_m  # data with zero-mean
        Ud = splin.svd(sp.dot(Y_o, Y_o.T) /
                       float(N))[0][:, :R]  # computes the R-projection matrix
        x_p = sp.dot(Ud.T, Y_o)  # project the zero-mean data onto p-subspace

        SNR = estimate_snr(Y, y_m, x_p)

        if verbose:
            print("SNR estimated = {}[dB]".format(SNR))
    else:
        SNR = snr_input
        if verbose:
            print("input SNR = {}[dB]\n".format(SNR))

    SNR_th = 15 + 10 * sp.log10(R)

    #############################################
    # Choosing Projective Projection or
    #          projection to p-1 subspace
    #############################################

    if SNR < SNR_th:
        if verbose:
            print("... Select proj. to R-1")

        d = R - 1
        if snr_input == 0:  # it means that the projection is already computed
            Ud = Ud[:, :d]
        else:
            y_m = sp.mean(Y, axis=1, keepdims=True)
            Y_o = Y - y_m  # data with zero-mean

            Ud = splin.svd(
                sp.dot(Y_o, Y_o.T) /
                float(N))[0][:, :d]  # computes the p-projection matrix
            x_p = sp.dot(Ud.T,
                         Y_o)  # project thezeros mean data onto p-subspace

        Yp = sp.dot(Ud, x_p[:d, :]) + y_m  # again in dimension L

        x = x_p[:d, :]  #  x_p =  Ud.T * Y_o is on a R-dim subspace
        c = sp.amax(sp.sum(x**2, axis=0))**0.5
        y = sp.vstack((x, c * sp.ones((1, N))))
    else:
        if verbose:
            print("... Select the projective proj.")

        d = R
        Ud = splin.svd(sp.dot(Y, Y.T) /
                       float(N))[0][:, :d]  # computes the p-projection matrix

        x_p = sp.dot(Ud.T, Y)
        Yp = sp.dot(Ud, x_p[:d, :]
                    )  # again in dimension L (note that x_p has no null mean)

        x = sp.dot(Ud.T, Y)
        u = sp.mean(x, axis=1, keepdims=True)  #equivalent to  u = Ud.T * r_m
        y = x / sp.dot(u.T, x)

#############################################
# VCA algorithm
#############################################

    indice = sp.zeros((R), dtype=int)
    A = sp.zeros((R, R))
    A[-1, 0] = 1

    for i in range(R):
        w = sp.random.rand(R, 1)
        f = w - sp.dot(A, sp.dot(splin.pinv(A), w))
        f = f / splin.norm(f)

        v = sp.dot(f.T, y)

        indice[i] = sp.argmax(sp.absolute(v))
        A[:, i] = y[:, indice[i]]  # same as x(:,indice(i))

    Ae = Yp[:, indice]

    return Ae, indice, Yp
Example #41
0
def main():
    gr_estimators = {"simple": digital.SNR_EST_SIMPLE,
                     "skew": digital.SNR_EST_SKEW,
                     "m2m4": digital.SNR_EST_M2M4,
                     "svr": digital.SNR_EST_SVR}
    py_estimators = {"simple": snr_est_simple,
                     "skew": snr_est_skew,
                     "m2m4": snr_est_m2m4,
                     "svr": snr_est_svr}


    parser = OptionParser(option_class=eng_option, conflict_handler="resolve")
    parser.add_option("-N", "--nsamples", type="int", default=10000,
                      help="Set the number of samples to process [default=%default]")
    parser.add_option("", "--snr-min", type="float", default=-5,
                      help="Minimum SNR [default=%default]")
    parser.add_option("", "--snr-max", type="float", default=20,
                      help="Maximum SNR [default=%default]")
    parser.add_option("", "--snr-step", type="float", default=0.5,
                      help="SNR step amount [default=%default]")
    parser.add_option("-t", "--type", type="choice",
                      choices=gr_estimators.keys(), default="simple",
                      help="Estimator type {0} [default=%default]".format(
                            gr_estimators.keys()))
    (options, args) = parser.parse_args ()

    N = options.nsamples
    xx = scipy.random.randn(N)
    xy = scipy.random.randn(N)
    bits =2*scipy.complex64(scipy.random.randint(0, 2, N)) - 1
    #bits =(2*scipy.complex64(scipy.random.randint(0, 2, N)) - 1) + \
    #    1j*(2*scipy.complex64(scipy.random.randint(0, 2, N)) - 1)

    snr_known = list()
    snr_python = list()
    snr_gr = list()

    # when to issue an SNR tag; can be ignored in this example.
    ntag = 10000

    n_cpx = xx + 1j*xy

    py_est = py_estimators[options.type]
    gr_est = gr_estimators[options.type]

    SNR_min = options.snr_min
    SNR_max = options.snr_max
    SNR_step = options.snr_step
    SNR_dB = scipy.arange(SNR_min, SNR_max+SNR_step, SNR_step)
    for snr in SNR_dB:
        SNR = 10.0**(snr/10.0)
        scale = scipy.sqrt(2*SNR)
        yy = bits + n_cpx/scale
        print "SNR: ", snr

        Sknown = scipy.mean(yy**2)
        Nknown = scipy.var(n_cpx/scale)
        snr0 = Sknown/Nknown
        snr0dB = 10.0*scipy.log10(snr0)
        snr_known.append(float(snr0dB))

        snrdB, snr = py_est(yy)
        snr_python.append(snrdB)

        gr_src = blocks.vector_source_c(bits.tolist(), False)
        gr_snr = digital.mpsk_snr_est_cc(gr_est, ntag, 0.001)
        gr_chn = channels.channel_model(1.0/scale)
        gr_snk = blocks.null_sink(gr.sizeof_gr_complex)
        tb = gr.top_block()
        tb.connect(gr_src, gr_chn, gr_snr, gr_snk)
        tb.run()

        snr_gr.append(gr_snr.snr())

    f1 = pylab.figure(1)
    s1 = f1.add_subplot(1,1,1)
    s1.plot(SNR_dB, snr_known, "k-o", linewidth=2, label="Known")
    s1.plot(SNR_dB, snr_python, "b-o", linewidth=2, label="Python")
    s1.plot(SNR_dB, snr_gr, "g-o", linewidth=2, label="GNU Radio")
    s1.grid(True)
    s1.set_title('SNR Estimators')
    s1.set_xlabel('SNR (dB)')
    s1.set_ylabel('Estimated SNR')
    s1.legend()

    f2 = pylab.figure(2)
    s2 = f2.add_subplot(1,1,1)
    s2.plot(yy.real, yy.imag, 'o')

    pylab.show()
Example #42
0
def ldpred_genomewide(data_file=None, ld_radius=None, ld_dict=None, out_file_prefix=None, ps=None,
               n=None, h2=None, num_iter=None, verbose=False, zero_jump_prob=0.05, burn_in=5):
    """
    Calculate LDpred for a genome
    """    
    
    df = h5py.File(data_file, 'r')
    has_phenotypes = False
    if 'y' in df.keys():
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores_pval_derived = sp.zeros(num_individs)
        has_phenotypes = True

    ld_scores_dict = ld_dict['ld_scores_dict']
    chrom_ld_dict = ld_dict['chrom_ld_dict']
    chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats']
        
    print 'Applying LDpred with LD radius: %d' % ld_radius
    results_dict = {}
    num_snps = 0
    sum_beta2s = 0
    cord_data_g = df['cord_data']

    for chrom_str in chromosomes_list: 
        if chrom_str in cord_data_g.keys():
            g = cord_data_g[chrom_str]
            betas = g['betas'][...]
            n_snps = len(betas)
            num_snps += n_snps
            sum_beta2s += sp.sum(betas ** 2)
        
    L = ld_scores_dict['avg_gw_ld_score']
    chi_square_lambda = sp.mean(n * sum_beta2s / float(num_snps))
    print 'Genome-wide lambda inflation:', chi_square_lambda,
    print 'Genome-wide mean LD score:', L
    gw_h2_ld_score_est = max(0.0001, (max(1, chi_square_lambda) - 1) / (n * (L / num_snps)))
    print 'Estimated genome-wide heritability:', gw_h2_ld_score_est
    
    assert chi_square_lambda > 1, 'Something is wrong with the GWAS summary statistics.  Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small.  '
    
    LDpred_inf_chrom_dict = {}
    print 'Calculating LDpred-inf weights'
    for chrom_str in chromosomes_list:
        if chrom_str in cord_data_g.keys():
            print 'Calculating scores for Chromosome %s' % ((chrom_str.split('_'))[1])           
            g = cord_data_g[chrom_str]

            # Filter monomorphic SNPs
            snp_stds = g['snp_stds_ref'][...]
            snp_stds = snp_stds.flatten()
            ok_snps_filter = snp_stds > 0
            pval_derived_betas = g['betas'][...]
            pval_derived_betas = pval_derived_betas[ok_snps_filter]
            if h2 is not None:
                h2_chrom = h2 * (n_snps / float(num_snps))            
            else:
                h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps))
            start_betas = LDpred_inf.ldpred_inf(pval_derived_betas, genotypes=None, reference_ld_mats=chrom_ref_ld_mats[chrom_str],
                                                h2=h2_chrom, n=n, ld_window_size=2 * ld_radius, verbose=False)
            LDpred_inf_chrom_dict[chrom_str] = start_betas
    
    
    for p in ps:
        print 'Starting LDpred with p=%0.4f' % p
        p_str = '%0.4f' % p
        results_dict[p_str] = {}
    
        if out_file_prefix:
            # Preparing output files
            raw_effect_sizes = []
            ldpred_effect_sizes = []
            ldpred_inf_effect_sizes = []
            out_sids = []
            chromosomes = []
            out_positions = []
            out_nts = []
            
        for chrom_str in chromosomes_list:
            if chrom_str in cord_data_g.keys():
                g = cord_data_g[chrom_str]
                if has_phenotypes:
                    if 'raw_snps_val' in g.keys():
                        raw_snps = g['raw_snps_val'][...]
                    else:
                        raw_snps = g['raw_snps_ref'][...]
                
                # Filter monomorphic SNPs
                snp_stds = g['snp_stds_ref'][...]
                snp_stds = snp_stds.flatten()
                ok_snps_filter = snp_stds > 0
                snp_stds = snp_stds[ok_snps_filter]
                pval_derived_betas = g['betas'][...]
                pval_derived_betas = pval_derived_betas[ok_snps_filter]
                positions = g['positions'][...]
                positions = positions[ok_snps_filter]
                sids = g['sids'][...]
                sids = sids[ok_snps_filter]
                log_odds = g['log_odds'][...]
                log_odds = log_odds[ok_snps_filter]
                nts = g['nts'][...]
                nts = nts[ok_snps_filter]


                if out_file_prefix:
                    chromosomes.extend([chrom_str] * len(pval_derived_betas))
                    out_positions.extend(positions)
                    out_sids.extend(sids)
                    raw_effect_sizes.extend(log_odds)
                    out_nts.extend(nts)
        
                n_snps = len(pval_derived_betas)
                
                if h2 is not None:
                    h2_chrom = h2 * (n_snps / float(num_snps))            
                else:
                    h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps))
                # print 'Prior parameters: p=%0.3f, n=%d, m=%d, h2_chrom=%0.4f' % (p, n, n_snps, h2_chrom)
                if 'chrom_ld_boundaries' in ld_dict.keys():
                    ld_boundaries = ld_dict['chrom_ld_boundaries'][chrom_str]
                    res_dict = ldpred_gibbs(pval_derived_betas, h2=h2_chrom, n=n, p=p, ld_radius=ld_radius,
                                            verbose=verbose, num_iter=num_iter, burn_in=burn_in, ld_dict=chrom_ld_dict[chrom_str],
                                            start_betas=LDpred_inf_chrom_dict[chrom_str], ld_boundaries=ld_boundaries,
                                            zero_jump_prob=zero_jump_prob)
                else:
                    res_dict = ldpred_gibbs(pval_derived_betas, h2=h2_chrom, n=n, p=p, ld_radius=ld_radius,
                                            verbose=verbose, num_iter=num_iter, burn_in=burn_in, ld_dict=chrom_ld_dict[chrom_str],
                                            start_betas=LDpred_inf_chrom_dict[chrom_str], zero_jump_prob=zero_jump_prob)
                
                updated_betas = res_dict['betas']
                updated_inf_betas = res_dict['inf_betas']
                sum_sqr_effects = sp.sum(updated_betas ** 2)
                if sum_sqr_effects > gw_h2_ld_score_est:
                    print 'Sum of squared updated effects estimates seems too large:', sum_sqr_effects
                    print 'This suggests that the Gibbs sampler did not convergence.'
                
                print 'Calculating scores for Chromosome %s' % ((chrom_str.split('_'))[1])
                updated_betas = updated_betas / (snp_stds.flatten())
                updated_inf_betas = updated_inf_betas / (snp_stds.flatten())
                ldpred_effect_sizes.extend(updated_betas)
                ldpred_inf_effect_sizes.extend(updated_inf_betas)
                if has_phenotypes:
                    prs = sp.dot(updated_betas, raw_snps)
                    risk_scores_pval_derived += prs
                    corr = sp.corrcoef(y, prs)[0, 1]
                    r2 = corr ** 2
                    print 'The R2 prediction accuracy of PRS using %s was: %0.4f' % (chrom_str, r2)
        
                    
        print 'There were %d (SNP) effects' % num_snps
        if has_phenotypes:
            num_indivs = len(y)
            results_dict[p_str]['y'] = y
            results_dict[p_str]['risk_scores_pd'] = risk_scores_pval_derived
            print 'Prediction accuracy was assessed using %d individuals.' % (num_indivs)
    
            corr = sp.corrcoef(y, risk_scores_pval_derived)[0, 1]
            r2 = corr ** 2
            results_dict[p_str]['r2_pd'] = r2
            print 'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2, ((1 - r2) ** 2) / num_indivs)
    
            if corr < 0:
                risk_scores_pval_derived = -1 * risk_scores_pval_derived
            auc = calc_auc(y, risk_scores_pval_derived)
            print 'AUC for the whole genome was: %0.4f' % auc
    
            # Now calibration                               
            denominator = sp.dot(risk_scores_pval_derived.T, risk_scores_pval_derived)
            y_norm = (y - sp.mean(y)) / sp.std(y)
            numerator = sp.dot(risk_scores_pval_derived.T, y_norm)
            regression_slope = (numerator / denominator)  # [0][0]
            print 'The slope for predictions with P-value derived  effects is:', regression_slope
            results_dict[p_str]['slope_pd'] = regression_slope
        
        weights_out_file = '%s_LDpred_p%0.4e.txt' % (out_file_prefix, p)
        with open(weights_out_file, 'w') as f:
            f.write('chrom    pos    sid    nt1    nt2    raw_beta     ldpred_beta\n')
            for chrom, pos, sid, nt, raw_beta, ldpred_beta in it.izip(chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, ldpred_effect_sizes):
                nt1, nt2 = nt[0], nt[1]
                f.write('%s    %d    %s    %s    %s    %0.4e    %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, ldpred_beta))

    weights_out_file = '%s_LDpred-inf.txt' % (out_file_prefix)
    with open(weights_out_file, 'w') as f:
        f.write('chrom    pos    sid    nt1    nt2    raw_beta    ldpred_inf_beta \n')
        for chrom, pos, sid, nt, raw_beta, ldpred_inf_beta in it.izip(chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, ldpred_inf_effect_sizes):
            nt1, nt2 = nt[0], nt[1]
            f.write('%s    %d    %s    %s    %s    %0.4e    %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, ldpred_inf_beta))
Example #43
0
def format_data(data, period, category_list, cpu_file):
    metric_list = [
        'addtocartbulk', 'checkLogin', 'checkoutoptions', 'login', 'logout',
        'main', 'orderhistory', 'quickadd'
    ]
    delete = []

    for i in xrange(len(data[2]) - 1):
        if not data[2][i]:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    # find out those metrics that are not in the metric_list
    for i in xrange(len(data[2]) - 1):
        flag = 0
        for j in range(8):
            if category_list[i] == metric_list[j]:
                flag = 1

        if flag == 0:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    start_time = min(data[2][0])
    max_time = max(data[2][0])

    for i in xrange(1, len(data[2]) - 1):
        if data[2][i] and start_time > min(data[2][i]):
            start_time = min(data[2][i])
        if data[2][i] and max_time > max(data[2][i]):
            max_time = max(data[2][i])

    samples = int(math.floor(((max_time - start_time) / period)))

    print_message('Number of samples (interval:%s) : %s' % (period, samples))

    for i in xrange(len(data[2]) - 1):
        end_time = start_time

        departure = [a + r * 1000 for a, r in zip(data[2][i], data[3][i])]

        for k in xrange(samples):

            index = [
                v[0] for v in enumerate(departure)
                if end_time <= v[1] < (end_time + period)
            ]

            arr_index = [
                v[0] for v in enumerate(data[2][i])
                if end_time <= v[1] < (end_time + period)
            ]

            response_times = [0]
            if index:
                response_times = [data[3][i][idx] for idx in index]
            data[4][i].append(scipy.mean(response_times))
            data[5][i].append(len(index) / period * 1000)
            data[6][i].append(len(index))
            data[7][i].append(len(arr_index))

            data[0][i].append(end_time + period)
            end_time += period

    # Number of samples for each request might not be equal
    max_num_requests = 0
    max_requests_idx = 0
    for i in xrange(len(data[2]) - 1):
        if max_num_requests < len(data[2][i]):
            max_num_requests = len(data[2][i])
            max_requests_idx = i

    for i in xrange(len(data[2]) - 1):
        data[0][i] = data[0][max_requests_idx]
        if len(data[4][i]) < len(data[0][i]):
            data[4][i].append([0] * (len(data[0][i]) - len(data[4][i])))
            data[5][i].append([0] * (len(data[0][i]) - len(data[5][i])))
            data[6][i].append([0] * (len(data[0][i]) - len(data[6][i])))

    data[0][len(data[0]) - 1] = data[0][0]

    with open(cpu_file) as f:
        count = 0
        cpu = []
        cpu_time = []
        flag = 0
        line = f.readline()
        while line:
            cpu_num = float(line)

            if count % 2 == 0:
                if cpu_num > 1 or math.isnan(cpu_num):
                    flag = 1
                else:
                    cpu.append(cpu_num)
            else:
                if flag:
                    flag = 0
                else:
                    cpu_time.append(cpu_num)

            count += 1
            line = f.readline()

    cpu_time = [e - 3600 * 1000 for e in cpu_time]
    indices = [i[0] for i in sorted(enumerate(cpu_time), key=lambda x: x[1])]
    cpu_time = [cpu_time[i] for i in indices]
    cpu = [cpu[i] for i in indices]

    for i in xrange(len(data[0][0])):
        indices_found = [
            v[0] for v in enumerate(cpu_time)
            if data[0][0][i] <= v[1] < data[0][0][i] + period
        ]

        if indices_found:
            mean = scipy.mean([cpu[i] for i in indices_found])
            data[1][len(data[1]) - 1].append(mean)

    return data
Example #44
0
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:
    agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True)
if useGraphics:
    pl.show(popup=True)
Example #45
0
# read in the probesets
f = open( ps_fn )
pss = { row.strip():0 for row in f }
f.close()

# read in the expression by computing the mean of controls only
# controls are the last four columns
controls = [2, 5, 6, 9]
f = open( expr_fn )
mean_expr = dict()
pss_expr = dict()
affected_means = list()
unaffected_means = list()
for row in f:
	L = row.strip().split( '\t' )
	mn = scipy.mean( map( float, [L[i] for i in controls] ))
	if L[0] not in mean_expr:
		mean_expr[L[0]] = mn
		if L[0] in pss:
			pss_expr[L[0]] = mn
			print mn
		else:
			print >> sys.stderr, mn
	else:
		raise ValueError( "You have a duplicate probeset. How did that happen?" )
f.close()

# calculate the overall mean and sd
ov_mean = scipy.mean( pss_expr.values() )
ov_sd = scipy.std( pss_expr.values() )
Example #46
0
def ldpred_inf_genomewide(data_file=None, ld_radius = None, ld_dict=None, out_file_prefix=None,
                          n=None, h2=None, use_gw_h2=False, verbose=False, summary_dict=None):
    """
    Calculate LDpred for a genome
    """    
    
    df = h5py.File(data_file,'r')
    has_phenotypes=False
    if 'y' in df:
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores_pval_derived = sp.zeros(num_individs)
        has_phenotypes=True

    ld_scores_dict = ld_dict['ld_scores_dict']
    chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats']
        
    print('Applying LDpred-inf with LD radius: %d' % ld_radius)
    results_dict = {}
    cord_data_g = df['cord_data']

    #Calculating genome-wide heritability using LD score regression, and partition heritability by chromsomes
    herit_dict = ld.get_chromosome_herits(cord_data_g, ld_scores_dict, n, h2=h2, use_gw_h2=use_gw_h2, 
                                          debug=verbose,summary_dict=summary_dict)

    if out_file_prefix:
        #Preparing output files
        raw_effect_sizes = []
        ldpred_effect_sizes = []
        sids = []
        chromosomes = []
        positions = []
        nts = []
        
    for chrom_str in util.chromosomes_list:
        if chrom_str in cord_data_g:
            g = cord_data_g[chrom_str]
            if has_phenotypes:
                if 'raw_snps_val' in g:
                    raw_snps = g['raw_snps_val'][...]
                else:
                    raw_snps = g['raw_snps_ref'][...]
            
            snp_stds = g['snp_stds_ref'][...]
            pval_derived_betas = g['betas'][...]
            if out_file_prefix:
                chromosomes.extend([chrom_str]*len(pval_derived_betas))
                positions.extend(g['positions'][...])
                sids_arr = (g['sids'][...]).astype(util.sids_u_dtype)
                sids.extend(sids_arr)
                raw_effect_sizes.extend(g['log_odds'][...])
                nts_arr = (g['nts'][...]).astype(util.nts_u_dtype)
                nts.extend(nts_arr)
        
            h2_chrom = herit_dict[chrom_str]['h2'] 
            updated_betas = ldpred_inf(pval_derived_betas, genotypes=None, reference_ld_mats=chrom_ref_ld_mats[chrom_str], 
                                                h2=h2_chrom, n=n, ld_window_size=2*ld_radius, verbose=False)
                    
            print('Calculating scores for Chromosome %s'%((chrom_str.split('_'))[1]))
            updated_betas = updated_betas / (snp_stds.flatten())
            ldpred_effect_sizes.extend(updated_betas)
            if has_phenotypes:
                prs = sp.dot(updated_betas, raw_snps)
                risk_scores_pval_derived += prs
                corr = sp.corrcoef(y, prs)[0, 1]
                r2 = corr ** 2
                print('The R2 prediction accuracy of PRS using %s was: %0.4f' %(chrom_str, r2))

                
    if has_phenotypes:
        num_indivs = len(y)
        results_dict['y']=y
        results_dict['risk_scores_pd']=risk_scores_pval_derived
        print('Prediction accuracy was assessed using %d individuals.'%(num_indivs))

        corr = sp.corrcoef(y, risk_scores_pval_derived)[0, 1]
        r2 = corr ** 2
        results_dict['r2_pd']=r2
        print('The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2, ((1-r2)**2)/num_indivs))

        if corr<0:
            risk_scores_pval_derived = -1* risk_scores_pval_derived
        auc = util.calc_auc(y,risk_scores_pval_derived)
        print('AUC for the whole genome was: %0.4f'%auc)

        #Now calibration                               
        denominator = sp.dot(risk_scores_pval_derived.T, risk_scores_pval_derived)
        y_norm = (y-sp.mean(y))/sp.std(y)
        numerator = sp.dot(risk_scores_pval_derived.T, y_norm)
        regression_slope = (numerator / denominator)
        print('The slope for predictions with P-value derived  effects is: %0.4f'%regression_slope)
        results_dict['slope_pd']=regression_slope
    
    weights_out_file = '%s_LDpred-inf.txt' % (out_file_prefix)
    with open(weights_out_file,'w') as f:
        f.write('chrom    pos    sid    nt1    nt2    raw_beta    ldpred_inf_beta\n')
        for chrom, pos, sid, nt, raw_beta, ldpred_beta in zip(chromosomes, positions, sids, nts, raw_effect_sizes, ldpred_effect_sizes):
            nt1,nt2 = nt[0],nt[1]
            f.write('%s    %d    %s    %s    %s    %0.4e    %0.4e\n'%(chrom, pos, sid, nt1, nt2, raw_beta, ldpred_beta))
Example #47
0
def cov_from_segments(gene,
                      seg_counts,
                      edge_counts,
                      edge_idx,
                      ax,
                      sample_idx=None,
                      log=False,
                      cmap_seg=None,
                      cmap_edg=None,
                      xlim=None,
                      grid=False,
                      order='C'):
    """This function takes a gene and its corresponding segment and edge counts to
    produce a coverage overview plot."""

    if sample_idx is None:
        sample_idx = [sp.arange(seg_counts.shape[1])]

    norm = plt.Normalize(0, len(sample_idx))

    if cmap_seg is None:
        cmap_seg = plt.get_cmap('jet')
    if cmap_edg is None:
        cmap_edg = plt.get_cmap('jet')

    line_patches = []
    fill_patches = []

    ### iterate over segments
    for j in range(gene.segmentgraph.segments.shape[1]):
        s = gene.segmentgraph.segments[:, j]
        ### iterate over samples
        for c, curr_idx in enumerate(sample_idx):
            #for i in curr_idx:
            if log:
                counts = sp.log10(seg_counts[j, curr_idx] + 1)
            else:
                counts = seg_counts[j, curr_idx]

            ### plot segment over all samples (including uncertainty region)
            if counts.shape[0] == 1:
                ax.plot(s, [counts[0], counts[0]],
                        '-',
                        color=cmap_seg(norm(c)),
                        linewidth=0.5)
                #line_patches.append(mlines.Line2D(s, [counts[0], counts[0]], color=cmap_seg(norm(c)), linewidth=2, transform=None))
            elif counts.shape[0] > 1:
                stderr = spst.sem(counts)
                mean = sp.mean(counts)
                #ax.fill_between(s, mean, mean+stderr, color=cmap_seg(norm(c)), alpha=0.3)
                ax.fill_between(s,
                                mean - stderr,
                                mean + stderr,
                                color=cmap_seg(norm(c)),
                                alpha=0.2,
                                edgecolor='none',
                                linewidth=0)
                #fill_patches.append(mpatches.Rectangle(s, mean-stderr, mean+stderr, color=cmap_seg(norm(c)), alpha=0.3, transform=None))
                ax.plot(s, [mean, mean],
                        '-',
                        color=cmap_seg(norm(c)),
                        linewidth=0.5)
                #line_patches.append(mlines.Line2D(s, [mean, mean], color=cmap_seg(norm(c)), linewidth=2, transform=None))

                #ax.plot(s, [mean+stderr, mean+stderr], ':', color=cmap_seg(norm(c)), linewidth=1)
                #ax.plot(s, [mean-stderr, mean-stderr], ':', color=cmap_seg(norm(c)), linewidth=1)

    #for line in line_patches:
    #    ax.add_line(line)
    #for patch in fill_patches:
    #    ax.add_patch(patch)

    ### iterate over intron edges
    for j in range(edge_idx.shape[0]):
        ### iterate over samples
        for c, curr_idx in enumerate(sample_idx):
            [s, t] = sp.unravel_index(edge_idx[j],
                                      gene.segmentgraph.seg_edges.shape,
                                      order=order)
            if log:
                counts = sp.log10(edge_counts[j, curr_idx] + 1)
            else:
                counts = edge_counts[j, curr_idx]
            mean = sp.mean(counts)
            add_intron_patch2(ax,
                              gene.segmentgraph.segments[1, s],
                              gene.segmentgraph.segments[0, t],
                              mean,
                              color=cmap_edg(norm(c)))

    if xlim is not None:
        ax.set_xlim(xlim)

    ### draw grid
    if grid:
        ax.grid(b=True,
                which='major',
                linestyle='--',
                linewidth=0.2,
                color='#222222')
        ax.xaxis.grid(False)

    ax.set_ylim([0, ax.get_ylim()[1]])
Example #48
0
 def centroid(self):
     """Return the geometric center of all x-y data points."""
     # 2012-06-27
     return scipy.mean(self.x), scipy.mean(self.y)
Example #49
0
def interact(u, v):
    """Compute element-wise mean(s) from two arrays."""
    return tuple(mean(array([u, v]), axis=0))
Example #50
0
def normalizedFitness(R):
    return array((R - mean(R)) / sqrt(var(R))).flatten()
Example #51
0
 def calcMean(self):
     self.mean = scipy.mean(self.series)
Example #52
0
    def _emmax_permutations(self,
                            snps,
                            phenotypes,
                            num_perm,
                            K=None,
                            Z=None,
                            method='REML'):
        """
                EMMAX permutation test
                Single SNPs
                
                Returns the list of max_pvals and max_fstats 
                """
        lmm = lm.LinearMixedModel(phenotypes)
        lmm.add_random_effect(Z * K * Z.T)

        eig_L = lmm._get_eigen_L_()

        print 'Getting variance estimates'
        res = lmm.get_estimates(eig_L, method=method)

        q = 1  # Single SNP is being tested
        p = len(lmm.X.T) + q
        n = lmm.n
        n_p = n - p
        H_sqrt_inv = res['H_sqrt_inv']

        Y = H_sqrt_inv * lmm.Y  #The transformed outputs.
        h0_X = H_sqrt_inv * lmm.X
        (h0_betas, h0_rss, h0_rank, h0_s) = linalg.lstsq(h0_X, Y)
        Y = Y - h0_X * h0_betas

        num_snps = len(snps)
        max_fstat_list = []
        min_pval_list = []
        chunk_size = len(Y)
        print "Working with chunk size: " + str(chunk_size)
        print "and " + str(num_snps) + " SNPs."
        Ys = sp.mat(sp.zeros((chunk_size, num_perm)))

        for perm_i in range(num_perm):
            #print 'Permutation nr. % d' % perm_i
            sp.random.shuffle(Y)
            Ys[:, perm_i] = Y

        min_rss_list = sp.repeat(h0_rss, num_perm)
        for i in range(0, num_snps,
                       chunk_size):  #Do the dot-product in chunks!
            snps_chunk = sp.matrix(snps[i:(i + chunk_size)])
            snps_chunk = snps_chunk * Z.T
            Xs = snps_chunk * (H_sqrt_inv.T)
            Xs = Xs - sp.mat(sp.mean(Xs, axis=1))
            for j in range(len(Xs)):  # for each snp
                (betas, rss_list, p,
                 sigma) = linalg.lstsq(Xs[j].T, Ys,
                                       overwrite_a=True)  # read the lstsq lit
                for k, rss in enumerate(rss_list):
                    if not rss:
                        print 'No predictability in the marker, moving on...'
                        continue
                    if min_rss_list[k] > rss:
                        min_rss_list[k] = rss
                if num_snps >= 10 and (i + j + 1) % (
                        num_snps / num_perm) == 0:  #Print dots
                    sys.stdout.write('.')
                    sys.stdout.flush()

        if num_snps >= 10:
            sys.stdout.write('\n')

        #min_rss = min(rss_list)
        max_f_stats = ((h0_rss / min_rss_list) - 1.0) * n_p / float(q)
        min_pvals = (stats.f.sf(max_f_stats, q, n_p))

        res_d = {'min_ps': min_pvals, 'max_f_stats': max_f_stats}
        print "There are: " + str(len(min_pvals))
        return res_d
Example #53
0
m = np.mean(vrest, 1)
vrest = vrest - m[:, None]
''' This is added since length of fcon1000 sequences is 225'''
#vrest = vrest[:, :225]

s = np.std(vrest, 1) + 1e-116
vrest1 = vrest / s[:, None]

rho1 = 0
rho1rot = 0
diffafter = 0
diffbefore = 0

a = sp.load('/big_disk/ajoshi/coding_ground/brainsync/data/\
fcon1000_null_all_' + hemi + '.npz')
rho_null = sp.mean(a['rho_null'], axis=0)

lst = glob.glob('/big_disk/ajoshi/fcon_1000/Beijing/sub*')
nsub = 0
rho_all = sp.zeros((vrest1.shape[0], 0))

for sub in lst:
    if not os.path.exists(sub + '/fmri_tnlm_5_reduce3_v2.mat'):
        continue

    vrest2 = scipy.io.loadmat(sub + '/fmri_tnlm_5_reduce3_v2.mat')
    data = vrest2['func_' + hemi + '']
    indx = sp.isnan(data)
    data[indx] = 0
    vrest = data
    vrest = vrest[:, :vrest1.shape[1]]
Example #54
0
                                   NRespWV[:, 1],
                                   kind='linear',
                                   copy=True,
                                   bounds_error=False,
                                   fill_value=0.0)
NResponRef = NRespInterp(Ref[:, 0])

#Create Master Observed Spectrum by merging CLR and NIR spectra

MergeStartWV = 7500.
MergeEndWV = 7900.

OverlapRegionIndices=np.where((Ref[:,0] >MergeStartWV) & \
     (Ref[:,0] < MergeEndWV))

NIRScaling2CLR= scipy.mean(CLRonRef[OverlapRegionIndices]) \
    / scipy.mean(NIRonRef[OverlapRegionIndices])

MASTER = deepcopy(Ref)
MASTER[OverlapRegionIndices,1]= \
    (CLRonRef[OverlapRegionIndices]+NIRonRef[OverlapRegionIndices]*NIRScaling2CLR)/2.

DedicatedNIRIndices = np.where(Ref[:, 0] >= 7900.)
DedicatedCLRIndices = np.where(Ref[:, 0] <= 7500.)
MASTER[DedicatedNIRIndices, 1] = NIRonRef[DedicatedNIRIndices] * NIRScaling2CLR
MASTER[DedicatedCLRIndices, 1] = CLRonRef[DedicatedCLRIndices]

#Compute EWs for telluric bands from MASTER
"""EWFN="JupiterEW20150123UT.txt"
BandName,BandStart,BandEnd,ContWidth,EW=ComputeEW.ComputeEW(MASTER,"Ca II H&K",3920.,3990.,20.,EWFN,False)
BandName,BandStart,BandEnd,ContWidth,EW=ComputeEW.ComputeEW(MASTER,"H Delta",4090.,4115.,10.,EWFN,True)
Example #55
0
def main(sysargs):
    sys.argv = sysargs
    arg_parser = argparse.ArgumentParser(
        description='Formats debates by removing HTML and filtering words.')
    arg_parser.add_argument('-i',
                            '--infile',
                            required=True,
                            help='Debate file to format.')
    args = arg_parser.parse_args()

    # Initialize nltk elements.
    parser = SpeechHTMLParser()
    sent_splitter = PunktSentenceTokenizer()
    tokenizer = TreebankWordTokenizer()
    tagger_loc = '/het/users/jengi/stanford-postagger/'
    tagger = StanfordTagger(tagger_loc + 'models/wsj-0-18-bidirectional-distsim.tagger', \
                                tagger_loc + 'stanford-postagger.jar')
    stemmer = SnowballStemmer('english')

    # Read infile.
    speaker_pattern = re.compile('.*:')
    null_pattern = re.compile('\s*(\[[^\]]*\]|\([^\)]*\))')
    dash_pattern = re.compile('\S+(--)\s+')
    ellipse_pattern = re.compile('\s*\.\.\.\s*')
    noun_tags = ['NN', 'NNS', 'NNP', 'NNPS']
    punct = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', \
                 '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', \
                 '\\', ']', '^', '_', '`', '{', '|', '}', '~']
    block_lengths = []
    with open(args.infile, 'r') as afile:
        file_contents = afile.read()
        parser.feed(file_contents)
        parser.close()

        num_blocks = 0
        speeches = {}
        for (speaker, block) in parser.text:
            if num_blocks % 10 == 0:
                print >> sys.stderr, 'Processing block ' + str(
                    num_blocks) + ' ...'
            orig_block = block

            # Remove applause, laughter, etc.
            block = repeated_search(block, null_pattern, 0)

            # Remove -- from the end of words.  (Indicates stuttering / stopping.)
            block = repeated_search(block, dash_pattern, 1)

            # Do more complex tokenization.
            sents = sent_splitter.tokenize(block)
            sents = [ellipse_pattern.sub(' ... ', sent) for sent in sents]
            tokens = [tokenizer.tokenize(sent) for sent in sents]

            # Run POS tagger and keep only nouns.
            # Also lowercase and stem these nouns.
            tags = [tagger.tag(toks) for toks in tokens]
            tokens = []
            tagged_text = []
            for sent in tags:
                tokens.append([])
                for (word, tag) in sent:
                    tagged_text.append(word)
                    tagged_text.append(tag)
                    if tag in noun_tags:
                        tokens[len(tokens) - 1].append(
                            stemmer.stem(word.lower()))

            # Remove any "sentences" that are actually empty and
            # any tokens that are pure punctuation.
            for i in reversed(range(len(tokens))):
                for j in reversed(range(len(tokens[i]))):
                    non_punct = ''.join(
                        [tok for tok in tokens[i][j] if tok not in punct])
                    if len(non_punct) == 0:
                        del tokens[i][j]

                if len(tokens[i]) == 0:
                    del tokens[i]

            # Make sure there is still at least one sentence left.
            num_sents = len(tokens)
            if num_sents == 0:
                continue

            # Add block to speeches dictionary.
            speaker = speaker[:speaker_pattern.match(speaker).end() - 1]
            if speaker not in speeches:
                speeches[speaker] = []
            speeches[speaker].append(orig_block)
            speeches[speaker].append(' '.join(tagged_text))
            speeches[speaker].append('\n'.join(
                [' '.join(sent) for sent in tokens]))
            #print speeches[speaker][0]
            #print speeches[speaker][1]
            #print speeches[speaker][2]

            num_blocks += 1
            num_tokens = 0
            for toks in tokens:
                num_tokens += len(toks)
            block_lengths.append(num_tokens)

    # Save each speaker's text to a file.
    (infolder, basename) = os.path.split(os.path.abspath(args.infile))
    out_prefix = infolder + '/'
    out_suffix = basename
    for speaker in speeches:
        # Create outfile prefixed by speaker's name.
        outfile = open(out_prefix + speaker + '-' + out_suffix, 'w')

        # Save text to outfile.
        blocks = speeches[speaker]
        for i in range(0, len(blocks), 3):
            print >> outfile, blocks[i]
            print >> outfile, blocks[i + 1]
            print >> outfile, blocks[i + 2]
            print >> outfile

        outfile.close()

    print '# of blocks: ' + str(num_blocks)
    print 'Mean # of tokens (per block): ' + str(scipy.mean(block_lengths))
    print 'Median # of tokens: ' + str(scipy.median(block_lengths))
    print 'Standard deviation in # of tokens: ' + str(scipy.std(block_lengths))
        c = 'r'
        marker = 'x'
    if t == 2:
        c = 'g'
        marker = 'o'
    if t == 3:
        c = 'b'
        marker = '*'
    if t == 4:
        c = 'y'
        marker = '+'
    plt.scatter(x[features[5]][y == t],
                x[features[6]][y == t].values,
                c=c,
                marker=marker)
# so there is no particluar classification layer clearly visible
x.corr()
sb.heatmap(x.corr())

Myclassifier = KNeighborsClassifier()

p = KFold(n_splits=5, shuffle=True)
means = []

for training, testing in p.split(x):

    Myclassifier.fit(x[training], y[training])
    prediction = Myclassifier.predict(features[testing])
    currmean = sc.mean(prediction == y[testing])
    means.append(currmean)
print("Mean accuracy: {:.1%}".format(sc.mean(means)))
Example #57
0
    def __init__(self, config):
        """."""

        Surface.__init__(self, config)

        # Models are stored as dictionaries in .mat format
        model_dict = loadmat(config['surface_file'])
        self.components = list(zip(model_dict['means'], model_dict['covs']))
        self.n_comp = len(self.components)
        self.wl = model_dict['wl'][0]
        self.n_wl = len(self.wl)

        # Set up normalization method
        self.normalize = model_dict['normalize']
        if self.normalize == 'Euclidean':
            self.norm = lambda r: norm(r)
        elif self.normalize == 'RMS':
            self.norm = lambda r: s.sqrt(s.mean(pow(r, 2)))
        elif self.normalize == 'None':
            self.norm = lambda r: 1.0
        else:
            raise ValueError('Unrecognized Normalization: %s\n' %
                             self.normalize)

        try:
            self.selection_metric = config['selection_metric']
        except KeyError:
            self.selection_metric = 'Mahalanobis'

        # This field, if present and set to true, forces us to use
        # any initialization state and never change.  The state is
        # preserved in the geometry object so that this object stays
        # stateless
        try:
            self.select_on_init = config['select_on_init']
        except KeyError:
            self.select_on_init = False

        # Reference values are used for normalizing the reflectances.
        # in the VSWIR regime, reflectances are normalized so that the model
        # is agnostic to absolute magnitude.
        self.refwl = s.squeeze(model_dict['refwl'])
        self.idx_ref = [s.argmin(abs(self.wl-w))
                        for w in s.squeeze(self.refwl)]
        self.idx_ref = s.array(self.idx_ref)

        # Cache some important computations
        self.Covs, self.Cinvs, self.mus = [], [], []
        for i in range(self.n_comp):
            Cov = self.components[i][1]
            self.Covs.append(s.array([Cov[j, self.idx_ref]
                                      for j in self.idx_ref]))
            self.Cinvs.append(svd_inv(self.Covs[-1]))
            self.mus.append(self.components[i][0][self.idx_ref])

        # Variables retrieved: each channel maps to a reflectance model parameter
        rmin, rmax = 0, 10.0
        self.statevec = ['RFL_%04i' % int(w) for w in self.wl]
        self.bounds = [[rmin, rmax] for w in self.wl]
        self.scale = [1.0 for w in self.wl]
        self.init = [0.15 * (rmax-rmin)+rmin for v in self.wl]
        self.idx_lamb = s.arange(self.n_wl)
        self.n_state = len(self.statevec)
 def mse(self, y_test, y):
     return sp.sqrt(sp.mean((y_test - y)**2))
    cabecalho.append(dic_vertices_associados[indice])

my_df = pd.DataFrame(matriz, columns=cabecalho, index=cabecalho)

my_df.to_csv('saida_csv.csv', index=True, header=True, sep=';')

# PARA SALVAR ARQUIVO DE SAIDA COM OS ÍNDICES CALCULADOS
file = open('indices_calculados.csv', 'w')
file.write('Numero de Vertices: ' + str(len(nx.degree(g))) + '\n')
file.write('\nNumero de arestas: ' + str(len(nx.edges(g))) + '\n')
file.write('\nLista de arestas:' + str(nx.edges(g)) + '\n')

file.write('\nGraus: ' + str(nx.degree(g)) + '\n')

grau = pd.DataFrame.from_dict(nx.degree(g), orient='index')
file.write('\nGrau médio do componente principal: ' + str(sp.mean(grau)[0]))

file.write('\nMinimo Caminho Medio:' +
           str(nx.average_shortest_path_length(g)))  #Graph is not connected
file.write('\nDiametro funcao(py):' + str(
    nx.diameter(g)))  #NetworkXError: Graph not connected: infinite path length
file.write('\n Densidade da rede: ' + str(nx.density(g)))

file.write('\nMatriz de adjacencia funcao(py):' + str(nx.adjacency_matrix(g)))

file.write('\nCoeficientede Aglomeração:  ' + str(nx.clustering(g)))

dic_aglomeracao = nx.clustering(g)
total_aglomeracao = 0

for chave in dic_aglomeracao:
Example #60
0
def snr_est_simple(signal):
    s = scipy.mean(abs(signal)**2)
    n = 2*scipy.var(abs(signal))
    snr_rat = s/n
    return 10.0*scipy.log10(snr_rat), snr_rat