def plot_median_errors(RefinementLevels):
        for i in RefinementLevels[0].cases:
            x =[];
            y =[];
            print "Analyzing median error on: ", i ;
            for r in RefinementLevels:                
                x.append(r.LUT.D_dim*r.LUT.P_dim)
                r.get_REL_ERR_SU2(i)
                y.append(r.SU2[i].median_ERR*100)
            
            x = sp.array(x)
            y = sp.array(y)            
            y = y[sp.argsort(x)]
            x = x[sp.argsort(x)]
                                    
            LHM = sp.ones((len(x),2))
            RHS = sp.ones((len(x),1))            
            LHM[:,1] = sp.log10(x)
            RHS[:,0] = sp.log10(y)

            sols = sp.linalg.lstsq(LHM,RHS)
            b = -sols[0][1]
            plt.loglog(x,y, label='%s, %s'%(i,r'$O(\frac{1}{N})^{%s}$'%str(sp.around(b,2))), basex=10, basey=10, \
                       subsy=sp.linspace(10**(-5), 10**(-2),20),\
                       subsx=sp.linspace(10**(2), 10**(5),50))
            
            #for r in RefinementLevels:                
               # x.append(r.LUT.D_dim*r.LUT.P_dim)
              #  r.get_REL_ERR_SciPy(i)
             #   y.append(r.SciPy[i].median_ERR*100)
            #plt.plot(x,y, label='SciPy: %s'%i)
        plt.grid(which='both')
        plt.xlabel('Grid Nodes (N)')
        plt.ylabel('Median relative error [%]')
        return;
def my_bh_fdr(p_val_vec):
    index = scipy.argsort(p_val_vec)
    exp_err = scipy.vstack((float(len(p_val_vec))/scipy.arange(1,len(p_val_vec) + 1)*p_val_vec[index],
                                      scipy.tile(1, [1, len(p_val_vec)]))).min(axis = 0)
    exp_err = scipy.vstack((exp_err,exp_err[scipy.r_[0,scipy.arange(len(exp_err)-1)]])).max(axis=0)
    #scipy.r_[index[0], index[range(len(index)-1)]
    resort_index = scipy.argsort(index)                 
    return exp_err[resort_index]
Example #3
0
    def outputTargetSimPairs(self, pairFile):

        pairList = []
        pairFilehandle = open(pairFile)
        for line in pairFilehandle:
            words = (line.strip().strip('\n').strip()).split()
            pairList.append(words)
        pairFilehandle.close()

        print "..Outputting similarities"
        outputFilename = "simPairs.txt"
        outputFilehandle = open(outputFilename, "w")
        outputFilehandle.write("word1 word2 sim | zsim1 zsim2 | psim1 psim2 | nIn1 nIn2\n")
        
        numTargets = len(self.similarityMatrix[0])
        
        for pair in pairList:
            if ((pair[0] in self.targetDict) and (pair[1] in self.targetDict)):
                i = self.targetDict[pair[0]]
                j = self.targetDict[pair[1]]
                
                sim = self.similarityMatrix[i,j]
                
                word0Sims = self.similarityMatrix[i]
                word1Sims = self.similarityMatrix[j]
                
                z0Sim = (sim - word0Sims.mean()) / word0Sims.std()
                z1Sim = (sim - word1Sims.mean()) / word1Sims.std()
                
                sim0min = np.amin(word0Sims)
                sim1min = np.amin(word1Sims)
                adjSim0 = sim + abs(sim0min)
                adjSim1 = sim + abs(sim1min)
                
                adjSimVector0 = word0Sims + abs(sim0min)
                adjSimVector1 = word1Sims + abs(sim1min)
                sim0Sum = adjSimVector0.sum()
                sim1Sum = adjSimVector1.sum()
                
                p0Sim = adjSim0 / sim0Sum
                p1Sim = adjSim1 / sim1Sum
                
                sortedIndexes0 = scipy.argsort(word0Sims)
                sortedIndexes1 = scipy.argsort(word1Sims)
                
                for k in range(numTargets):
                    if sortedIndexes0[k] == j:
                        nIn0 = numTargets - k
                        break
                for k in range(numTargets):
                    if sortedIndexes1[k] == i:
                        nIn1 = numTargets - k
                        break
                
                outputFilehandle.write("%s %s %0.3f | %0.3f %0.3f | %0.5f %0.5f | %0.0f %0.0f\n" % (pair[0], pair[1], sim, z0Sim, z1Sim, p0Sim, p1Sim, nIn0, nIn1))
                
            else:
                outputFilehandle.write("%s %s NA NA NA NA NA NA NA\n" % (pair[0], pair[1]))
Example #4
0
def plot_overlap_ps(result_file, ss_file='/Users/bjarnivilhjalmsson/data/GIANT/GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt',
                   fig_filename='/Users/bjarnivilhjalmsson/data/tmp/manhattan_combPC_HGT.png', method='combPC',
                   ylabel='Comb. PC (HIP,WC,HGT,BMI) $-log_{10}(P$-value$)$', xlabel='Height $-log_{10}(P$-value$)$', p_thres=0.00001):
    # Parse results ans SS file
    res_table = pandas.read_table(result_file)
    ss_table = pandas.read_table(ss_file)
    # Parse 
    res_sids = sp.array(res_table['SNPid'])
    if method == 'MVT':
        comb_ps = sp.array(res_table['pval'])
    elif method == 'combPC':
        comb_ps = sp.array(res_table['combPC'])
    if 'MarkerName' in ss_table.keys():
        ss_sids = sp.array(ss_table['MarkerName'])
    elif 'SNP' in ss_table.keys():
        ss_sids = sp.array(ss_table['SNP'])
    else:
        raise Exception("Don't know where to look for rs IDs")
    marg_ps = sp.array(ss_table['p'])
    
    # Filtering boring p-values
    res_p_filter = comb_ps < p_thres
    res_sids = res_sids[res_p_filter]
    comb_ps = comb_ps[res_p_filter]
#     ss_p_filter = marg_ps<p_thres
#     ss_sids = ss_sids[ss_p_filter]
#     marg_ps = marg_ps[ss_p_filter]
    
    common_sids = sp.intersect1d(res_sids, ss_sids)
    print 'Found %d SNPs in common' % (len(common_sids))
    ss_filter = sp.in1d(ss_sids, common_sids)
    res_filter = sp.in1d(res_sids, common_sids)
    
    ss_sids = ss_sids[ss_filter]
    res_sids = res_sids[res_filter]
    marg_ps = marg_ps[ss_filter]
    comb_ps = comb_ps[res_filter]
    
    print 'Now sorting'
    ss_index = sp.argsort(ss_sids)
    res_index = sp.argsort(res_sids)
    
    marg_ps = -sp.log10(marg_ps[ss_index])
    comb_ps = -sp.log10(comb_ps[res_index])
    
    with plt.style.context('fivethirtyeight'):
        plt.plot(marg_ps, comb_ps, 'b.', alpha=0.2)
        (x_min, x_max) = plt.xlim()
        (y_min, y_max) = plt.ylim()
        
        plt.plot([x_min, x_max], [y_min, y_max], 'k--', alpha=0.2)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        plt.tight_layout()
        plt.savefig(fig_filename)
    plt.clf()
Example #5
0
def plotBias(vals, fn_plot, myidx, logScale = False, refname = 'TCGA'):

    iqr    = ( (sp.percentile(vals[~myidx],75) - sp.percentile(vals[~myidx],25) ) * 1.5)
    iqr2    = ( (sp.percentile(vals[myidx],75) - sp.percentile(vals[myidx],25) ) * 1.5)

    sidx   = sp.argsort(vals)
    vals   = vals[sidx]
    myidx = myidx[sidx]

    fig  = plt.figure(figsize=(12,10))
    ax   = fig.add_subplot(111)
    ax_c = ax.twinx()
    ax.vlines(sp.array(sp.arange(sp.sum(vals.shape[0])))[myidx],[0], vals[myidx], label = '%s Reference'%refname)
    ax.vlines(sp.array(sp.arange(sp.sum(vals.shape[0])))[~myidx],[0], vals[~myidx], color = 'r', label = 'Your Samples')

    ax.plot([0,vals.shape[0]],[3,3], '--', color = 'green')
    ax.plot([0,vals.shape[0]],[5,5] , '--',color = 'green')
    ax.plot([0,vals.shape[0]],[iqr + sp.percentile(vals[~myidx], 75),iqr + sp.percentile(vals[~myidx], 75)], '--',color = 'green')
    ax.plot([0,vals.shape[0]],[iqr2 + sp.percentile(vals[myidx], 75),iqr2 + sp.percentile(vals[myidx], 75)], '--',color = 'green')

#    ax.plot([0,vals.shape[0]],[6.25,6.25],'--', color = 'green')
    ax.plot([0,vals.shape[0]],[10,10] , '--',color = 'green')
    ax.set_ylabel('Median 3\'/5\' Bias')
    ax.set_xlim(0,vals.shape[0])
    if logScale:
        ax.set_yscale('log')
        ax_c.set_yscale('log')
    ax_c.set_ylim(ax.get_ylim())

    ### add right side ticks
    if logScale:       
        tick_thresholds = sp.array([3,5,iqr+sp.percentile(vals[~myidx],75),iqr2 + sp.percentile(vals[myidx], 75), 10])#sp.array(sp.log([3,5,iqr+sp.percentile(vals,75), 10, 50]))
    else:
        tick_thresholds = sp.array([3,5,iqr+sp.percentile(vals[~myidx],75),iqr2 + sp.percentile(vals[myidx], 75), 10])
    tick_idx        = sp.argsort(tick_thresholds)
    tick_thresholds = tick_thresholds[tick_idx]
    tick_thresholds = sp.around(tick_thresholds, decimals = 2)
    ax_c.set_yticks(tick_thresholds)

    tick_thresholds                = tick_thresholds.astype('|S4')
    tick_thresholds                = tick_thresholds.astype('|S50')
    tick_thresholds[tick_idx == 2] = tick_thresholds[tick_idx == 2][0] + ' (Your Filter)'
#    tick_thresholds[tick_idx == 3] = tick_thresholds[tick_idx == 3][0] + ' (PRAD Filter)'
    tick_thresholds[tick_idx == 3] = tick_thresholds[tick_idx == 3][0] + ' (%s Filter)'%(refname)

    ax_c.set_yticklabels(tick_thresholds)


    ax.grid()
    ax.legend(loc=2)
    plt.tight_layout()
    plt.savefig(fn_plot, dpi = 300)
    plt.clf()
Example #6
0
 def _query(self,lv,k=None):
     if (k==None):
       k=self.k
     if (type(lv)!=numpy.ndarray):
         lv=numpy.array(lv)
     if (lv.ndim==1):
         lv=lv.reshape(1,lv.shape[0])
     if (lv.shape[0]==1):
       dt=abs(self.va.reshape(self.va.shape[0],1)-lv).T
       dr=scipy.argsort(dt)[0,:k]
       return numpy.vectorize(lambda x:self.va[x])(dr).reshape(1,k)
     else:
       dt=scipy.spatial.distance.cdist(lv,self.va.reshape(self.va.shape[0],1))
       dr=scipy.argsort(dt)[:,:k]
       return numpy.vectorize(lambda x:self.va[x])(dr)
def remove_isolated_clusters(conns, nonzero_locs, num_to_keep):
    r"""
    Identifies and removes all disconnected clusters except the number of
    groups specified by "num_to_keep". num_to_keep=N retains the N largest
    clusters
    """
    #
    adj_mat = generate_adjacency_matrix(conns, nonzero_locs)
    #
    logger.info('determining connected components...')
    cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1]
    groups, counts = sp.unique(cs_ids, return_counts=True)
    order = sp.argsort(counts)[::-1]
    groups = groups[order]
    counts = counts[order]
    #
    msg = '    {} component groups for {} total nodes'
    logger.debug(msg.format(groups.size, cs_ids.size))
    msg = '    largest group number: {}, size {}'
    logger.debug(msg.format(groups[0], counts[0]))
    msg = '    {} % of nodes contained in largest group'
    logger.debug(msg.format(counts[0]/cs_ids.size*100))
    msg = '    {} % of nodes contained in {} retained groups'
    num = sp.sum(counts[0:num_to_keep])/cs_ids.size*100
    logger.debug(msg.format(num, num_to_keep))
    #
    inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0]
    num = nonzero_locs.size
    nonzero_locs = nonzero_locs[inds]
    msg = '    removed {} disconnected nodes'
    logger.debug(msg.format(num - nonzero_locs.size))
    #
    return nonzero_locs
Example #8
0
def writeTopXGenes2File(filename,sqlfile,outdir,top=1000):
    f = h5py.File(filename,'r')
    chromosomes = f['chromosomes'][:]
    positions = f['positions'][:]
    p_values = f['p_values'][:].flatten()
    name = f['phenotype_name'].value.replace(" ","_").replace("<i>","").replace("</i>","")
    ind = sp.argsort(p_values)[:-1]
    chromosomes = chromosomes[ind]
    positions = positions[ind]
    p_values = p_values[ind]
    chromosomes = chromosomes[0:top]
    positions = positions[0:top]
    p_values = p_values[0:top]
    f.close()

    sqlite = sqlite3.connect(sqlfile)
    sqlite_cursor = sqlite.cursor()

    out = open(os.path.join(outdir,name + ".csv"),"w")

    out.write("Chr,Pos,PVal,GeneID (closest),Distance (bp)\n")
    for i in xrange(chromosomes.shape[0]):
        sqlite_cursor.execute("SELECT * FROM geneannotation WHERE chromosome_id=? ORDER BY ABS(annotation_start - ?) LIMIT 1",(str(chromosomes[i]),int(positions[i])))
        annotation = sqlite_cursor.fetchall()
        #print annotation
        if len(annotation)==1:
            if positions[i] >= annotation[0][3] and positions[i] <= annotation[0][4]:
                distance = 0
            elif positions[i] > annotation[0][4]:
                distance = abs(positions[i]-annotation[0][4])
            else:
                distance = abs(positions[i]-annotation[0][3])
            out.write(chromosomes[i] + "," + str(int(positions[i])) + ",%.2e"%(p_values[i]) + "," + annotation[0][1] + "," + str(int(distance)) + "\n")
    sqlite.close()
Example #9
0
def eigsort(eigresult):
    """
    Sort the output of scipy.linalg.eig() in terms of 
    eignevalue magnitude
    """
    ix = sp.argsort(abs(eigresult[0]))
    return ( eigresult[0][ix], eigresult[1][:,ix] )
Example #10
0
File: QTR.py Project: xkronosua/QTR
	def loadData(self):
		'''Завантаження даних з файлів'''
		Tabs = ( ('tab_2', 'tab_3','tab_4'),
			('tab_3', 'tab_2','tab_4'))
		uiObj = ('XColumn', 'YColumn', 'MColumn', 'MCheck')
		
		senderName = self.sender().objectName()
		key = senderName[0]
		active = [self.Types[key]] + self.findUi( [key + i for i in uiObj])
		data = []
		XY = sp.zeros((0,2))
		path = self.Path[active[0]]
		if os.path.exists(path):
			try:
				data = sp.loadtxt(path)
				'''
				activeFilt = self.findChilds(QtGui.QLineEdit, FiltersKeys[active[0]])
				filtNames = ''
				
				if activeFilt[0].isEnabled() and activeFilt[1].isEnabled():
					self.filtersDict = self.getFilters(length = self.LENGTH)
					for i in (0,1):
						filtNames = activeFilt[i].text().strip().replace(" ","").upper()
						temp = 1.
						
						if filtNames:
							temp = self.resFilters(filtNames)
							
						self.filtList[active[0]][i] = temp
				else:
					self.filtList[active[0]][:] = [1., 1.]
				print("Filters [X,Y]:",self.filtList[active[0]])
				'''
				xc = active[1].value()
				yc = active[2].value()
				mc = active[3].value()
				if active[4].checkState():
					XY = sp.array( [data[:,xc], data[:,yc] ]).T / sp.array([data[:,mc], data[:,mc]]).T
				else:
					XY = sp.array( [data[:,xc], data[:,yc] ]).T
				XY = XY[XY[:,0] > 0]
				XY = XY[XY[:,1] > 0]
				if getattr(self.ui,senderName[0]+'CutForward').isChecked():
					p = sp.where( XY[:,0] == XY[:,0].max())[0][0]
					print(p)
					XY = XY[:p,:]
				XY = XY[sp.argsort(XY[:,0])]
				'''
				XY[:,0] = XY[:,0]/self.filtList[active[0]][0]
				XY[:,1] = XY[:,1]/self.filtList[active[0]][1]
				'''
				self.updateData(array = Array(XY,Type = active[0]), action = 0)
				tabs = self.findUi(Tabs[active[0]])
				tabs[0].setEnabled(True)
				
				if tabs[1].isEnabled():
					tabs[2].setEnabled(True)
			except (ValueError, IOError, IndexError):
				self.mprint("loadData: readError")
		else:  self.mprint('loadData: pathError')
Example #11
0
    def gettimes(ionocontlist):
        """
        This static method will take a list of files, or a single string, and
        deterimine the time ordering and give the sort order for the files to be in.
        Inputs
            ionocontlist- A list of IonoContainer h5 files. Can also be a single
            string of a file name.
        Outputs
            sortlist - A numpy array of integers that will chronilogically order
            the files
            outtime - A Nt x 2 numpy array of all of the times.
            timebeg - A list of beginning times
        """
        if isinstance(ionocontlist,string_types):
            ionocontlist=[ionocontlist]
        timelist=[]
        fileslist = []
        for ifilenum,ifile in enumerate(ionocontlist):
            with tables.open_file(str(ifile)) as f:
                times = f.root.Time_Vector.read()


            timelist.append(times)
            fileslist.append(ifilenum*sp.ones(len(times)))
        times_file =sp.array([i[:,0].min() for i in timelist])
        sortlist = sp.argsort(times_file)

        timelist_s = [timelist[i] for i in sortlist]
        timebeg = times_file[sortlist]
        fileslist = sp.vstack([fileslist[i][0] for i in sortlist]).flatten().astype('int64')
        outime = sp.vstack(timelist_s)
        return (sortlist,outime,fileslist,timebeg,timelist_s)
def find(x, v, next_largest=1, indices=None):
    """Returns the index into the 1D array x corresponding to the
    element of x that is either equal to v or the nearest to
    v. x is assumed to contain unique elements.

    if v is outside the range of values in x then the index of the
    smallest or largest element of x is returned.

    If next_largest == 1 then the nearest element taken is the next
    largest, otherwise if next_largest == 0 then the next smallest
    is taken.

    The optional argument indices speeds up multiple calls to this
    function if you pre-calculate indices=argsort(x).
    """
    if indices is None:
        indices=argsort(x)
    xs=take(x, indices)
    assert next_largest in [0,1], "next_largest must be 0 or 1"
    eqmask=(xs==v).tolist()
    try:
        ix = eqmask.index(1)
    except ValueError:
        if next_largest:
            mask=(xs<v).tolist()
        else:
            mask=(xs>v).tolist()
        try:
            ix=min([max([0,mask.index(1-next_largest)+next_largest-1]),len(mask)-1])
        except ValueError:
            ix = 0+next_largest-1
    return indices[ix]
Example #13
0
def readAnnotationFile(fn, format='gaf'):
    ### get list of overlapping genes
    overlapgenes = getOverlapGenes(fn, format)

    ### reading in gaf
    data   = readinganno(fn, overlapgenes, format)

    uqgid   = data.keys() ###  unique gene ids
    newdata = []
    for gid in uqgid:
        ### process transcripts
        if len(data[gid]) == 1:
            temp = processSingleTranscriptGenes(data[gid])
        else:
            temp = processMultiTranscriptGenes(data[gid])

        ### make sure it has been processed correctly
        if temp is None:
            continue
        else:
            temp.extend([gid])
            newdata.append(temp)

    newdata = sp.array(newdata)
    sidx    = sp.argsort(newdata[:,5])
    newdata = newdata[sidx,:]
    ### filter gene with no name
    return sp.array(newdata)
 def apply_flow(self,flowrate):
     r'''
     Convert the invaded sequence into an invaded time for a given flow rate
     considering the volume of invaded pores and throats.
     
     Parameters
     ----------
     flowrate : float
         The flow rate of the injected fluid
         
     Returns
     -------
     Creates a throat array called 'invasion_time' in the Algorithm 
     dictionary
     
     '''
     P12 = self._net['throat.conns']  # List of throats conns
     a = self['throat.invasion_sequence']  # Invasion sequence
     b = sp.argsort(self['throat.invasion_sequence'])
     P12_inv = self['pore.invasion_sequence'][P12]  # Pore invasion sequence
     # Find if the connected pores were invaded with or before each throat
     P1_inv = P12_inv[:,0] == a
     P2_inv = P12_inv[:,1] == a
     c = sp.column_stack((P1_inv,P2_inv))  
     d = sp.sum(c,axis=1,dtype=bool)  # List of Pores invaded with each throat
     # Find volume of these pores
     P12_vol = sp.zeros((self.Nt,))
     P12_vol[d] = self._net['pore.volume'][P12[c]]
     # Add invaded throat volume to pore volume (if invaded)
     T_vol = P12_vol + self._net['throat.volume']
     # Cumulative sum on the sorted throats gives cumulated inject volume
     e = sp.cumsum(T_vol[b]/flowrate)
     t = sp.zeros((self.Nt,))
     t[b] = e  # Convert back to original order
     self._phase['throat.invasion_time'] = t
Example #15
0
    def eigensigma(self):
        from scipy.linalg import eig
        from scipy.sparse import lil_matrix,bmat,eye
        from scipy import argsort,where
        #from scipy.sparse.linalg import eigen
        transverseH = lil_matrix((self.wafer.shape[1],self.wafer.shape[1]))
        transverseH.setdiag([2*self.t0]*self.wafer.shape[1])
        transverseH.setdiag([-self.t0]*self.wafer.shape[1],1)
        transverseH.setdiag([-self.t0]*self.wafer.shape[1],-1)
#following is wrong
        #SO=eye(self.wafer.shape[1],self.wafer.shape[1],1)*self.tso-eye(self.wafer.shape[1],self.wafer.shape[1],-1)*self.tso
        #transverseHspin = bmat([[transverseH, SO],[SO,transverseH]])
        #self.HH = transverseHspin
        #from pudb import set_trace; set_trace()
        v,d = eig(transverseH.todense())
        ndx = argsort(v)
        d=d[:,ndx]
        v=v[ndx]
        self.v = v
        self.d = d
        try:
            self.maxmode = where(self.v < self.Efermi-self.band_bottom)[0].max()+1
        except ValueError:
            print "- ValueError probably no modes will fit at that energy"
        if v.max() > self.Efermi-self.band_bottom:
            print 'Some mode energies larger than fermi energy, only up to mode {0} will fit'.format(self.maxmode)
            print 'Argument num_modes="all" takes only modes low enough'
            print ''
Example #16
0
 def query(self,lv,k=None):
     """ returns distance and element index"""
     if (k==None):
       k=self.k
     if (type(lv)!=numpy.ndarray):
         lv=numpy.array(lv)
     if (lv.ndim==1):
         lv=lv.reshape(1,lv.shape[0])
     if (lv.shape[0]==1):
       dt=abs(self.va.reshape(self.va.shape[0],1)-lv).T
       dr=scipy.argsort(dt)[0,:k]
       return dt.take(dr),dr.reshape(k)
     else:
       dt=scipy.spatial.distance.cdist(lv,self.va.reshape(self.va.shape[0],1))
       dr=scipy.argsort(dt)[:,:k]
       return dt.take(dr),dr
Example #17
0
def benjamini_hochberg_yekutieli(p_values=None,q_value=0.05,sort_idx=None,return_sort_idx=False):
    p_values = p_values.ravel()
    if sort_idx is None:
        sort_idx = sp.argsort(p_values)
        p_values = p_values[sort_idx]
    else:
        sort_idx = sort_idx.ravel()
        p_values = p_values[sort_idx]
    m = p_values.shape[0]
    idx_line = sp.arange(1,m+1)
    cV = (1.0/idx_line).sum()
    thr_line = (idx_line*q_value*cV)/float(m);
    thr_ind = sp.where(p_values<=thr_line)[0]
    if thr_ind.shape[0]==0:
        thr = 0.0;
    else:
        thr = p_values[thr_ind.max()]
    #adjust p_values
    p_values_adjusted = sp.ones(m)
    prev = 1.0
    for i in range(m,0,-1):
        p_values_adjusted[i-1] = sp.minimum(prev,p_values[i-1]*float(m)*cV/float(i))
        if p_values_adjusted[i-1]>1:
            p_values_adjusted[i-1]=1
        prev = p_values_adjusted[i-1]
    #resort pvalues
    p_tmp = p_values_adjusted.copy()
    p_values_adjusted[sort_idx] = p_tmp
    if return_sort_idx==True:
        return [thr,p_values_adjusted,sort_idx]        
    else:
        return [thr,p_values_adjusted]
    def setup(self, phase, throat_prop='throat.capillary_pressure', **kwargs):
        r"""
        Set up the required parameters for the algorithm

        Parameters
        ----------
        phase : OpenPNM Phase object
            The phase to be injected into the Network.  The Phase must have the
            capillary entry pressure values for the system.

        throat_prop : string
            The name of the throat property containing the capillary entry
            pressure.  The default is 'throat.capillary_pressure'.

        """
        self._phase = phase
        # Setup arrays and info
        self['throat.entry_pressure'] = phase[throat_prop]
        # Indices into t_entry giving a sorted list
        self['throat.sorted'] = sp.argsort(self['throat.entry_pressure'], axis=0)
        self['throat.order'] = sp.zeros_like(self['throat.sorted'])
        self['throat.order'][self['throat.sorted']] = sp.arange(0, self._net.Nt)
        self['throat.invaded'] = -sp.ones((self._net.Nt,))
        self['pore.invaded'] = -sp.ones((self._net.Np,))
        self._tcount = 0
 def nms(boxes, T = 0.5):
     if len(boxes) == 0:
         return []
     boxes = boxes.astype("float")
     pick = []
     x1 = boxes[:,0]
     y1 = boxes[:,1]
     x2 = boxes[:,2]
     y2 = boxes[:,3]    
     area = (x2 - x1 + 1) * (y2 - y1 + 1)
     idxs = sp.argsort(y2)    
     while len(idxs) > 0:
         last = len(idxs) - 1
         i = idxs[last]
         pick.append(i)
         xx1 = sp.maximum(x1[i], x1[idxs[:last]])
         yy1 = sp.maximum(y1[i], y1[idxs[:last]])
         xx2 = sp.minimum(x2[i], x2[idxs[:last]])
         yy2 = sp.minimum(y2[i], y2[idxs[:last]])
         w = sp.maximum(0, xx2 - xx1 + 1)
         h = sp.maximum(0, yy2 - yy1 + 1)
         I = w * h
         #overlap_ratio = I / area[idxs[:last]]
         overlap_ratio = I /(area[i] +  area[idxs[:last]] - I)
         idxs = sp.delete(idxs, sp.concatenate(([last], sp.where(overlap_ratio > T)[0])))
     return boxes[pick].astype("int")
Example #20
0
    def _get_model_cv_preds(self, model, X_train, y_train, cache_file):
        """
        Return cross-validation predictions on the training set, using cache
        if possible.
        This is used if stacking is enabled (ie. a second model is used to
        combine the stage 0 predictions).
        """
        stack_preds = load_from_cache(
            "models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        if stack_preds is None:
            kfold = cross_validation.StratifiedKFold(y_train, 4)
            stack_preds = []
            indexes_cv = []
            for stage0, stack in kfold:
                model.fit(X_train[stage0], y_train[stage0])
                stack_preds.extend(list(model.predict_proba(
                    X_train[stack])[:, 1]))
                indexes_cv.extend(list(stack))
            stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]

            with open("cache/models/%s/cv_preds/%s%d.pkl" % (
                    self.cache_dir, cache_file), 'wb') as f:
                pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL)

        return stack_preds
Example #21
0
    def CreateEnergyGrid(self,ParticlesPerBin=1000):
        v2 = self.Snapshot.vx*self.Snapshot.vx+self.Snapshot.vy*self.Snapshot.vy+self.Snapshot.vz*self.Snapshot.vz
        E = 0.5*v2 + self.Snapshot.V
        
        index = scipy.argsort(E)
        
        tmpE = []
        tmpMass = []
        
        N = len(index)
        BinNo = 0
        self.EGrid = EnergyGrid()
        
        TotalMass = self.Snapshot.m.sum()

        while (BinNo+1)*ParticlesPerBin < N:
            Particles = index[ range(BinNo*ParticlesPerBin,(BinNo+1)*ParticlesPerBin) ]
            Max = E[Particles].max()
            Min = E[Particles].min()
            Mean = E[Particles].mean()
            tmpE.append( Mean )
            tmpMass.append( self.Snapshot.m[Particles].sum() / ( Max - Min )  )
            BinNo += 1
        
        self.EGrid.Mass = scipy.array(tmpMass)
        self.EGrid.E = scipy.array(tmpE)        
        return self.EGrid
Example #22
0
    def add_times(self,self2):
        """This method will combine the times and content of two instances of the GeoData class.
        The first object will be extendent in time."""
        datakeys = self.data.keys()
        assert set(datakeys) ==set(self2.data.keys()),'Data must have the same names.'
        # Look at the coordinate names
        assert self.coordnames==self2.coordnames,'Must be same coordinate same.'
        # Look at the data location
        a = np.ma.array(self.dataloc,mask=np.isnan(self.dataloc))
        blah = np.ma.array(self2.dataloc,mask=np.isnan(self2.dataloc))
        assert np.ma.allequal(a,blah),'Location points must be the same'

        # Look at the sensor location
        a = np.ma.array(self.sensorloc,mask=np.isnan(self.sensorloc))
        blah = np.ma.array(self2.sensorloc,mask=np.isnan(self2.sensorloc))
        assert np.ma.allequal(a,blah),'Sensor Locations must be the same'

        alltimes = sp.vstack((timerepair(self.times),timerepair(self2.times)))

        #sort based off of start times
        s_ind = sp.argsort(alltimes[:,0])
        self.times = alltimes[s_ind]
        
        if self.issatellite():
            for ikey in self.datanames():
                outarr=sp.concatenate((self.data[ikey],self2.data[ikey]),0)
                self.data[ikey]=outarr[s_ind]
            for ikey in self.datanames():
                outarr = sp.hstack((self.data[ikey],self2.data[ikey]))
                self.data[ikey] = outarr[:,s_ind]
Example #23
0
def roc(labels, predictions):
    """roc - calculate receiver operator curve
    labels: true labels (>0 : True, else False)
    predictions: the ranking generated from whatever predictor is used"""
    #1. convert to arrays
    labels = S.array(labels).reshape([-1])
    predictions = S.array(predictions).reshape([-1])

    #threshold
    t = labels>0
    
    #sort predictions in desceninding order
    #get order implied by predictor (descending)
    Ix = S.argsort(predictions)[::-1]
    #reorder truth
    t = t[Ix]

    #compute true positiive and false positive rates
    tp = S.double(N.cumsum(t))/t.sum()
    fp = S.double(N.cumsum(~t))/(~t).sum()

    #add end points
    tp = S.concatenate(([0],tp,[1]))
    fp = S.concatenate(([0],fp,[1]))

    return [tp,fp]
Example #24
0
  def _get_model_cv_preds(self, model, X_train, y_train):
    """
    Return cross-validation predictions on the training set
    """
    fname = self._get_model_cv_fname(model, X_train, y_train, self.n_folds_stack)
    try:
        logger.debug("trying to load cv_pred from  %s", fname)
        with open(fname,"rb") as f:
            stack_preds = pickle.load(f)
    except IOError:
        logger.debug("not found: %s", fname)
        stack_preds = None

    if stack_preds is None:
        kfold = cross_validation.StratifiedKFold(y_train, self.n_folds_stack)
        stack_preds = []
        indexes_cv = []
        for stage0, stack in kfold:
            model.fit(X_train[stage0], y_train[stage0])
            stack_preds.extend(list(model.predict_proba(
                X_train[stack])[:, 1]))
            indexes_cv.extend(list(stack))
        stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]
    
        with open(fname,"wb") as f:
            pickle.dump(stack_preds,f)
    
    if self.use_logit and self.gnrl=='LR':
        logger.debug('transform stack_preds(%s) using logit',stack_preds.shape)
        stack_preds = logit(stack_preds)
    
    return stack_preds
	def toplines(self,n_lines=5):
		""" This function is given. """
		lines = sp.zeros((self.n_topics,n_lines))
		for i in xrange(self.n_topics):
			args = sp.argsort(self._theta[:,i]).tolist()
			args.reverse()
			lines[i,:] = sp.array(args)[0:n_lines] + 1
		return lines
Example #26
0
def precision_and_recall(actual,predicted,cls):
    c = (actual == cls)
    si = sp.argsort(-c)
    tp = sp.cumsum(sp.single(predicted[si] == cls))
    fp = sp.cumsum(sp.single(predicted[si] != cls))
    rec = tp /sp.sum(predicted == cls)
    prec = tp / (fp + tp)
    return prec,rec
Example #27
0
def segmented():
    
    radius = 5 
    sigmaI = 0.02 
    sigmaX = 3.0 
    height = img.shape[0]
    width = img.shape[1]
    flatImg = img.flatten()
    darkImg = flatImg
    brightImg = flatImg
    
    nodes = img.flatten()
    
    W = spar.lil_matrix((nodes.size, nodes.size),dtype=float)
    D = sp.zeros((1,nodes.size))
    
    for row in range(height):
        for col in range(width):				
            for k in range(row-radius,row+radius):
                for l in range(col-radius,col+radius):
                    try:
                        w = weight(row,col,k,l)
                        W[row*width+col,k*width+l] = w
                        D[0,row*width+col] += w		
                    except:
                        continue
                        
    D = spar.spdiags(D, 0, nodes.size, nodes.size)

    Q = D - W
     
    D1 = D.todense()
    Q1 = Q.todense()
    
    diags = sp.diag(D1)
    DminusHalf = sp.diag(diags**-0.5)
    
    
    segQ = sp.dot(sp.dot(DminusHalf, Q1),DminusHalf)
    vals, vecs = la.eig(segQ)
    
    vecind = sp.argsort(vals)[1]
    theVec = vecs[vecind]

    for i in range(0,height**2):
        if theVec[i] < 0:
            darkImg[i] = 0.0
        else:
            brightImg[i] = 0.0
            
    
    darkImg = sp.reshape(darkImg, (height,height))
    brightImg = sp.reshape(brightImg, (height,height))
             
    
    
    
    return darkImg, flatImg, brightImg
Example #28
0
def Experimento(db):
# nome das figuras
 name_arr = scipy.array(db.keys())

# outro dicionario: nome das figuras x rótulos das classes
 cl = dict(zip(name_arr,[int(db[i][0]) for i in name_arr]))

# Obtém da base de entrada uma Matriz N_Samples x N_Features
# Descarta primeira coluna (Rótulos das classes)
 data = scipy.array([db[nome][1:] for nome in name_arr])

# distancia : medida de dissimilaridade a ser empregada 
#distancias = ['braycurtis','canberra','chebyshev','cityblock','correlation',
#              'cosine','dice','euclidean','hamming','jaccard',
#              'kulsinski','mahalanobis','matching','minkowski',
#              'rogerstanimoto','russelrao','seuclidean','sokalmichener',
#              'sokalsneath','sqeuclidean','yule']

 distancia = 'euclidean'

# Numero de amostras
 Nobj = data.shape[0]

# Total de classes
 Nclasses = max(cl.values())

# Total de amostras por classe
# assumindo que a base é balanceada!!!!
 Nac = Nobj/Nclasses

# Numero de recuperações
 Nretr = Nac

# Calcula matriz de distancias 
 md = squareform(pdist(data,distancia))

# Para contabilizar a Matriz de confusão
 l = scipy.zeros((Nclasses,Nac),dtype = int)

 for i,nome in zip(scipy.arange(Nobj),name_arr):
# Para cada linha de md estabelece rank de recuperacao
# O primeiro elemento de cada linha corresponde a forma modelo
# Obtem a classe dos objetos recuperados pelo ordem crescente de distancia
  idx = scipy.argsort(md[i])
 # pega classes a qual pertencem o primeiro padrao e as imagens recuperadas
  classe_padrao = cl[nome]
  name_retr = name_arr[idx] 
  aux = scipy.array([cl[j] for j in name_retr])
 # estamos interessados apenas nos Nretr subsequentes resultados
  classe_retrs = aux[1:Nretr]
  n = scipy.nonzero(classe_retrs == classe_padrao)
 # Contabiliza resultados
  for i in n[0]:
   l[classe_padrao-1,i] = l[classe_padrao-1,i] + 1 

 return l,Nac
    def __init__(self, N, vectors, coverage_ratio=0.2):
        """
        Performs exact nearest neighbour search on the data set.

        vectors can either be a numpy matrix with all the vectors
        as columns OR a python array containing the individual
        numpy vectors.
        """
        # We need a dict from vector string representation to index
        self.vector_dict = {}
        self.N = N
        self.coverage_ratio = coverage_ratio

        # Get numpy array representation of input
        self.vectors = numpy_array_from_list_or_numpy_array(vectors)

        # Build map from vector string representation to vector
        for index in range(self.vectors.shape[1]):
            self.vector_dict[self.__vector_to_string(
                self.vectors[:, index])] = index

        # Get transposed version of vector matrix, so that the rows
        # are the vectors (needed by cdist)
        vectors_t = numpy.transpose(self.vectors)

        # Determine the indices of query vectors used for comparance
        # with approximated search.
        query_count = numpy.floor(self.coverage_ratio *
                                  self.vectors.shape[1])
        self.query_indices = []
        for k in range(int(query_count)):
            index = numpy.floor(k*(self.vectors.shape[1]/query_count))
            index = min(index, self.vectors.shape[1]-1)
            self.query_indices.append(int(index))

        print('\nStarting exact search (query set size=%d)...\n' % query_count)

        # For each query vector get the closest N neighbours
        self.closest = {}
        self.exact_search_time_per_vector = 0.0

        for index in self.query_indices:

            v = vectors_t[index, :].reshape(1, self.vectors.shape[0])
            exact_search_start_time = time.time()
            D = cdist(v, vectors_t, 'euclidean')
            self.closest[index] = scipy.argsort(D)[0, 1:N+1]

            # Save time needed for exact search
            exact_search_time = time.time() - exact_search_start_time
            self.exact_search_time_per_vector += exact_search_time

        print('\Done with exact search...\n')

        # Normalize search time
        self.exact_search_time_per_vector /= float(len(self.query_indices))
	def topterms(self,n_terms=10):
		""" This function is given. """
		vec = sp.atleast_2d(sp.arange(0,self.n_words))
		topics = []
		for k in xrange(self.n_topics):
			probs = sp.atleast_2d(self._phi[k,:])
			mat = sp.append(probs,vec,0)
			sind = sp.array([mat[:,i] for i in sp.argsort(mat[0])]).T
			topics.append([self.vocab[int(sind[1,self.n_words - 1 - i])] for i in xrange(n_terms)])
		return topics
def test_gaussian_multiple_populations_adpative_population_size(
        db_path, sampler):
    sigma_x = 1
    sigma_y = .5
    y_observed = 2

    def model(args):
        return {"y": st.norm(args['x'], sigma_y).rvs()}

    models = [model]
    models = list(map(SimpleModel, models))
    nr_populations = 4
    population_size = AdaptivePopulationSize(600)
    parameter_given_model_prior_distribution = [
        Distribution(x=st.norm(0, sigma_x))
    ]
    abc = ABCSMC(models,
                 parameter_given_model_prior_distribution,
                 MinMaxDistance(measures_to_use=["y"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)
    abc.new(db_path, {"y": y_observed})

    minimum_epsilon = -1

    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["x"].values
    sort_indices = sp.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(
        sp.hstack((-200, posterior_x[sort_indices], 200)),
        sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))

    sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x**2 + 1 / sigma_y**2)
    mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2
    expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
    x = sp.linspace(-8, 8)
    max_distribution_difference = sp.absolute(
        f_empirical(x) - expected_posterior_x.cdf(x)).max()
    assert max_distribution_difference < 0.15
    assert history.max_t == nr_populations - 1
    mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
    assert abs(mean_emp - mu_x_given_y) < .07
    assert abs(std_emp - sigma_x_given_y) < .12
Example #32
0
    def __init__(self, hash_name, projection_count, training_set):
        """
        Computes principal components for training vector set. Uses
        first projection_count principal components for projections.

        Training set must be either a numpy matrix or a list of
        numpy vectors.
        """
        super(PCABinaryProjections, self).__init__(hash_name)
        self.projection_count = projection_count

        # Only do training if training set was specified
        if not training_set is None:
            # Get numpy array representation of input
            training_set = numpy_array_from_list_or_numpy_array(training_set)

            # Get subspace size from training matrix
            self.dim = training_set.shape[0]

            # Get transposed training set matrix for PCA
            training_set_t = numpy.transpose(training_set)

            # Compute principal components
            (eigenvalues, eigenvectors) = perform_pca(training_set_t)

            # Get largest N eigenvalue/eigenvector indices
            largest_eigenvalue_indices = numpy.flipud(
                scipy.argsort(eigenvalues))[:projection_count]

            # Create matrix for first N principal components
            self.components = numpy.zeros(
                (self.dim, len(largest_eigenvalue_indices)))

            # Put first N principal components into matrix
            for index in range(len(largest_eigenvalue_indices)):
                self.components[:, index] = \
                    eigenvectors[:, largest_eigenvalue_indices[index]]

            # We need the component vectors to be in the rows
            self.components = numpy.transpose(self.components)
        else:
            self.dim = None
            self.components = None

        # This is only used in case we need to process sparse vectors
        self.components_csr = None
Example #33
0
 def sorted_csr_from_coo(shape, row_idx, col_idx, val, only_topk=None):
     m = (sp.absolute(val).sum() + 1) * 3
     sorted_idx = sp.argsort(row_idx * m - val)
     row_idx[:] = row_idx[sorted_idx]
     col_idx[:] = col_idx[sorted_idx]
     val[:] = val[sorted_idx]
     indptr = sp.cumsum(sp.bincount(row_idx + 1, minlength=(shape[0] + 1)))
     if only_topk is not None and isinstance(only_topk, int):
         only_topk = max(min(1, only_topk), only_topk)
         selected_idx = (sp.arange(len(val)) - indptr[row_idx]) < only_topk
         row_idx = row_idx[selected_idx]
         col_idx = col_idx[selected_idx]
         val = val[selected_idx]
     indptr = sp.cumsum(sp.bincount(row_idx + 1, minlength=(shape[0] + 1)))
     return smat.csr_matrix((val, col_idx, indptr),
                            shape=shape,
                            dtype=val.dtype)
Example #34
0
def weighted_quantile(points, weights=None, alpha=0.5):
    """
    Weighted alpha-quantile. E.g. alpha = 0.5 -> median.
    """

    # sort input and set weights
    sorted_indices = sp.argsort(points)
    points = points[sorted_indices]
    if weights is None:
        len_points = len(points)
        weights = sp.ones(len_points) / len_points
    else:
        weights = weights[sorted_indices]

    cs = sp.cumsum(weights)
    quantile = sp.interp(alpha, cs - 0.5 * weights, points)
    return quantile
Example #35
0
    def setup(self,
              phase,
              entry_pressure='',
              pore_volume='',
              throat_volume=''):
        r"""
        Set up the required parameters for the algorithm

        Parameters
        ----------
        phase : OpenPNM Phase object
            The phase to be injected into the Network.  The Phase must have the
            capillary entry pressure values for the system.

        entry_pressure : string
            The dictionary key to the capillary entry pressure.  If none is
            supplied then the current value is retained. The default is
            'throat.capillary_pressure'.

        pore_volume : string
            The dictionary key to the pore volume.  If none is supplied then
            the current value is retained. The default is 'pore.volume'.

        throat_volume : string
            The dictionary key to the throat volume.  If none is supplied then
            the current value is retained. The default is 'throat.volume'.

        """
        self.settings['phase'] = phase.name
        if pore_volume:
            self.settings['pore_volume'] = pore_volume
        if throat_volume:
            self.settings['throat_volume'] = throat_volume
        if entry_pressure:
            self.settings['entry_pressure'] = entry_pressure

        # Setup arrays and info
        self['throat.entry_pressure'] = phase[self.settings['entry_pressure']]
        # Indices into t_entry giving a sorted list
        self['throat.sorted'] = sp.argsort(self['throat.entry_pressure'],
                                           axis=0)
        self['throat.order'] = 0
        self['throat.order'][self['throat.sorted']] = sp.arange(0, self.Nt)
        self['throat.invasion_sequence'] = -1
        self['pore.invasion_sequence'] = -1
Example #36
0
def parse_plink_snps(genotype_file, snp_indices):
    plinkf = plinkfile.PlinkFile(genotype_file)
    samples = plinkf.get_samples()
    num_individs = len(samples)
    num_snps = len(snp_indices)
    raw_snps = sp.empty((num_snps, num_individs), dtype='int8')
    # If these indices are not in order then we place them in the right place while parsing SNPs.
    snp_order = sp.argsort(snp_indices)
    # print(snp_indices)
    ordered_snp_indices = list(snp_indices[snp_order])
    ordered_snp_indices.reverse()
    print('Iterating over file to load SNPs')
    snp_i = 0
    next_i = ordered_snp_indices.pop()
    line_i = 0
    max_i = ordered_snp_indices[0]
    while line_i <= max_i:
        if line_i < next_i:
            next(plinkf)
        elif line_i == next_i:
            line = next(plinkf)
            snp = sp.array(line, dtype='int8')
            bin_counts = line.allele_counts()
            if bin_counts[-1] > 0:
                mode_v = sp.argmax(bin_counts[:2])
                snp[snp == 3] = mode_v
            s_i = snp_order[snp_i]

            ## fixed buggy code
            ## wrong encoding of genotype (A1 should be encoded as 1 instead of A2. It is different from plinkio default)
            ## original code:
            # raw_snps[s_i] = snp
            ## new code
            raw_snps[s_i] = 2 - snp
            ## fix finish

            if line_i < max_i:
                next_i = ordered_snp_indices.pop()
            snp_i += 1
        line_i += 1
    plinkf.close()
    assert snp_i == len(raw_snps), 'Failed to parse SNPs?'
    num_indivs = len(raw_snps[0])
    freqs = sp.sum(raw_snps, 1, dtype='float32') / (2 * float(num_indivs))
    return raw_snps, freqs
Example #37
0
def compute_MI(seqs, batches, emat):
    # preliminaries
    n_seqs = len(batches)
    n_batches = int(batches.max()) + 1  # assumes zero indexed batches
    n_bins = 1000

    #energies = sp.zeros(n_seqs)
    f = sp.zeros((n_batches, n_seqs))

    # compute energies
    # for i in range(n_seqs):
    #     energies[i] = sp.sum(seqs[:,:,i]*emat)
    # alternate way
    dot = emat[:, :, sp.newaxis] * seqs
    energies = dot.sum(0).sum(0)

    # sort energies
    inds = sp.argsort(energies)
    for i, ind in enumerate(inds):
        f[batches[ind], i] = 1.0 / n_seqs  # batches aren't zero indexed

    # bin and convolve with Gaussian
    f_binned = sp.zeros((n_batches, n_bins))

    for i in range(n_batches):
        f_binned[i, :] = sp.histogram(f[i, :].nonzero()[0],
                                      bins=n_bins,
                                      range=(0, n_seqs))[0]
    #f_binned = f_binned/f_binned.sum()
    f_reg = scipy.ndimage.gaussian_filter1d(f_binned, 0.04 * n_bins, axis=1)
    f_reg = f_reg / f_reg.sum()

    # compute marginal probabilities
    p_b = sp.sum(f_reg, axis=1)
    p_s = sp.sum(f_reg, axis=0)

    # finally sum to compute the MI
    MI = 0
    for i in range(n_batches):
        for j in range(n_bins):
            if f_reg[i, j] != 0:
                MI = MI + f_reg[i, j] * sp.log2(f_reg[i, j] /
                                                (p_b[i] * p_s[j]))
    print MI
    return MI, f_reg
Example #38
0
    def _goodK(self, cutoff=None):
        if cutoff is None:
            cutoff = 1e-10 * self.X.max()

        powers = self.Et * self.Ew.max(0) * self.Eh.max(1)
        sorted_powers = sp.flipud(sp.argsort(powers))
        idx = sp.where(powers[sorted_powers] > cutoff * powers.max())[0]
        goodk = sorted_powers[:(idx[-1] + 1)]
        if powers[goodk[-1]] < cutoff:
            goodk = sp.delete(goodk, -1)

        goodk = sp.sort(goodk)

        # Etが1を超えているindexは削除するようにする
        # too_large_k = sp.where(self.Et > 1.0)[0]
        # goodk = sp.array(list(set(goodk) - set(too_large_k)))

        return goodk
    def _get_model_cv_preds(self, model, X_train, y_train):
        """
        Return cross-validation predictions on the training set.       
        
        This is used if stacking is enabled (ie. a second model is used to
        combine the stage 0 predictions).
        """

        kfold = cross_validation.StratifiedKFold(y_train, 4)
        stack_preds = []
        indexes_cv = []
        for stage0, stack in kfold:
            model.fit(X_train[stage0], y_train[stage0])
            stack_preds.extend(list(model.predict_proba(X_train[stack])[:, 1]))
            indexes_cv.extend(list(stack))
        stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]

        return stack_preds
Example #40
0
 def coo_to_csr(coo):
     nr_rows, nr_cols, nnz, row, col, val = (
         coo.shape[0],
         coo.shape[1],
         coo.data.shape[0],
         coo.row,
         coo.col,
         coo.data,
     )
     indptr = sp.cumsum(sp.bincount(row + 1,
                                    minlength=(nr_rows + 1)),
                        dtype=sp.uint64)
     indices = sp.zeros(nnz, dtype=sp.uint32)
     data = sp.zeros(nnz, dtype=dtype)
     sorted_idx = sp.argsort(row * sp.float64(nr_cols) + col)
     indices[:] = col[sorted_idx]
     data[:] = val[sorted_idx]
     return indptr, indices, data
Example #41
0
def plotLoadings(FA, term, n_genes=10):
    """Plot highest loadings of a factor

    Args:
        FA                 (:class:`fscLVM.CSparseFA`): Factor analysis object, usually generated using `initFA` function
        term                                  (str): Name of facotr for which loadings are to be plotted
        n_genes                                 (int): Number of loadings to be shown

    """

    Zchanged = FA.getZchanged([term])[:, 0]
    W = FA.getW([term])[:, 0]
    Z = FA.getZ([term])[:, 0]
    gene_labels = SP.array(FA.gene_ids)

    #plot weights

    Wabs = SP.absolute(W) * SP.absolute(Z)
    gene_index = SP.argsort(-Wabs)[:n_genes]

    Igain = (Zchanged[gene_index] == 1)
    Ielse = (Zchanged[gene_index] == 0)

    fig = plt.figure(figsize=(5, 5))
    y = SP.arange(len(gene_index))
    if Ielse.any():
        plt.plot(abs(W[gene_index][Ielse] * Z[gene_index][Ielse]),
                 y[Ielse],
                 'k.',
                 label='pre annotated')
    if Igain.any():
        plt.plot(abs(W[gene_index][Igain] * Z[gene_index][Igain]),
                 y[Igain],
                 'r.',
                 label='gains')

    plt.xlabel('Abs. weight', fontsize=14)
    plt.ylabel('Genes', fontsize=14)
    plt.yticks(y, gene_labels[gene_index], fontsize=14)
    plt.xticks(fontsize=13)

    plt.legend()
    plt.show()
    return fig
Example #42
0
    def Pk2Mp(ar,k,pk,ell_max=None):
        """
        Implementation of FFTLog from A.J.S. Hamilton (2000)
        assumes log(k) are equally spaced
        """

        muk = model.muk
        dmuk = model.dmuk

        k0 = k[0]
        l=np.log(k.max()/k0)
        r0=1.

        N=len(k)
        emm=N*np.fft.fftfreq(N)
        r=r0*sp.exp(-emm*l/N)
        dr=abs(np.log(r[1]/r[0]))
        s=sp.argsort(r)
        r=r[s]

        xi=np.zeros([ell_max//2+1,len(ar)])

        for ell in range(0,ell_max+1,2):
            pk_ell=np.sum(dmuk*L(muk,ell)*pk,axis=0)*(2*ell+1)*(-1)**(ell//2)
            mu=ell+0.5
            n=2.
            q=2-n-0.5
            x=q+2*sp.pi*1j*emm/l
            lg1=myGamma.LogGammaLanczos((mu+1+x)/2)
            lg2=myGamma.LogGammaLanczos((mu+1-x)/2)

            um=(k0*r0)**(-2*sp.pi*1j*emm/l)*2**x*sp.exp(lg1-lg2)
            um[0]=sp.real(um[0])
            an=np.fft.fft(pk_ell*k**n/2/sp.pi**2*np.sqrt(sp.pi/2))
            an*=um
            xi_loc=np.fft.ifft(an)
            xi_loc=xi_loc[s]
            xi_loc/=r**(3-n)
            xi_loc[-1]=0
            spline=sp.interpolate.splrep(np.log(r)-dr/2,sp.real(xi_loc),k=3,s=0)
            xi[ell//2,:]=sp.interpolate.splev(np.log(ar),spline)

        return xi
Example #43
0
 def classify(self, test_data, n_neighbors=5):
     if test_data.shape[1] != self.training_data.shape[1]:
         raise ValueError(
             'Training data and test data do not have the same dimensions.')
     n_train = self.training_data.shape[0]
     n_test = test_data.shape[0]
     dists = sp.zeros((n_test, n_train))
     labels = []
     for i in xrange(n_test):
         for j in xrange(n_train):
             dists[i,
                   j] = la.norm(self.training_data[j, :] - test_data[i, :])
         inds = sp.argsort(dists[i, :])[0:n_neighbors]
         votes = sp.array([
             sum(self.training_labels[inds] == self.classes[k])
             for k in xrange(self.n_classes)
         ])
         labels.append(self.classes[sp.copy(votes).argmax()])
     return labels
def translate(tr_pairs, first_only=False):
    '''Takes a list of pairs with keys that should be translated into each other

    Returns a list of dictionaries providing the requested translation
    '''

    metapickle = 'metadata.pickle.gz'
    if not os.path.exists(metapickle):
        (header_rel, data_rel) = _parse_metatable(META_REL)
        (header_rna, data_rna) = _parse_metatable(META_RNA)
        data_rel = _handle_multi_entries(header_rel, data_rel)

        ### merge the two tables into a single one
        (header, data) = _merge_tables(header_rel, header_rna, data_rel,
                                       data_rna)

        cPickle.dump((header, data), gzip.open(metapickle, 'w'), -1)
    else:
        (header, data) = cPickle.load(gzip.open(metapickle, 'r'))

    dicts = []
    for source, target in tr_pairs:
        curr_dict = dict()
        idx1 = sp.where(header == source)[0][0]
        idx2 = sp.where(header == target)[0][0]
        s_idx = sp.argsort(data[:, idx1])
        data = data[s_idx, :]
        _, cnt = sp.unique(data[:, idx1], return_counts=True)
        cum = 0
        for c in cnt:
            if c == 1 or first_only:
                curr_dict[data[cum, idx1]] = data[cum, idx2]
            else:
                if sp.unique(data[cum:(cum + c), idx2]).shape[0] == 1:
                    curr_dict[data[cum, idx1]] = data[cum, idx2]
                else:
                    curr_dict[data[cum, idx1]] = ','.join(
                        sp.unique(data[cum:(cum + c), idx2]))
            cum += c

        dicts.append(curr_dict)

    return dicts
Example #45
0
def desi_convert_DLA(inPath, outPath):
    """
    Convert a catalog of DLA from a DESI format to
    the format used by picca
    """

    fromDESIkey2piccaKey = {
        'RA': 'RA',
        'DEC': 'DEC',
        'Z': 'Z_DLA_RSD',
        'ZQSO': 'Z_QSO_RSD',
        'NHI': 'N_HI_DLA',
        'THING_ID': 'MOCKID',
        'DLAID': 'DLAID',
        'PLATE': 'MOCKID',
        'MJD': 'MOCKID',
        'FIBERID': 'MOCKID'
    }

    cat = {}
    h = fitsio.FITS(inPath)
    for k, v in fromDESIkey2piccaKey.items():
        cat[k] = h['DLACAT'][v][:]
    h.close()
    print('INFO: Found {} DLA from {} quasars'.format(
        cat['Z'].size,
        sp.unique(cat['THING_ID']).size))

    w = sp.argsort(cat['THING_ID'])
    for k in cat.keys():
        cat[k] = cat[k][w]

    for k in ['RA', 'DEC']:
        cat[k] = cat[k].astype('float64')

    ### Save
    out = fitsio.FITS(outPath, 'rw', clobber=True)
    cols = [v for v in cat.values()]
    names = [k for k in cat.keys()]
    out.write(cols, names=names, extname='DLACAT')
    out.close()

    return
Example #46
0
def RBFKernelPCA(matrix=None, gamma=1, n_components=2):
    #1. Compute RBF Kernel
    K = np.exp(-gamma *
               distance.squareform(distance.pdist(matrix, 'sqeuclidean')))
    #2. Center kernel matrix
    N = K.shape[0]
    one_n = np.ones((N, N)) / N
    cen_K = (np.eye(N) - one_n).dot(K.dot(np.eye(N) - one_n))
    #3. Compute eigenvalues and eigenvactors
    [eigen_values, eigen_vectors] = linalg.eig(cen_K)
    #4. sort eigen vectors in decreasing order based on eigen values
    indices = sp.argsort(-eigen_values)
    [eigen_values, eigen_vectors
     ] = [sp.real(eigen_values[indices]), eigen_vectors[:, indices]]
    #5. Return transformed data for the first n_components
    A = (eigen_vectors[:, 0:n_components]) * sp.sqrt(
        1 / eigen_values[0:n_components])
    transformed = transformData(A, cen_K)
    return transformed
Example #47
0
	def main(self):
		print 'reading image'
		filename = os.path.join(self.main_path,self.img_file_path)
		img = Image.open(filename)
		img = img.resize(self.img_size, Image.ANTIALIAS) 
		arr = scipy.misc.fromimage(img)
		ar = arr.reshape((scipy.product(arr.shape[:2]), arr.shape[2]))
		print 'img_reshaped to size:'ar.shape
		print 'finding clusters'
		codes, dist = scipy.cluster.vq.kmeans(ar, self.num_clusters)
		print 'cluster centres:\n', codes

		vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
		counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

		index_max = scipy.argsort(counts) [::-1]           # find most frequent in desc order
		for i in codes[index_max]:
			colour = ''.join(chr(c) for c in i).encode('hex')
			print 'most frequent is %s (#%s)' % (i, colour)
def remove_isolated_clusters(conns, nonzero_locs, num_to_keep, **kwargs):
    r"""
    Identifies and removes all disconnected clusters except the number of
    groups specified by "num_to_keep". num_to_keep=N retains the N largest
    clusters
    """
    #
    adj_mat = generate_adjacency_matrix(conns, nonzero_locs)
    #
    logger.info('determining connected components...')
    cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1]
    groups, counts = sp.unique(cs_ids, return_counts=True)
    order = sp.argsort(counts)[::-1]
    groups = groups[order]
    counts = counts[order]
    del adj_mat, order
    num_to_keep = min(num_to_keep, groups.size)
    #
    msg = '\t{} component groups for {} total nodes'
    logger.debug(msg.format(groups.size, cs_ids.size))
    msg = '\tlargest group number: {}, size {}'
    logger.debug(msg.format(groups[0], counts[0]))
    msg = '\t{} % of nodes contained in largest group'
    logger.debug(msg.format(counts[0] / cs_ids.size * 100))
    msg = '\t{} % of nodes contained in {} retained groups'
    num = sp.sum(counts[0:num_to_keep]) / cs_ids.size * 100
    logger.debug(msg.format(num, num_to_keep))
    #
    # creating image colored by clusters if desired
    if kwargs.get('output_img', False):
        save_cluster_image(cs_ids, groups, counts, nonzero_locs,
                           kwargs.get('img_shape'), kwargs.get('img_name'))
    #
    inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0]
    del cs_ids, groups, counts
    #
    num = nonzero_locs.size
    nonzero_locs = nonzero_locs[inds]
    msg = '\tremoved {} disconnected nodes'
    logger.debug(msg.format(num - nonzero_locs.size))
    #
    return nonzero_locs
Example #49
0
def storey_tibishirani(p_values=None, sort_idx=None, return_sort_idx=False):
    p_values = p_values.ravel()
    if sort_idx is None:
        sort_idx = sp.argsort(p_values)
        p_values = p_values[sort_idx]
    else:
        sort_idx = sort_idx.ravel()
        p_values = p_values[sort_idx]
    m = p_values.shape[0]
    if m < 100:  #if number if tests is too small use pi0=1
        pi0 = 1.0
    else:  # otherwise estimate pi0 using a natural cubic spline
        #evaluate pi0 for a set of lambdas
        pi0 = []
        lambdas = sp.arange(0.01, 0.96, 0.01)
        counts = []

        for __lambda in lambdas:
            counts.append((p_values > __lambda).sum())
        counts = sp.array(counts)
        for i in xrange(lambdas.shape[0]):
            pi0.append(counts[i] / float(m * (1.0 - lambdas[i])))
        pi0 = sp.array(pi0)

        splrep = interpolate.splrep(lambdas, pi0, k=3)
        pi0 = interpolate.splev(lambdas[-1], splrep)

        if pi0 > 1.0:
            pi0 = 1.0

    q_values = pi0 * p_values
    #q_values[-1] = sp.minimum(q_values[-1],1.0)
    for i in xrange(m - 2, -1, -1):
        q_values[i] = sp.minimum(pi0 * m * p_values[i] / float(i + 1.0),
                                 q_values[i + 1])
    #resort q_values
    q_tmp = q_values.copy()
    q_values[sort_idx] = q_tmp
    if return_sort_idx == True:
        return [q_values, sort_idx]
    else:
        return q_values
Example #50
0
def call_dfa(chrom, xdata, DFs, mask, data):
    """Runs DFA on subset of variables from "xdata" as 
    defined by "chrom" and returns a vector of fitness 
    scores to be fed back into the GA
    """
    Y = []
    for x in range(len(chrom)):
        if _remdup(chrom[x]) == 0:
            #extract vars from xdata
            slice = meancent(_slice(xdata, chrom[x]))
            collate = 0
            for nF in range(mask.shape[1]):
                #split in to training and test
                tr_slice, cv_slice, ts_slice, tr_grp, cv_grp, ts_grp, tr_nm, cv_nm, ts_nm = _split(
                    slice, data['class'][:, 0], mask[:, nF].tolist(),
                    data['label'])

                try:
                    u, v, eigs, dummy = cva(tr_slice, tr_grp, DFs)
                    projU = scipy.dot(cv_slice, v)
                    u = scipy.concatenate((u, projU), 0)
                    group2 = scipy.concatenate((tr_grp, cv_grp), 0)

                    B, W = _BW(u, group2)
                    L, A = scipy.linalg.eig(B, W)
                    order = _flip(
                        scipy.argsort(scipy.reshape(L.real, (len(L), ))))
                    Ls = _flip(scipy.sort(L.real))
                    eigval = Ls[0:DFs]

                    collate += sum(eigval)
                except:
                    continue

            if collate != 0:
                Y.append(float(mask.shape[1]) / collate)
            else:
                Y.append(10.0**5)
        else:
            Y.append(10.0**5)

    return scipy.array(Y)[:, nA]
Example #51
0
def qqplot(pv, distr = 'log10', alphaLevel = 0.05):
	"""
	This script makes a Quantile-Quantile plot of the observed
	negative log P-value distribution against the theoretical one under the null.

	Input:
		pv				pvalues (numpy array)
		distr           scale of the distribution (log10 or chi2)
		alphaLevel      significance bounds
	"""
	shape_ok = (len(pv.shape)==1) or ((len(pv.shape)==2) and pv.shape[1]==1)
	assert shape_ok, 'qqplot requires a 1D array of p-values'

	tests = pv.shape[0]
	pnull = (0.5 + sp.arange(tests))/tests
	# pnull = np.sort(np.random.uniform(size = tests))    
	Ipv = sp.argsort(pv)

	if distr == 'chi2':    
	    qnull = sp.stats.chi2.isf(pnull, 1)   
	    qemp = (sp.stats.chi2.isf(pv[Ipv],1))
	    xl = 'LOD scores'
	    yl = '$\chi^2$ quantiles'

	if distr == 'log10':
	    qnull = -sp.log10(pnull)
	    qemp = -sp.log10(pv[Ipv])
	    
	    xl = '-log10(P) observed'
	    yl = '-log10(P) expected'

	plt.plot(qnull, qemp, '.')
	#plt.plot([0,qemp.m0x()], [0,qemp.max()],'r')
	plt.plot([0,qnull.max()], [0,qnull.max()],'r')
	plt.ylabel(xl)
	plt.xlabel(yl)
	if alphaLevel is not None:
	    if distr == 'log10':
	        betaUp, betaDown, theoreticalPvals = _qqplot_bar(M=tests,alphaLevel=alphaLevel,distr=distr)
	        lower = -sp.log10(theoreticalPvals-betaDown)
	        upper = -sp.log10(theoreticalPvals+betaUp)
	        plt.fill_between(-sp.log10(theoreticalPvals),lower,upper,color='grey',alpha=0.5)
Example #52
0
def mds(SM):
    """ MDS (Multi Dimensional Scaling)
    @param SM Input similarity matrix
    @return 
        V1: ndarray
            Dimension 1 of MDS
        V2: ndarray
            Dimension 2 of MDS
    """
    N = SM.shape[0]

    # 距離の2乗行列を作成
    D = SM * SM

    # 中心化行列
    one = sp.eye(N) - sp.ones((N, N)) / N

    # ヤング・ハウスホルダー変換
    P = -0.5 * one * D * one  # これだと要素積になってしまうのでは?
    # P = -0.5 * sp.dot( sp.dot(one, D), one )

    # 固有値分解
    W, V = sp.linalg.eig(P)
    ind = sp.argsort(W)
    x1 = ind[-1]
    x2 = ind[-2]

    # 標準偏差を掛けたデータにする
    # s = P.std(axis=0)
    # w1 = s[x1]
    # w2 = s[x2]

    # V1 = w1 * V[:, x1]
    # V2 = w2 * V[:, x2]
    V1 = V[:, x1]
    V2 = V[:, x2]

    # 実数値に変換
    V1 = V1.astype('float')
    V2 = V2.astype('float')

    return V1, V2
Example #53
0
def RBFKernelPCA(matrix=None, gamma=1, n_components=2):
    n = matrix.shape[0]
    #1. Compute RBF Kernel
    kernelmat = np.exp(-gamma *
                       (distance.cdist(matrix, matrix, metric='euclidean')))
    #2. Center kernel matrix
    center = np.identity(n) - np.ones((n, n)) / n
    cen_kernelmat = center @ kernelmat @ center
    #3. Compute eigenvalues and eigenvactors
    [eigen_values, eigen_vectors] = linalg.eig(cen_kernelmat)
    #4. sort eigen vectors in decreasing order based on eigen values
    indices = sp.argsort(-eigen_values)
    [eigen_values, eigen_vectors
     ] = [sp.real(eigen_values[indices]), eigen_vectors[:, indices]]
    #sollte nicht negativ sein: make them unit length
    #first two PCs:
    A = np.sqrt(
        1 / eigen_values[:n_components]) * eigen_vectors[:, :n_components]
    #5. Return transformed data
    return sp.dot(A.T, cen_kernelmat.T).T
Example #54
0
def test_continuous_non_gaussian(db_path, sampler):
    def model(args):
        return {"result": sp.rand() * args['u']}

    models = [model]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(250)
    parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0,
                                                                  1))]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)
    d_observed = .5
    abc.new(db_path, {"result": d_observed})
    abc.do_not_stop_when_only_single_model_alive()

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=2)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["u"].values
    sort_indices = sp.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(sp.hstack((-200,
                                                     posterior_x[sort_indices],
                                                     200)),
                                          sp.hstack((0,
                                                     sp.cumsum(
                                                         posterior_weight[
                                                             sort_indices]),
                                                     1)))

    @sp.vectorize
    def f_expected(u):
        return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \
               (u > d_observed)

    x = sp.linspace(0.1, 1)
    max_distribution_difference = sp.absolute(f_empirical(x) -
                                              f_expected(x)).max()
    assert max_distribution_difference < 0.12
Example #55
0
def auc(y, prob, w):
    if len(w) == 0:
        mindiff = scipy.amin(scipy.diff(scipy.unique(prob)))
        pert = scipy.random.uniform(0, mindiff / 3, prob.size)
        t, rprob = scipy.unique(prob + pert, return_inverse=True)
        n1 = scipy.sum(y, keepdims=True)
        n0 = y.shape[0] - n1
        u = scipy.sum(rprob[y == 1]) - n1 * (n1 + 1) / 2
        result = u / (n1 * n0)
    else:
        op = scipy.argsort(prob)
        y = y[op]
        w = w[op]
        cw = scipy.cumsum(w)
        w1 = w[y == 1]
        cw1 = scipy.cumsum(w1)
        wauc = scipy.sum(w1 * (cw[y == 1] - cw1))
        sumw = cw1[-1]
        sumw = sumw * (c1[-1] - sumw)
        result = wauc / sumw
    return (result)
Example #56
0
    def train(self, X):
        # データの中心化
        self.X_mean = X.mean(0)
        X_centered = X - self.X_mean

        # 分散共分散行列の作成
        V = sp.cov(X_centered.T)

        # Vの固有値計算
        self.eigvals, self.eigvecs = linalg.eig(V)

        # 大きい方からn_components個の固有値を取り出し,それに対応する固有ベクトルを並べて基底を定める
        eigvals_idx = sp.argsort(self.eigvals)
        eigvals_idx = eigvals_idx[len(eigvals_idx)::-1]
        self.U = self.eigvecs[eigvals_idx[:self.n_components]]

        # 基底ベクトルから射影した点を求める
        X_pca = sp.dot(self.U, X_centered.T)
        X_pca = X_pca.T

        return X_pca, self.U
Example #57
0
def cubeIndex_RWGNumbers_computation(RWGNumber_cubeNumber, RWGNumber_cubeCentroidCoord):
    """each finest-level cube must somehow know which edges it contains.
    This function has the goal of establishing this list for every cube.
    Only the cubes containing edges will be retained.
    We also create a list of the cubes centroids, which will be ordered the same
    way as the cubes_lists_edges_numbers list."""
    E = RWGNumber_cubeNumber.shape[0] # the number of RWGs involved
    ind_sorted_cubes_numbers = argsort(RWGNumber_cubeNumber, kind='mergesort')
    sorted_cubes_numbers = take(RWGNumber_cubeNumber, ind_sorted_cubes_numbers, axis=0)
    sorted_edges_numbers = take(arange(E), ind_sorted_cubes_numbers, axis=0)
    sorted_edges_numbers_cubes_centroids = take(RWGNumber_cubeCentroidCoord, ind_sorted_cubes_numbers, axis=0)
    cubes_lists_edges_numbers = {} # the desired dictionary, renewed for each cube
    cube_list_edges_numbers_tmp = [sorted_edges_numbers[0]] # the temporary list, renewed for each cube
    cubes_centroids = [sorted_edges_numbers_cubes_centroids[0]]
    cubeIndex = 0
    for j in range(E-1): # we cannot go up to (E-1), since (j+1) will then be equal to E (out of bound index)
        if sorted_cubes_numbers[j+1] == sorted_cubes_numbers[j]: # if the next cube number is the same as the current one
            cube_list_edges_numbers_tmp.append(sorted_edges_numbers[j+1]) # add the next element to the temporary list
        else: # if not, we then add the temporary "per-cube" list to the complete list
            cubes_lists_edges_numbers[cubeIndex] = array(cube_list_edges_numbers_tmp)
            cubes_centroids.append(sorted_edges_numbers_cubes_centroids[j+1])
            cube_list_edges_numbers_tmp = [sorted_edges_numbers[j+1]] # init of the temporary list for the next cube
            cubeIndex += 1
    # we must append the last temporary list
    if cubeIndex in cubes_lists_edges_numbers:
        cubes_lists_edges_numbers[cubeIndex+1] = array(cube_list_edges_numbers_tmp)
    else:
        cubes_lists_edges_numbers[cubeIndex] = array(cube_list_edges_numbers_tmp)

    # we transform the "cubes_lists_edges_numbers" in a linear array, useful for the C++ code
    C = len(cubes_lists_edges_numbers)
    cubes_edges_numbers = zeros(E, 'i')
    cube_N_RWGs = zeros(C, 'i')
    startIndex = 0
    for j in range(C):
        length = cubes_lists_edges_numbers[j].shape[0]
        cube_N_RWGs[j] = length
        cubes_edges_numbers[startIndex:startIndex + length] = cubes_lists_edges_numbers[j]
        startIndex += length
    return cubes_edges_numbers, cubes_lists_edges_numbers, cube_N_RWGs.astype('i'), (array(cubes_centroids)).astype('d')
Example #58
0
def qqplot(pv, outPlot, color="#2c7fb8", label='unknown', alphaLevel=0.05):
    distr = 'log10'
    ax = plt.gca()
    #if (len(pv.shape) == 1) or ((len(pv.shape) == 2) and pv.shape[1] == 1):
    #        die("qqplot requires a 1D array of p-values")
    tests = pv.shape[0]
    pnull = (0.5 + sp.arange(tests)) / tests
    Ipv = sp.argsort(pv)

    if distr == 'log10':
        qnull = -sp.log10(pnull)
        qemp = -sp.log10(pv[Ipv])
        xl = '-log10(P) observed'
        yl = '-log10(P) expected'

    plt.plot(qnull, qemp, '.', color=color, label=label)
    # plt.plot([0,qemp.m0x()], [0,qemp.max()],'r')
    plt.plot([0, qnull.max()], [0, qnull.max()], 'r')
    plt.ylabel(xl)
    plt.xlabel(yl)
    if alphaLevel is not None:
        if distr == 'log10':
            betaUp, betaDown, theoreticalPvals = _qqplot_bar(
                M=tests, alphaLevel=alphaLevel, distr=distr)
            lower = -sp.log10(theoreticalPvals - betaDown)
            upper = -sp.log10(theoreticalPvals + betaUp)
            plt.fill_between(-sp.log10(theoreticalPvals),
                             lower,
                             upper,
                             color='grey',
                             alpha=0.5)
            # plt.plot(-sp.log10(theoreticalPvals),lower,'g-.')
            # plt.plot(-sp.log10(theoreticalPvals),upper,'g-.')

    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    fig = ax.get_figure()
    fig.savefig(outPlot)
Example #59
0
	def plot_pct_fwds(self):
		"""
		Plot the pct of forward motion for each genotype.
		"""
		
		Nn = len(self.dirs_to_plot)
		for iD, dir in enumerate(self.dirs_to_plot):
			filename = os.path.join(dir, 'pct_fwd.txt')
			tmp_data = sp.loadtxt(filename)
			if self.data is None:
				self.data = sp.zeros((Nn, len(tmp_data)))
			self.data[iD, :] = tmp_data
		
		# Get average fwd_pcts with error bars (1 sem)
		self.avgs = sp.average(self.data, axis=1)*100
		self.stds = sp.std(self.data, axis=1)*100
		sort_idxs = sp.argsort(self.avgs)[::-1]
		
		# Make empty zero index if not yet (hacky!!)
		if sort_idxs[0] != 0:
			zero_idx = sp.argwhere(sort_idxs == 0)[0]
			change_idx = sort_idxs[zero_idx]
			sort_idxs[zero_idx] = sort_idxs[0]
			sort_idxs[0] = 0
			
		sort_labels = self.genotypes[sort_idxs]
		sort_avgs = self.avgs[sort_idxs]
		sort_stds = self.stds[sort_idxs]
		
		
		# Plot for each genotype
		fig = plt.figure()
		fig.set_size_inches(3, 4)
		plt.errorbar(range(Nn), sort_avgs, sort_stds, lw=0, 
						elinewidth=1.5, capsize=5, color='k')
		plt.scatter(range(Nn), sort_avgs, c=sp.arange(Nn), 
						cmap=plt.cm.winter, zorder=100, s=30)
		plt.ylim(0, 105)
		plt.xticks(rotation=90)
		plt.xticks(range(Nn), sort_labels)
Example #60
0
def RBFKernelPCA(matrix=None, gamma=1, n_components=2):
    #1. Compute RBF Kernel
    #2. Center kernel matrix
    #3. Compute eigenvalues and eigenvactors
    #4. sort eigen vectors in decreasing order based on eigen values
    #5. Return transformed data for the first n_components

    d = distance.pdist(matrix, 'sqeuclidean')
    m1 = distance.squareform(d)
    K = np.exp(-gamma * m1)
    o1 = np.ones(K.shape) / K.shape[0]
    I = np.identity(K.shape[0])
    Kp = np.dot(np.dot((I - o1), K), (I - o1))

    eigen_values, eigen_vectors = computePCA_SVD(Kp)
    indices = sp.argsort(-eigen_values)
    seigen_values = sp.real(eigen_values[indices])
    seigen_vectors = eigen_vectors[:, indices]
    for i in range(len(seigen_values)):
        sseigen_vectors = np.sqrt(1 / seigen_values[i]) * seigen_vectors
    transformed = transformData(sseigen_vectors[:, 0:n_components], Kp)
    return transformed