Ejemplo n.º 1
0
def plot_median_errors(RefinementLevels):
        for i in RefinementLevels[0].cases:
            x =[];
            y =[];
            print "Analyzing median error on: ", i ;
            for r in RefinementLevels:                
                x.append(r.LUT.D_dim*r.LUT.P_dim)
                r.get_REL_ERR_SU2(i)
                y.append(r.SU2[i].median_ERR*100)
            
            x = sp.array(x)
            y = sp.array(y)            
            y = y[sp.argsort(x)]
            x = x[sp.argsort(x)]
                                    
            LHM = sp.ones((len(x),2))
            RHS = sp.ones((len(x),1))            
            LHM[:,1] = sp.log10(x)
            RHS[:,0] = sp.log10(y)

            sols = sp.linalg.lstsq(LHM,RHS)
            b = -sols[0][1]
            plt.loglog(x,y, label='%s, %s'%(i,r'$O(\frac{1}{N})^{%s}$'%str(sp.around(b,2))), basex=10, basey=10, \
                       subsy=sp.linspace(10**(-5), 10**(-2),20),\
                       subsx=sp.linspace(10**(2), 10**(5),50))
            
            #for r in RefinementLevels:                
               # x.append(r.LUT.D_dim*r.LUT.P_dim)
              #  r.get_REL_ERR_SciPy(i)
             #   y.append(r.SciPy[i].median_ERR*100)
            #plt.plot(x,y, label='SciPy: %s'%i)
        plt.grid(which='both')
        plt.xlabel('Grid Nodes (N)')
        plt.ylabel('Median relative error [%]')
        return;
def my_bh_fdr(p_val_vec):
    index = scipy.argsort(p_val_vec)
    exp_err = scipy.vstack((float(len(p_val_vec))/scipy.arange(1,len(p_val_vec) + 1)*p_val_vec[index],
                                      scipy.tile(1, [1, len(p_val_vec)]))).min(axis = 0)
    exp_err = scipy.vstack((exp_err,exp_err[scipy.r_[0,scipy.arange(len(exp_err)-1)]])).max(axis=0)
    #scipy.r_[index[0], index[range(len(index)-1)]
    resort_index = scipy.argsort(index)                 
    return exp_err[resort_index]
Ejemplo n.º 3
0
    def outputTargetSimPairs(self, pairFile):

        pairList = []
        pairFilehandle = open(pairFile)
        for line in pairFilehandle:
            words = (line.strip().strip('\n').strip()).split()
            pairList.append(words)
        pairFilehandle.close()

        print "..Outputting similarities"
        outputFilename = "simPairs.txt"
        outputFilehandle = open(outputFilename, "w")
        outputFilehandle.write("word1 word2 sim | zsim1 zsim2 | psim1 psim2 | nIn1 nIn2\n")
        
        numTargets = len(self.similarityMatrix[0])
        
        for pair in pairList:
            if ((pair[0] in self.targetDict) and (pair[1] in self.targetDict)):
                i = self.targetDict[pair[0]]
                j = self.targetDict[pair[1]]
                
                sim = self.similarityMatrix[i,j]
                
                word0Sims = self.similarityMatrix[i]
                word1Sims = self.similarityMatrix[j]
                
                z0Sim = (sim - word0Sims.mean()) / word0Sims.std()
                z1Sim = (sim - word1Sims.mean()) / word1Sims.std()
                
                sim0min = np.amin(word0Sims)
                sim1min = np.amin(word1Sims)
                adjSim0 = sim + abs(sim0min)
                adjSim1 = sim + abs(sim1min)
                
                adjSimVector0 = word0Sims + abs(sim0min)
                adjSimVector1 = word1Sims + abs(sim1min)
                sim0Sum = adjSimVector0.sum()
                sim1Sum = adjSimVector1.sum()
                
                p0Sim = adjSim0 / sim0Sum
                p1Sim = adjSim1 / sim1Sum
                
                sortedIndexes0 = scipy.argsort(word0Sims)
                sortedIndexes1 = scipy.argsort(word1Sims)
                
                for k in range(numTargets):
                    if sortedIndexes0[k] == j:
                        nIn0 = numTargets - k
                        break
                for k in range(numTargets):
                    if sortedIndexes1[k] == i:
                        nIn1 = numTargets - k
                        break
                
                outputFilehandle.write("%s %s %0.3f | %0.3f %0.3f | %0.5f %0.5f | %0.0f %0.0f\n" % (pair[0], pair[1], sim, z0Sim, z1Sim, p0Sim, p1Sim, nIn0, nIn1))
                
            else:
                outputFilehandle.write("%s %s NA NA NA NA NA NA NA\n" % (pair[0], pair[1]))
Ejemplo n.º 4
0
def plot_overlap_ps(result_file, ss_file='/Users/bjarnivilhjalmsson/data/GIANT/GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt',
                   fig_filename='/Users/bjarnivilhjalmsson/data/tmp/manhattan_combPC_HGT.png', method='combPC',
                   ylabel='Comb. PC (HIP,WC,HGT,BMI) $-log_{10}(P$-value$)$', xlabel='Height $-log_{10}(P$-value$)$', p_thres=0.00001):
    # Parse results ans SS file
    res_table = pandas.read_table(result_file)
    ss_table = pandas.read_table(ss_file)
    # Parse 
    res_sids = sp.array(res_table['SNPid'])
    if method == 'MVT':
        comb_ps = sp.array(res_table['pval'])
    elif method == 'combPC':
        comb_ps = sp.array(res_table['combPC'])
    if 'MarkerName' in ss_table.keys():
        ss_sids = sp.array(ss_table['MarkerName'])
    elif 'SNP' in ss_table.keys():
        ss_sids = sp.array(ss_table['SNP'])
    else:
        raise Exception("Don't know where to look for rs IDs")
    marg_ps = sp.array(ss_table['p'])
    
    # Filtering boring p-values
    res_p_filter = comb_ps < p_thres
    res_sids = res_sids[res_p_filter]
    comb_ps = comb_ps[res_p_filter]
#     ss_p_filter = marg_ps<p_thres
#     ss_sids = ss_sids[ss_p_filter]
#     marg_ps = marg_ps[ss_p_filter]
    
    common_sids = sp.intersect1d(res_sids, ss_sids)
    print 'Found %d SNPs in common' % (len(common_sids))
    ss_filter = sp.in1d(ss_sids, common_sids)
    res_filter = sp.in1d(res_sids, common_sids)
    
    ss_sids = ss_sids[ss_filter]
    res_sids = res_sids[res_filter]
    marg_ps = marg_ps[ss_filter]
    comb_ps = comb_ps[res_filter]
    
    print 'Now sorting'
    ss_index = sp.argsort(ss_sids)
    res_index = sp.argsort(res_sids)
    
    marg_ps = -sp.log10(marg_ps[ss_index])
    comb_ps = -sp.log10(comb_ps[res_index])
    
    with plt.style.context('fivethirtyeight'):
        plt.plot(marg_ps, comb_ps, 'b.', alpha=0.2)
        (x_min, x_max) = plt.xlim()
        (y_min, y_max) = plt.ylim()
        
        plt.plot([x_min, x_max], [y_min, y_max], 'k--', alpha=0.2)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        plt.tight_layout()
        plt.savefig(fig_filename)
    plt.clf()
Ejemplo n.º 5
0
def plotBias(vals, fn_plot, myidx, logScale = False, refname = 'TCGA'):

    iqr    = ( (sp.percentile(vals[~myidx],75) - sp.percentile(vals[~myidx],25) ) * 1.5)
    iqr2    = ( (sp.percentile(vals[myidx],75) - sp.percentile(vals[myidx],25) ) * 1.5)

    sidx   = sp.argsort(vals)
    vals   = vals[sidx]
    myidx = myidx[sidx]

    fig  = plt.figure(figsize=(12,10))
    ax   = fig.add_subplot(111)
    ax_c = ax.twinx()
    ax.vlines(sp.array(sp.arange(sp.sum(vals.shape[0])))[myidx],[0], vals[myidx], label = '%s Reference'%refname)
    ax.vlines(sp.array(sp.arange(sp.sum(vals.shape[0])))[~myidx],[0], vals[~myidx], color = 'r', label = 'Your Samples')

    ax.plot([0,vals.shape[0]],[3,3], '--', color = 'green')
    ax.plot([0,vals.shape[0]],[5,5] , '--',color = 'green')
    ax.plot([0,vals.shape[0]],[iqr + sp.percentile(vals[~myidx], 75),iqr + sp.percentile(vals[~myidx], 75)], '--',color = 'green')
    ax.plot([0,vals.shape[0]],[iqr2 + sp.percentile(vals[myidx], 75),iqr2 + sp.percentile(vals[myidx], 75)], '--',color = 'green')

#    ax.plot([0,vals.shape[0]],[6.25,6.25],'--', color = 'green')
    ax.plot([0,vals.shape[0]],[10,10] , '--',color = 'green')
    ax.set_ylabel('Median 3\'/5\' Bias')
    ax.set_xlim(0,vals.shape[0])
    if logScale:
        ax.set_yscale('log')
        ax_c.set_yscale('log')
    ax_c.set_ylim(ax.get_ylim())

    ### add right side ticks
    if logScale:       
        tick_thresholds = sp.array([3,5,iqr+sp.percentile(vals[~myidx],75),iqr2 + sp.percentile(vals[myidx], 75), 10])#sp.array(sp.log([3,5,iqr+sp.percentile(vals,75), 10, 50]))
    else:
        tick_thresholds = sp.array([3,5,iqr+sp.percentile(vals[~myidx],75),iqr2 + sp.percentile(vals[myidx], 75), 10])
    tick_idx        = sp.argsort(tick_thresholds)
    tick_thresholds = tick_thresholds[tick_idx]
    tick_thresholds = sp.around(tick_thresholds, decimals = 2)
    ax_c.set_yticks(tick_thresholds)

    tick_thresholds                = tick_thresholds.astype('|S4')
    tick_thresholds                = tick_thresholds.astype('|S50')
    tick_thresholds[tick_idx == 2] = tick_thresholds[tick_idx == 2][0] + ' (Your Filter)'
#    tick_thresholds[tick_idx == 3] = tick_thresholds[tick_idx == 3][0] + ' (PRAD Filter)'
    tick_thresholds[tick_idx == 3] = tick_thresholds[tick_idx == 3][0] + ' (%s Filter)'%(refname)

    ax_c.set_yticklabels(tick_thresholds)


    ax.grid()
    ax.legend(loc=2)
    plt.tight_layout()
    plt.savefig(fn_plot, dpi = 300)
    plt.clf()
Ejemplo n.º 6
0
 def _query(self,lv,k=None):
     if (k==None):
       k=self.k
     if (type(lv)!=numpy.ndarray):
         lv=numpy.array(lv)
     if (lv.ndim==1):
         lv=lv.reshape(1,lv.shape[0])
     if (lv.shape[0]==1):
       dt=abs(self.va.reshape(self.va.shape[0],1)-lv).T
       dr=scipy.argsort(dt)[0,:k]
       return numpy.vectorize(lambda x:self.va[x])(dr).reshape(1,k)
     else:
       dt=scipy.spatial.distance.cdist(lv,self.va.reshape(self.va.shape[0],1))
       dr=scipy.argsort(dt)[:,:k]
       return numpy.vectorize(lambda x:self.va[x])(dr)
Ejemplo n.º 7
0
    def _get_model_cv_preds(self, model, X_train, y_train, cache_file):
        """
        Return cross-validation predictions on the training set, using cache
        if possible.
        This is used if stacking is enabled (ie. a second model is used to
        combine the stage 0 predictions).
        """
        stack_preds = load_from_cache(
            "models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        if stack_preds is None:
            kfold = cross_validation.StratifiedKFold(y_train, 4)
            stack_preds = []
            indexes_cv = []
            for stage0, stack in kfold:
                model.fit(X_train[stage0], y_train[stage0])
                stack_preds.extend(list(model.predict_proba(
                    X_train[stack])[:, 1]))
                indexes_cv.extend(list(stack))
            stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]

            with open("cache/models/%s/cv_preds/%s%d.pkl" % (
                    self.cache_dir, cache_file), 'wb') as f:
                pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL)

        return stack_preds
Ejemplo n.º 8
0
def find(x, v, next_largest=1, indices=None):
    """Returns the index into the 1D array x corresponding to the
    element of x that is either equal to v or the nearest to
    v. x is assumed to contain unique elements.

    if v is outside the range of values in x then the index of the
    smallest or largest element of x is returned.

    If next_largest == 1 then the nearest element taken is the next
    largest, otherwise if next_largest == 0 then the next smallest
    is taken.

    The optional argument indices speeds up multiple calls to this
    function if you pre-calculate indices=argsort(x).
    """
    if indices is None:
        indices=argsort(x)
    xs=take(x, indices)
    assert next_largest in [0,1], "next_largest must be 0 or 1"
    eqmask=(xs==v).tolist()
    try:
        ix = eqmask.index(1)
    except ValueError:
        if next_largest:
            mask=(xs<v).tolist()
        else:
            mask=(xs>v).tolist()
        try:
            ix=min([max([0,mask.index(1-next_largest)+next_largest-1]),len(mask)-1])
        except ValueError:
            ix = 0+next_largest-1
    return indices[ix]
Ejemplo n.º 9
0
    def gettimes(ionocontlist):
        """
        This static method will take a list of files, or a single string, and
        deterimine the time ordering and give the sort order for the files to be in.
        Inputs
            ionocontlist- A list of IonoContainer h5 files. Can also be a single
            string of a file name.
        Outputs
            sortlist - A numpy array of integers that will chronilogically order
            the files
            outtime - A Nt x 2 numpy array of all of the times.
            timebeg - A list of beginning times
        """
        if isinstance(ionocontlist,string_types):
            ionocontlist=[ionocontlist]
        timelist=[]
        fileslist = []
        for ifilenum,ifile in enumerate(ionocontlist):
            with tables.open_file(str(ifile)) as f:
                times = f.root.Time_Vector.read()


            timelist.append(times)
            fileslist.append(ifilenum*sp.ones(len(times)))
        times_file =sp.array([i[:,0].min() for i in timelist])
        sortlist = sp.argsort(times_file)

        timelist_s = [timelist[i] for i in sortlist]
        timebeg = times_file[sortlist]
        fileslist = sp.vstack([fileslist[i][0] for i in sortlist]).flatten().astype('int64')
        outime = sp.vstack(timelist_s)
        return (sortlist,outime,fileslist,timebeg,timelist_s)
Ejemplo n.º 10
0
def eigsort(eigresult):
    """
    Sort the output of scipy.linalg.eig() in terms of 
    eignevalue magnitude
    """
    ix = sp.argsort(abs(eigresult[0]))
    return ( eigresult[0][ix], eigresult[1][:,ix] )
def remove_isolated_clusters(conns, nonzero_locs, num_to_keep):
    r"""
    Identifies and removes all disconnected clusters except the number of
    groups specified by "num_to_keep". num_to_keep=N retains the N largest
    clusters
    """
    #
    adj_mat = generate_adjacency_matrix(conns, nonzero_locs)
    #
    logger.info('determining connected components...')
    cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1]
    groups, counts = sp.unique(cs_ids, return_counts=True)
    order = sp.argsort(counts)[::-1]
    groups = groups[order]
    counts = counts[order]
    #
    msg = '    {} component groups for {} total nodes'
    logger.debug(msg.format(groups.size, cs_ids.size))
    msg = '    largest group number: {}, size {}'
    logger.debug(msg.format(groups[0], counts[0]))
    msg = '    {} % of nodes contained in largest group'
    logger.debug(msg.format(counts[0]/cs_ids.size*100))
    msg = '    {} % of nodes contained in {} retained groups'
    num = sp.sum(counts[0:num_to_keep])/cs_ids.size*100
    logger.debug(msg.format(num, num_to_keep))
    #
    inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0]
    num = nonzero_locs.size
    nonzero_locs = nonzero_locs[inds]
    msg = '    removed {} disconnected nodes'
    logger.debug(msg.format(num - nonzero_locs.size))
    #
    return nonzero_locs
Ejemplo n.º 12
0
def readAnnotationFile(fn, format='gaf'):
    ### get list of overlapping genes
    overlapgenes = getOverlapGenes(fn, format)

    ### reading in gaf
    data   = readinganno(fn, overlapgenes, format)

    uqgid   = data.keys() ###  unique gene ids
    newdata = []
    for gid in uqgid:
        ### process transcripts
        if len(data[gid]) == 1:
            temp = processSingleTranscriptGenes(data[gid])
        else:
            temp = processMultiTranscriptGenes(data[gid])

        ### make sure it has been processed correctly
        if temp is None:
            continue
        else:
            temp.extend([gid])
            newdata.append(temp)

    newdata = sp.array(newdata)
    sidx    = sp.argsort(newdata[:,5])
    newdata = newdata[sidx,:]
    ### filter gene with no name
    return sp.array(newdata)
Ejemplo n.º 13
0
  def _get_model_cv_preds(self, model, X_train, y_train):
    """
    Return cross-validation predictions on the training set
    """
    fname = self._get_model_cv_fname(model, X_train, y_train, self.n_folds_stack)
    try:
        logger.debug("trying to load cv_pred from  %s", fname)
        with open(fname,"rb") as f:
            stack_preds = pickle.load(f)
    except IOError:
        logger.debug("not found: %s", fname)
        stack_preds = None

    if stack_preds is None:
        kfold = cross_validation.StratifiedKFold(y_train, self.n_folds_stack)
        stack_preds = []
        indexes_cv = []
        for stage0, stack in kfold:
            model.fit(X_train[stage0], y_train[stage0])
            stack_preds.extend(list(model.predict_proba(
                X_train[stack])[:, 1]))
            indexes_cv.extend(list(stack))
        stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]
    
        with open(fname,"wb") as f:
            pickle.dump(stack_preds,f)
    
    if self.use_logit and self.gnrl=='LR':
        logger.debug('transform stack_preds(%s) using logit',stack_preds.shape)
        stack_preds = logit(stack_preds)
    
    return stack_preds
Ejemplo n.º 14
0
 def nms(boxes, T = 0.5):
     if len(boxes) == 0:
         return []
     boxes = boxes.astype("float")
     pick = []
     x1 = boxes[:,0]
     y1 = boxes[:,1]
     x2 = boxes[:,2]
     y2 = boxes[:,3]    
     area = (x2 - x1 + 1) * (y2 - y1 + 1)
     idxs = sp.argsort(y2)    
     while len(idxs) > 0:
         last = len(idxs) - 1
         i = idxs[last]
         pick.append(i)
         xx1 = sp.maximum(x1[i], x1[idxs[:last]])
         yy1 = sp.maximum(y1[i], y1[idxs[:last]])
         xx2 = sp.minimum(x2[i], x2[idxs[:last]])
         yy2 = sp.minimum(y2[i], y2[idxs[:last]])
         w = sp.maximum(0, xx2 - xx1 + 1)
         h = sp.maximum(0, yy2 - yy1 + 1)
         I = w * h
         #overlap_ratio = I / area[idxs[:last]]
         overlap_ratio = I /(area[i] +  area[idxs[:last]] - I)
         idxs = sp.delete(idxs, sp.concatenate(([last], sp.where(overlap_ratio > T)[0])))
     return boxes[pick].astype("int")
Ejemplo n.º 15
0
 def apply_flow(self,flowrate):
     r'''
     Convert the invaded sequence into an invaded time for a given flow rate
     considering the volume of invaded pores and throats.
     
     Parameters
     ----------
     flowrate : float
         The flow rate of the injected fluid
         
     Returns
     -------
     Creates a throat array called 'invasion_time' in the Algorithm 
     dictionary
     
     '''
     P12 = self._net['throat.conns']  # List of throats conns
     a = self['throat.invasion_sequence']  # Invasion sequence
     b = sp.argsort(self['throat.invasion_sequence'])
     P12_inv = self['pore.invasion_sequence'][P12]  # Pore invasion sequence
     # Find if the connected pores were invaded with or before each throat
     P1_inv = P12_inv[:,0] == a
     P2_inv = P12_inv[:,1] == a
     c = sp.column_stack((P1_inv,P2_inv))  
     d = sp.sum(c,axis=1,dtype=bool)  # List of Pores invaded with each throat
     # Find volume of these pores
     P12_vol = sp.zeros((self.Nt,))
     P12_vol[d] = self._net['pore.volume'][P12[c]]
     # Add invaded throat volume to pore volume (if invaded)
     T_vol = P12_vol + self._net['throat.volume']
     # Cumulative sum on the sorted throats gives cumulated inject volume
     e = sp.cumsum(T_vol[b]/flowrate)
     t = sp.zeros((self.Nt,))
     t[b] = e  # Convert back to original order
     self._phase['throat.invasion_time'] = t
Ejemplo n.º 16
0
def writeTopXGenes2File(filename,sqlfile,outdir,top=1000):
    f = h5py.File(filename,'r')
    chromosomes = f['chromosomes'][:]
    positions = f['positions'][:]
    p_values = f['p_values'][:].flatten()
    name = f['phenotype_name'].value.replace(" ","_").replace("<i>","").replace("</i>","")
    ind = sp.argsort(p_values)[:-1]
    chromosomes = chromosomes[ind]
    positions = positions[ind]
    p_values = p_values[ind]
    chromosomes = chromosomes[0:top]
    positions = positions[0:top]
    p_values = p_values[0:top]
    f.close()

    sqlite = sqlite3.connect(sqlfile)
    sqlite_cursor = sqlite.cursor()

    out = open(os.path.join(outdir,name + ".csv"),"w")

    out.write("Chr,Pos,PVal,GeneID (closest),Distance (bp)\n")
    for i in xrange(chromosomes.shape[0]):
        sqlite_cursor.execute("SELECT * FROM geneannotation WHERE chromosome_id=? ORDER BY ABS(annotation_start - ?) LIMIT 1",(str(chromosomes[i]),int(positions[i])))
        annotation = sqlite_cursor.fetchall()
        #print annotation
        if len(annotation)==1:
            if positions[i] >= annotation[0][3] and positions[i] <= annotation[0][4]:
                distance = 0
            elif positions[i] > annotation[0][4]:
                distance = abs(positions[i]-annotation[0][4])
            else:
                distance = abs(positions[i]-annotation[0][3])
            out.write(chromosomes[i] + "," + str(int(positions[i])) + ",%.2e"%(p_values[i]) + "," + annotation[0][1] + "," + str(int(distance)) + "\n")
    sqlite.close()
Ejemplo n.º 17
0
Archivo: QTR.py Proyecto: xkronosua/QTR
	def loadData(self):
		'''Завантаження даних з файлів'''
		Tabs = ( ('tab_2', 'tab_3','tab_4'),
			('tab_3', 'tab_2','tab_4'))
		uiObj = ('XColumn', 'YColumn', 'MColumn', 'MCheck')
		
		senderName = self.sender().objectName()
		key = senderName[0]
		active = [self.Types[key]] + self.findUi( [key + i for i in uiObj])
		data = []
		XY = sp.zeros((0,2))
		path = self.Path[active[0]]
		if os.path.exists(path):
			try:
				data = sp.loadtxt(path)
				'''
				activeFilt = self.findChilds(QtGui.QLineEdit, FiltersKeys[active[0]])
				filtNames = ''
				
				if activeFilt[0].isEnabled() and activeFilt[1].isEnabled():
					self.filtersDict = self.getFilters(length = self.LENGTH)
					for i in (0,1):
						filtNames = activeFilt[i].text().strip().replace(" ","").upper()
						temp = 1.
						
						if filtNames:
							temp = self.resFilters(filtNames)
							
						self.filtList[active[0]][i] = temp
				else:
					self.filtList[active[0]][:] = [1., 1.]
				print("Filters [X,Y]:",self.filtList[active[0]])
				'''
				xc = active[1].value()
				yc = active[2].value()
				mc = active[3].value()
				if active[4].checkState():
					XY = sp.array( [data[:,xc], data[:,yc] ]).T / sp.array([data[:,mc], data[:,mc]]).T
				else:
					XY = sp.array( [data[:,xc], data[:,yc] ]).T
				XY = XY[XY[:,0] > 0]
				XY = XY[XY[:,1] > 0]
				if getattr(self.ui,senderName[0]+'CutForward').isChecked():
					p = sp.where( XY[:,0] == XY[:,0].max())[0][0]
					print(p)
					XY = XY[:p,:]
				XY = XY[sp.argsort(XY[:,0])]
				'''
				XY[:,0] = XY[:,0]/self.filtList[active[0]][0]
				XY[:,1] = XY[:,1]/self.filtList[active[0]][1]
				'''
				self.updateData(array = Array(XY,Type = active[0]), action = 0)
				tabs = self.findUi(Tabs[active[0]])
				tabs[0].setEnabled(True)
				
				if tabs[1].isEnabled():
					tabs[2].setEnabled(True)
			except (ValueError, IOError, IndexError):
				self.mprint("loadData: readError")
		else:  self.mprint('loadData: pathError')
Ejemplo n.º 18
0
    def add_times(self,self2):
        """This method will combine the times and content of two instances of the GeoData class.
        The first object will be extendent in time."""
        datakeys = self.data.keys()
        assert set(datakeys) ==set(self2.data.keys()),'Data must have the same names.'
        # Look at the coordinate names
        assert self.coordnames==self2.coordnames,'Must be same coordinate same.'
        # Look at the data location
        a = np.ma.array(self.dataloc,mask=np.isnan(self.dataloc))
        blah = np.ma.array(self2.dataloc,mask=np.isnan(self2.dataloc))
        assert np.ma.allequal(a,blah),'Location points must be the same'

        # Look at the sensor location
        a = np.ma.array(self.sensorloc,mask=np.isnan(self.sensorloc))
        blah = np.ma.array(self2.sensorloc,mask=np.isnan(self2.sensorloc))
        assert np.ma.allequal(a,blah),'Sensor Locations must be the same'

        alltimes = sp.vstack((timerepair(self.times),timerepair(self2.times)))

        #sort based off of start times
        s_ind = sp.argsort(alltimes[:,0])
        self.times = alltimes[s_ind]
        
        if self.issatellite():
            for ikey in self.datanames():
                outarr=sp.concatenate((self.data[ikey],self2.data[ikey]),0)
                self.data[ikey]=outarr[s_ind]
            for ikey in self.datanames():
                outarr = sp.hstack((self.data[ikey],self2.data[ikey]))
                self.data[ikey] = outarr[:,s_ind]
Ejemplo n.º 19
0
    def setup(self, phase, throat_prop='throat.capillary_pressure', **kwargs):
        r"""
        Set up the required parameters for the algorithm

        Parameters
        ----------
        phase : OpenPNM Phase object
            The phase to be injected into the Network.  The Phase must have the
            capillary entry pressure values for the system.

        throat_prop : string
            The name of the throat property containing the capillary entry
            pressure.  The default is 'throat.capillary_pressure'.

        """
        self._phase = phase
        # Setup arrays and info
        self['throat.entry_pressure'] = phase[throat_prop]
        # Indices into t_entry giving a sorted list
        self['throat.sorted'] = sp.argsort(self['throat.entry_pressure'], axis=0)
        self['throat.order'] = sp.zeros_like(self['throat.sorted'])
        self['throat.order'][self['throat.sorted']] = sp.arange(0, self._net.Nt)
        self['throat.invaded'] = -sp.ones((self._net.Nt,))
        self['pore.invaded'] = -sp.ones((self._net.Np,))
        self._tcount = 0
Ejemplo n.º 20
0
def benjamini_hochberg_yekutieli(p_values=None,q_value=0.05,sort_idx=None,return_sort_idx=False):
    p_values = p_values.ravel()
    if sort_idx is None:
        sort_idx = sp.argsort(p_values)
        p_values = p_values[sort_idx]
    else:
        sort_idx = sort_idx.ravel()
        p_values = p_values[sort_idx]
    m = p_values.shape[0]
    idx_line = sp.arange(1,m+1)
    cV = (1.0/idx_line).sum()
    thr_line = (idx_line*q_value*cV)/float(m);
    thr_ind = sp.where(p_values<=thr_line)[0]
    if thr_ind.shape[0]==0:
        thr = 0.0;
    else:
        thr = p_values[thr_ind.max()]
    #adjust p_values
    p_values_adjusted = sp.ones(m)
    prev = 1.0
    for i in range(m,0,-1):
        p_values_adjusted[i-1] = sp.minimum(prev,p_values[i-1]*float(m)*cV/float(i))
        if p_values_adjusted[i-1]>1:
            p_values_adjusted[i-1]=1
        prev = p_values_adjusted[i-1]
    #resort pvalues
    p_tmp = p_values_adjusted.copy()
    p_values_adjusted[sort_idx] = p_tmp
    if return_sort_idx==True:
        return [thr,p_values_adjusted,sort_idx]        
    else:
        return [thr,p_values_adjusted]
Ejemplo n.º 21
0
def roc(labels, predictions):
    """roc - calculate receiver operator curve
    labels: true labels (>0 : True, else False)
    predictions: the ranking generated from whatever predictor is used"""
    #1. convert to arrays
    labels = S.array(labels).reshape([-1])
    predictions = S.array(predictions).reshape([-1])

    #threshold
    t = labels>0
    
    #sort predictions in desceninding order
    #get order implied by predictor (descending)
    Ix = S.argsort(predictions)[::-1]
    #reorder truth
    t = t[Ix]

    #compute true positiive and false positive rates
    tp = S.double(N.cumsum(t))/t.sum()
    fp = S.double(N.cumsum(~t))/(~t).sum()

    #add end points
    tp = S.concatenate(([0],tp,[1]))
    fp = S.concatenate(([0],fp,[1]))

    return [tp,fp]
Ejemplo n.º 22
0
    def eigensigma(self):
        from scipy.linalg import eig
        from scipy.sparse import lil_matrix,bmat,eye
        from scipy import argsort,where
        #from scipy.sparse.linalg import eigen
        transverseH = lil_matrix((self.wafer.shape[1],self.wafer.shape[1]))
        transverseH.setdiag([2*self.t0]*self.wafer.shape[1])
        transverseH.setdiag([-self.t0]*self.wafer.shape[1],1)
        transverseH.setdiag([-self.t0]*self.wafer.shape[1],-1)
#following is wrong
        #SO=eye(self.wafer.shape[1],self.wafer.shape[1],1)*self.tso-eye(self.wafer.shape[1],self.wafer.shape[1],-1)*self.tso
        #transverseHspin = bmat([[transverseH, SO],[SO,transverseH]])
        #self.HH = transverseHspin
        #from pudb import set_trace; set_trace()
        v,d = eig(transverseH.todense())
        ndx = argsort(v)
        d=d[:,ndx]
        v=v[ndx]
        self.v = v
        self.d = d
        try:
            self.maxmode = where(self.v < self.Efermi-self.band_bottom)[0].max()+1
        except ValueError:
            print "- ValueError probably no modes will fit at that energy"
        if v.max() > self.Efermi-self.band_bottom:
            print 'Some mode energies larger than fermi energy, only up to mode {0} will fit'.format(self.maxmode)
            print 'Argument num_modes="all" takes only modes low enough'
            print ''
Ejemplo n.º 23
0
 def query(self,lv,k=None):
     """ returns distance and element index"""
     if (k==None):
       k=self.k
     if (type(lv)!=numpy.ndarray):
         lv=numpy.array(lv)
     if (lv.ndim==1):
         lv=lv.reshape(1,lv.shape[0])
     if (lv.shape[0]==1):
       dt=abs(self.va.reshape(self.va.shape[0],1)-lv).T
       dr=scipy.argsort(dt)[0,:k]
       return dt.take(dr),dr.reshape(k)
     else:
       dt=scipy.spatial.distance.cdist(lv,self.va.reshape(self.va.shape[0],1))
       dr=scipy.argsort(dt)[:,:k]
       return dt.take(dr),dr
Ejemplo n.º 24
0
    def CreateEnergyGrid(self,ParticlesPerBin=1000):
        v2 = self.Snapshot.vx*self.Snapshot.vx+self.Snapshot.vy*self.Snapshot.vy+self.Snapshot.vz*self.Snapshot.vz
        E = 0.5*v2 + self.Snapshot.V
        
        index = scipy.argsort(E)
        
        tmpE = []
        tmpMass = []
        
        N = len(index)
        BinNo = 0
        self.EGrid = EnergyGrid()
        
        TotalMass = self.Snapshot.m.sum()

        while (BinNo+1)*ParticlesPerBin < N:
            Particles = index[ range(BinNo*ParticlesPerBin,(BinNo+1)*ParticlesPerBin) ]
            Max = E[Particles].max()
            Min = E[Particles].min()
            Mean = E[Particles].mean()
            tmpE.append( Mean )
            tmpMass.append( self.Snapshot.m[Particles].sum() / ( Max - Min )  )
            BinNo += 1
        
        self.EGrid.Mass = scipy.array(tmpMass)
        self.EGrid.E = scipy.array(tmpE)        
        return self.EGrid
Ejemplo n.º 25
0
def precision_and_recall(actual,predicted,cls):
    c = (actual == cls)
    si = sp.argsort(-c)
    tp = sp.cumsum(sp.single(predicted[si] == cls))
    fp = sp.cumsum(sp.single(predicted[si] != cls))
    rec = tp /sp.sum(predicted == cls)
    prec = tp / (fp + tp)
    return prec,rec
Ejemplo n.º 26
0
	def toplines(self,n_lines=5):
		""" This function is given. """
		lines = sp.zeros((self.n_topics,n_lines))
		for i in xrange(self.n_topics):
			args = sp.argsort(self._theta[:,i]).tolist()
			args.reverse()
			lines[i,:] = sp.array(args)[0:n_lines] + 1
		return lines
Ejemplo n.º 27
0
def segmented():
    
    radius = 5 
    sigmaI = 0.02 
    sigmaX = 3.0 
    height = img.shape[0]
    width = img.shape[1]
    flatImg = img.flatten()
    darkImg = flatImg
    brightImg = flatImg
    
    nodes = img.flatten()
    
    W = spar.lil_matrix((nodes.size, nodes.size),dtype=float)
    D = sp.zeros((1,nodes.size))
    
    for row in range(height):
        for col in range(width):				
            for k in range(row-radius,row+radius):
                for l in range(col-radius,col+radius):
                    try:
                        w = weight(row,col,k,l)
                        W[row*width+col,k*width+l] = w
                        D[0,row*width+col] += w		
                    except:
                        continue
                        
    D = spar.spdiags(D, 0, nodes.size, nodes.size)

    Q = D - W
     
    D1 = D.todense()
    Q1 = Q.todense()
    
    diags = sp.diag(D1)
    DminusHalf = sp.diag(diags**-0.5)
    
    
    segQ = sp.dot(sp.dot(DminusHalf, Q1),DminusHalf)
    vals, vecs = la.eig(segQ)
    
    vecind = sp.argsort(vals)[1]
    theVec = vecs[vecind]

    for i in range(0,height**2):
        if theVec[i] < 0:
            darkImg[i] = 0.0
        else:
            brightImg[i] = 0.0
            
    
    darkImg = sp.reshape(darkImg, (height,height))
    brightImg = sp.reshape(brightImg, (height,height))
             
    
    
    
    return darkImg, flatImg, brightImg
Ejemplo n.º 28
0
    def __init__(self, N, vectors, coverage_ratio=0.2):
        """
        Performs exact nearest neighbour search on the data set.

        vectors can either be a numpy matrix with all the vectors
        as columns OR a python array containing the individual
        numpy vectors.
        """
        # We need a dict from vector string representation to index
        self.vector_dict = {}
        self.N = N
        self.coverage_ratio = coverage_ratio

        # Get numpy array representation of input
        self.vectors = numpy_array_from_list_or_numpy_array(vectors)

        # Build map from vector string representation to vector
        for index in range(self.vectors.shape[1]):
            self.vector_dict[self.__vector_to_string(
                self.vectors[:, index])] = index

        # Get transposed version of vector matrix, so that the rows
        # are the vectors (needed by cdist)
        vectors_t = numpy.transpose(self.vectors)

        # Determine the indices of query vectors used for comparance
        # with approximated search.
        query_count = numpy.floor(self.coverage_ratio *
                                  self.vectors.shape[1])
        self.query_indices = []
        for k in range(int(query_count)):
            index = numpy.floor(k*(self.vectors.shape[1]/query_count))
            index = min(index, self.vectors.shape[1]-1)
            self.query_indices.append(int(index))

        print('\nStarting exact search (query set size=%d)...\n' % query_count)

        # For each query vector get the closest N neighbours
        self.closest = {}
        self.exact_search_time_per_vector = 0.0

        for index in self.query_indices:

            v = vectors_t[index, :].reshape(1, self.vectors.shape[0])
            exact_search_start_time = time.time()
            D = cdist(v, vectors_t, 'euclidean')
            self.closest[index] = scipy.argsort(D)[0, 1:N+1]

            # Save time needed for exact search
            exact_search_time = time.time() - exact_search_start_time
            self.exact_search_time_per_vector += exact_search_time

        print('\Done with exact search...\n')

        # Normalize search time
        self.exact_search_time_per_vector /= float(len(self.query_indices))
Ejemplo n.º 29
0
def Experimento(db):
# nome das figuras
 name_arr = scipy.array(db.keys())

# outro dicionario: nome das figuras x rótulos das classes
 cl = dict(zip(name_arr,[int(db[i][0]) for i in name_arr]))

# Obtém da base de entrada uma Matriz N_Samples x N_Features
# Descarta primeira coluna (Rótulos das classes)
 data = scipy.array([db[nome][1:] for nome in name_arr])

# distancia : medida de dissimilaridade a ser empregada 
#distancias = ['braycurtis','canberra','chebyshev','cityblock','correlation',
#              'cosine','dice','euclidean','hamming','jaccard',
#              'kulsinski','mahalanobis','matching','minkowski',
#              'rogerstanimoto','russelrao','seuclidean','sokalmichener',
#              'sokalsneath','sqeuclidean','yule']

 distancia = 'euclidean'

# Numero de amostras
 Nobj = data.shape[0]

# Total de classes
 Nclasses = max(cl.values())

# Total de amostras por classe
# assumindo que a base é balanceada!!!!
 Nac = Nobj/Nclasses

# Numero de recuperações
 Nretr = Nac

# Calcula matriz de distancias 
 md = squareform(pdist(data,distancia))

# Para contabilizar a Matriz de confusão
 l = scipy.zeros((Nclasses,Nac),dtype = int)

 for i,nome in zip(scipy.arange(Nobj),name_arr):
# Para cada linha de md estabelece rank de recuperacao
# O primeiro elemento de cada linha corresponde a forma modelo
# Obtem a classe dos objetos recuperados pelo ordem crescente de distancia
  idx = scipy.argsort(md[i])
 # pega classes a qual pertencem o primeiro padrao e as imagens recuperadas
  classe_padrao = cl[nome]
  name_retr = name_arr[idx] 
  aux = scipy.array([cl[j] for j in name_retr])
 # estamos interessados apenas nos Nretr subsequentes resultados
  classe_retrs = aux[1:Nretr]
  n = scipy.nonzero(classe_retrs == classe_padrao)
 # Contabiliza resultados
  for i in n[0]:
   l[classe_padrao-1,i] = l[classe_padrao-1,i] + 1 

 return l,Nac
Ejemplo n.º 30
0
	def topterms(self,n_terms=10):
		""" This function is given. """
		vec = sp.atleast_2d(sp.arange(0,self.n_words))
		topics = []
		for k in xrange(self.n_topics):
			probs = sp.atleast_2d(self._phi[k,:])
			mat = sp.append(probs,vec,0)
			sind = sp.array([mat[:,i] for i in sp.argsort(mat[0])]).T
			topics.append([self.vocab[int(sind[1,self.n_words - 1 - i])] for i in xrange(n_terms)])
		return topics