def _generate_throats(self): r""" Generate the throats (connections, numbering and types) """ Nx = self._Nx Ny = self._Ny Nz = self._Nz Np = Nx * Ny * Nz ind = sp.arange(0, Np) #Generate throats based on pattern of the adjacency matrix tpore1_1 = ind[(ind % Nx) < (Nx - 1)] tpore2_1 = tpore1_1 + 1 tpore1_2 = ind[(ind % (Nx * Ny)) < (Nx * (Ny - 1))] tpore2_2 = tpore1_2 + Nx tpore1_3 = ind[(ind % Np) < (Nx * Ny * (Nz - 1))] tpore2_3 = tpore1_3 + Nx * Ny tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3)) tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3)) connections = sp.vstack((tpore1, tpore2)).T connections = connections[sp.lexsort( (connections[:, 1], connections[:, 0]))] self['throat.all'] = sp.ones_like(sp.arange(0, sp.shape(tpore1)[0]), dtype=bool) self['throat.conns'] = connections
def unique(a): order = sp.lexsort(a.T) a = a[order] diff = sp.diff(a, axis=0) ui = sp.ones(len(a), 'bool') ui[1:] = (diff != 0).any(axis=1) return a[ui]
def simplex_array_boundary(s, parity): """ Compute the boundary faces and boundary operator of an array of simplices with given simplex parities E.g. For a mesh with two triangles [0,1,2] and [1,3,2], the second triangle has opposite parity relative to sorted order. simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1])) """ #TODO handle edge case as special case num_simplices = s.shape[0] faces_per_simplex = s.shape[1] num_faces = num_simplices * faces_per_simplex orientations = 1 - 2 * parity #faces[:,:-2] are the indices of the faces #faces[:,-2] is the index of the simplex whose boundary produced the face #faces[:,-1] is the orientation of the face in the boundary of the simplex faces = empty((num_faces, s.shape[1] + 1), dtype=s.dtype) for i in range(faces_per_simplex): rows = faces[num_simplices * i:num_simplices * (i + 1)] rows[:, :i] = s[:, :i] rows[:, i:-2] = s[:, i + 1:] rows[:, -2] = arange(num_simplices) rows[:, -1] = ((-1)**i) * orientations #sort rows faces = faces[lexsort(faces[:, :-2].T[::-1])] #find unique faces face_mask = ~hstack( (array([False]), alltrue(faces[1:, :-2] == faces[:-1, :-2], axis=1))) unique_faces = faces[face_mask, :-2] #compute CSR representation for boundary operator csr_indptr = hstack((arange(num_faces)[face_mask], array([num_faces]))) csr_indices = ascontiguousarray(faces[:, -2]) csr_data = faces[:, -1].astype('int8') shape = (len(unique_faces), num_simplices) boundary_operator = csr_matrix((csr_data, csr_indices, csr_indptr), shape) return unique_faces, boundary_operator
def simplex_array_boundary(s,parity): """ Compute the boundary faces and boundary operator of an array of simplices with given simplex parities E.g. For a mesh with two triangles [0,1,2] and [1,3,2], the second triangle has opposite parity relative to sorted order. simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1])) """ #TODO handle edge case as special case num_simplices = s.shape[0] faces_per_simplex = s.shape[1] num_faces = num_simplices * faces_per_simplex orientations = 1 - 2*parity #faces[:,:-2] are the indices of the faces #faces[:,-2] is the index of the simplex whose boundary produced the face #faces[:,-1] is the orientation of the face in the boundary of the simplex faces = empty((num_faces,s.shape[1]+1),dtype=s.dtype) for i in range(faces_per_simplex): rows = faces[num_simplices*i:num_simplices*(i+1)] rows[:, : i] = s[:, :i] rows[:,i :-2] = s[:,i+1: ] rows[:, -2 ] = arange(num_simplices) rows[:, -1 ] = ((-1)**i)*orientations #sort rows faces = faces[lexsort( faces[:,:-2].T[::-1] )] #find unique faces face_mask = -hstack((array([False]),alltrue(faces[1:,:-2] == faces[:-1,:-2],axis=1))) unique_faces = faces[face_mask,:-2] #compute CSR representation for boundary operator csr_indptr = hstack((arange(num_faces)[face_mask],array([num_faces]))) csr_indices = ascontiguousarray(faces[:,-2]) csr_data = faces[:,-1].astype('int8') shape = (len(unique_faces),num_simplices) boundary_operator = csr_matrix((csr_data,csr_indices,csr_indptr), shape) return unique_faces,boundary_operator
def sort_rows(array, index=None): """Sort array by rows""" if array.shape[0] == 0: if index == True: return (array, []) else: return (array) s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)]) if index == True: return (array[s_idx, :], s_idx) else: return array[s_idx, :]
def sort_rows(array, index = None): """Sort array by rows""" if array.shape[0] == 0: if index == True: return (array, []) else: return (array) s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)]) if index == True: return (array[s_idx, :], s_idx) else: return array[s_idx, :]
def _generate_throats(self): r""" Generate the throats (connections, numbering and types) """ Nx = self._Nx Ny = self._Ny Nz = self._Nz Np = Nx*Ny*Nz ind = sp.arange(0,Np) #Generate throats based on pattern of the adjacency matrix tpore1_1 = ind[(ind%Nx)<(Nx-1)] tpore2_1 = tpore1_1 + 1 tpore1_2 = ind[(ind%(Nx*Ny))<(Nx*(Ny-1))] tpore2_2 = tpore1_2 + Nx tpore1_3 = ind[(ind%Np)<(Nx*Ny*(Nz-1))] tpore2_3 = tpore1_3 + Nx*Ny tpore1 = sp.hstack((tpore1_1,tpore1_2,tpore1_3)) tpore2 = sp.hstack((tpore2_1,tpore2_2,tpore2_3)) connections = sp.vstack((tpore1,tpore2)).T connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))] self['throat.all'] = sp.ones_like(sp.arange(0,sp.shape(tpore1)[0]),dtype=bool) self['throat.conns'] = connections
def simplex_array_searchsorted(s, v): """Find the row indices (of s) corresponding to the simplices stored in the rows of simplex array v. The rows of s must be stored in lexicographical order. Example ------- >>> from numpy import array >>> s = array([[0,1],[0,2],[1,2],[1,3]]) >>> v = array([[1,2],[0,2]]) >>> simplex_array_searchsorted(s,v) array([2, 1]) """ s = asarray(s) v = asarray(v) if ndim(s) != 2 or ndim(v) != 2: raise ValueError('expected rank 2 arrays') if s.shape[1] != v.shape[1]: raise ValueError('number of columns must agree') # compute row indices by sorting both arrays together Ns = s.shape[0] Nv = v.shape[0] perm = lexsort(vstack((s, v))[:, ::-1].T) flags = concatenate((ones(Ns, dtype=int), zeros(Nv, dtype=int))) indices = empty(Ns + Nv, dtype=int) indices[perm] = cumsum(flags[perm]) indices = indices[Ns:].copy() indices -= 1 return indices
def simplex_array_searchsorted(s, v): """Find the row indices (of s) corresponding to the simplices stored in the rows of simplex array v. The rows of s must be stored in lexicographical order. Example ------- >>> from numpy import array >>> s = array([[0,1],[0,2],[1,2],[1,3]]) >>> v = array([[1,2],[0,2]]) >>> simplex_array_searchsorted(s,v) array([2, 1]) """ s = asarray(s) v = asarray(v) if rank(s) != 2 or rank(v) != 2: raise ValueError('expected rank 2 arrays') if s.shape[1] != v.shape[1]: raise ValueError('number of columns must agree') # compute row indices by sorting both arrays together Ns = s.shape[0] Nv = v.shape[0] perm = lexsort(vstack((s,v))[:,::-1].T) flags = concatenate( (ones(Ns,dtype=int),zeros(Nv,dtype=int)) ) indices = empty(Ns+Nv, dtype=int) indices[perm] = cumsum(flags[perm]) indices = indices[Ns:].copy() indices -= 1 return indices
def processMultiTranscriptGenes(tcrpts): ### all transcript isoforms have at least two exons if sp.sum(np.core.defchararray.find(tcrpts,',') != -1) != len(tcrpts): return None #### make matrix of transcript struc and length myExons = [x.split(':')[1].split(',') for x in tcrpts] myExons = sp.array([reduce(lambda x, y: x + y, myExons)]).ravel() ### unravel exons into one list of exons myExonsInt = sp.array([x.split('-') for x in myExons]).astype('int') ### sort this sidxInt = sp.lexsort((myExonsInt[:,1], myExonsInt[:,0])) myExons = myExons[sidxInt] myExonsInt = myExonsInt[sidxInt,:] ### see how often i got each item dummy, uidx, dists = ut.unique_rows(myExonsInt, index=True, counts = True) N_match = sp.sum(dists == len(tcrpts)) if N_match < 3: ### i want at lest 3 constitutive exons return None ### get constitutitve exons iConst = dists == len(tcrpts) uqConstEx = myExons[uidx][iConst] firstEx = uqConstEx[0] lastEx = uqConstEx[-1] ## get length of all transcripts myExStrucL = [] for i,rec in enumerate(tcrpts): myExStrucL.append(getTranscriptLengthBex(rec, firstEx, lastEx)) firstEx = tcrpts[0].split(':')[0] + ':' + firstEx + ':' + tcrpts[0].split(':')[2] lastEx = tcrpts[0].split(':')[0] + ':' + lastEx + ':' + tcrpts[0].split(':')[2] return [firstEx, lastEx, tcrpts[0].split(':')[0],tcrpts[0].split(':')[2], str(sp.median(myExStrucL))]
def sort_rows(array, index = None): """Sort array by rows""" ### empty array if array.shape[0] == 0: if index == True: return (array, []) else: return (array) ### only one row if len(array.shape) == 1: if index == True: return (array, [0]) else: return (array) ### more than one row s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)]) if index == True: return (array[s_idx, :], s_idx) else: return array[s_idx, :]
def sort_rows(array, index=None): """Sort array by rows""" ### empty array if array.shape[0] == 0: if index == True: return (array, []) else: return (array) ### only one row if len(array.shape) == 1: if index == True: return (array, [0]) else: return (array) ### more than one row s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)]) if index == True: return (array[s_idx, :], s_idx) else: return array[s_idx, :]
def _generate_throats(self): r""" Generate the throats (connections, numbering and types) """ self._logger.info("generate_throats: Define connections between pores") [Nx, Ny, Nz] = sp.shape(self._template) Np = Nx*Ny*Nz ind = np.arange(0, Np) #Generate throats based on pattern of the adjacency matrix #This is taken from Cubic tpore1_1 = ind[(ind % Nx) < (Nx-1)] tpore2_1 = tpore1_1 + 1 tpore1_2 = ind[(ind % (Nx*Ny)) < (Nx*(Ny-1))] tpore2_2 = tpore1_2 + Nx tpore1_3 = ind[(ind % Np) < (Nx*Ny*(Nz-1))] tpore2_3 = tpore1_3 + Nx*Ny tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3)) tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3)) connections = sp.vstack((tpore1, tpore2)).T connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))] #Remove throats to non-active pores img_ind = self.get_pore_data(prop='voxel_index') temp0 = sp.in1d(connections[:, 0], img_ind) temp1 = sp.in1d(connections[:, 1], img_ind) tind = temp0*temp1 connections = connections[tind] #Need a cleaner way to do this other than voxel_to_pore map...figure out later self.set_throat_data(prop='connections', data=self._voxel_to_pore_map[connections]) self.set_throat_info(label='all', locations=sp.ones(sp.sum(tind,),dtype=bool)) self.set_throat_data(prop='numbering', data=np.arange(0, sp.sum(tind))) self._logger.debug("generate_throats: End of method")
def readExpDataBam(base_dir): # base_dir = "/cbio/grlab/projects/TCGA/PanCancer/icgc_qc" allfiles = os.listdir(base_dir) allfiles = fnmatch.filter(allfiles, '*.tsv') for i,f in enumerate(allfiles): if i == 0: header = sp.array([f.split('.')[0]]) data = sp.array(pandas.read_csv(os.path.join(base_dir, f), delim_whitespace = True))#sp.loadtxt(os.path.join(base_dir, f), delimiter = '\t', dtype = 'string') exonpos = data[:,0]#data[:,[0,1]].ravel('C') data = data[:,1].astype('float')#data[:,[6,7]].ravel('C').astype('float') else: header = sp.hstack((header, sp.array([f.split('.')[0]]))) tmp = sp.array(pandas.read_csv(os.path.join(base_dir, f), delim_whitespace = True))#sp.loadtxt(os.path.join(base_dir, f), delimiter = '\t', dtype = 'string') tmp = tmp[:,1].astype('float')#tmp[:,[6,7]].ravel('C').astype('float') data = sp.vstack((data, tmp)) if len(data.shape) == 1: data = data[:, sp.newaxis] else: data = data.T sidx = sp.argsort(header) header = header[sidx] data = data[:, sidx] ### remove non chromosomal contigs iOK = sp.array([x.startswith('chr') for x in exonpos]) exonpos = exonpos[iOK] data = data[iOK,:] chrm = sp.array([x.split(':')[0].strip('chr') for x in exonpos]) start = sp.array([x.split(':')[1].split('-')[0] for x in exonpos]).astype('int') sidx = sp.lexsort((start, chrm)) exonpos = exonpos[sidx] data = data[sidx,:] return exonpos, header, data
def sort(self): s_idx = sp.lexsort([self.vertices[1, :], self.vertices[0, :]]) self.reorder(s_idx)
def getOverlapGenes(fn, format): """ Returns a list of gene names which are overlapping """ ### Read gene annotation from gaf data = [] if format == 'gaf': for l in open(fn, 'r'): lSpl = l.strip('\n').split('\t') if lSpl[2] != 'gene': continue if lSpl[8] != 'genome': continue if lSpl[15] == '': continue data.append([lSpl[1],lSpl[16]]) elif format == 'gtf': for l in open(fn, 'r'): ## comments if l[0] == '#': continue lSpl = l.strip('\n').split('\t') if lSpl[2].lower() != 'gene': continue tags = get_tags_gtf(lSpl[8]) data.append([tags['gene_id'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])]) elif format in ['gff', 'gff3']: for l in open(fn, 'r'): ## comments if l[0] == '#': continue lSpl = l.strip('\n').split('\t') if not lSpl[2].lower() in ['gene', 'lincrna_gene', 'mirna_gene', 'processed_transcript', 'rrna_gene', 'snrna_gene', 'snorna_gene']: continue tags = get_tags_gff3(lSpl[8]) try: data.append([tags['ID'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])]) except KeyError: data.append([tags['Parent'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])]) ### data contains two o columns: gene_ID, GeneLocus (e.g., chr7:130020290-130027948:+) data = sp.array(data) ### fix positions pos = data[:,1] pos = sp.array([x.split(':')[0]+'-'+x.split(':')[1] for x in pos]) pos = sp.array([x.strip('chr') for x in pos]) pos = sp.array([x.split('-') for x in pos]) pos[pos[:,0] == 'X',0] = '23' pos[pos[:,0] == 'Y',0] = '24' ### filter weird things like mitochondria etc. iOK = np.core.defchararray.isdigit(pos[:,0]) pos = pos[iOK,:] data = data[iOK,:] pos = pos.astype('int') ### sort everything nicely sidx = sp.lexsort((pos[:,2], pos[:,1], pos[:,0])) pos = pos[sidx,:] data = data[sidx,:] ### find genes with overlapping annotations myOverlapGenes = [] for i in xrange(pos.shape[0]): mypos = pos[i,:] ## same chr iChr = mypos[0] == pos[:,0] ## end is in something else iLBEnd = mypos[2] >= pos[:,1] iUBEnd = mypos[2] <= pos[:,2] ## st is in something else iLBSt = mypos[1] <= pos[:,2] iUBSt = mypos[1] >= pos[:,1] ## on both ends the only entry that overlaps to i is i itself --> continue if (sp.sum(iChr & iLBEnd & iUBEnd) == 1) and (sp.sum(iChr & iLBSt & iUBSt) == 1): continue ### extract IDs of overlapping genes overlapgenesSt = data[iChr & iUBSt & iLBSt, 0] overlapgenesEnd = data[iChr & iUBEnd & iLBEnd, 0] overlapgenesSt = sp.array([x.split('|')[0] for x in overlapgenesSt]) overlapgenesEnd = sp.array([x.split('|')[0] for x in overlapgenesEnd]) ### this shoudl actually never happen ... if (sp.unique(overlapgenesSt).shape[0] == 1) and (sp.unique(overlapgenesEnd).shape[0] == 1): continue if sp.unique(overlapgenesSt).shape[0] > 1: myOverlapGenes.extend(overlapgenesSt.tolist()) if sp.unique(overlapgenesEnd).shape[0] > 1: myOverlapGenes.extend(overlapgenesEnd.tolist()) return sp.unique(myOverlapGenes)