Exemple #1
0
 def _generate_throats(self):
     r"""
     Generate the throats (connections, numbering and types)
     """
     Nx = self._Nx
     Ny = self._Ny
     Nz = self._Nz
     Np = Nx * Ny * Nz
     ind = sp.arange(0, Np)
     #Generate throats based on pattern of the adjacency matrix
     tpore1_1 = ind[(ind % Nx) < (Nx - 1)]
     tpore2_1 = tpore1_1 + 1
     tpore1_2 = ind[(ind % (Nx * Ny)) < (Nx * (Ny - 1))]
     tpore2_2 = tpore1_2 + Nx
     tpore1_3 = ind[(ind % Np) < (Nx * Ny * (Nz - 1))]
     tpore2_3 = tpore1_3 + Nx * Ny
     tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3))
     tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3))
     connections = sp.vstack((tpore1, tpore2)).T
     connections = connections[sp.lexsort(
         (connections[:, 1], connections[:, 0]))]
     self['throat.all'] = sp.ones_like(sp.arange(0,
                                                 sp.shape(tpore1)[0]),
                                       dtype=bool)
     self['throat.conns'] = connections
Exemple #2
0
def unique(a):
    order = sp.lexsort(a.T)
    a = a[order]
    diff = sp.diff(a, axis=0)
    ui = sp.ones(len(a), 'bool')
    ui[1:] = (diff != 0).any(axis=1) 
    return a[ui]
Exemple #3
0
def simplex_array_boundary(s, parity):
    """
    Compute the boundary faces and boundary operator of an
    array of simplices with given simplex parities

    E.g.
    
      For a mesh with two triangles [0,1,2] and [1,3,2], the second
      triangle has opposite parity relative to sorted order.
      
      simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1]))
      
    """
    #TODO handle edge case as special case

    num_simplices = s.shape[0]
    faces_per_simplex = s.shape[1]
    num_faces = num_simplices * faces_per_simplex

    orientations = 1 - 2 * parity

    #faces[:,:-2] are the indices of the faces
    #faces[:,-2]  is the index of the simplex whose boundary produced the face
    #faces[:,-1]  is the orientation of the face in the boundary of the simplex
    faces = empty((num_faces, s.shape[1] + 1), dtype=s.dtype)
    for i in range(faces_per_simplex):
        rows = faces[num_simplices * i:num_simplices * (i + 1)]

        rows[:, :i] = s[:, :i]
        rows[:, i:-2] = s[:, i + 1:]
        rows[:, -2] = arange(num_simplices)
        rows[:, -1] = ((-1)**i) * orientations

    #sort rows
    faces = faces[lexsort(faces[:, :-2].T[::-1])]

    #find unique faces
    face_mask = ~hstack(
        (array([False]), alltrue(faces[1:, :-2] == faces[:-1, :-2], axis=1)))

    unique_faces = faces[face_mask, :-2]

    #compute CSR representation for boundary operator
    csr_indptr = hstack((arange(num_faces)[face_mask], array([num_faces])))
    csr_indices = ascontiguousarray(faces[:, -2])
    csr_data = faces[:, -1].astype('int8')

    shape = (len(unique_faces), num_simplices)
    boundary_operator = csr_matrix((csr_data, csr_indices, csr_indptr), shape)

    return unique_faces, boundary_operator
Exemple #4
0
def simplex_array_boundary(s,parity):
    """
    Compute the boundary faces and boundary operator of an
    array of simplices with given simplex parities

    E.g.
    
      For a mesh with two triangles [0,1,2] and [1,3,2], the second
      triangle has opposite parity relative to sorted order.
      
      simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1]))
      
    """
    #TODO handle edge case as special case
    
    num_simplices     = s.shape[0]
    faces_per_simplex = s.shape[1]
    num_faces         = num_simplices * faces_per_simplex

    orientations = 1 - 2*parity

    #faces[:,:-2] are the indices of the faces
    #faces[:,-2]  is the index of the simplex whose boundary produced the face
    #faces[:,-1]  is the orientation of the face in the boundary of the simplex
    faces = empty((num_faces,s.shape[1]+1),dtype=s.dtype)
    for i in range(faces_per_simplex):
        rows = faces[num_simplices*i:num_simplices*(i+1)]

        rows[:,  : i] = s[:,   :i]
        rows[:,i :-2] = s[:,i+1: ]
        rows[:, -2  ] = arange(num_simplices)
        rows[:, -1  ] = ((-1)**i)*orientations

    #sort rows
    faces = faces[lexsort( faces[:,:-2].T[::-1] )]

    #find unique faces
    face_mask    = -hstack((array([False]),alltrue(faces[1:,:-2] == faces[:-1,:-2],axis=1)))
    unique_faces = faces[face_mask,:-2]

    #compute CSR representation for boundary operator
    csr_indptr  = hstack((arange(num_faces)[face_mask],array([num_faces])))
    csr_indices = ascontiguousarray(faces[:,-2])
    csr_data    = faces[:,-1].astype('int8')
  
    shape = (len(unique_faces),num_simplices)   
    boundary_operator = csr_matrix((csr_data,csr_indices,csr_indptr), shape)

    return unique_faces,boundary_operator
Exemple #5
0
def sort_rows(array, index=None):
    """Sort array by rows"""

    if array.shape[0] == 0:
        if index == True:
            return (array, [])
        else:
            return (array)

    s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)])

    if index == True:
        return (array[s_idx, :], s_idx)
    else:
        return array[s_idx, :]
Exemple #6
0
def sort_rows(array, index = None):
    """Sort array by rows"""

    if array.shape[0] == 0:
        if index == True:
            return (array, [])
        else:
            return (array)

    s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)])

    if index == True:
        return (array[s_idx, :], s_idx)
    else:
        return array[s_idx, :]
Exemple #7
0
 def _generate_throats(self):
     r"""
     Generate the throats (connections, numbering and types)
     """
     Nx = self._Nx
     Ny = self._Ny
     Nz = self._Nz
     Np = Nx*Ny*Nz
     ind = sp.arange(0,Np)
     #Generate throats based on pattern of the adjacency matrix
     tpore1_1 = ind[(ind%Nx)<(Nx-1)]
     tpore2_1 = tpore1_1 + 1
     tpore1_2 = ind[(ind%(Nx*Ny))<(Nx*(Ny-1))]
     tpore2_2 = tpore1_2 + Nx
     tpore1_3 = ind[(ind%Np)<(Nx*Ny*(Nz-1))]
     tpore2_3 = tpore1_3 + Nx*Ny
     tpore1 = sp.hstack((tpore1_1,tpore1_2,tpore1_3))
     tpore2 = sp.hstack((tpore2_1,tpore2_2,tpore2_3))
     connections = sp.vstack((tpore1,tpore2)).T
     connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))]
     self['throat.all'] = sp.ones_like(sp.arange(0,sp.shape(tpore1)[0]),dtype=bool)
     self['throat.conns'] = connections
Exemple #8
0
def simplex_array_searchsorted(s, v):
    """Find the row indices (of s) corresponding to the simplices stored
    in the rows of simplex array v.  The rows of s must be stored in
    lexicographical order.

    Example
    -------

    >>> from numpy import array
    >>> s = array([[0,1],[0,2],[1,2],[1,3]])
    >>> v = array([[1,2],[0,2]])
    >>> simplex_array_searchsorted(s,v)
    array([2, 1])

    """

    s = asarray(s)
    v = asarray(v)

    if ndim(s) != 2 or ndim(v) != 2:
        raise ValueError('expected rank 2 arrays')

    if s.shape[1] != v.shape[1]:
        raise ValueError('number of columns must agree')

    # compute row indices by sorting both arrays together
    Ns = s.shape[0]
    Nv = v.shape[0]

    perm = lexsort(vstack((s, v))[:, ::-1].T)

    flags = concatenate((ones(Ns, dtype=int), zeros(Nv, dtype=int)))
    indices = empty(Ns + Nv, dtype=int)
    indices[perm] = cumsum(flags[perm])
    indices = indices[Ns:].copy()
    indices -= 1

    return indices
Exemple #9
0
def simplex_array_searchsorted(s, v):
    """Find the row indices (of s) corresponding to the simplices stored 
    in the rows of simplex array v.  The rows of s must be stored in 
    lexicographical order.

    Example
    -------

    >>> from numpy import array
    >>> s = array([[0,1],[0,2],[1,2],[1,3]])
    >>> v = array([[1,2],[0,2]])
    >>> simplex_array_searchsorted(s,v)
    array([2, 1])

    """

    s = asarray(s)
    v = asarray(v)

    if rank(s) != 2 or rank(v) != 2:
        raise ValueError('expected rank 2 arrays')

    if s.shape[1] != v.shape[1]:
        raise ValueError('number of columns must agree')
   
    # compute row indices by sorting both arrays together
    Ns = s.shape[0]
    Nv = v.shape[0]
    
    perm = lexsort(vstack((s,v))[:,::-1].T)
    
    flags = concatenate( (ones(Ns,dtype=int),zeros(Nv,dtype=int)) )
    indices = empty(Ns+Nv, dtype=int)
    indices[perm] = cumsum(flags[perm])
    indices = indices[Ns:].copy()
    indices -= 1

    return indices
Exemple #10
0
def processMultiTranscriptGenes(tcrpts):

    ### all transcript isoforms have at least two exons
    if sp.sum(np.core.defchararray.find(tcrpts,',') != -1) != len(tcrpts):
        return None

    #### make matrix of transcript struc and length
    myExons       = [x.split(':')[1].split(',') for x in tcrpts]
    myExons       = sp.array([reduce(lambda x, y: x + y, myExons)]).ravel() ### unravel exons into one list of exons
    myExonsInt    = sp.array([x.split('-') for x in myExons]).astype('int')

    ### sort this
    sidxInt       = sp.lexsort((myExonsInt[:,1], myExonsInt[:,0]))
    myExons       = myExons[sidxInt]
    myExonsInt    = myExonsInt[sidxInt,:]

    ### see how often i got each item
    dummy, uidx, dists = ut.unique_rows(myExonsInt, index=True, counts = True)
    N_match            = sp.sum(dists == len(tcrpts))

    if N_match < 3: ### i want at lest 3 constitutive exons
        return None

    ### get constitutitve exons
    iConst    = dists == len(tcrpts)
    uqConstEx = myExons[uidx][iConst]

    firstEx   = uqConstEx[0]
    lastEx    = uqConstEx[-1]

    ## get length of all transcripts
    myExStrucL = []
    for i,rec in enumerate(tcrpts):
        myExStrucL.append(getTranscriptLengthBex(rec, firstEx, lastEx))        

    firstEx   = tcrpts[0].split(':')[0] + ':' + firstEx + ':' + tcrpts[0].split(':')[2]
    lastEx    = tcrpts[0].split(':')[0] + ':' + lastEx  + ':' + tcrpts[0].split(':')[2]
    return [firstEx, lastEx, tcrpts[0].split(':')[0],tcrpts[0].split(':')[2], str(sp.median(myExStrucL))]
Exemple #11
0
def sort_rows(array, index = None):
    """Sort array by rows"""

    ### empty array
    if array.shape[0] == 0:
        if index == True:
            return (array, [])
        else:
            return (array)

    ### only one row
    if len(array.shape) == 1:
        if index == True:
            return (array, [0])
        else:
            return (array)

    ### more than one row
    s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)])

    if index == True:
        return (array[s_idx, :], s_idx)
    else:
        return array[s_idx, :]
Exemple #12
0
def sort_rows(array, index=None):
    """Sort array by rows"""

    ### empty array
    if array.shape[0] == 0:
        if index == True:
            return (array, [])
        else:
            return (array)

    ### only one row
    if len(array.shape) == 1:
        if index == True:
            return (array, [0])
        else:
            return (array)

    ### more than one row
    s_idx = sp.lexsort([array[:, -i] for i in range(1, array.shape[1] + 1)])

    if index == True:
        return (array[s_idx, :], s_idx)
    else:
        return array[s_idx, :]
Exemple #13
0
    def _generate_throats(self):
        r"""
        Generate the throats (connections, numbering and types)
        """
        self._logger.info("generate_throats: Define connections between pores")

        [Nx, Ny, Nz] = sp.shape(self._template)
        Np = Nx*Ny*Nz
        ind = np.arange(0, Np)

        #Generate throats based on pattern of the adjacency matrix
        #This is taken from Cubic
        tpore1_1 = ind[(ind % Nx) < (Nx-1)]
        tpore2_1 = tpore1_1 + 1
        tpore1_2 = ind[(ind % (Nx*Ny)) < (Nx*(Ny-1))]
        tpore2_2 = tpore1_2 + Nx
        tpore1_3 = ind[(ind % Np) < (Nx*Ny*(Nz-1))]
        tpore2_3 = tpore1_3 + Nx*Ny
        tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3))
        tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3))
        connections = sp.vstack((tpore1, tpore2)).T
        connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))]

        #Remove throats to non-active pores
        img_ind = self.get_pore_data(prop='voxel_index')
        temp0 = sp.in1d(connections[:, 0], img_ind)
        temp1 = sp.in1d(connections[:, 1], img_ind)
        tind = temp0*temp1
        connections = connections[tind]

        #Need a cleaner way to do this other than voxel_to_pore map...figure out later
        
        self.set_throat_data(prop='connections', data=self._voxel_to_pore_map[connections])
        self.set_throat_info(label='all', locations=sp.ones(sp.sum(tind,),dtype=bool))
        self.set_throat_data(prop='numbering', data=np.arange(0, sp.sum(tind)))
        self._logger.debug("generate_throats: End of method")
Exemple #14
0
def readExpDataBam(base_dir):
#    base_dir = "/cbio/grlab/projects/TCGA/PanCancer/icgc_qc"
    allfiles = os.listdir(base_dir)
    allfiles = fnmatch.filter(allfiles, '*.tsv')
    for i,f in enumerate(allfiles):
        if i == 0:
            header  = sp.array([f.split('.')[0]])
            data    = sp.array(pandas.read_csv(os.path.join(base_dir, f), delim_whitespace = True))#sp.loadtxt(os.path.join(base_dir, f), delimiter = '\t', dtype = 'string')
            exonpos = data[:,0]#data[:,[0,1]].ravel('C')
            data    = data[:,1].astype('float')#data[:,[6,7]].ravel('C').astype('float')
        else:
            header = sp.hstack((header, sp.array([f.split('.')[0]])))
            tmp    = sp.array(pandas.read_csv(os.path.join(base_dir, f), delim_whitespace = True))#sp.loadtxt(os.path.join(base_dir, f), delimiter = '\t', dtype = 'string')
            tmp    = tmp[:,1].astype('float')#tmp[:,[6,7]].ravel('C').astype('float')
            data   = sp.vstack((data, tmp))
    if len(data.shape) == 1:
        data = data[:, sp.newaxis]
    else:
        data    = data.T
    sidx    = sp.argsort(header)
    header  = header[sidx]
    data    = data[:, sidx]

    ### remove non chromosomal contigs
    iOK     = sp.array([x.startswith('chr') for x in exonpos])
    exonpos = exonpos[iOK]
    data    = data[iOK,:]

    chrm  = sp.array([x.split(':')[0].strip('chr') for x in exonpos])
    start = sp.array([x.split(':')[1].split('-')[0] for x in exonpos]).astype('int')
    sidx  = sp.lexsort((start, chrm))
    
    exonpos = exonpos[sidx]
    data    = data[sidx,:]

    return exonpos, header, data
Exemple #15
0
 def sort(self):
     
     s_idx = sp.lexsort([self.vertices[1, :], self.vertices[0, :]])
     self.reorder(s_idx)
Exemple #16
0
def getOverlapGenes(fn, format):
    """
    Returns a list of gene names which are overlapping
    """

    ### Read gene annotation from gaf
    data = []
    if format == 'gaf':
        for l in open(fn, 'r'):
            lSpl = l.strip('\n').split('\t')
            if lSpl[2] != 'gene':
                continue
            if lSpl[8] != 'genome':
                continue
            if lSpl[15] == '':
                continue
            data.append([lSpl[1],lSpl[16]])
    elif format == 'gtf':
        for l in open(fn, 'r'):
            ## comments
            if l[0] == '#':
                continue
            lSpl = l.strip('\n').split('\t')
            if lSpl[2].lower() != 'gene':
                continue
            tags = get_tags_gtf(lSpl[8])
            data.append([tags['gene_id'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])])  
    elif format in ['gff', 'gff3']:
        for l in open(fn, 'r'):
            ## comments
            if l[0] == '#':
                continue
            lSpl = l.strip('\n').split('\t')
            if not lSpl[2].lower() in ['gene', 'lincrna_gene', 'mirna_gene', 'processed_transcript', 'rrna_gene', 'snrna_gene', 'snorna_gene']:
                continue
            tags = get_tags_gff3(lSpl[8])
            try:
                data.append([tags['ID'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])])  
            except KeyError:
                data.append([tags['Parent'], '%s:%s-%s' % (lSpl[0], lSpl[3], lSpl[4])])  

    ### data contains two   o columns: gene_ID, GeneLocus (e.g., chr7:130020290-130027948:+) 
    data = sp.array(data)

    ### fix positions
    pos  = data[:,1]
    pos  = sp.array([x.split(':')[0]+'-'+x.split(':')[1] for x in pos])
    pos  = sp.array([x.strip('chr') for x in pos])
    pos  = sp.array([x.split('-') for x in pos])
    pos[pos[:,0] == 'X',0] = '23'
    pos[pos[:,0] == 'Y',0] = '24'

    ### filter weird things like mitochondria etc.
    iOK  = np.core.defchararray.isdigit(pos[:,0])
    pos  = pos[iOK,:]
    data = data[iOK,:]
    pos  = pos.astype('int')
    
    ### sort everything nicely
    sidx = sp.lexsort((pos[:,2], pos[:,1], pos[:,0]))
    pos  = pos[sidx,:]
    data = data[sidx,:]
    
    ### find genes with overlapping annotations
    myOverlapGenes = []
    for i in xrange(pos.shape[0]):
        mypos  = pos[i,:]

        ## same chr
        iChr   = mypos[0] == pos[:,0]

        ## end is in something else
        iLBEnd    = mypos[2] >= pos[:,1]
        iUBEnd    = mypos[2] <= pos[:,2]

        ## st is in something else
        iLBSt = mypos[1] <= pos[:,2]
        iUBSt = mypos[1] >= pos[:,1]

        ## on both ends the only entry that overlaps to i is i itself --> continue
        if (sp.sum(iChr & iLBEnd & iUBEnd) == 1) and (sp.sum(iChr & iLBSt & iUBSt) == 1):
            continue

        ### extract IDs of overlapping genes
        overlapgenesSt = data[iChr & iUBSt & iLBSt, 0]
        overlapgenesEnd = data[iChr & iUBEnd & iLBEnd, 0]
        
        overlapgenesSt = sp.array([x.split('|')[0] for x in overlapgenesSt])
        overlapgenesEnd = sp.array([x.split('|')[0] for x in overlapgenesEnd])

        ### this shoudl actually never happen ...
        if (sp.unique(overlapgenesSt).shape[0] == 1) and (sp.unique(overlapgenesEnd).shape[0] == 1):
            continue
        if sp.unique(overlapgenesSt).shape[0] > 1:
            myOverlapGenes.extend(overlapgenesSt.tolist())
        if sp.unique(overlapgenesEnd).shape[0] > 1:
            myOverlapGenes.extend(overlapgenesEnd.tolist())
    return sp.unique(myOverlapGenes)