Esempio n. 1
0
    def __init__(self,filename=None,usecols=(0,1,2,3,3),**kwargs):
        self.data            = {}
        self.__sorted_keys   = []
        self.itr             = 0
        if not filename is None:
            if isinstance(filename,str):
                from aio import loadstream
                f = loadstream(filename)
                readtable = np.genfromtxt(
                                    f,
                                    dtype=self._bedgraphdtype,
                                    usecols=usecols,
                                    **kwargs)
                f.close()
            elif isinstance(filename,np.ndarray) or isinstance(filename,list):
                readtable = np.core.records.fromarrays(
                                    np.array(filename).transpose()[[usecols]],
                                    dtype = self._bedgraphdtype)
            for line in readtable:
                chrom = line['chrom']
                #if np.isnan(line['value']):
                    #line['value'] = 0
                if not chrom in self.data:
                    self.data[chrom] = []
                    self.data[chrom].append(line)
                else:
                    #self.data[chrom] = np.append(self.data[chrom],line)
                    self.data[chrom].append(line)

            for chrom in self.data:
                self.data[chrom] = np.core.records.fromrecords(self.data[chrom],
                                                               dtype = self._bedgraphdtype)
                self.data[chrom].sort(kind='heapsort',order='start')
      
            self._flush()
Esempio n. 2
0
    def __init__(self,genomeName,usechr=['#','X']):
        datafile = os.path.join(os.path.dirname(os.path.abspath(__file__)),'genomes/' + genomeName + '.info')
        f = loadstream(datafile)
        self.info = np.genfromtxt(f,dtype=[('chrom','S30'),('length',int)])
        f.close() 
        choices = np.zeros(len(self.info),dtype=bool)
        for chrnum in usechr:
            if chrnum == '#':
                choices = np.logical_or([re.search('chr[0-9]',c) != None for c in self.info['chrom']],choices)
            else:
                choices = np.logical_or(self.info['chrom'] == ('chr'+str(chrnum)), choices)

        self.info = self.info[choices]
Esempio n. 3
0
 def __init__(self,filename,genome=None,resolution=None,usechr=['#','X']):
     self._applyedMethods = {}
     if isinstance(filename,int):
         self.matrix=np.zeros((filename,filename),dtype = np.float32)
     elif isinstance(filename,str):
         if not os.path.isfile(filename):
             raise IOError,"File %s doesn't exist!\n" % (filename)
         if os.path.splitext(filename)[1] == '.hdf5' or os.path.splitext(filename)[1] == '.hmat':
             h5f = h5py.File(filename,'r')
             self.matrix = h5f['matrix'][:]
             self.idx    = h5f['idx'][:]
             if 'applyedMethods' in h5f.keys():
                 self._applyedMethods = cPickle.loads(h5f['applyedMethods'].value)
             
             if 'genome' in h5f.keys() and 'resolution' in h5f.keys():         
                 self.genome     = cPickle.loads(h5f['genome'].value)
                 self.resolution = cPickle.loads(h5f['resolution'].value)
             h5f.close()
         else:
             from aio import loadstream
             f    = loadstream(filename)
             s    = f.next()
             line = re.split('\t+|\s+',s.rstrip())
             n    = len(line) - 3
             idx  = []
             i    = 0
             tidx = line[0:3];tidx.append('')
             idx.append(tidx)
             self.matrix = np.zeros((n,n),dtype = np.float32)
             self.matrix[i] = line[3:]
             for s in f:
                 i += 1
                 line = re.split('\t+|\s+',s.rstrip())
                 tidx = line[0:3];tidx.append('')
                 idx.append(tidx)
                 self.matrix[i] = line[3:]
             f.close()
             self.idx    = np.core.records.fromarrays(np.array(idx).transpose(),dtype=self._idxdtype)
     #----------------end filename
     if isinstance(genome,str) and isinstance(resolution,int):
         if hasattr(self,"genome") and hasattr(self,"resolution"):
             raise RuntimeError, "Genome and resolution has already been specified."
         genomedb    = utils.genome(genome,usechr=usechr)
         bininfo     = genomedb.bininfo(resolution)
         flaglist    = ['' for i in range(len(bininfo.chromList))]
         self.genome = genome
         self.resolution = resolution
         self._buildindex(bininfo.chromList,bininfo.startList,bininfo.endList,flaglist)