def __init__(self,filename=None,usecols=(0,1,2,3,3),**kwargs): self.data = {} self.__sorted_keys = [] self.itr = 0 if not filename is None: if isinstance(filename,str): from aio import loadstream f = loadstream(filename) readtable = np.genfromtxt( f, dtype=self._bedgraphdtype, usecols=usecols, **kwargs) f.close() elif isinstance(filename,np.ndarray) or isinstance(filename,list): readtable = np.core.records.fromarrays( np.array(filename).transpose()[[usecols]], dtype = self._bedgraphdtype) for line in readtable: chrom = line['chrom'] #if np.isnan(line['value']): #line['value'] = 0 if not chrom in self.data: self.data[chrom] = [] self.data[chrom].append(line) else: #self.data[chrom] = np.append(self.data[chrom],line) self.data[chrom].append(line) for chrom in self.data: self.data[chrom] = np.core.records.fromrecords(self.data[chrom], dtype = self._bedgraphdtype) self.data[chrom].sort(kind='heapsort',order='start') self._flush()
def __init__(self,genomeName,usechr=['#','X']): datafile = os.path.join(os.path.dirname(os.path.abspath(__file__)),'genomes/' + genomeName + '.info') f = loadstream(datafile) self.info = np.genfromtxt(f,dtype=[('chrom','S30'),('length',int)]) f.close() choices = np.zeros(len(self.info),dtype=bool) for chrnum in usechr: if chrnum == '#': choices = np.logical_or([re.search('chr[0-9]',c) != None for c in self.info['chrom']],choices) else: choices = np.logical_or(self.info['chrom'] == ('chr'+str(chrnum)), choices) self.info = self.info[choices]
def __init__(self,filename,genome=None,resolution=None,usechr=['#','X']): self._applyedMethods = {} if isinstance(filename,int): self.matrix=np.zeros((filename,filename),dtype = np.float32) elif isinstance(filename,str): if not os.path.isfile(filename): raise IOError,"File %s doesn't exist!\n" % (filename) if os.path.splitext(filename)[1] == '.hdf5' or os.path.splitext(filename)[1] == '.hmat': h5f = h5py.File(filename,'r') self.matrix = h5f['matrix'][:] self.idx = h5f['idx'][:] if 'applyedMethods' in h5f.keys(): self._applyedMethods = cPickle.loads(h5f['applyedMethods'].value) if 'genome' in h5f.keys() and 'resolution' in h5f.keys(): self.genome = cPickle.loads(h5f['genome'].value) self.resolution = cPickle.loads(h5f['resolution'].value) h5f.close() else: from aio import loadstream f = loadstream(filename) s = f.next() line = re.split('\t+|\s+',s.rstrip()) n = len(line) - 3 idx = [] i = 0 tidx = line[0:3];tidx.append('') idx.append(tidx) self.matrix = np.zeros((n,n),dtype = np.float32) self.matrix[i] = line[3:] for s in f: i += 1 line = re.split('\t+|\s+',s.rstrip()) tidx = line[0:3];tidx.append('') idx.append(tidx) self.matrix[i] = line[3:] f.close() self.idx = np.core.records.fromarrays(np.array(idx).transpose(),dtype=self._idxdtype) #----------------end filename if isinstance(genome,str) and isinstance(resolution,int): if hasattr(self,"genome") and hasattr(self,"resolution"): raise RuntimeError, "Genome and resolution has already been specified." genomedb = utils.genome(genome,usechr=usechr) bininfo = genomedb.bininfo(resolution) flaglist = ['' for i in range(len(bininfo.chromList))] self.genome = genome self.resolution = resolution self._buildindex(bininfo.chromList,bininfo.startList,bininfo.endList,flaglist)