def extractContigs(self, timer, bids=[], fasta=[], prefix='', cutoff=0): """Extract contigs and write to file""" if prefix is None or prefix == '': prefix=os.path.basename(self.dbFileName) \ .replace(".gm", "") \ .replace(".sm", "") profile = self.loadProfile(timer, bids, cutoff) bm = BinManager(profile) # load all the contigs which have been assigned to bins cp = ContigParser() # contigs looks like cid->seq contigs = {} import mimetypes try: for file_name in fasta: gm_open = open try: # handle gzipped files mime = mimetypes.guess_type(file_name) if mime[1] == 'gzip': import gzip gm_open = gzip.open except: print "Error when guessing contig file mimetype" raise with gm_open(file_name, "r") as f: cp.getWantedSeqs(f, profile.contigNames, out_dict=contigs) except: print "Could not parse contig file:",fasta[0],sys.exc_info()[0] raise # now print out the sequences print "Writing files" for bid in bm.getBids(): file_name = os.path.join(self._outDir, "%s_bin_%d.fna" % (prefix, bid)) try: with open(file_name, 'w') as f: for cid in bm.profile.contigNames[bm.getBinIndices(bid)]: if(cid in contigs): f.write(">%s\n%s\n" % (cid, contigs[cid])) else: print "These are not the contigs you're looking for. ( %s )" % (cid) except: print "Could not open file for writing:",file_name,sys.exc_info()[0] raise
def extractReads(self, timer, bids=[], bams=[], prefix="", mixBams=False, mixGroups=False, mixReads=False, interleaved=False, bigFile=False, headersOnly=False, minMapQual=0, maxMisMatches=1000, useSuppAlignments=False, useSecondaryAlignments=False, threads=1, verbose=False): """Extract reads from bam files and write to file All logic is handled by BamM <- soon to be wrapped by StoreM""" # load data profile = self.loadProfile(timer, bids) bm = BinManager(profile) # bins print "Extracting reads" # work out a set of targets to pass to the parser targets = [] group_names = [] for bid in bm.getBids(): group_names.append("BIN_%d" % bid) row_indices = bm.getBinIndices(bid) targets.append(list(bm.profile.contigNames[row_indices])) # get something to parse the bams with bam_parser = BMBE(targets, bams, groupNames=group_names, prefix=prefix, outFolder=self._outDir, mixBams=mixBams, mixGroups=mixGroups, mixReads=mixReads, interleaved=interleaved, bigFile=bigFile, headersOnly=headersOnly, minMapQual=minMapQual, maxMisMatches=maxMisMatches, useSuppAlignments=useSuppAlignments, useSecondaryAlignments=useSecondaryAlignments) bam_parser.extract(threads=threads, verbose=verbose)
def extractReads(self, timer, bids=[], bams=[], prefix="", mixBams=False, mixGroups=False, mixReads=False, interleaved=False, bigFile=False, headersOnly=False, minMapQual=0, maxMisMatches=1000, useSuppAlignments=False, useSecondaryAlignments=False, threads=1, verbose=False): """Extract reads from bam files and write to file All logic is handled by BamM <- soon to be wrapped by StoreM""" # load data profile = self.loadProfile(timer, bids) bm = BinManager(profile) # bins print "Extracting reads" # work out a set of targets to pass to the parser targets = [] group_names = [] for bid in bm.getBids(): group_names.append("BIN_%d" % bid) row_indices = bm.getBinIndices(bid) targets.append(list(bm.profile.contigNames[row_indices])) # get something to parse the bams with bam_parser = BMBE(targets, bams, groupNames=group_names, prefix=prefix, outFolder=self._outDir, mixBams=mixBams, mixGroups=mixGroups, mixReads=mixReads, interleaved=interleaved, bigFile=bigFile, headersOnly=headersOnly, minMapQual=minMapQual, maxMisMatches=maxMisMatches, useSuppAlignments=useSuppAlignments, useSecondaryAlignments=useSecondaryAlignments) bam_parser.extract(threads=threads, verbose=verbose)
def extractContigs(self, timer, bids=[], fasta=[], prefix='', cutoff=0): """Extract contigs and write to file""" if prefix is None or prefix == '': prefix=os.path.basename(self.dbFileName) \ .replace(".gm", "") \ .replace(".sm", "") profile = self.loadProfile(timer, bids, cutoff) bm = BinManager(profile) # load all the contigs which have been assigned to bins cp = ContigParser() # contigs looks like cid->seq contigs = {} import mimetypes try: for file_name in fasta: gm_open = open try: # handle gzipped files mime = mimetypes.guess_type(file_name) if mime[1] == 'gzip': import gzip gm_open = gzip.open except: print "Error when guessing contig file mimetype" raise with gm_open(file_name, "r") as f: cp.getWantedSeqs(f, profile.contigNames, out_dict=contigs) except: print "Could not parse contig file:", fasta[0], sys.exc_info()[0] raise # now print out the sequences print "Writing files" for bid in bm.getBids(): file_name = os.path.join(self._outDir, "%s_bin_%d.fna" % (prefix, bid)) try: with open(file_name, 'w') as f: for cid in bm.profile.contigNames[bm.getBinIndices(bid)]: if (cid in contigs): f.write(">%s\n%s\n" % (cid, contigs[cid])) else: print "These are not the contigs you're looking for. ( %s )" % ( cid) except: print "Could not open file for writing:", file_name, sys.exc_info( )[0] raise
def extractMappingInfo(self, timer, bids=[], prefix='', separator='\t', cutoff=0): """Extract markers from bins and write to file""" if prefix is None or prefix == '': prefix=os.path.basename(self.dbFileName) \ .replace(".gm", "") \ .replace(".sm", "") profile = self.loadProfile(timer, bids, cutoff) bm = BinManager(profile) mt = MarkerCheckTreePrinter(profile) # now print out the marker info print "Writing files" for bid in bm.getBids(): file_name = os.path.join(self._outDir, "%s_bin_%d.txt" % (prefix, bid)) bin_indices = bm.getBinIndices([bid]) idx = np.flatnonzero( np.in1d(profile.mapping.rowIndices, bin_indices)) labels = profile.mapping.markerNames[idx] cnames = profile.contigNames[profile.mapping.rowIndices[idx]] taxstrings = profile.mapping.taxstrings[idx] try: with open(file_name, 'w') as f: #labels and lineages f.write( '#info table\n%s\n' % separator.join(['label', 'taxonomy', 'contig_name'])) for (label, taxstring, cname) in zip(labels, taxstrings, cnames): f.write('%s\n' % separator.join( [label, '\'%s\'' % taxstring, cname])) #marker tree f.write('\n#marker tree\n') f.write( mt.printTree(profile.mapping.rowIndices[idx], leaves_list=bin_indices)) except: print "Could not open file for writing:", file_name, sys.exc_info( )[0] raise
def extractMappingInfo(self, timer, bids=[], prefix='', separator='\t', cutoff=0 ): """Extract markers from bins and write to file""" if prefix is None or prefix == '': prefix=os.path.basename(self.dbFileName) \ .replace(".gm", "") \ .replace(".sm", "") profile = self.loadProfile(timer, bids, cutoff) bm = BinManager(profile) mt = MarkerCheckTreePrinter(profile) # now print out the marker info print "Writing files" for bid in bm.getBids(): file_name = os.path.join(self._outDir, "%s_bin_%d.txt" % (prefix, bid)) bin_indices = bm.getBinIndices([bid]) idx = np.flatnonzero(np.in1d(profile.mapping.rowIndices, bin_indices)) labels = profile.mapping.markerNames[idx] cnames = profile.contigNames[profile.mapping.rowIndices[idx]] taxstrings = profile.mapping.taxstrings[idx] try: with open(file_name, 'w') as f: #labels and lineages f.write('#info table\n%s\n' % separator.join(['label', 'taxonomy', 'contig_name'])) for (label, taxstring, cname) in zip(labels, taxstrings, cnames): f.write('%s\n' % separator.join([label, '\'%s\'' % taxstring, cname])) #marker tree f.write('\n#marker tree\n') f.write(mt.printTree(profile.mapping.rowIndices[idx], leaves_list=bin_indices)) except: print "Could not open file for writing:",file_name,sys.exc_info()[0] raise