Esempio n. 1
0
    def extractContigs(self,
                       timer,
                       bids=[],
                       fasta=[],
                       prefix='',
                       cutoff=0):
        """Extract contigs and write to file"""
        
        if prefix is None or prefix == '':
            prefix=os.path.basename(self.dbFileName) \
                            .replace(".gm", "") \
                            .replace(".sm", "")
                            
        profile = self.loadProfile(timer, bids, cutoff)
        bm = BinManager(profile)
        
        # load all the contigs which have been assigned to bins
        cp = ContigParser()
        # contigs looks like cid->seq
        contigs = {}
        import mimetypes
        try:
            for file_name in fasta:
                gm_open = open
                try:
                    # handle gzipped files
                    mime = mimetypes.guess_type(file_name)
                    if mime[1] == 'gzip':
                        import gzip
                        gm_open = gzip.open
                except:
                    print "Error when guessing contig file mimetype"
                    raise
                with gm_open(file_name, "r") as f:
                    cp.getWantedSeqs(f, profile.contigNames, out_dict=contigs)
        except:
            print "Could not parse contig file:",fasta[0],sys.exc_info()[0]
            raise

        # now print out the sequences
        print "Writing files"
        for bid in bm.getBids():
            file_name = os.path.join(self._outDir, "%s_bin_%d.fna" % (prefix, bid))
            try:
                with open(file_name, 'w') as f:
                    for cid in bm.profile.contigNames[bm.getBinIndices(bid)]:
                        if(cid in contigs):
                            f.write(">%s\n%s\n" % (cid, contigs[cid]))
                        else:
                            print "These are not the contigs you're looking for. ( %s )" % (cid)
            except:
                print "Could not open file for writing:",file_name,sys.exc_info()[0]
                raise
Esempio n. 2
0
    def extractReads(self,
                     timer,
                     bids=[],
                     bams=[],
                     prefix="",
                     mixBams=False,
                     mixGroups=False,
                     mixReads=False,
                     interleaved=False,
                     bigFile=False,
                     headersOnly=False,
                     minMapQual=0,
                     maxMisMatches=1000,
                     useSuppAlignments=False,
                     useSecondaryAlignments=False,
                     threads=1,
                     verbose=False):
        """Extract reads from bam files and write to file

        All logic is handled by BamM <- soon to be wrapped by StoreM"""
        # load data
        profile = self.loadProfile(timer, bids)
        bm = BinManager(profile) # bins

        print "Extracting reads"

        # work out a set of targets to pass to the parser
        targets = []
        group_names = []
        for bid in bm.getBids():
            group_names.append("BIN_%d" % bid)
            row_indices = bm.getBinIndices(bid)
            targets.append(list(bm.profile.contigNames[row_indices]))

        # get something to parse the bams with
        bam_parser = BMBE(targets,
                          bams,
                          groupNames=group_names,
                          prefix=prefix,
                          outFolder=self._outDir,
                          mixBams=mixBams,
                          mixGroups=mixGroups,
                          mixReads=mixReads,
                          interleaved=interleaved,
                          bigFile=bigFile,
                          headersOnly=headersOnly,
                          minMapQual=minMapQual,
                          maxMisMatches=maxMisMatches,
                          useSuppAlignments=useSuppAlignments,
                          useSecondaryAlignments=useSecondaryAlignments)

        bam_parser.extract(threads=threads,
                           verbose=verbose)
Esempio n. 3
0
    def extractReads(self,
                     timer,
                     bids=[],
                     bams=[],
                     prefix="",
                     mixBams=False,
                     mixGroups=False,
                     mixReads=False,
                     interleaved=False,
                     bigFile=False,
                     headersOnly=False,
                     minMapQual=0,
                     maxMisMatches=1000,
                     useSuppAlignments=False,
                     useSecondaryAlignments=False,
                     threads=1,
                     verbose=False):
        """Extract reads from bam files and write to file

        All logic is handled by BamM <- soon to be wrapped by StoreM"""
        # load data
        profile = self.loadProfile(timer, bids)
        bm = BinManager(profile)  # bins

        print "Extracting reads"

        # work out a set of targets to pass to the parser
        targets = []
        group_names = []
        for bid in bm.getBids():
            group_names.append("BIN_%d" % bid)
            row_indices = bm.getBinIndices(bid)
            targets.append(list(bm.profile.contigNames[row_indices]))

        # get something to parse the bams with
        bam_parser = BMBE(targets,
                          bams,
                          groupNames=group_names,
                          prefix=prefix,
                          outFolder=self._outDir,
                          mixBams=mixBams,
                          mixGroups=mixGroups,
                          mixReads=mixReads,
                          interleaved=interleaved,
                          bigFile=bigFile,
                          headersOnly=headersOnly,
                          minMapQual=minMapQual,
                          maxMisMatches=maxMisMatches,
                          useSuppAlignments=useSuppAlignments,
                          useSecondaryAlignments=useSecondaryAlignments)

        bam_parser.extract(threads=threads, verbose=verbose)
Esempio n. 4
0
    def extractContigs(self, timer, bids=[], fasta=[], prefix='', cutoff=0):
        """Extract contigs and write to file"""

        if prefix is None or prefix == '':
            prefix=os.path.basename(self.dbFileName) \
                            .replace(".gm", "") \
                            .replace(".sm", "")

        profile = self.loadProfile(timer, bids, cutoff)
        bm = BinManager(profile)

        # load all the contigs which have been assigned to bins
        cp = ContigParser()
        # contigs looks like cid->seq
        contigs = {}
        import mimetypes
        try:
            for file_name in fasta:
                gm_open = open
                try:
                    # handle gzipped files
                    mime = mimetypes.guess_type(file_name)
                    if mime[1] == 'gzip':
                        import gzip
                        gm_open = gzip.open
                except:
                    print "Error when guessing contig file mimetype"
                    raise
                with gm_open(file_name, "r") as f:
                    cp.getWantedSeqs(f, profile.contigNames, out_dict=contigs)
        except:
            print "Could not parse contig file:", fasta[0], sys.exc_info()[0]
            raise

        # now print out the sequences
        print "Writing files"
        for bid in bm.getBids():
            file_name = os.path.join(self._outDir,
                                     "%s_bin_%d.fna" % (prefix, bid))
            try:
                with open(file_name, 'w') as f:
                    for cid in bm.profile.contigNames[bm.getBinIndices(bid)]:
                        if (cid in contigs):
                            f.write(">%s\n%s\n" % (cid, contigs[cid]))
                        else:
                            print "These are not the contigs you're looking for. ( %s )" % (
                                cid)
            except:
                print "Could not open file for writing:", file_name, sys.exc_info(
                )[0]
                raise
Esempio n. 5
0
    def extractMappingInfo(self,
                           timer,
                           bids=[],
                           prefix='',
                           separator='\t',
                           cutoff=0):
        """Extract markers from bins and write to file"""
        if prefix is None or prefix == '':
            prefix=os.path.basename(self.dbFileName) \
                            .replace(".gm", "") \
                            .replace(".sm", "")

        profile = self.loadProfile(timer, bids, cutoff)
        bm = BinManager(profile)
        mt = MarkerCheckTreePrinter(profile)

        # now print out the marker info
        print "Writing files"
        for bid in bm.getBids():
            file_name = os.path.join(self._outDir,
                                     "%s_bin_%d.txt" % (prefix, bid))

            bin_indices = bm.getBinIndices([bid])
            idx = np.flatnonzero(
                np.in1d(profile.mapping.rowIndices, bin_indices))

            labels = profile.mapping.markerNames[idx]
            cnames = profile.contigNames[profile.mapping.rowIndices[idx]]
            taxstrings = profile.mapping.taxstrings[idx]

            try:
                with open(file_name, 'w') as f:
                    #labels and lineages
                    f.write(
                        '#info table\n%s\n' %
                        separator.join(['label', 'taxonomy', 'contig_name']))
                    for (label, taxstring,
                         cname) in zip(labels, taxstrings, cnames):
                        f.write('%s\n' % separator.join(
                            [label, '\'%s\'' % taxstring, cname]))

                    #marker tree
                    f.write('\n#marker tree\n')
                    f.write(
                        mt.printTree(profile.mapping.rowIndices[idx],
                                     leaves_list=bin_indices))
            except:
                print "Could not open file for writing:", file_name, sys.exc_info(
                )[0]
                raise
Esempio n. 6
0
 def extractMappingInfo(self,
                        timer,
                        bids=[],
                        prefix='',
                        separator='\t',
                        cutoff=0
                        ):
     """Extract markers from bins and write to file"""
     if prefix is None or prefix == '':
         prefix=os.path.basename(self.dbFileName) \
                         .replace(".gm", "") \
                         .replace(".sm", "")
     
     profile = self.loadProfile(timer, bids, cutoff)
     bm = BinManager(profile)
     mt = MarkerCheckTreePrinter(profile)
     
     # now print out the marker info
     print "Writing files"
     for bid in bm.getBids():
         file_name = os.path.join(self._outDir, "%s_bin_%d.txt" % (prefix, bid))
         
         bin_indices = bm.getBinIndices([bid])
         idx = np.flatnonzero(np.in1d(profile.mapping.rowIndices, bin_indices))
         
         labels = profile.mapping.markerNames[idx]
         cnames = profile.contigNames[profile.mapping.rowIndices[idx]]
         taxstrings = profile.mapping.taxstrings[idx]
         
         try:
             with open(file_name, 'w') as f:
                 #labels and lineages
                 f.write('#info table\n%s\n' % separator.join(['label', 'taxonomy', 'contig_name']))
                 for (label, taxstring, cname) in zip(labels, taxstrings, cnames):
                     f.write('%s\n' % separator.join([label, '\'%s\'' % taxstring, cname]))
                 
                 #marker tree
                 f.write('\n#marker tree\n')
                 f.write(mt.printTree(profile.mapping.rowIndices[idx], leaves_list=bin_indices))
         except:
             print "Could not open file for writing:",file_name,sys.exc_info()[0]
             raise