Exemple #1
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        parseOptions()
        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                logging.warn("'fancyChunking' not yet available for BAM "
                             "files without accompanying .pbi files, "
                             "disabling")
                options.fancyChunking = False
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readCmpH5Input()

        monitoringThread = threading.Thread(target=monitorSlaves, args=(self,))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(options.temporaryDirectory,
                                                      "profile-main.out"))

            elif options.doDebugging:
                if not options.threaded:
                    die("Debugging only works with -T (threaded) mode")
                logging.info("PID: %d", os.getpid())
                import ipdb
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inCmpH5.close()
        return 0
Exemple #2
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("ConsensusCore2 version: %s" %
                     (consensusCore2Version() or "ConsensusCore2 unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if options.algorithm == "arrow" and peekFile.isCmpH5:
                die("Arrow does not support CmpH5 files")
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                die("Genomic Consensus only works with cmp.h5 files and BAM "
                    "files with accompanying .pbi files")
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readAlignmentInput()

        monitoringThread = threading.Thread(target=monitorSlaves,
                                            args=(self, ))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(
                                    options.temporaryDirectory,
                                    "profile-main.out"))

            elif options.debug:
                if not options.threaded:
                    die("Debugging only works with -T (threaded) mode")
                logging.info("PID: %d", os.getpid())
                import ipdb
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inAlnFile.close()
        return 0
Exemple #3
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        parseOptions()
        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        if options.usingBam:
            logging.warn(
                "'fancyChunking' not yet available for BAM, disabling")
            options.fancyChunking = False

            # Peek at the bam file to build tables
            with BamReader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at BAM file %s" % options.inputFilename)
                logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
        else:
            # We need to peek at the cmp.h5 file to build the The
            # refGroupId<->refGroupFullName mapping, and to determine
            # whether the selected algorithm parameters (Quiver) are
            # compatible with the data.  But we then have to close the
            # file, and let the "real" open happen after the fork.
            with CmpH5Reader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at CmpH5 file %s" %
                             options.inputFilename)
                logging.info("Input CmpH5 data: numAlnHits=%d" %
                             len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
                options.disableHdf5ChunkCache = self._shouldDisableChunkCache(
                    peekCmpH5)
                if options.disableHdf5ChunkCache:
                    logging.info(
                        "Will disable HDF5 chunk cache (large number of datasets)"
                    )
            logging.debug("After peek, # hdf5 objects open: %d" %
                          h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readCmpH5Input()

        monitoringThread = threading.Thread(target=monitorSlaves,
                                            args=(self, ))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(
                                    options.temporaryDirectory,
                                    "profile-main.out"))

            elif options.doDebugging:
                logging.info("PID: %d", os.getpid())
                try:
                    import ipdb as pdb
                except:
                    import pdb
                return pdb.runeval("self._mainLoop()", globals(), locals())
            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inCmpH5.close()
        return 0
Exemple #4
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        parseOptions()
        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        if options.usingBam:
            logging.warn("'fancyChunking' not yet available for BAM, disabling")
            options.fancyChunking = False

            # Peek at the bam file to build tables
            with BamReader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at BAM file %s" % options.inputFilename)
                logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
        else:
            # We need to peek at the cmp.h5 file to build the The
            # refGroupId<->refGroupFullName mapping, and to determine
            # whether the selected algorithm parameters (Quiver) are
            # compatible with the data.  But we then have to close the
            # file, and let the "real" open happen after the fork.
            with CmpH5Reader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at CmpH5 file %s" % options.inputFilename)
                logging.info("Input CmpH5 data: numAlnHits=%d" % len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
                options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekCmpH5)
                if options.disableHdf5ChunkCache:
                    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
            logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readCmpH5Input()

        monitoringThread = threading.Thread(target=monitorSlaves, args=(self,))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(options.temporaryDirectory,
                                                      "profile-main.out"))

            elif options.doDebugging:
                logging.info("PID: %d", os.getpid())
                try:    import ipdb as pdb
                except: import pdb
                return pdb.runeval("self._mainLoop()", globals(), locals())
            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inCmpH5.close()
        return 0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        random.seed(42)

        if options.pdb or options.pdbAtStartup:
            print("Process ID: %d" % os.getpid(), file=sys.stderr)
            try:
                import ipdb
            except ImportError:
                die("Debugging options require 'ipdb' package installed.")

            if not options.threaded:
                die("Debugging only works with -T (threaded) mode")

        if options.pdbAtStartup:
            ipdb.set_trace()

        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("ConsensusCore2 version: %s" %
                     (consensusCore2Version() or "ConsensusCore2 unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if options.algorithm == "arrow" and peekFile.isCmpH5:
                die("Arrow does not support CmpH5 files")
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                die("Genomic Consensus only works with cmp.h5 files and BAM "
                    "files with accompanying .pbi files")
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._algorithm = self._algorithmByName(options.algorithm, peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")

        self._launchSlaves()
        self._readAlignmentInput()

        monitoringThread = threading.Thread(target=monitorSlaves, args=(self,))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(options.temporaryDirectory,
                                                      "profile-main.out"))

            elif options.pdb:
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except BaseException as exc:
            msg = 'options={}'.format(pprint.pformat(vars(options)))
            logging.exception(msg)
            self.abortWork(repr(exc))

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inAlnFile.close()
        return 0