예제 #1
0
    def extractMappedRead(self, aln, windowStart):
        """
        Given a clipped alignment, convert its coordinates into template
        space (starts with 0), bundle it up with its features as a
        MappedRead.
        """
        if isinstance(aln, CmpH5Alignment):
            die("Arrow does not support CmpH5 files!")

        assert aln.referenceSpan > 0

        def baseFeature(featureName):
            if aln.reader.hasBaseFeature(featureName):
                rawFeature = aln.baseFeature(featureName, aligned=False, orientation="native")
                return rawFeature.clip(0,255).astype(np.uint8)
            else:
                return np.zeros((aln.readLength,), dtype=np.uint8)

        name = aln.readName
        chemistry = aln.sequencingChemistry
        strand = cc.StrandType_REVERSE if aln.isReverseStrand else cc.StrandType_FORWARD
        read = cc.Read(name,
                       aln.read(aligned=False, orientation="native"),
                       cc.Uint8Vector(baseFeature("Ipd").tolist()),
                       cc.Uint8Vector(baseFeature("PulseWidth").tolist()),
                       cc.SNR(aln.hqRegionSnr),
                       chemistry)
        return cc.MappedRead(read,
                             strand,
                             int(aln.referenceStart - windowStart),
                             int(aln.referenceEnd   - windowStart))
예제 #2
0
    def extractMappedRead(self, aln, windowStart):
        """
        Given a clipped alignment, convert its coordinates into template
        space (starts with 0), bundle it up with its features as a
        MappedRead.
        """
        if isinstance(aln, CmpH5Alignment):
            die("Arrow does not support CmpH5 files!")

        assert aln.referenceSpan > 0

        def baseFeature(featureName):
            if aln.reader.hasBaseFeature(featureName):
                rawFeature = aln.baseFeature(featureName, aligned=False, orientation="native")
                return rawFeature.clip(0,255).astype(np.uint8)
            else:
                return np.zeros((aln.qLen,), dtype=np.uint8)

        name = aln.readName
        chemistry = aln.sequencingChemistry
        strand = cc.StrandType_REVERSE if aln.isReverseStrand else cc.StrandType_FORWARD
        read = cc.Read(name,
                       aln.read(aligned=False, orientation="native"),
                       cc.Uint8Vector(baseFeature("Ipd").tolist()),
                       cc.Uint8Vector(baseFeature("PulseWidth").tolist()),
                       cc.SNR(aln.hqRegionSnr),
                       chemistry)
        return cc.MappedRead(read,
                             strand,
                             int(aln.referenceStart - windowStart),
                             int(aln.referenceEnd   - windowStart))
예제 #3
0
파일: main.py 프로젝트: lpp1985/lpp_Script
 def _algorithmByName(self, name, peekFile):
     if name == "plurality":
         from GenomicConsensus.plurality import plurality
         algo = plurality
     elif name == "quiver":
         from GenomicConsensus.quiver import quiver
         algo = quiver
     elif name == "arrow":
         from GenomicConsensus.arrow import arrow
         algo = arrow
     elif name == "poa":
         from GenomicConsensus.poa import poa
         algo = poa
     elif name == "best":
         logging.info("Identifying best algorithm based on input data")
         from GenomicConsensus import algorithmSelection
         algoName = algorithmSelection.bestAlgorithm(peekFile.sequencingChemistry)
         return self._algorithmByName(algoName, peekFile)
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     logging.info("Will use {a} algorithm".format(a=name))
     return algo
예제 #4
0
def _buildParameterSet(parameterSetName, nameValuePairs):
    chem, modelName = parameterSetName.split(".")[:2]
    if    modelName == "AllQVsModel":    model = AllQVsModel
    elif  modelName == "NoMergeQVModel": model = NoMergeQVModel
    elif  modelName == "NoQVsModel":     model = NoQVsModel
    elif  modelName == "AllQVsMergingByChannelModel": model = AllQVsMergingByChannelModel
    elif  modelName == "NoQVsMergingByChannelModel":  model = NoQVsMergingByChannelModel
    else:
        logging.error("Found parameter set for unrecognized model: %s" % modelName)
        return None

    if map(fst, nameValuePairs) != model.parameterNames:
        die("Malformed parameter set file")

    qvModelParams = cc.QvModelParams(chem, modelName,
        *[ float(snd(pair)) for pair in nameValuePairs ])

    #
    # Dirty hack for --diploid support, diploid model is scaled
    # differently.  Needs further work.
    #
    if parameterSetName == "unknown.NoQVsModel":
        bandingOptions     = cc.BandingOptions(4, 24)
        fastScoreThreshold = -50
    else:
        bandingOptions     = cc.BandingOptions(4, 6)
        fastScoreThreshold = -12.5

    quiverConfig = cc.QuiverConfig(qvModelParams,
                                   cc.ALL_MOVES,
                                   bandingOptions,
                                   fastScoreThreshold)
    return ParameterSet(parameterSetName, model, chem, quiverConfig)
예제 #5
0
파일: model.py 프로젝트: lpp1985/lpp_Script
def _buildParameterSet(parameterSetName, nameValuePairs):
    chem, modelName = parameterSetName.split(".")[:2]
    if modelName == "AllQVsModel": model = AllQVsModel
    elif modelName == "NoMergeQVModel": model = NoMergeQVModel
    elif modelName == "NoQVsModel": model = NoQVsModel
    elif modelName == "AllQVsMergingByChannelModel":
        model = AllQVsMergingByChannelModel
    elif modelName == "NoQVsMergingByChannelModel":
        model = NoQVsMergingByChannelModel
    else:
        logging.error("Found parameter set for unrecognized model: %s" %
                      modelName)
        return None

    if map(fst, nameValuePairs) != model.parameterNames:
        die("Malformed parameter set file")

    qvModelParams = cc.QvModelParams(
        chem, modelName, *[float(snd(pair)) for pair in nameValuePairs])

    #
    # Dirty hack for --diploid support, diploid model is scaled
    # differently.  Needs further work.
    #
    if parameterSetName == "unknown.NoQVsModel":
        bandingOptions = cc.BandingOptions(4, 24)
        fastScoreThreshold = -50
    else:
        bandingOptions = cc.BandingOptions(4, 6)
        fastScoreThreshold = -12.5

    quiverConfig = cc.QuiverConfig(qvModelParams, cc.ALL_MOVES, bandingOptions,
                                   fastScoreThreshold)
    return ParameterSet(parameterSetName, model, chem, quiverConfig)
예제 #6
0
 def _configureAlgorithm(self, options, alnFile):
     assert self._algorithm != None
     try:
         self._algorithmConfiguration = self._algorithm.configure(
             options, alnFile)
     except IncompatibleDataException as e:
         die("Failure: %s" % e.message)
예제 #7
0
 def extractFeatures(aln):
     """
     Extract the data in a cmp.h5 alignment record into a
     native-orientation gapless string.
     """
     if isinstance(aln, CmpH5Alignment):
         die("Arrow does not support CmpH5 files!")
     else:
         return aln.read(aligned=False, orientation="native")
예제 #8
0
 def extractFeatures(aln):
     """
     Extract the data in a cmp.h5 alignment record into a
     native-orientation gapless string.
     """
     if isinstance(aln, CmpH5Alignment):
         die("Arrow does not support CmpH5 files!")
     else:
         return aln.read(aligned=False, orientation="native")
예제 #9
0
 def _algorithmByName(self, name):
     if name=="plurality":
         algo = plurality
     elif name=="quiver":
         algo = quiver
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     return algo
예제 #10
0
 def _algorithmByName(self, name):
     if name == "plurality":
         algo = plurality
     elif name == "quiver":
         algo = quiver
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     return algo
예제 #11
0
def loadParameterSets(parametersFile=None, spec=None, cmpH5=None):
    """
    spec is either:
      - chemName.modelName  (complete spec),
      - chemName
      - None
    If the spec is incomplete, cmpH5 is required to determine the best
    available option.

    Returned value is a dict of completeSpec -> QuiverConfig
    """
    if spec is None:
        chemistryName, modelName = None, None
    elif "." in spec:
        chemistryName, modelName = spec.split(".")
    else:
        chemistryName, modelName = spec, None
    assert cmpH5 or (chemistryName and modelName)

    parametersFile = _findParametersFile(parametersFile)
    logging.info("Using Quiver parameters file %s" % parametersFile)
    sets = _loadParameterSets(parametersFile)

    if chemistryName and modelName:
        try:
            p = sets[spec]
            params = {"*": p}
        except:
            die("Quiver: no available parameter set named %s" % \
                spec)
    elif chemistryName:
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        p = _bestParameterSet(sets, chemistryName, qvsAvailable)
        if p.chemistry != chemistryName:
            die("Quiver: no parameter set available compatible with this " + \
                "cmp.h5 for chemistry \"%s\" " % chemistryName)
        params = {"*": p}
    else:
        chemistryNames = list(set(cmpH5.sequencingChemistry))  # uniquify
        if not _isChemistryMixSupported(chemistryNames):
            logging.warn("Unsupported chemistry mix, results will be undefined: %s" % \
                         ", ".join(chemistryNames))
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        bestParams = [
            _bestParameterSet(sets, chemistryName, qvsAvailable)
            for chemistryName in chemistryNames
        ]
        params = dict(zip(chemistryNames, bestParams))

    return params
예제 #12
0
def loadParameterSets(parametersFile=None, spec=None, cmpH5=None):
    """
    spec is either:
      - chemName.modelName  (complete spec),
      - chemName
      - None
    If the spec is incomplete, cmpH5 is required to determine the best
    available option.

    Returned value is a dict of completeSpec -> QuiverConfig
    """
    if spec is None:
        chemistryName, modelName = None, None
    elif "." in spec:
        chemistryName, modelName = spec.split(".")
    else:
        chemistryName, modelName = spec, None
    assert cmpH5 or (chemistryName and modelName)

    parametersFile = _findParametersFile(parametersFile)
    logging.info("Using Quiver parameters file %s" % parametersFile)
    sets = _loadParameterSets(parametersFile)

    if chemistryName and modelName:
        try:
            p = sets[spec]
            params = { "*" : p }
        except:
            die("Quiver: no available parameter set named %s" % \
                spec)
    elif chemistryName:
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        p = _bestParameterSet(sets, chemistryName, qvsAvailable)
        if p.chemistry != chemistryName:
            die("Quiver: no parameter set available compatible with this " + \
                "cmp.h5 for chemistry \"%s\" " % chemistryName)
        params = { "*" : p }
    else:
        chemistryNames = list(set(cmpH5.sequencingChemistry))  # uniquify
        if not _isChemistryMixSupported(chemistryNames):
            logging.warn("Unsupported chemistry mix, results will be undefined: %s" % \
                         ", ".join(chemistryNames))
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        bestParams = [ _bestParameterSet(sets, chemistryName, qvsAvailable)
                       for chemistryName in chemistryNames ]
        params = dict(zip(chemistryNames, bestParams))

    return params
예제 #13
0
 def _algorithmByName(self, name):
     if name == "plurality":
         from GenomicConsensus.plurality import plurality
         algo = plurality
     elif name == "quiver":
         from GenomicConsensus.quiver import quiver
         algo = quiver
     elif name == "arrow":
         from GenomicConsensus.arrow import arrow
         algo = arrow
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     return algo
예제 #14
0
 def _algorithmByName(self, name):
     if name == "plurality":
         from GenomicConsensus.plurality import plurality
         algo = plurality
     elif name == "quiver":
         from GenomicConsensus.quiver import quiver
         algo = quiver
     elif name == "arrow":
         from GenomicConsensus.arrow import arrow
         algo = arrow
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     return algo
예제 #15
0
파일: arrow.py 프로젝트: lpp1985/lpp_Script
def configure(options, alnFile):
    if alnFile.readType != "standard":
        raise U.IncompatibleDataException(
            "The Arrow algorithm requires a BAM file containing standard (non-CCS) reads."
        )

    if options.diploid:
        logging.warn("Diploid analysis not yet supported under Arrow model.")

    # load parameters from file
    if options.parametersFile:
        logging.info("Loading model parameters from: ({0})".format(
            options.parametersFile))
        if not cc.LoadModels(options.parametersFile):
            die("Arrow: unable to load parameters from: ({0})".format(
                options.parametersFile))

    # test available chemistries
    supp = set(cc.SupportedChemistries())
    logging.info("Found consensus models for: ({0})".format(", ".join(
        sorted(supp))))

    used = set([])
    if options.parametersSpec != "auto":
        logging.info("Overriding model selection with: ({0})".format(
            options.parametersSpec))
        if not cc.OverrideModel(options.parametersSpec):
            die("Arrow: unable to override model with: ({0})".format(
                options.parametersSpec))
        used.add(options.parametersSpec)
    else:
        used.update(alnFile.sequencingChemistry)
        unsupp = used - supp
        if used - supp:
            die("Arrow: unsupported chemistries found: ({0})".format(", ".join(
                sorted(unsupp))))

    # All arrow models require PW except P6 and the first S/P1-C1
    for readGroup in alnFile.readGroupTable:
        if set([readGroup["SequencingChemistry"]]) - set(
            ["P6-C4", "S/P1-C1/beta"]):
            if ("Ipd" not in readGroup["BaseFeatures"]
                    or "PulseWidth" not in readGroup["BaseFeatures"]):
                die("Arrow model requires missing base feature: IPD or PulseWidth"
                    )

    logging.info("Using consensus models for: ({0})".format(", ".join(
        sorted(used))))

    return M.ArrowConfig(minMapQV=options.minMapQV,
                         noEvidenceConsensus=options.noEvidenceConsensusCall,
                         computeConfidence=(not options.fastMode),
                         minReadScore=options.minReadScore,
                         minHqRegionSnr=options.minHqRegionSnr,
                         minZScore=options.minZScore,
                         minAccuracy=options.minAccuracy)
예제 #16
0
파일: model.py 프로젝트: lpp1985/lpp_Script
def loadParameterSets(parametersFile=None, spec=None, cmpH5=None):
    """
    spec is either:
      - chemName.modelName  (complete spec),
      - chemName
      - None
    If the spec is incomplete, cmpH5 is required to determine the best
    available option.

    Returned value is a dict of completeSpec -> QuiverConfig
    """
    if spec is None:
        chemistryName, modelName = None, None
    elif "." in spec:
        chemistryName, modelName = spec.split(".")
    else:
        chemistryName, modelName = spec, None
    assert (cmpH5 is not None) or (chemistryName and modelName)

    parametersFile = _findParametersFile(parametersFile)
    logging.info("Using Quiver parameters file %s" % parametersFile)
    sets = _loadParameterSets(parametersFile)

    if chemistryName and modelName:
        try:
            p = sets[spec]
            params = {"*": p}
        except:
            die("Quiver: no available parameter set named %s" % \
                spec)
    elif chemistryName:
        qvsAvailable = cmpH5.baseFeaturesAvailable()
        p = _bestParameterSet(sets, chemistryName, qvsAvailable)
        if p.chemistry != chemistryName:
            die("Quiver: no parameter set available compatible with this " + \
                "cmp.h5 for chemistry \"%s\" " % chemistryName)
        params = {"*": p}
    else:
        chemistryNames = list(set(cmpH5.sequencingChemistry))  # uniquify
        if "unknown" in chemistryNames:
            die("\"unknown\" chemistry in alignment file: either an unsupported chemistry "
                +
                "has been used, the alignment file has been improperly constructed, or "
                +
                "this version of SMRTanalysis is too old to recognize a new chemistry."
                )
        if not _isChemistryMixSupported(chemistryNames):
            die("Unsupported chemistry mix, cannot proceed.")
        qvsAvailable = cmpH5.baseFeaturesAvailable()
        bestParams = [
            _bestParameterSet(sets, chemistryName, qvsAvailable)
            for chemistryName in chemistryNames
        ]
        params = dict(zip(chemistryNames, bestParams))

    return params
예제 #17
0
def configure(options, alnFile):
    if alnFile.readType != "standard":
        raise U.IncompatibleDataException(
            "The Arrow algorithm requires a BAM file containing standard (non-CCS) reads." )

    if options.diploid:
        logging.info(
            "Diploid polishing in the Arrow model is in *BETA* mode.\n"
            "Any multi-base string that appears in annotation files\n"
            "is not phased!")

    # load parameters from file
    if options.parametersFile:
        logging.info("Loading model parameters from: ({0})".format(options.parametersFile))
        if not cc.LoadModels(options.parametersFile):
            die("Arrow: unable to load parameters from: ({0})".format(options.parametersFile))

    # test available chemistries
    supp = set(cc.SupportedChemistries())
    logging.info("Found consensus models for: ({0})".format(", ".join(sorted(supp))))

    used = set([])
    if options.parametersSpec != "auto":
        logging.info("Overriding model selection with: ({0})".format(options.parametersSpec))
        if not cc.OverrideModel(options.parametersSpec):
            die("Arrow: unable to override model with: ({0})".format(options.parametersSpec))
        used.add(options.parametersSpec)
    else:
        used.update(alnFile.sequencingChemistry)
        unsupp = used - supp
        if used - supp:
            die("Arrow: unsupported chemistries found: ({0})".format(", ".join(sorted(unsupp))))

    # All arrow models require PW except P6 and the first S/P1-C1
    for readGroup in alnFile.readGroupTable:
        if set([readGroup["SequencingChemistry"]]) - set(["P6-C4", "S/P1-C1/beta"]):
            if ("Ipd" not in readGroup["BaseFeatures"] or
                "PulseWidth" not in readGroup["BaseFeatures"]):
                die("Arrow model requires missing base feature: IPD or PulseWidth")

    logging.info("Using consensus models for: ({0})".format(", ".join(sorted(used))))

    return M.ArrowConfig(minMapQV=options.minMapQV,
                         noEvidenceConsensus=options.noEvidenceConsensusCall,
                         computeConfidence=(not options.fastMode),
                         minReadScore=options.minReadScore,
                         minHqRegionSnr=options.minHqRegionSnr,
                         minZScore=options.minZScore,
                         minAccuracy=options.minAccuracy,
                         maskRadius=options.maskRadius,
                         maskErrorRate=options.maskErrorRate,
                         polishDiploid=options.diploid)
예제 #18
0
 def _loadReference(self, cmpH5):
     logging.info("Loading reference")
     err = reference.loadFromFile(options.referenceFilename, cmpH5)
     if err:
         die("Error loading reference")
     # Grok the referenceWindow spec, if any.
     if options.referenceWindowsAsString is None:
         options.referenceWindows = ()
     elif options.skipUnrecognizedContigs:
         # This is a workaround for smrtpipe scatter/gather.
         options.referenceWindows = []
         for s in options.referenceWindowsAsString.split(","):
             try:
                 win = reference.stringToWindow(s)
                 options.referenceWindows.append(win)
             except:
                 pass
     else:
         options.referenceWindows = map(reference.stringToWindow,
                                        options.referenceWindowsAsString.split(","))
예제 #19
0
def loadParameterSets(parametersFile=None, spec=None, cmpH5=None):
    """
    spec is either:
      - chemName.modelName  (complete spec),
      - chemName
      - None
    If the spec is incomplete, cmpH5 is required to determine the best
    available option.

    Returned value is a dict of completeSpec -> QuiverConfig
    """
    if spec is None:
        chemistryName, modelName = None, None
    elif "." in spec:
        chemistryName, modelName = spec.split(".")
    else:
        chemistryName, modelName = spec, None
    assert cmpH5 or (chemistryName and modelName)

    parametersFile = _findParametersFile(parametersFile)
    logging.info("Using Quiver parameters file %s" % parametersFile)
    sets = _loadParameterSets(parametersFile)

    if chemistryName and modelName:
        try:
            p = sets[spec]
            params = { "*" : p }
        except:
            die("Quiver: no available parameter set named %s" % \
                spec)
    elif chemistryName:
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        p = _bestParameterSet(sets, chemistryName, qvsAvailable)
        if p.chemistry != chemistryName:
            die("Quiver: no parameter set available compatible with this " + \
                "cmp.h5 for chemistry \"%s\" " % chemistryName)
        params = { "*" : p }
    else:
        chemistryNames = list(set(cmpH5.sequencingChemistry))  # uniquify
        if "unknown" in chemistryNames:
            die("\"unknown\" chemistry in alignment file: either an unsupported chemistry " +
                "has been used, the alignment file has been improperly constructed, or " +
                "this version of SMRTanalysis is too old to recognize a new chemistry.")
        if not _isChemistryMixSupported(chemistryNames):
            die("Unsupported chemistry mix, cannot proceed.")
        qvsAvailable = cmpH5.pulseFeaturesAvailable()
        bestParams = [ _bestParameterSet(sets, chemistryName, qvsAvailable)
                       for chemistryName in chemistryNames ]
        params = dict(zip(chemistryNames, bestParams))

    return params
예제 #20
0
 def _loadReference(self, cmpH5):
     logging.info("Loading reference")
     err = reference.loadFromFile(options.referenceFilename, cmpH5)
     if err:
         die("Error loading reference")
     # Grok the referenceWindow spec, if any.
     if options.referenceWindowsAsString is None:
         options.referenceWindows = ()
     elif options.skipUnrecognizedContigs:
         # This is a workaround for smrtpipe scatter/gather.
         options.referenceWindows = []
         for s in options.referenceWindowsAsString.split(","):
             try:
                 win = reference.stringToWindow(s)
                 options.referenceWindows.append(win)
             except:
                 pass
     else:
         options.referenceWindows = map(
             reference.stringToWindow,
             options.referenceWindowsAsString.split(","))
예제 #21
0
 def _algorithmByName(self, name, peekFile):
     if name == "plurality":
         from GenomicConsensus.plurality import plurality
         algo = plurality
     elif name == "quiver":
         from GenomicConsensus.quiver import quiver
         algo = quiver
     elif name == "arrow":
         from GenomicConsensus.arrow import arrow
         algo = arrow
         # All arrow models require PW except P6 and the first S/P1-C1
         for readGroup in peekFile.readGroupTable:
             if set([readGroup["SequencingChemistry"]]) - set(
                 ["P6-C4", "S/P1-C1/beta"]):
                 if ("Ipd" not in readGroup["BaseFeatures"]
                         or "PulseWidth" not in readGroup["BaseFeatures"]):
                     die("Model requires missing base feature: IPD or PulseWidth"
                         )
     elif name == "poa":
         from GenomicConsensus.poa import poa
         algo = poa
     elif name == "best":
         logging.info("Identifying best algorithm based on input data")
         from GenomicConsensus import algorithmSelection
         algoName = algorithmSelection.bestAlgorithm(
             peekFile.sequencingChemistry)
         return self._algorithmByName(algoName, peekFile)
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     logging.info("Will use {a} algorithm".format(a=name))
     return algo
예제 #22
0
 def _algorithmByName(self, name, peekFile):
     if name == "plurality":
         from GenomicConsensus.plurality import plurality
         algo = plurality
     elif name == "quiver":
         from GenomicConsensus.quiver import quiver
         algo = quiver
     elif name == "arrow":
         from GenomicConsensus.arrow import arrow
         algo = arrow
         # All arrow models require PW except P6 and the first S/P1-C1
         if set(peekFile.sequencingChemistry) - set(["P6-C4", "S/P1-C1/beta"]):
             if (not peekFile.hasBaseFeature("Ipd") or
                 not peekFile.hasBaseFeature("PulseWidth")):
                 die("Model requires missing base feature: IPD or PulseWidth")
     elif name == "poa":
         from GenomicConsensus.poa import poa
         algo = poa
     elif name == "best":
         logging.info("Identifying best algorithm based on input data")
         from GenomicConsensus import algorithmSelection
         algoName = algorithmSelection.bestAlgorithm(peekFile.sequencingChemistry)
         return self._algorithmByName(algoName, peekFile)
     else:
         die("Failure: unrecognized algorithm %s" % name)
     isOK, msg = algo.availability
     if not isOK:
         die("Failure: %s" % msg)
     logging.info("Will use {a} algorithm".format(a=name))
     return algo
예제 #23
0
 def _checkFileCompatibility(self, cmpH5):
     if not cmpH5.isSorted:
         die("Input CmpH5 file must be sorted.")
     if cmpH5.isEmpty:
         die("Input CmpH5 file must be nonempty.")
예제 #24
0
 def _configureAlgorithm(self, options, cmpH5):
     assert self._algorithm != None
     try:
         self._algorithmConfiguration = self._algorithm.configure(options, cmpH5)
     except IncompatibleDataException as e:
         die("Failure: %s" % e.message)
예제 #25
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        parseOptions()
        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                logging.warn("'fancyChunking' not yet available for BAM "
                             "files without accompanying .pbi files, "
                             "disabling")
                options.fancyChunking = False
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readCmpH5Input()

        monitoringThread = threading.Thread(target=monitorSlaves, args=(self,))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(options.temporaryDirectory,
                                                      "profile-main.out"))

            elif options.doDebugging:
                if not options.threaded:
                    die("Debugging only works with -T (threaded) mode")
                logging.info("PID: %d", os.getpid())
                import ipdb
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inCmpH5.close()
        return 0
예제 #26
0
 def _checkFileCompatibility(self, cmpH5):
     if not cmpH5.isSorted:
         die("Input CmpH5 file must be sorted.")
     if cmpH5.isEmpty:
         die("Input CmpH5 file must be nonempty.")
예제 #27
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        random.seed(42)

        if options.pdb or options.pdbAtStartup:
            print("Process ID: %d" % os.getpid(), file=sys.stderr)
            try:
                import ipdb
            except ImportError:
                die("Debugging options require 'ipdb' package installed.")

            if not options.threaded:
                die("Debugging only works with -T (threaded) mode")

        if options.pdbAtStartup:
            ipdb.set_trace()

        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("ConsensusCore2 version: %s" %
                     (consensusCore2Version() or "ConsensusCore2 unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if options.algorithm == "arrow" and peekFile.isCmpH5:
                die("Arrow does not support CmpH5 files")
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                die("Genomic Consensus only works with cmp.h5 files and BAM "
                    "files with accompanying .pbi files")
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._algorithm = self._algorithmByName(options.algorithm, peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")

        self._launchSlaves()
        self._readAlignmentInput()

        monitoringThread = threading.Thread(target=monitorSlaves, args=(self,))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(options.temporaryDirectory,
                                                      "profile-main.out"))

            elif options.pdb:
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except BaseException as exc:
            msg = 'options={}'.format(pprint.pformat(vars(options)))
            logging.exception(msg)
            self.abortWork(repr(exc))

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inAlnFile.close()
        return 0
예제 #28
0
파일: main.py 프로젝트: lpp1985/lpp_Script
 def _checkFileCompatibility(self, alnFile):
     if not alnFile.isSorted:
         die("Input Alignment file must be sorted.")
     if alnFile.isCmpH5 and alnFile.isEmpty:
         die("Input Alignment file must be nonempty.")
예제 #29
0
 def _checkFileCompatibility(self, alnFile):
     if not alnFile.isSorted:
         die("Input Alignment file must be sorted.")
     if alnFile.isEmpty:
         die("Input Alignment file must be nonempty.")
예제 #30
0
    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("ConsensusCore2 version: %s" %
                     (consensusCore2Version() or "ConsensusCore2 unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        with AlignmentSet(options.inputFilename) as peekFile:
            if options.algorithm == "arrow" and peekFile.isCmpH5:
                die("Arrow does not support CmpH5 files")
            if not peekFile.isCmpH5 and not peekFile.hasPbi:
                die("Genomic Consensus only works with cmp.h5 files and BAM "
                    "files with accompanying .pbi files")
            logging.info("Peeking at file %s" % options.inputFilename)
            logging.info("Input data: numAlnHits=%d" % len(peekFile))
            resolveOptions(peekFile)
            self._loadReference(peekFile)
            self._checkFileCompatibility(peekFile)
            self._configureAlgorithm(options, peekFile)
            options.disableHdf5ChunkCache = True
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readAlignmentInput()

        monitoringThread = threading.Thread(target=monitorSlaves,
                                            args=(self, ))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(
                                    options.temporaryDirectory,
                                    "profile-main.out"))

            elif options.debug:
                if not options.threaded:
                    die("Debugging only works with -T (threaded) mode")
                logging.info("PID: %d", os.getpid())
                import ipdb
                with ipdb.launch_ipdb_on_exception():
                    self._mainLoop()

            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inAlnFile.close()
        return 0