Esempio n. 1
0
    def loadReferenceAndModel(self, referencePath, cmpH5Path):

        # Load the reference contigs - annotated with their refID from the cmp.h5
        contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path)

        # Read reference info table from cmp.h5
        (refInfoTable, movieInfoTable) = ReferenceUtils.loadCmpH5Tables(cmpH5Path)
        self.refInfo = refInfoTable

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the /MovieInfo/SequencingChemistry tags
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" % self.args.ipdModel)
                sys.exit(1)
        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
                sys.exit(1)

            # Use the SequencingChemistry data to select an ipd model
            if 'SequencingChemistry' in movieInfoTable.dtype.fields.keys():
                # Pick majority chemistry
                chemistries = movieInfoTable.SequencingChemistry.tolist()
                chemCounts = dict([ (k, len(list(v))) for (k, v) in itertools.groupby(chemistries)])
                majorityChem = max(chemCounts, key=chemCounts.get)

                # Find the appropriate model file:
                ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")

                if majorityChem == 'unknown':
                    logging.warning("Chemistry is unknown. Falling back to built-in model")
                    ipdModel = None
                elif not os.path.exists(ipdModel):
                    logging.warning("Model not found: %s" % ipdModel)
                    logging.warning("Falling back to built-in model")
                    ipdModel = None
                else:
                    logging.info("Using Chemistry matched IPD model: %s" % ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
Esempio n. 2
0
    def setUp(self):

        # Load the lambda genome from our sample data

        dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
        ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
        cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")

        self.contigs = ReferenceUtils.loadReferenceContigs(ref, cmpFile)
        self.ipdModel = IpdModel(self.contigs)

        # Create a functional KineticWorker object that can be poked at manually.
        self.kw = KineticWorker(self.ipdModel)
        self.cmpH5 = CmpH5Reader(cmpFile)

        # Put in our cmp.h5 - this is normally supplied by the Worker superclass
        self.kw.caseCmpH5 = self.cmpH5
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()