def loadReferenceAndModel(self, referencePath, cmpH5Path): # Load the reference contigs - annotated with their refID from the cmp.h5 contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path) # Read reference info table from cmp.h5 (refInfoTable, movieInfoTable) = ReferenceUtils.loadCmpH5Tables(cmpH5Path) self.refInfo = refInfoTable # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the /MovieInfo/SequencingChemistry tags # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) sys.exit(1) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) # Use the SequencingChemistry data to select an ipd model if 'SequencingChemistry' in movieInfoTable.dtype.fields.keys(): # Pick majority chemistry chemistries = movieInfoTable.SequencingChemistry.tolist() chemCounts = dict([ (k, len(list(v))) for (k, v) in itertools.groupby(chemistries)]) majorityChem = max(chemCounts, key=chemCounts.get) # Find the appropriate model file: ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.warning("Chemistry is unknown. Falling back to built-in model") ipdModel = None elif not os.path.exists(ipdModel): logging.warning("Model not found: %s" % ipdModel) logging.warning("Falling back to built-in model") ipdModel = None else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def setUp(self): # Load the lambda genome from our sample data dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta') cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5") self.contigs = ReferenceUtils.loadReferenceContigs(ref, cmpFile) self.ipdModel = IpdModel(self.contigs) # Create a functional KineticWorker object that can be poked at manually. self.kw = KineticWorker(self.ipdModel) self.cmpH5 = CmpH5Reader(cmpFile) # Put in our cmp.h5 - this is normally supplied by the Worker superclass self.kw.caseCmpH5 = self.cmpH5 self.kw.controlCmpH5 = None self.kw.options = self.getOpts()