def test_bam(self): bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam") refFile = os.path.join(ref_dir, "Helicobacter_pylori_J99", "sequence", "Helicobacter_pylori_J99.fasta") ds = AlignmentSet(bamFile, referenceFastaFname=refFile) contigs = ReferenceUtils.loadReferenceContigs(refFile, ds) self.assertEquals(len(contigs), 1) self.assertEquals(contigs[0].cmph5ID, 0) chemistry = ReferenceUtils.loadAlignmentChemistry(ds) self.assertEquals(chemistry, "P6-C4")
def loadReferenceAndModel(self, referencePath, ipdModelFilename): assert self.alignments is not None and self.referenceWindows is not None # Load the reference contigs - annotated with their refID from the cmp.h5 logging.info("Loading reference contigs {!r}".format(referencePath)) contigs = ReferenceUtils.loadReferenceContigs( referencePath, alignmentSet=self.alignments, windows=self.referenceWindows) self.ipdModel = IpdModel(contigs, ipdModelFilename, self.args.modelIters)
def test_bam (self): bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam") refFile = os.path.join(ref_dir, "Helicobacter_pylori_J99", "sequence", "Helicobacter_pylori_J99.fasta") ds = AlignmentSet(bamFile, referenceFastaFname=refFile) contigs = ReferenceUtils.loadReferenceContigs(refFile, ds) self.assertEquals(len(contigs), 1) self.assertEquals(contigs[0].cmph5ID, 0) chemistry = ReferenceUtils.loadAlignmentChemistry(ds) self.assertEquals(chemistry, "P6-C4")
def test_cmph5(self): base_dir = os.path.dirname(os.path.abspath(__file__)) dataDir = os.path.join(base_dir, 'data') resourcesDir = os.path.join(base_dir, '../kineticsTools/resources') refFile = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta') cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5") ds = AlignmentSet(cmpFile, referenceFastaFname=refFile) contigs = ReferenceUtils.loadReferenceContigs(refFile, ds) self.assertEquals(len(contigs), 1) self.assertEquals(contigs[0].cmph5ID, 1) chemistry = ReferenceUtils.loadAlignmentChemistry(ds) self.assertEquals(chemistry, "P4-C2")
def test_cmph5 (self): base_dir = os.path.dirname(os.path.abspath(__file__)) dataDir = os.path.join(base_dir,'data') resourcesDir = os.path.join(base_dir, '../kineticsTools/resources') refFile = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta') cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5") ds = AlignmentSet(cmpFile, referenceFastaFname=refFile) contigs = ReferenceUtils.loadReferenceContigs(refFile, ds) self.assertEquals(len(contigs), 1) self.assertEquals(contigs[0].cmph5ID, 1) chemistry = ReferenceUtils.loadAlignmentChemistry(ds) self.assertEquals(chemistry, "P4-C2")
def loadReferenceAndModel(self, referencePath): assert self.alignments is not None and self.referenceWindows is not None # Load the reference contigs - annotated with their refID from the cmp.h5 logging.info("Loading reference contigs %s" % referencePath) contigs = ReferenceUtils.loadReferenceContigs(referencePath, alignmentSet=self.alignments, windows=self.referenceWindows) # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) sys.exit(1) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments) # Temporary solution for Sequel chemistries: we do not # have trained kinetics models in hand yet for Sequel # chemistries. However we have observed that the P5-C3 # training seems to yield fairly good results on Sequel # chemistries to date. So for the moment, we will use # that model for Sequel data. if majorityChem.startswith("S/"): logging.info("No trained model available yet for Sequel chemistries; modeling as P5-C3") majorityChem = "P5-C3" ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.error("Chemistry cannot be identified---cannot perform kinetic analysis") sys.exit(1) elif not os.path.exists(ipdModel): logging.error("Aborting, no kinetics model available for this chemistry: %s" % ipdModel) sys.exit(1) else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def loadReferenceAndModel(self, referencePath, cmpH5Path): # Load the reference contigs - annotated with their refID from the cmp.h5 contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path) # Read reference info table from cmp.h5 (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path) self.refInfo = refInfoTable # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's chemistry info # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path) ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.warning( "Chemistry is unknown. Falling back to built-in model") ipdModel = None elif not os.path.exists(ipdModel): logging.warning("Model not found: %s" % ipdModel) logging.warning("Falling back to built-in model") ipdModel = None else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def setUp(self): self.cmpH5 = None resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources') ref = self.getReference() alnFile = self.getAlignments() assert os.path.exists(alnFile) and os.path.exists(ref) self.ds = AlignmentSet(alnFile, referenceFastaFname=ref) self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds) self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P6-C4.h5")) # Create a functional KineticWorker object that can be poked at self.kw = KineticWorker(self.ipdModel) # Put in our cmp.h5 - this is normally supplied by the Worker self.kw.caseCmpH5 = self.ds self.kw.controlCmpH5 = None self.kw.options = self.getOpts()
def loadReferenceAndModel(self, referencePath, cmpH5Path): # Load the reference contigs - annotated with their refID from the cmp.h5 contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path) # Read reference info table from cmp.h5 (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path) self.refInfo = refInfoTable # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's chemistry info # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path) ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == "unknown": logging.warning("Chemistry is unknown. Falling back to built-in model") ipdModel = None elif not os.path.exists(ipdModel): logging.warning("Model not found: %s" % ipdModel) logging.warning("Falling back to built-in model") ipdModel = None else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def setUp(self): # Load the lambda genome from our sample data dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta') cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5") self.contigs = ReferenceUtils.loadReferenceContigs(ref, cmpFile) self.ipdModel = IpdModel(self.contigs) # Create a functional KineticWorker object that can be poked at manually. self.kw = KineticWorker(self.ipdModel) self.cmpH5 = CmpH5Reader(cmpFile) # Put in our cmp.h5 - this is normally supplied by the Worker superclass self.kw.caseCmpH5 = self.cmpH5 self.kw.controlCmpH5 = None self.kw.options = self.getOpts()
def setUp(self): # Load the lambda genome from our sample data dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources') ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta') cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5") self.cmpH5 = AlignmentSet(cmpFile, referenceFastaFname=ref) self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.cmpH5) self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P4-C2.h5")) # Create a functional KineticWorker object that can be poked at manually. self.kw = KineticWorker(self.ipdModel) # Put in our cmp.h5 - this is normally supplied by the Worker superclass self.kw.caseCmpH5 = self.cmpH5 self.kw.controlCmpH5 = None self.kw.options = self.getOpts()
def loadReferenceAndModel(self, referencePath): assert self.alignments is not None and self.referenceWindows is not None # Load the reference contigs - annotated with their refID from the cmp.h5 logging.info("Loading reference contigs %s" % referencePath) contigs = ReferenceUtils.loadReferenceContigs(referencePath, alignmentSet=self.alignments, windows=self.referenceWindows) # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) sys.exit(1) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments) ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.error("Chemistry cannot be identified---cannot perform kinetic analysis") sys.exit(1) elif not os.path.exists(ipdModel): logging.error("Aborting, no kinetics model available for this chemistry: %s" % ipdModel) sys.exit(1) else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def loadReferenceAndModel(self, referencePath, cmpH5Path): # Load the reference contigs - annotated with their refID from the cmp.h5 contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path) # Read reference info table from cmp.h5 (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path) if (self.options.refContigs is not None or self.options.refContigIndex != -1): if (self.options.refContigs is not None and self.options.refContigIndex != -1): requestedIds = set(self.options.refContigs.split(',')).union( [self.options.refContigIndex]) elif (self.options.refContigs is None and self.options.refContigIndex != -1): requestedIds = set([self.options.refContigIndex]) elif (self.options.refContigs is not None and self.options.refContigIndex == -1): requestedIds = set(self.options.refContigs.split(',')) relevantContigs = [ i for (i, rec) in enumerate(refInfoTable) if (rec.FullName in requestedIds or rec.Name in requestedIds or rec.RefInfoID in requestedIds) ] self.refInfo = refInfoTable[relevantContigs] else: self.refInfo = refInfoTable # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) sys.exit(1) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path) ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.error( "Chemistry cannot be identified---cannot perform kinetic analysis" ) sys.exit(1) elif not os.path.exists(ipdModel): logging.error( "Aborting, no kinetics model available for this chemistry: %s" % ipdModel) sys.exit(1) else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
def loadReferenceAndModel(self, referencePath, cmpH5Path): # Load the reference contigs - annotated with their refID from the cmp.h5 contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path) # Read reference info table from cmp.h5 (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path) if (self.options.refContigs is not None or self.options.refContigIndex != -1): if (self.options.refContigs is not None and self.options.refContigIndex != -1): requestedIds = set(self.options.refContigs.split(',')).union([self.options.refContigIndex]) elif (self.options.refContigs is None and self.options.refContigIndex != -1): requestedIds = set([self.options.refContigIndex]) elif (self.options.refContigs is not None and self.options.refContigIndex == -1): requestedIds = set(self.options.refContigs.split(',')) relevantContigs = [ i for (i, rec) in enumerate(refInfoTable) if (rec.FullName in requestedIds or rec.Name in requestedIds or rec.RefInfoID in requestedIds) ] self.refInfo = refInfoTable[relevantContigs] else: self.refInfo = refInfoTable # There are three different ways the ipdModel can be loaded. # In order of precedence they are: # 1. Explicit path passed to --ipdModel # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags # 3. Fall back to built-in model. # By default, use built-in model ipdModel = None if self.args.ipdModel: ipdModel = self.args.ipdModel logging.info("Using passed in ipd model: %s" % self.args.ipdModel) if not os.path.exists(self.args.ipdModel): logging.error("Couldn't find model file: %s" % self.args.ipdModel) sys.exit(1) elif self.args.paramsPath: if not os.path.exists(self.args.paramsPath): logging.error("Params path doesn't exist: %s" % self.args.paramsPath) sys.exit(1) majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path) ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5") if majorityChem == 'unknown': logging.warning("Chemistry is unknown. Falling back to built-in model") ipdModel = None elif not os.path.exists(ipdModel): logging.warning("Model not found: %s" % ipdModel) logging.warning("Falling back to built-in model") ipdModel = None else: logging.info("Using Chemistry matched IPD model: %s" % ipdModel) self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)