Exemple #1
0
 def test_bam(self):
     bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam")
     refFile = os.path.join(ref_dir, "Helicobacter_pylori_J99", "sequence",
                            "Helicobacter_pylori_J99.fasta")
     ds = AlignmentSet(bamFile, referenceFastaFname=refFile)
     contigs = ReferenceUtils.loadReferenceContigs(refFile, ds)
     self.assertEquals(len(contigs), 1)
     self.assertEquals(contigs[0].cmph5ID, 0)
     chemistry = ReferenceUtils.loadAlignmentChemistry(ds)
     self.assertEquals(chemistry, "P6-C4")
Exemple #2
0
 def loadReferenceAndModel(self, referencePath, ipdModelFilename):
     assert self.alignments is not None and self.referenceWindows is not None
     # Load the reference contigs - annotated with their refID from the cmp.h5
     logging.info("Loading reference contigs {!r}".format(referencePath))
     contigs = ReferenceUtils.loadReferenceContigs(
         referencePath,
         alignmentSet=self.alignments,
         windows=self.referenceWindows)
     self.ipdModel = IpdModel(contigs, ipdModelFilename,
                              self.args.modelIters)
 def test_bam (self):
     bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam")
     refFile = os.path.join(ref_dir, "Helicobacter_pylori_J99", "sequence",
         "Helicobacter_pylori_J99.fasta")
     ds = AlignmentSet(bamFile, referenceFastaFname=refFile)
     contigs = ReferenceUtils.loadReferenceContigs(refFile, ds)
     self.assertEquals(len(contigs), 1)
     self.assertEquals(contigs[0].cmph5ID, 0)
     chemistry = ReferenceUtils.loadAlignmentChemistry(ds)
     self.assertEquals(chemistry, "P6-C4")
Exemple #4
0
 def test_cmph5(self):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dataDir = os.path.join(base_dir, 'data')
     resourcesDir = os.path.join(base_dir, '../kineticsTools/resources')
     refFile = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
     cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")
     ds = AlignmentSet(cmpFile, referenceFastaFname=refFile)
     contigs = ReferenceUtils.loadReferenceContigs(refFile, ds)
     self.assertEquals(len(contigs), 1)
     self.assertEquals(contigs[0].cmph5ID, 1)
     chemistry = ReferenceUtils.loadAlignmentChemistry(ds)
     self.assertEquals(chemistry, "P4-C2")
 def test_cmph5 (self):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dataDir = os.path.join(base_dir,'data')
     resourcesDir = os.path.join(base_dir, '../kineticsTools/resources')
     refFile = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
     cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")
     ds = AlignmentSet(cmpFile, referenceFastaFname=refFile)
     contigs = ReferenceUtils.loadReferenceContigs(refFile, ds)
     self.assertEquals(len(contigs), 1)
     self.assertEquals(contigs[0].cmph5ID, 1)
     chemistry = ReferenceUtils.loadAlignmentChemistry(ds)
     self.assertEquals(chemistry, "P4-C2")
Exemple #6
0
    def loadReferenceAndModel(self, referencePath):
        assert self.alignments is not None and self.referenceWindows is not None
        # Load the reference contigs - annotated with their refID from the cmp.h5
        logging.info("Loading reference contigs %s" % referencePath)
        contigs = ReferenceUtils.loadReferenceContigs(referencePath,
            alignmentSet=self.alignments, windows=self.referenceWindows)

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" % self.args.ipdModel)
                sys.exit(1)
        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments)

            # Temporary solution for Sequel chemistries: we do not
            # have trained kinetics models in hand yet for Sequel
            # chemistries.  However we have observed that the P5-C3
            # training seems to yield fairly good results on Sequel
            # chemistries to date.  So for the moment, we will use
            # that model for Sequel data.
            if majorityChem.startswith("S/"):
                logging.info("No trained model available yet for Sequel chemistries; modeling as P5-C3")
                majorityChem = "P5-C3"

            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")
            if majorityChem == 'unknown':
                logging.error("Chemistry cannot be identified---cannot perform kinetic analysis")
                sys.exit(1)
            elif not os.path.exists(ipdModel):
                logging.error("Aborting, no kinetics model available for this chemistry: %s" % ipdModel)
                sys.exit(1)
            else:
                logging.info("Using Chemistry matched IPD model: %s" % ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
    def loadReferenceAndModel(self, referencePath, cmpH5Path):

        # Load the reference contigs - annotated with their refID from the cmp.h5
        contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path)

        # Read reference info table from cmp.h5
        (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path)
        self.refInfo = refInfoTable

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's chemistry info
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" %
                              self.args.ipdModel)

        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" %
                              self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path)
            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")

            if majorityChem == 'unknown':
                logging.warning(
                    "Chemistry is unknown. Falling back to built-in model")
                ipdModel = None
            elif not os.path.exists(ipdModel):
                logging.warning("Model not found: %s" % ipdModel)
                logging.warning("Falling back to built-in model")
                ipdModel = None
            else:
                logging.info("Using Chemistry matched IPD model: %s" %
                             ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
Exemple #9
0
    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs,
                                 os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
    def loadReferenceAndModel(self, referencePath, cmpH5Path):

        # Load the reference contigs - annotated with their refID from the cmp.h5
        contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path)

        # Read reference info table from cmp.h5
        (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path)
        self.refInfo = refInfoTable

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's chemistry info
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" % self.args.ipdModel)

        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path)
            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")

            if majorityChem == "unknown":
                logging.warning("Chemistry is unknown. Falling back to built-in model")
                ipdModel = None
            elif not os.path.exists(ipdModel):
                logging.warning("Model not found: %s" % ipdModel)
                logging.warning("Falling back to built-in model")
                ipdModel = None
            else:
                logging.info("Using Chemistry matched IPD model: %s" % ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
Exemple #11
0
    def setUp(self):

        # Load the lambda genome from our sample data

        dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
        ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
        cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")

        self.contigs = ReferenceUtils.loadReferenceContigs(ref, cmpFile)
        self.ipdModel = IpdModel(self.contigs)

        # Create a functional KineticWorker object that can be poked at manually.
        self.kw = KineticWorker(self.ipdModel)
        self.cmpH5 = CmpH5Reader(cmpFile)

        # Put in our cmp.h5 - this is normally supplied by the Worker superclass
        self.kw.caseCmpH5 = self.cmpH5
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
Exemple #12
0
    def setUp(self):

        # Load the lambda genome from our sample data

        dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources')
        ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
        cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")

        self.cmpH5 = AlignmentSet(cmpFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.cmpH5)
        self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P4-C2.h5"))

        # Create a functional KineticWorker object that can be poked at manually.
        self.kw = KineticWorker(self.ipdModel)

        # Put in our cmp.h5 - this is normally supplied by the Worker superclass
        self.kw.caseCmpH5 = self.cmpH5
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
Exemple #13
0
    def loadReferenceAndModel(self, referencePath):
        assert self.alignments is not None and self.referenceWindows is not None
        # Load the reference contigs - annotated with their refID from the cmp.h5
        logging.info("Loading reference contigs %s" % referencePath)
        contigs = ReferenceUtils.loadReferenceContigs(referencePath,
            alignmentSet=self.alignments, windows=self.referenceWindows)

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" % self.args.ipdModel)
                sys.exit(1)
        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments)
            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")
            if majorityChem == 'unknown':
                logging.error("Chemistry cannot be identified---cannot perform kinetic analysis")
                sys.exit(1)
            elif not os.path.exists(ipdModel):
                logging.error("Aborting, no kinetics model available for this chemistry: %s" % ipdModel)
                sys.exit(1)
            else:
                logging.info("Using Chemistry matched IPD model: %s" % ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
Exemple #14
0
    def loadReferenceAndModel(self, referencePath, cmpH5Path):

        # Load the reference contigs - annotated with their refID from the cmp.h5
        contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path)

        # Read reference info table from cmp.h5
        (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path)

        if (self.options.refContigs is not None
                or self.options.refContigIndex != -1):

            if (self.options.refContigs is not None
                    and self.options.refContigIndex != -1):

                requestedIds = set(self.options.refContigs.split(',')).union(
                    [self.options.refContigIndex])

            elif (self.options.refContigs is None
                  and self.options.refContigIndex != -1):

                requestedIds = set([self.options.refContigIndex])

            elif (self.options.refContigs is not None
                  and self.options.refContigIndex == -1):

                requestedIds = set(self.options.refContigs.split(','))

            relevantContigs = [
                i for (i, rec) in enumerate(refInfoTable)
                if (rec.FullName in requestedIds or rec.Name in requestedIds
                    or rec.RefInfoID in requestedIds)
            ]
            self.refInfo = refInfoTable[relevantContigs]

        else:
            self.refInfo = refInfoTable

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" %
                              self.args.ipdModel)
                sys.exit(1)
        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" %
                              self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path)
            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")
            if majorityChem == 'unknown':
                logging.error(
                    "Chemistry cannot be identified---cannot perform kinetic analysis"
                )
                sys.exit(1)
            elif not os.path.exists(ipdModel):
                logging.error(
                    "Aborting, no kinetics model available for this chemistry: %s"
                    % ipdModel)
                sys.exit(1)
            else:
                logging.info("Using Chemistry matched IPD model: %s" %
                             ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
    def loadReferenceAndModel(self, referencePath, cmpH5Path):

        # Load the reference contigs - annotated with their refID from the cmp.h5
        contigs = ReferenceUtils.loadReferenceContigs(referencePath, cmpH5Path)

        # Read reference info table from cmp.h5
        (refInfoTable, _) = ReferenceUtils.loadCmpH5Tables(cmpH5Path)

        if (self.options.refContigs is not None or
            self.options.refContigIndex != -1):

            if (self.options.refContigs is not None and 
                self.options.refContigIndex != -1):

                requestedIds = set(self.options.refContigs.split(',')).union([self.options.refContigIndex])

            elif (self.options.refContigs is None and 
                self.options.refContigIndex != -1):
       
                requestedIds = set([self.options.refContigIndex])

            elif (self.options.refContigs is not None and 
                self.options.refContigIndex == -1):
       
                requestedIds = set(self.options.refContigs.split(','))
      

            relevantContigs = [ i for (i, rec) in enumerate(refInfoTable)
                                if (rec.FullName  in requestedIds or
                                    rec.Name      in requestedIds or
                                    rec.RefInfoID in requestedIds) ]
            self.refInfo = refInfoTable[relevantContigs]


        else:
            self.refInfo = refInfoTable

        # There are three different ways the ipdModel can be loaded.
        # In order of precedence they are:
        # 1. Explicit path passed to --ipdModel
        # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags
        # 3. Fall back to built-in model.

        # By default, use built-in model
        ipdModel = None

        if self.args.ipdModel:
            ipdModel = self.args.ipdModel
            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
            if not os.path.exists(self.args.ipdModel):
                logging.error("Couldn't find model file: %s" % self.args.ipdModel)
                sys.exit(1)
        elif self.args.paramsPath:
            if not os.path.exists(self.args.paramsPath):
                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
                sys.exit(1)

            majorityChem = ReferenceUtils.loadCmpH5Chemistry(cmpH5Path)
            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")
            if majorityChem == 'unknown':
                logging.warning("Chemistry is unknown. Falling back to built-in model")
                ipdModel = None
            elif not os.path.exists(ipdModel):
                logging.warning("Model not found: %s" % ipdModel)
                logging.warning("Falling back to built-in model")
                ipdModel = None
            else:
                logging.info("Using Chemistry matched IPD model: %s" % ipdModel)

        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)