Esempio n. 1
0
    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs,
                                 os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
Esempio n. 3
0
    def setUp(self):

        # Load the lambda genome from our sample data

        dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources')
        ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
        cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")

        self.cmpH5 = AlignmentSet(cmpFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.cmpH5)
        self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P4-C2.h5"))

        # Create a functional KineticWorker object that can be poked at manually.
        self.kw = KineticWorker(self.ipdModel)

        # Put in our cmp.h5 - this is normally supplied by the Worker superclass
        self.kw.caseCmpH5 = self.cmpH5
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()
Esempio n. 4
0
class _TestBase(object):
    MAX_ALIGNMENTS = 1500
    """
    Common test functionality.  All input type tests should inherit from this,
    and yield identical results.
    """
    def getOpts(self):
        """Derived tests can override this to customize behaviour"""
        return self.basicOpts()

    def basicOpts(self):
        """Mock up some options for the kinetic worker"""
        self_ = self

        class opts:
            def __init__(self):
                self.mapQvThreshold = -1
                self.cap_percentile = 99.0
                self.minCoverage = 3
                self.subread_norm = True
                self.maxCoverage = 200
                self.identify = True
                self.methylFraction = False
                self.pvalue = 0.01
                self.modsToCall = ['H', 'J', 'K']
                # Bug 23546: need to set values for these two new flags:
                self.identifyMinCov = 5
                self.methylMinCov = 10
                self.useLDA = False
                self.maxAlignments = self_.MAX_ALIGNMENTS
                self.randomSeed = None

        return opts()

    def getAlignments(self):
        raise NotImplementedError()

    def getReference(self):
        refDir = "/pbi/dept/secondary/siv/references"
        return os.path.join(refDir, "Helicobacter_pylori_J99", "sequence",
                            "Helicobacter_pylori_J99.fasta")

    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs,
                                 os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()

    def test_private_api(self):
        start = 50
        end = 100
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)
        rir = list(
            self.kw.caseCmpH5.readsInRange(referenceWindow.refName,
                                           referenceWindow.start,
                                           referenceWindow.end))
        self.assertEqual(len(rir), 301)
        chunks = self.kw._fetchChunks(REF_GROUP_ID, (start, end),
                                      self.kw.caseCmpH5)
        factor = 1.0 / self.ds.readGroupTable[0].FrameRate
        rawIpds = self.kw._loadRawIpds(rir, start, end, factor)
        logging.critical(len(rawIpds))
        # XXX note that this is very dependent on the exact order of reads
        # found by readsInRange(), which may be altered by changes to the
        # implementation of the dataset API.  It should, however, remain
        # consistent across equivalent input types.
        # XXX 2015-08-28 disabling this for now because it will change if the
        # dataset contains multiple .bam files
        #self.assertEqual("%.4f" % rawIpds[0][2], "0.2665")
        log.info(rawIpds)
        chunks = self.kw._chunkRawIpds(rawIpds)
        #log.critical(chunks)

    def test_small_decode(self):
        """Test for known modifications near the start of H. pylori genome"""
        # XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A)
        start = 50
        end = 100
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
        mods = self.kw._decodePositiveControl(kinetics, bounds)
        log.info(mods)

        # Verify that we detect m6A mods at 14982 and 14991
        m6AMods = [
            x for x in mods
            if x['modification'] == 'm6A' and x['tpl'] in (88, 90)
        ]
        self.assertEqual(len(m6AMods), 2)
        m4CMods = [
            x for x in mods
            if x['modification'] == 'm4C' and x['tpl'] in (59, )
        ]
        self.assertEqual(len(m4CMods), 1)
        for x in mods:
            if x['strand'] == 0:
                self.assertEqual(x['tpl'], 88)
            else:
                self.assertTrue(x['tpl'] in [59, 90])
class _TestBase(object):
    MAX_ALIGNMENTS = 1500

    """
    Common test functionality.  All input type tests should inherit from this,
    and yield identical results.
    """

    def getOpts(self):
        """Derived tests can override this to customize behaviour"""
        return self.basicOpts()

    def basicOpts(self):
        """Mock up some options for the kinetic worker"""
        self_ = self
        class opts:
            def __init__(self):
                self.mapQvThreshold = -1
                self.cap_percentile = 99.0
                self.minCoverage = 3
                self.subread_norm = True
                self.maxCoverage = 200
                self.identify = True
                self.methylFraction = False
                self.pvalue = 0.01
                self.modsToCall = ['H', 'J', 'K']
                # Bug 23546: need to set values for these two new flags:
                self.identifyMinCov = 5
                self.methylMinCov = 10
                self.useLDA = False
                self.maxAlignments = self_.MAX_ALIGNMENTS
                self.randomSeed = None
        return opts()

    def getAlignments (self):
        raise NotImplementedError()

    def getReference (self):
        refDir = "/pbi/dept/secondary/siv/references"
        return os.path.join(refDir, "Helicobacter_pylori_J99", "sequence",
            "Helicobacter_pylori_J99.fasta")

    def setUp(self):
        self.cmpH5 = None
        resourcesDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../kineticsTools/resources')
        ref = self.getReference()
        alnFile = self.getAlignments()
        assert os.path.exists(alnFile) and os.path.exists(ref)

        self.ds = AlignmentSet(alnFile, referenceFastaFname=ref)
        self.contigs = ReferenceUtils.loadReferenceContigs(ref, self.ds)
        self.ipdModel = IpdModel(self.contigs, os.path.join(resourcesDir, "P6-C4.h5"))
        # Create a functional KineticWorker object that can be poked at
        self.kw = KineticWorker(self.ipdModel)
        # Put in our cmp.h5 - this is normally supplied by the Worker
        self.kw.caseCmpH5 = self.ds
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()

    def test_private_api (self):
        start = 50
        end = 100
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)
        rir = list(self.kw.caseCmpH5.readsInRange(referenceWindow.refName,
            referenceWindow.start, referenceWindow.end))
        self.assertEqual(len(rir), 301)
        chunks = self.kw._fetchChunks(REF_GROUP_ID, (start, end),
            self.kw.caseCmpH5)
        factor = 1.0 / self.ds.readGroupTable[0].FrameRate
        rawIpds = self.kw._loadRawIpds(rir, start, end, factor)
        logging.critical(len(rawIpds))
        # XXX note that this is very dependent on the exact order of reads
        # found by readsInRange(), which may be altered by changes to the
        # implementation of the dataset API.  It should, however, remain
        # consistent across equivalent input types.
        # XXX 2015-08-28 disabling this for now because it will change if the
        # dataset contains multiple .bam files
        #self.assertEqual("%.4f" % rawIpds[0][2], "0.2665")
        log.info(rawIpds)
        chunks = self.kw._chunkRawIpds(rawIpds)
        #log.critical(chunks)

    def test_small_decode (self):
        """Test for known modifications near the start of H. pylori genome"""
        # XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A)
        start = 50
        end = 100
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
        mods = self.kw._decodePositiveControl(kinetics, bounds)
        log.info(mods)

        # Verify that we detect m6A mods at 14982 and 14991
        m6AMods = [x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (88, 90) ]
        self.assertEqual(len(m6AMods), 2)
        m4CMods = [x for x in mods if x['modification'] == 'm4C' and x['tpl'] in (59,) ]
        self.assertEqual(len(m4CMods), 1)
        for x in mods:
            if x['strand'] == 0:
                self.assertEqual(x['tpl'], 88)
            else:
                self.assertTrue(x['tpl'] in [59,90])