def testSmallDecode(self): """ Test modified fraction estimation in detection mode around a known modification in lambda """ # First methlyated A in lambda: # strand motif onTarget seqid tpl # 0 GCACNNNNNNGTT On 1 14983 start = 14900 end = 15100 referenceWindow = ReferenceWindow(1, "lambda_NEB3011", start, end) bounds = (start, end) self.kw._prepForReferenceWindow(referenceWindow) kinetics = self.kw._summarizeReferenceRegion(bounds, True, False) # Verify that we detect m6A mods at 14982 and 14991 m6AMods = [{ 'frac': x['frac'], 'fracLow': x['fracLow'], 'fracUp': x['fracUp'], 'tpl': x['tpl'], 'strand': x['strand'] } for x in kinetics if 'frac' in x and x['tpl'] in (14982, 14991)] print(m6AMods) for mod in m6AMods: self.assertGreater(mod["frac"], 0.5)
def test_private_api(self): start = 50 end = 100 REF_GROUP_ID = "gi|12057207|gb|AE001439.1|" referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end) bounds = (start, end) rir = list( self.kw.caseCmpH5.readsInRange(referenceWindow.refName, referenceWindow.start, referenceWindow.end)) self.assertEqual(len(rir), 301) chunks = self.kw._fetchChunks(REF_GROUP_ID, (start, end), self.kw.caseCmpH5) factor = 1.0 / self.ds.readGroupTable[0].FrameRate rawIpds = self.kw._loadRawIpds(rir, start, end, factor) logging.critical(len(rawIpds)) # XXX note that this is very dependent on the exact order of reads # found by readsInRange(), which may be altered by changes to the # implementation of the dataset API. It should, however, remain # consistent across equivalent input types. # XXX 2015-08-28 disabling this for now because it will change if the # dataset contains multiple .bam files #self.assertEqual("%.4f" % rawIpds[0][2], "0.2665") log.info(rawIpds) chunks = self.kw._chunkRawIpds(rawIpds)
def test_small_decode(self): """Test for known modifications near the start of H. pylori genome""" # XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A) start = 50 end = 100 REF_GROUP_ID = "gi|12057207|gb|AE001439.1|" referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end) bounds = (start, end) self.kw._prepForReferenceWindow(referenceWindow) kinetics = self.kw._summarizeReferenceRegion(bounds, False, True) mods = self.kw._decodePositiveControl(kinetics, bounds) log.info(mods) # Verify that we detect m6A mods at 14982 and 14991 m6AMods = [ x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (88, 90) ] self.assertEqual(len(m6AMods), 2) m4CMods = [ x for x in mods if x['modification'] == 'm4C' and x['tpl'] in (59, ) ] self.assertEqual(len(m4CMods), 1) for x in mods: if x['strand'] == 0: self.assertEqual(x['tpl'], 88) else: self.assertTrue(x['tpl'] in [59, 90])
def test_small_decode(self): start = 985 end = 1065 REF_GROUP_ID = "gi|12057207|gb|AE001439.1|" referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end) bounds = (start, end) self.kw._prepForReferenceWindow(referenceWindow) kinetics = self.kw._summarizeReferenceRegion(bounds, False, True) mods = self.kw._decodePositiveControl(kinetics, bounds) self.assertEqual(len(mods), 4)
def test_small_decode(self): start = 50 end = 100 REF_GROUP_ID = "gi|12057207|gb|AE001439.1|" referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end) bounds = (start, end) self.kw._prepForReferenceWindow(referenceWindow) kinetics = self.kw._summarizeReferenceRegion(bounds, False, True) # XXX note that this is very dependent on the exact order of reads # found by readsInRange(), which may be altered by changes to the # implementation of the dataset API. It should be immune to stochastic # effects, however. self.assertEqual("%.5f" % kinetics[0]['ipdRatio'], "1.06460") mods = self.kw._decodePositiveControl(kinetics, bounds) self.assertEqual(len(mods), 3)
def testSmallDecode(self): """ Test a modification decode around a known modification in lambda """ # First methlyated A in lambda: # strand motif onTarget seqid tpl # 0 GCACNNNNNNGTT On 1 14983 start = 14900 end = 15100 referenceWindow = ReferenceWindow(1, "lambda_NEB3011", start, end) bounds = (start, end) self.kw._prepForReferenceWindow(referenceWindow) kinetics = self.kw._summarizeReferenceRegion(bounds, False, True) mods = self.kw._decodePositiveControl(kinetics, bounds) print(mods) # Verify that we detect m6A mods at 14982 and 14991 m6AMods = [x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (14982, 14991)] self.assertEqual(len(m6AMods), 2)