def verifyLineageSet(self, markerSetFile, bRequireTaxonomy): """Verify output of lineage set command.""" with open(markerSetFile) as f: f.readline() # skip header for line in f: if line.strip() != '': lineSplit = line.split('\t') binId = lineSplit[0] numMarkers = int(lineSplit[1]) uid = lineSplit[2] lineage = lineSplit[3] numGenomes = int(lineSplit[4]) markerSet = MarkerSet(uid, lineage, numGenomes, eval(lineSplit[5].rstrip())) np.testing.assert_almost_equal(int(binId), 637000110, err_msg="Failed bin ID test") if not bRequireTaxonomy: # this might be a little unstable as it depends on HMMER and prodigal, but # we will see how it goes np.testing.assert_equal(markerSet.numSets(), 266, err_msg="Failed # marker set test") np.testing.assert_equal(markerSet.numMarkers(), 2134, err_msg="Failed # markers test") assert(uid == 'UID5199') else: np.testing.assert_equal(markerSet.numSets(), 282, err_msg="Failed # marker set test") np.testing.assert_equal(markerSet.numMarkers(), 1254, err_msg="Failed # markers test") assert(lineage == 'f__Enterobacteriaceae')
def testMarkerSet(self): """Verify marker set data structure.""" markers = [set(["a", "b"]), set(["c"])] ms = MarkerSet(0, "k__Bacteria", 100, markers) markerGenes, markerSets = ms.size() self.assertEqual(markerGenes, 3) self.assertEqual(markerSets, 2) self.assertEqual(ms.numMarkers(), 3) self.assertEqual(ms.numSets(), 2) self.assertEqual(ms.getMarkerGenes(), set(["a", "b", "c"]))
def testMarkerSet(self): """Verify marker set data structure.""" markers = [set(['a', 'b']), set(['c'])] ms = MarkerSet(0, 'k__Bacteria', 100, markers) markerGenes, markerSets = ms.size() self.assertEqual(markerGenes, 3) self.assertEqual(markerSets, 2) self.assertEqual(ms.numMarkers(), 3) self.assertEqual(ms.numSets(), 2) self.assertEqual(ms.getMarkerGenes(), set(['a', 'b', 'c']))
def run(self): # read internal nodes file metadata = {} for line in open('./experiments/classTree.internal_nodes.tsv'): uid, label = [x.strip() for x in line.split('\t')] metadata[uid] = label # read all lineage-specific marker genes treeParser = TreeParser() uniqueIdToLineageStatistics = treeParser.readNodeMetadata() for uid in metadata: stats = uniqueIdToLineageStatistics[uid] markerSet = MarkerSet(uid, 'NA', int(stats['# genomes']), eval(stats['marker set'])) metadata[uid] += ' [%d, %d, %d]' % (stats['# genomes'], markerSet.numMarkers(), markerSet.numSets()) # write out results fout = open('./experiments/classTree.internal_nodes.metadata.tsv', 'w') for uid, label in metadata.items(): fout.write(uid + '\t' + label + '\n') fout.close()
def run(self): # read internal nodes file metadata = {} for line in open('./experiments/classTree.internal_nodes.tsv'): uid, label = [x.strip() for x in line.split('\t')] metadata[uid] = label # read all lineage-specific marker genes treeParser = TreeParser() uniqueIdToLineageStatistics = treeParser.readNodeMetadata() for uid in metadata: stats = uniqueIdToLineageStatistics[uid] markerSet = MarkerSet(uid, 'NA', int(stats['# genomes']), eval(stats['marker set'])) metadata[uid] += ' [%d, %d, %d]' % (stats['# genomes'], markerSet.numMarkers(), markerSet.numSets()) # write out results fout = open('./experiments/classTree.internal_nodes.metadata.tsv', 'w') for uid, label in metadata.iteritems(): fout.write(uid + '\t' + label + '\n') fout.close()