Esempio n. 1
0
    def verifyLineageSet(self, markerSetFile, bRequireTaxonomy):
        """Verify output of lineage set command."""

        with open(markerSetFile) as f:
            f.readline() # skip header

            for line in f:
                if line.strip() != '':
                    lineSplit = line.split('\t')
                    binId = lineSplit[0]
                    numMarkers = int(lineSplit[1])
                    uid = lineSplit[2]
                    lineage = lineSplit[3]
                    numGenomes = int(lineSplit[4])
                    markerSet = MarkerSet(uid, lineage, numGenomes, eval(lineSplit[5].rstrip()))

        np.testing.assert_almost_equal(int(binId), 637000110, err_msg="Failed bin ID test")
        if not bRequireTaxonomy:
            # this might be a little unstable as it depends on HMMER and prodigal, but
            # we will see how it goes
            np.testing.assert_equal(markerSet.numSets(), 266, err_msg="Failed # marker set test")
            np.testing.assert_equal(markerSet.numMarkers(), 2134, err_msg="Failed # markers test")
            assert(uid == 'UID5199')
        else:
            np.testing.assert_equal(markerSet.numSets(), 282, err_msg="Failed # marker set test")
            np.testing.assert_equal(markerSet.numMarkers(), 1254, err_msg="Failed # markers test")
            assert(lineage == 'f__Enterobacteriaceae')
Esempio n. 2
0
    def testMarkerSet(self):
        """Verify marker set data structure."""

        markers = [set(["a", "b"]), set(["c"])]
        ms = MarkerSet(0, "k__Bacteria", 100, markers)

        markerGenes, markerSets = ms.size()
        self.assertEqual(markerGenes, 3)
        self.assertEqual(markerSets, 2)

        self.assertEqual(ms.numMarkers(), 3)
        self.assertEqual(ms.numSets(), 2)

        self.assertEqual(ms.getMarkerGenes(), set(["a", "b", "c"]))
Esempio n. 3
0
    def testMarkerSet(self):
        """Verify marker set data structure."""

        markers = [set(['a', 'b']), set(['c'])]
        ms = MarkerSet(0, 'k__Bacteria', 100, markers)

        markerGenes, markerSets = ms.size()
        self.assertEqual(markerGenes, 3)
        self.assertEqual(markerSets, 2)

        self.assertEqual(ms.numMarkers(), 3)
        self.assertEqual(ms.numSets(), 2)

        self.assertEqual(ms.getMarkerGenes(), set(['a', 'b', 'c']))
Esempio n. 4
0
 def testMarkerSet(self):
     """Verify marker set data structure."""
     
     markers = [set(['a', 'b']), set(['c'])]
     ms = MarkerSet(0, 'k__Bacteria', 100, markers)
     
     markerGenes, markerSets = ms.size()
     self.assertEqual(markerGenes, 3)
     self.assertEqual(markerSets, 2)
     
     self.assertEqual(ms.numMarkers(), 3)
     self.assertEqual(ms.numSets(), 2)
     
     self.assertEqual(ms.getMarkerGenes(), set(['a', 'b', 'c']))
    def run(self):
        # read internal nodes file
        metadata = {}
        for line in open('./experiments/classTree.internal_nodes.tsv'):
            uid, label = [x.strip() for x in line.split('\t')]
            metadata[uid] = label

        # read all lineage-specific marker genes
        treeParser = TreeParser()
        uniqueIdToLineageStatistics = treeParser.readNodeMetadata()
        for uid in metadata:
            stats = uniqueIdToLineageStatistics[uid]
            markerSet = MarkerSet(uid, 'NA', int(stats['# genomes']),
                                  eval(stats['marker set']))

            metadata[uid] += ' [%d, %d, %d]' % (stats['# genomes'],
                                                markerSet.numMarkers(),
                                                markerSet.numSets())

        # write out results
        fout = open('./experiments/classTree.internal_nodes.metadata.tsv', 'w')
        for uid, label in metadata.items():
            fout.write(uid + '\t' + label + '\n')
        fout.close()
    def run(self):
        # read internal nodes file
        metadata = {}
        for line in open('./experiments/classTree.internal_nodes.tsv'):
            uid, label = [x.strip() for x in line.split('\t')]
            metadata[uid] = label
        

        # read all lineage-specific marker genes
        treeParser = TreeParser()
        uniqueIdToLineageStatistics = treeParser.readNodeMetadata()
        for uid in metadata:
            stats = uniqueIdToLineageStatistics[uid]
            markerSet = MarkerSet(uid, 'NA', int(stats['# genomes']), eval(stats['marker set']))
            
            metadata[uid] += ' [%d, %d, %d]' % (stats['# genomes'], markerSet.numMarkers(), markerSet.numSets())
            
        # write out results
        fout = open('./experiments/classTree.internal_nodes.metadata.tsv', 'w')
        for uid, label in metadata.iteritems():
            fout.write(uid + '\t' + label + '\n')
        fout.close()