Beispiel #1
0
    def test_write(self):
        text = """\
##types:string	int	float	bool
name	num	real	truth
matt	-123	10.0	True
alex	456	2.5	False
mike	789	-30.0	False
"""

        text_no_comments = """\
name	num	real	truth
matt	-123	10.0	True
alex	456	2.5	False
mike	789	-30.0	False
"""

        # Write
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        tab.write(out, comments=True)
        self.assertEqual(out.getvalue(), text)

        # Write no comments
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        tab.write(out)
        self.assertEqual(out.getvalue(), text_no_comments)

        # Write string
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        out.write(str(tab))
        self.assertEqual(out.getvalue(), text_no_comments)

        expected_repr = ("""\
name  num   real      truth  \n\
matt  -123   10.0000   True  \n\
alex   456    2.5000  False  \n\
mike   789  -30.0000  False  \n\
""")
        # Write repr
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        out.write(repr(tab))
        self.assertEqual(out.getvalue(), expected_repr)

        # read with no header and write with header
        expected = ("""\
a	b	c
1	2	3
4	5	6
""")
        data = [
            [1, 2, 3],
            [4, 5, 6],
        ]
        tab = tablelib.Table(data, nheaders=0, headers=['a', 'b', 'c'])
        out = StringIO()
        tab.write(out, nheaders=1)
        self.assertEqual(out.getvalue(), expected)
    def test_table_types(self):

        # Explict types.
        text = """\
##types:string	int	string	bool
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text), guess_types=False)
        self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": True})

        # Do not guess types, always use string.
        text = """\
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text), guess_types=False)
        self.assertEquals(tab[0], {"text": "1a", "num": "-10", "name": "john", "truth": "true"})

        # Guess types from first row.
        text = """\
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text))
        self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": True})

        # Only specify some types with no guessing.
        tab = tablelib.read_table(StringIO(text), guess_types=False, types={"truth": bool})
        self.assertEquals(tab[0], {"text": "1a", "num": "-10", "name": "john", "truth": True})

        # Only specify some types with guessing.
        tab = tablelib.read_table(StringIO(text), types={"truth": str})
        self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": "true"})
    def test_write(self):
        text = """\
##types:string	int	float	bool
name	num	real	truth
matt	-123	10.0	True
alex	456	2.5	False
mike	789	-30.0	False
"""

        text_no_comments = """\
name	num	real	truth
matt	-123	10.0	True
alex	456	2.5	False
mike	789	-30.0	False
"""

        # Write
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        tab.write(out, comments=True)
        self.assertEqual(out.getvalue(), text)

        # Write no comments
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        tab.write(out)
        self.assertEqual(out.getvalue(), text_no_comments)

        # Write string
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        out.write(str(tab))
        self.assertEqual(out.getvalue(), text_no_comments)

        expected_repr = """\
name  num   real      truth  \n\
matt  -123   10.0000   True  \n\
alex   456    2.5000  False  \n\
mike   789  -30.0000  False  \n\
"""
        # Write repr
        tab = tablelib.read_table(StringIO(text))
        out = StringIO()
        out.write(repr(tab))
        self.assertEqual(out.getvalue(), expected_repr)

        # read with no header and write with header
        expected = """\
a	b	c
1	2	3
4	5	6
"""
        data = [[1, 2, 3], [4, 5, 6]]
        tab = tablelib.Table(data, nheaders=0, headers=["a", "b", "c"])
        out = StringIO()
        tab.write(out, nheaders=1)
        self.assertEqual(out.getvalue(), expected)
    def addGoTerms(self, gofile):

        if not tableExists(self.cur, "GoTerms"):
            self.makeGoTermsTable()

        util.tic("add go terms")
        goterms = tablelib.read_table(gofile)
        goterms_lookup = goterms.groupby("orf")
        goterms_bygoid = goterms.groupby("goid")

        for goterm in goterms_bygoid:
            term = goterms_bygoid[goterm][0]

            if '"' in term["term"]:
                print term

            self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" %
                             (term["goid"], term["term"]))

        for gene, terms in goterms_lookup.iteritems():
            for term in terms:
                self.cur.execute(
                    """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" %
                    (gene, term["goid"]))
        util.toc()
Beispiel #5
0
    def test_read(self):
        text = """\
##types:str	int	float	bool
name	num	real	truth
matt	-123	10.0	true
alex	456	2.5	false
mike	789	-30.0	false
"""
        expected = [
            {
                'real': 10.0,
                'num': -123,
                'name': 'matt',
                'truth': True
            },
            {
                'real': 2.5,
                'num': 456,
                'name': 'alex',
                'truth': False
            },
            {
                'real': -30.0,
                'num': 789,
                'name': 'mike',
                'truth': False
            },
        ]
        tab = tablelib.read_table(StringIO(text))
        self.assertEqual(tab, expected)
Beispiel #6
0
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES 
                                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                                 "%s");""" %
                (row["partid"], familyGeneNames.get(row["partid"],
                                                    ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3, row["dup"], row["loss"],
                 row["genes"], familyGeneNames.get(row["partid"],
                                                   ("", ""))[1]))
        util.toc()
Beispiel #7
0
    def addEvents(self, eventsfile):

        if not tableExists(self.cur, "Events"):
            self.makeEventsTable()

        util.tic("add events")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")

        self.cur.execute("SELECT famid FROM Families;")
        famids = [x[0] for x in self.cur]

        for famid in famids:
            if famid not in events_lookup:
                continue
            row = events_lookup[famid]

            for sp in self.stree.nodes:
                sp = str(sp)

                self.cur.execute(
                    """INSERT INTO Events VALUES 
                                ("%s", "%s", %d, %d, %d, %d);""" %
                    (famid, sp, row[sp + "-genes"], row[sp + "-dup"],
                     row[sp + "-loss"], row[sp + "-appear"]))
        util.toc()
    def addEvents(self, eventsfile):

        if not tableExists(self.cur, "Events"):
            self.makeEventsTable()

        util.tic("add events")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")

        self.cur.execute("SELECT famid FROM Families;")
        famids = [x[0] for x in self.cur]

        for famid in famids:
            if famid not in events_lookup:
                continue
            row = events_lookup[famid]

            for sp in self.stree.nodes:
                sp = str(sp)

                self.cur.execute(
                    """INSERT INTO Events VALUES
                    ("%s", "%s", %d, %d, %d, %d);""" %
                    (famid, sp,
                     row[sp+"-genes"],
                     row[sp+"-dup"],
                     row[sp+"-loss"],
                     row[sp+"-appear"]))
        util.toc()
Beispiel #9
0
    def addGoTerms(self, gofile):

        if not tableExists(self.cur, "GoTerms"):
            self.makeGoTermsTable()

        util.tic("add go terms")
        goterms = tablelib.read_table(gofile)
        goterms_lookup = goterms.groupby("orf")
        goterms_bygoid = goterms.groupby("goid")

        for goterm in goterms_bygoid:
            term = goterms_bygoid[goterm][0]

            if '"' in term["term"]:
                print term

            self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" %
                             (term["goid"], term["term"]))

        for gene, terms in goterms_lookup.iteritems():
            for term in terms:
                self.cur.execute(
                    """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" %
                    (gene, term["goid"]))
        util.toc()
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES
                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                "%s");""" %
                (row["partid"],
                 familyGeneNames.get(row["partid"], ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3,
                 row["dup"], row["loss"], row["genes"],
                 familyGeneNames.get(row["partid"], ("", ""))[1]))
        util.toc()
Beispiel #11
0
    def addPfams(self, pfamfile):

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            self.cur.execute(
                """INSERT INTO PfamDomains VALUES ("%s", "%s", "%s");""" %
                (row['pfamid'], row['pfam_name'], row['pfam_description']))
    def test_sort(self):
        text = """\
##types:str	int	int
name	num	num2
matt	123	3
alex	456	0
mike	789	1
"""

        tab = tablelib.read_table(StringIO(text))
        tab.sort()
        self.assertEqual(tab.cget("name", "num"), [["alex", "matt", "mike"], [456, 123, 789]])
Beispiel #13
0
    def test_read_error(self):
        text = """\
##types:str	int	int
name	num	num
matt	123	0
alex	456	2
mike	789	1
"""
        self.assertRaises(tablelib.TableException,
                          lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num
matt	123	0
alex	456	2	extra
mike	789	1
"""
        self.assertRaises(tablelib.TableException,
                          lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num2
matt	123	0
alex	456	not_an_int
mike	789	1
"""
        self.assertRaises(tablelib.TableException,
                          lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num2
matt	123	0
alex	456
mike	789	1
"""
        self.assertRaises(tablelib.TableException,
                          lambda: tablelib.read_table(StringIO(text)))
Beispiel #14
0
    def test_sort(self):
        text = """\
##types:str	int	int
name	num	num2
matt	123	3
alex	456	0
mike	789	1
"""

        tab = tablelib.read_table(StringIO(text))
        tab.sort()
        self.assertEqual(tab.cget('name', 'num'),
                         [['alex', 'matt', 'mike'], [456, 123, 789]])
    def addPfams(self, pfamfile):

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            self.cur.execute(
                """INSERT INTO PfamDomains VALUES ("%s", "%s", "%s");""" %
                (row['pfamid'],
                 row['pfam_name'],
                 row['pfam_description']))
    def test_read(self):
        text = """\
##types:str	int	float	bool
name	num	real	truth
matt	-123	10.0	true
alex	456	2.5	false
mike	789	-30.0	false
"""
        expected = [
            {"real": 10.0, "num": -123, "name": "matt", "truth": True},
            {"real": 2.5, "num": 456, "name": "alex", "truth": False},
            {"real": -30.0, "num": 789, "name": "mike", "truth": False},
        ]
        tab = tablelib.read_table(StringIO(text))
        self.assertEqual(tab, expected)
    def test_read_error(self):
        text = """\
##types:str	int	int
name	num	num
matt	123	0
alex	456	2
mike	789	1
"""
        self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num
matt	123	0
alex	456	2	extra
mike	789	1
"""
        self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num2
matt	123	0
alex	456	not_an_int
mike	789	1
"""
        self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text)))

        text = """\
##types:str	int	int
name	num	num2
matt	123	0
alex	456
mike	789	1
"""
        self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text)))
 def __init__(self, filename=None, datadir=None, olddatadir=None):
     if filename != None:
         self.famtab = tablelib.read_table(filename)
     else:
         self.famtab = tablelib.Table(headers=["famid", "genes"])
     
     self.famlookup = self.famtab.lookup("famid")
     self.genelookup = {}
     
     for fam in self:
         for gene in self.getGenes(fam["famid"]):
             self.genelookup[gene] = fam
     
     self.filename = filename        
     self.datadir = datadir
     self.olddatadir = olddatadir
Beispiel #19
0
    def __init__(self, filename=None, datadir=None, olddatadir=None):
        if filename != None:
            self.famtab = tablelib.read_table(filename)
        else:
            self.famtab = tablelib.Table(headers=["famid", "genes"])

        self.famlookup = self.famtab.lookup("famid")
        self.genelookup = {}

        for fam in self:
            for gene in self.getGenes(fam["famid"]):
                self.genelookup[gene] = fam

        self.filename = filename
        self.datadir = datadir
        self.olddatadir = olddatadir
    def __init__(self, dbfile=None, famfile=None, smapfile=None,
                 genenamefile=None, streefile=None,
                 baseDir=None,
                 treeFileExt=None,
                 fastaFileExt=None):
        self.fams = genecluster.FamilyDb(famfile)
        self.gene2species = phylo.read_gene2species(smapfile)
        self.genenames_tab = tablelib.read_table(genenamefile)
        self.gene2name = self.genenames_tab.lookup("id")
        self.stree = treelib.read_tree(streefile)
        self.baseDir = baseDir
        self.treeFileExt = treeFileExt
        self.fastaFileExt = fastaFileExt

        # open database
        self.con = sqlite.connect(dbfile, isolation_level="DEFERRED")
        self.cur = self.con.cursor()
Beispiel #21
0
    def addPfamGenes(self, pfamfile):
        """add pfam domains"""

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        util.tic("add pfam domains")

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            name = re.sub("\..*$", "", row["pfam_acc"])

            self.cur.execute("""INSERT INTO GenePfamDomains VALUES
                                ("%s", "%s", %d, %d, %f, %f);""" %
                             (row["locus"], name, row["start"], row["end"],
                              row["score"], row["evalue"]))
        util.toc()
    def addPfamGenes(self, pfamfile):
        """add pfam domains"""

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        util.tic("add pfam domains")

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            name = re.sub("\..*$", "", row["pfam_acc"])

            self.cur.execute("""INSERT INTO GenePfamDomains VALUES
                                ("%s", "%s", %d, %d, %f, %f);""" %
                             (row["locus"], name, row["start"],
                              row["end"], row["score"], row["evalue"]))
        util.toc()
Beispiel #23
0
    def test_nheaders(self):

        text = """\
##types:str	int	int
#
# hello
#
name	0	1
matt	123	3
alex	456	2
mike	789	1
"""
        tab = tablelib.read_table(StringIO(text), nheaders=0)

        tab.add_col('extra', bool, False)
        for row in tab:
            row['extra'] = True

        self.assertEquals(set(tab[0].keys()), set([0, 1, 2, 'extra']))
    def test_nheaders(self):

        text = """\
##types:str	int	int
#
# hello
#
name	0	1
matt	123	3
alex	456	2
mike	789	1
"""
        tab = tablelib.read_table(StringIO(text), nheaders=0)

        tab.add_col("extra", bool, False)
        for row in tab:
            row["extra"] = True

        self.assertEquals(set(tab[0].keys()), set([0, 1, 2, "extra"]))
Beispiel #25
0
    def __init__(self,
                 dbfile=None,
                 famfile=None,
                 smapfile=None,
                 genenamefile=None,
                 streefile=None,
                 baseDir=None,
                 treeFileExt=None,
                 fastaFileExt=None):
        self.fams = genecluster.FamilyDb(famfile)
        self.gene2species = phylo.read_gene2species(smapfile)
        self.genenames_tab = tablelib.read_table(genenamefile)
        self.gene2name = self.genenames_tab.lookup("id")
        self.stree = treelib.read_tree(streefile)
        self.baseDir = baseDir
        self.treeFileExt = treeFileExt
        self.fastaFileExt = fastaFileExt

        # open database
        self.con = sqlite.connect(dbfile, isolation_level="DEFERRED")
        self.cur = self.con.cursor()
Beispiel #26
0
    def test_table_types(self):

        # Explict types.
        text = """\
##types:string	int	string	bool
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text), guess_types=False)
        self.assertEquals(tab[0], {
            'text': '1a',
            'num': -10,
            'name': 'john',
            'truth': True
        })

        # Do not guess types, always use string.
        text = """\
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text), guess_types=False)
        self.assertEquals(tab[0], {
            'text': '1a',
            'num': '-10',
            'name': 'john',
            'truth': 'true'
        })

        # Guess types from first row.
        text = """\
name	num	text	truth
john	-10	1a	true
"""
        tab = tablelib.read_table(StringIO(text))
        self.assertEquals(tab[0], {
            'text': '1a',
            'num': -10,
            'name': 'john',
            'truth': True
        })

        # Only specify some types with no guessing.
        tab = tablelib.read_table(StringIO(text),
                                  guess_types=False,
                                  types={'truth': bool})
        self.assertEquals(tab[0], {
            'text': '1a',
            'num': '-10',
            'name': 'john',
            'truth': True
        })

        # Only specify some types with guessing.
        tab = tablelib.read_table(StringIO(text), types={'truth': str})
        self.assertEquals(tab[0], {
            'text': '1a',
            'num': -10,
            'name': 'john',
            'truth': 'true'
        })