def test_write(self): text = """\ ##types:string int float bool name num real truth matt -123 10.0 True alex 456 2.5 False mike 789 -30.0 False """ text_no_comments = """\ name num real truth matt -123 10.0 True alex 456 2.5 False mike 789 -30.0 False """ # Write tab = tablelib.read_table(StringIO(text)) out = StringIO() tab.write(out, comments=True) self.assertEqual(out.getvalue(), text) # Write no comments tab = tablelib.read_table(StringIO(text)) out = StringIO() tab.write(out) self.assertEqual(out.getvalue(), text_no_comments) # Write string tab = tablelib.read_table(StringIO(text)) out = StringIO() out.write(str(tab)) self.assertEqual(out.getvalue(), text_no_comments) expected_repr = ("""\ name num real truth \n\ matt -123 10.0000 True \n\ alex 456 2.5000 False \n\ mike 789 -30.0000 False \n\ """) # Write repr tab = tablelib.read_table(StringIO(text)) out = StringIO() out.write(repr(tab)) self.assertEqual(out.getvalue(), expected_repr) # read with no header and write with header expected = ("""\ a b c 1 2 3 4 5 6 """) data = [ [1, 2, 3], [4, 5, 6], ] tab = tablelib.Table(data, nheaders=0, headers=['a', 'b', 'c']) out = StringIO() tab.write(out, nheaders=1) self.assertEqual(out.getvalue(), expected)
def test_table_types(self): # Explict types. text = """\ ##types:string int string bool name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text), guess_types=False) self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": True}) # Do not guess types, always use string. text = """\ name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text), guess_types=False) self.assertEquals(tab[0], {"text": "1a", "num": "-10", "name": "john", "truth": "true"}) # Guess types from first row. text = """\ name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text)) self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": True}) # Only specify some types with no guessing. tab = tablelib.read_table(StringIO(text), guess_types=False, types={"truth": bool}) self.assertEquals(tab[0], {"text": "1a", "num": "-10", "name": "john", "truth": True}) # Only specify some types with guessing. tab = tablelib.read_table(StringIO(text), types={"truth": str}) self.assertEquals(tab[0], {"text": "1a", "num": -10, "name": "john", "truth": "true"})
def test_write(self): text = """\ ##types:string int float bool name num real truth matt -123 10.0 True alex 456 2.5 False mike 789 -30.0 False """ text_no_comments = """\ name num real truth matt -123 10.0 True alex 456 2.5 False mike 789 -30.0 False """ # Write tab = tablelib.read_table(StringIO(text)) out = StringIO() tab.write(out, comments=True) self.assertEqual(out.getvalue(), text) # Write no comments tab = tablelib.read_table(StringIO(text)) out = StringIO() tab.write(out) self.assertEqual(out.getvalue(), text_no_comments) # Write string tab = tablelib.read_table(StringIO(text)) out = StringIO() out.write(str(tab)) self.assertEqual(out.getvalue(), text_no_comments) expected_repr = """\ name num real truth \n\ matt -123 10.0000 True \n\ alex 456 2.5000 False \n\ mike 789 -30.0000 False \n\ """ # Write repr tab = tablelib.read_table(StringIO(text)) out = StringIO() out.write(repr(tab)) self.assertEqual(out.getvalue(), expected_repr) # read with no header and write with header expected = """\ a b c 1 2 3 4 5 6 """ data = [[1, 2, 3], [4, 5, 6]] tab = tablelib.Table(data, nheaders=0, headers=["a", "b", "c"]) out = StringIO() tab.write(out, nheaders=1) self.assertEqual(out.getvalue(), expected)
def addGoTerms(self, gofile): if not tableExists(self.cur, "GoTerms"): self.makeGoTermsTable() util.tic("add go terms") goterms = tablelib.read_table(gofile) goterms_lookup = goterms.groupby("orf") goterms_bygoid = goterms.groupby("goid") for goterm in goterms_bygoid: term = goterms_bygoid[goterm][0] if '"' in term["term"]: print term self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" % (term["goid"], term["term"])) for gene, terms in goterms_lookup.iteritems(): for term in terms: self.cur.execute( """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" % (gene, term["goid"])) util.toc()
def test_read(self): text = """\ ##types:str int float bool name num real truth matt -123 10.0 true alex 456 2.5 false mike 789 -30.0 false """ expected = [ { 'real': 10.0, 'num': -123, 'name': 'matt', 'truth': True }, { 'real': 2.5, 'num': 456, 'name': 'alex', 'truth': False }, { 'real': -30.0, 'num': 789, 'name': 'mike', 'truth': False }, ] tab = tablelib.read_table(StringIO(text)) self.assertEqual(tab, expected)
def addFamilies(self, eventsfile, discard=[]): if not tableExists(self.cur, "Families"): self.makeFamiliesTable() util.tic("add families") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") familyGeneNames = self.makeFamilyGeneNames() discard = set(discard) for row in events_tab: famid = row["partid"] if famid in discard: util.logger("discarding '%s'" % famid) continue tree = treelib.read_tree(self.getTreeFile(famid)) treelen = sum(x.dist for x in tree) seqs = fasta.read_fasta(self.getFastaFile(famid)) seqlen = stats.median(map(len, seqs.values())) self.cur.execute( """INSERT INTO Families VALUES ("%s", "%s", %f, %f, %f, %d, %d, %d, "%s");""" % (row["partid"], familyGeneNames.get(row["partid"], ("", ""))[0], row["famrate"], treelen, seqlen * 3, row["dup"], row["loss"], row["genes"], familyGeneNames.get(row["partid"], ("", ""))[1])) util.toc()
def addEvents(self, eventsfile): if not tableExists(self.cur, "Events"): self.makeEventsTable() util.tic("add events") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") self.cur.execute("SELECT famid FROM Families;") famids = [x[0] for x in self.cur] for famid in famids: if famid not in events_lookup: continue row = events_lookup[famid] for sp in self.stree.nodes: sp = str(sp) self.cur.execute( """INSERT INTO Events VALUES ("%s", "%s", %d, %d, %d, %d);""" % (famid, sp, row[sp + "-genes"], row[sp + "-dup"], row[sp + "-loss"], row[sp + "-appear"])) util.toc()
def addEvents(self, eventsfile): if not tableExists(self.cur, "Events"): self.makeEventsTable() util.tic("add events") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") self.cur.execute("SELECT famid FROM Families;") famids = [x[0] for x in self.cur] for famid in famids: if famid not in events_lookup: continue row = events_lookup[famid] for sp in self.stree.nodes: sp = str(sp) self.cur.execute( """INSERT INTO Events VALUES ("%s", "%s", %d, %d, %d, %d);""" % (famid, sp, row[sp+"-genes"], row[sp+"-dup"], row[sp+"-loss"], row[sp+"-appear"])) util.toc()
def addPfams(self, pfamfile): if not tableExists(self.cur, "PfamDomains"): self.makePfamTable() pfams = tablelib.read_table(pfamfile) for row in pfams: self.cur.execute( """INSERT INTO PfamDomains VALUES ("%s", "%s", "%s");""" % (row['pfamid'], row['pfam_name'], row['pfam_description']))
def test_sort(self): text = """\ ##types:str int int name num num2 matt 123 3 alex 456 0 mike 789 1 """ tab = tablelib.read_table(StringIO(text)) tab.sort() self.assertEqual(tab.cget("name", "num"), [["alex", "matt", "mike"], [456, 123, 789]])
def test_read_error(self): text = """\ ##types:str int int name num num matt 123 0 alex 456 2 mike 789 1 """ self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text))) text = """\ ##types:str int int name num num matt 123 0 alex 456 2 extra mike 789 1 """ self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text))) text = """\ ##types:str int int name num num2 matt 123 0 alex 456 not_an_int mike 789 1 """ self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text))) text = """\ ##types:str int int name num num2 matt 123 0 alex 456 mike 789 1 """ self.assertRaises(tablelib.TableException, lambda: tablelib.read_table(StringIO(text)))
def test_sort(self): text = """\ ##types:str int int name num num2 matt 123 3 alex 456 0 mike 789 1 """ tab = tablelib.read_table(StringIO(text)) tab.sort() self.assertEqual(tab.cget('name', 'num'), [['alex', 'matt', 'mike'], [456, 123, 789]])
def test_read(self): text = """\ ##types:str int float bool name num real truth matt -123 10.0 true alex 456 2.5 false mike 789 -30.0 false """ expected = [ {"real": 10.0, "num": -123, "name": "matt", "truth": True}, {"real": 2.5, "num": 456, "name": "alex", "truth": False}, {"real": -30.0, "num": 789, "name": "mike", "truth": False}, ] tab = tablelib.read_table(StringIO(text)) self.assertEqual(tab, expected)
def __init__(self, filename=None, datadir=None, olddatadir=None): if filename != None: self.famtab = tablelib.read_table(filename) else: self.famtab = tablelib.Table(headers=["famid", "genes"]) self.famlookup = self.famtab.lookup("famid") self.genelookup = {} for fam in self: for gene in self.getGenes(fam["famid"]): self.genelookup[gene] = fam self.filename = filename self.datadir = datadir self.olddatadir = olddatadir
def __init__(self, dbfile=None, famfile=None, smapfile=None, genenamefile=None, streefile=None, baseDir=None, treeFileExt=None, fastaFileExt=None): self.fams = genecluster.FamilyDb(famfile) self.gene2species = phylo.read_gene2species(smapfile) self.genenames_tab = tablelib.read_table(genenamefile) self.gene2name = self.genenames_tab.lookup("id") self.stree = treelib.read_tree(streefile) self.baseDir = baseDir self.treeFileExt = treeFileExt self.fastaFileExt = fastaFileExt # open database self.con = sqlite.connect(dbfile, isolation_level="DEFERRED") self.cur = self.con.cursor()
def addPfamGenes(self, pfamfile): """add pfam domains""" if not tableExists(self.cur, "PfamDomains"): self.makePfamTable() util.tic("add pfam domains") pfams = tablelib.read_table(pfamfile) for row in pfams: name = re.sub("\..*$", "", row["pfam_acc"]) self.cur.execute("""INSERT INTO GenePfamDomains VALUES ("%s", "%s", %d, %d, %f, %f);""" % (row["locus"], name, row["start"], row["end"], row["score"], row["evalue"])) util.toc()
def test_nheaders(self): text = """\ ##types:str int int # # hello # name 0 1 matt 123 3 alex 456 2 mike 789 1 """ tab = tablelib.read_table(StringIO(text), nheaders=0) tab.add_col('extra', bool, False) for row in tab: row['extra'] = True self.assertEquals(set(tab[0].keys()), set([0, 1, 2, 'extra']))
def test_nheaders(self): text = """\ ##types:str int int # # hello # name 0 1 matt 123 3 alex 456 2 mike 789 1 """ tab = tablelib.read_table(StringIO(text), nheaders=0) tab.add_col("extra", bool, False) for row in tab: row["extra"] = True self.assertEquals(set(tab[0].keys()), set([0, 1, 2, "extra"]))
def test_table_types(self): # Explict types. text = """\ ##types:string int string bool name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text), guess_types=False) self.assertEquals(tab[0], { 'text': '1a', 'num': -10, 'name': 'john', 'truth': True }) # Do not guess types, always use string. text = """\ name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text), guess_types=False) self.assertEquals(tab[0], { 'text': '1a', 'num': '-10', 'name': 'john', 'truth': 'true' }) # Guess types from first row. text = """\ name num text truth john -10 1a true """ tab = tablelib.read_table(StringIO(text)) self.assertEquals(tab[0], { 'text': '1a', 'num': -10, 'name': 'john', 'truth': True }) # Only specify some types with no guessing. tab = tablelib.read_table(StringIO(text), guess_types=False, types={'truth': bool}) self.assertEquals(tab[0], { 'text': '1a', 'num': '-10', 'name': 'john', 'truth': True }) # Only specify some types with guessing. tab = tablelib.read_table(StringIO(text), types={'truth': str}) self.assertEquals(tab[0], { 'text': '1a', 'num': -10, 'name': 'john', 'truth': 'true' })