def construct_db_of_sister(taxon, DB, outprefix):
    outfile = outprefix + ".tempfa"
    outfiletbl = outprefix + ".temptable"
    mfid(taxon, DB, outfile, outfiletbl, remove_genomes=True)
    of = open(outfiletbl, "r")
    seq_taxon_dict = {}
    taxon_seqs_dict = {}
    for i in of:
        spls = i.strip().split("\t")
        try:
            taxon_seqs_dict[spls[1]]
        except:
            taxon_seqs_dict[spls[1]] = set()
        seq_taxon_dict[spls[3]] = spls[1]
        taxon_seqs_dict[spls[1]].add(spls[3])
    of.close()
    seqs_dict = seq.read_fasta_file_return_dict(outfile)
    return outfile, outfiletbl, seq_taxon_dict, taxon_seqs_dict, seqs_dict
Beispiel #2
0
def add_updated_seqs_to_dir(dirl, tid, DB, outfilehead, internal=False):
    print(dirl)
    oldids = set()
    maintablename = None
    for i in os.listdir(dirl):
        # get the main table name so that we can update things
        if i[-len(".table"):] == ".table":
            maintablename = i
        # read all the tables
        if ".table" in i:
            print("reading:", i, file=sys.stderr)
            fl = open(dirl + i, "r")
            for j in fl:
                spls = j.strip().split("\t")
                oldids.add(spls[2])
            fl.close()

    newids = set()
    newid_info = {}  #key is id, value is full bit
    newid_seq = {}  #key is id, value is seq
    seqs, tbls = None, None
    if internal == False:
        seqs, tbls = mfid(tid, DB, None, None)
    else:
        seqs, tbls = mfid_internal(tid, DB, None, None)
    for i, j in zip(seqs, tbls):
        spls = j.split("\t")
        newids.add(spls[2])
        newid_info[spls[2]] = j
        newid_seq[spls[2]] = i

    diff = newids.difference(oldids)
    if len(diff) > 0:
        newtable = dirl + maintablename + "." + outfilehead
        newseqfn = dirl + maintablename.replace(".table",
                                                ".fas") + "." + outfilehead
        print("writing:", newtable, file=sys.stderr)
        print("writing:", newseqfn, file=sys.stderr)
        newtablef = open(newtable, "w")
        newseqf = open(newseqfn, "w")
        for i in diff:
            newtablef.write(newid_info[i] + "\n")
            newseqf.write(newid_seq[i] + "\n")
        newtablef.close()
        newseqf.close()
    else:
        print("no update", file=sys.stderr)
Beispiel #3
0
    didntmake = set()
    for i in tree.iternodes():
        if "unclassified" in i.label:
            didntmake.add(i)
            continue
        if "environmental" in i.label:
            didntmake.add(i)
            continue
        if i.parent in didntmake:
            didntmake.add(i)
            continue
        orig = i.label
        if i != tree:
            i.label = i.parent.label + "/" + i.label
        tid = orig.split("_")[-1]
        dirr = i.label
        if len(i.children) == 0:
            mfid(tid,
                 DB,
                 dirl + dirr + "/" + orig + ".fas",
                 dirl + dirr + "/" + orig + ".table",
                 True,
                 limitlist=taxalist)
        else:
            mfid_in(tid,
                    DB,
                    dirl + dirr + "/" + orig + ".fas",
                    dirl + dirr + "/" + orig + ".table",
                    True,
                    limitlist=taxalist)