Exemple #1
0
def phylomedump_tree_iterator( tarfn,verbose=0 ):
    """PhylomeDB all_trees.tar.gz dump treeobj generator."""
    #open tarfile
    if tarfn.endswith(".gz"):
        tar = tarfile.open(tarfn, "r:gz")
    else:
        tar = tarfile.open(tarfn, "r")

    i = k = 0
    #process entries
    for m in tar:
        #if i>100: break
        if not m.isfile():
            continue
        #load tree
        if   m.name.endswith(".nw"):
            i += 1
            #get nw
            nw = tar.extractfile(m).readline()
            t  = PhyloTree(nw)
            ##add seedid and method info
            #Phy000CWA9_YEAST.JTT.nw --> Phy000CWA9_YEAST JTT
            seedid, method = os.path.basename(m.name).split(".")[:2]
            t.seedid = seedid
            t.method = method
        #or add lk, seedid, method and lk to treeobj
        elif m.name.endswith(".lk"):
            seedid, method, lk = tar.extractfile(m).readline().split('\t')[:3]
            t.lk = float(lk)
            if not t.lk:
                sys.stderr.write( " Err: Zero likelihood (%s) for: %s\n" % (t.lk, ", ".join((t.seedid, t.method))))
                continue
            if seedid!=t.seedid or t.method != method:
                sys.stderr.write( " Err: Seedid and/or method doesn't match: %s\n" % ", ".join((seedid, t.seedid, method, t.method)))
                continue
            k += 1
            if verbose and not i%100:
                sys.stderr.write( "  %6i\r" % i )

            yield t
    if verbose:
        sys.stderr.write( " %s out of %s trees succesfully parsed [memory: %s KB]\n" % (k, i, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))