Example #1
0
def proctree(r, i=1):
    tg.map_stree(g, r)
    taxids = set()
    for lf in r.leaves():
        taxids.update(lf.taxid_rootpath)
    taxg = tg.taxid_new_subgraph(g, taxids)
    verts = taxg.new_vertex_property('bool')
    edges = taxg.new_edge_property('bool')

    # add stree's nodes and branches into taxonomy graph
    tg.merge_stree(taxg, r, i, verts, edges)

    # next, add taxonomy edges to taxg connecting 'incertae sedis'
    # leaves in stree to their containing taxa
    for lf in r.leaves():
        if lf.taxid and lf.taxid in taxg.taxid_vertex and lf.incertae_sedis:
            taxv = taxg.taxid_vertex[lf.taxid]
            ev = taxg.edge(taxv, lf.v, True)
            if ev:
                assert len(ev)==1
                e = ev[0]
            else:
                e = taxg.add_edge(taxv, lf.v)
            taxg.edge_in_taxonomy[e] = 1
    
    # make a view of taxg that keeps only the vertices and edges traced by
    # the source tree
    gv = tg.graph_view(taxg, vfilt=verts, efilt=edges)
    gv.vertex_strees = taxg.vertex_strees
    gv.edge_strees = taxg.edge_strees

    # the following code sets up the visualization
    ecolor = taxg.new_edge_property('string')
    for e in taxg.edges():
        est = taxg.edge_strees[e]
        eit = taxg.edge_in_taxonomy[e]
        if len(est) and not eit: ecolor[e] = 'blue'
        elif len(est) and eit: ecolor[e] = 'green'
        else: ecolor[e] = 'yellow'

    ewidth = taxg.new_edge_property('int')
    for e in taxg.edges():
        est = taxg.edge_strees[e]
        if len(est): ewidth[e] = 3
        else: ewidth[e] = 1

    vcolor = taxg.new_vertex_property('string')
    for v in taxg.vertices():
        if not taxg.vertex_in_taxonomy[v]: vcolor[v] = 'blue'
        else: vcolor[v] = 'green'

    vsize = taxg.new_vertex_property('int')
    for v in taxg.vertices():
        if taxg.vertex_in_taxonomy[v] or v.out_degree()==0:
            vsize[v] = 4
        else: vsize[v] = 2

    pos, pin = tg.layout(taxg, gv, gv.root, sfdp=True, deg0=195.0,
                         degspan=150.0, radius=400)

    for v in gv.vertices(): pin[v] = 1

    for e in taxg.edges():
        src = e.source()
        tgt = e.target()
        if not verts[src]:
            verts[src] = 1
            pos[src] = [0.0, 0.0]
            vcolor[src] = 'red'
        if not verts[tgt]:
            verts[tgt] = 1
            pos[tgt] = [0.0, 0.0]
            vcolor[tgt] = 'red'
        if not edges[e]:
            edges[e] = 1
            ecolor[e] = 'red'
            ewidth[e] = 1.0
            gv.wt[e] = 1.0

    pos = gt.sfdp_layout(gv, pos=pos, pin=pin, eweight=gv.wt, multilevel=False)

    nodes = []
    links = []
    idx = {}

    xmin = min([ pos[x][0] for x in gv.vertices() ])
    ymin = min([ pos[x][1] for x in gv.vertices() ])
    for x in gv.vertices(): pos[x] = [pos[x][0]-xmin, pos[x][1]-ymin]

    for i,v in enumerate(gv.vertices()):
        idx[int(v)] = i
        taxid = gv.vertex_taxid[v]
        try:
            name = gv.vertex_name[v]
        except:
            name = gv.taxid_name(taxid) if taxid else ''#'node%s' % int(v)
        isleaf = v.out_degree()==0
        d = dict(label=name, isleaf=isleaf, strees=list(gv.vertex_strees[v]),
                 altlabel=name)
        if taxid: d['taxid'] = taxid
        ## if dist: d['dist'] = dist[v]
        if pos and pos[v]:
            x, y = pos[v]
            d['x'] = x; d['y'] = y
            d['fixed'] = True
        d['color'] = vcolor[v]
        d['size'] = vsize[v]
        nodes.append(d)
    for e in gv.edges():
        source = idx[int(e.source())]
        target = idx[int(e.target())]
        strees = gv.edge_strees[e]
        d = dict(source=source, target=target, strees = list(strees),
                 taxedge=bool(gv.edge_in_taxonomy[e]))
        d['color'] = ecolor[e]
        d['width'] = ewidth[e]
        links.append(d)

    return dict(nodes=nodes, links=links)
def proc(g, line, merged, probfile, outfile):
    pbtree, s = line.split()
    print 'processing', pbtree
    r = ivy.newick.parse(s) # the root node of the tree of interest
    lvs = r.leaves()
    rps = [] # rootpaths of leaf nodes, where each rootpath is a list
             # of taxids from leaf to root
    leaf_tid_counts = tg.Counter()
    try:
        for lf in lvs:
            # assign/compute attributes of leaves
            w = lf.label.split('_')
            lf.gi = int(w[-2][2:])
            lf.taxid = int(w[-1][2:])
            leaf_tid_counts[lf.taxid] += 1
            if lf.taxid not in g.taxid_vertex and lf.taxid in merged:
                lf.taxid = merged[lf.taxid]
            ## lf.taxv = g.taxid_vertex[lf.taxid]
            taxv = g.taxid_vertex[lf.taxid]
            lf.taxid_next, lf.taxid_back = g.hindex[taxv]
            lf.taxid_rootpath = tg.taxid_rootpath(g, lf.taxid)
            for i, x in enumerate(lf.taxid_rootpath):
                if x not in g.taxid_vertex and x in merged:
                    lf.taxid_rootpath[i] = merged[x]
            rps.append(lf.taxid_rootpath)
    except:
        print '!!! problem assigning leaf taxids'
        probfile.write('%s\n' % pbtree)
        #return []

    r.mrca = tg.rootpath_mrca(rps) # taxid of mrca of all tree's leaves

    taxids = set()
    for rp in rps:
        # trim rootpaths: make them terminate with mrca
        while 1:
            if rp[-1] == r.mrca: break
            else: rp.pop()
        assert rp
        taxids.update(rp)

    # create a taxonomy (sub)graph of only those taxids represented in r
    ## taxidsubg = tg.taxid_subgraph(g, taxids)
    taxidsubg = tg.taxid_new_subgraph(g, taxids)
    taxidsubg.vfilt = taxidsubg.new_vertex_property('bool')

    ## r.taxv = taxidsubg.taxid_vertex[r.mrca]

    # no need to check for convexity for singleton tip taxa
    for x in [ taxidsubg.taxid_vertex[lf.taxid] for lf in lvs
               if leaf_tid_counts[lf.taxid]==1 ]:
        taxidsubg.vfilt[x] = 0
    
    # an undirected graph having the same topology as r, used for
    # checking convexity of taxa
    treegraph = tg.gt.Graph(directed=False)
    treegraph.mrca = r.mrca
    print 'mrca:', g.taxid_name(r.mrca)
    treegraph.vertex_taxid = tg.get_or_create_vp(treegraph, 'taxid', 'int')
    ## treegraph.vertex_taxv = tg.get_or_create_vp(treegraph, 'taxv', 'int')
    v2lf = {}
    N = len(r)
    verts = treegraph.add_vertex(N)
    for n in r: # for each node in r
        # store its treegraph vertex
        n.v = verts.next()
        if not n.children:
            treegraph.vertex_taxid[n.v] = n.taxid
            ## treegraph.vertex_taxv[n.v] = int(n.taxv)
            v2lf[n.v] = n
        if n.parent:
            treegraph.add_edge(n.parent.v, n.v)

    treegraph_leaves = [ x for x in treegraph.vertices() if x.out_degree()==1 ]
    convex = {} # for storing the convex subgraphs
    def traverse(taxv):
        """
        `taxv` is a vertex in the taxonomy graph. This function checks whether
        it is convex in `treegraph`; if yes, stores the info in
        `convex`; if no, it recursively checks descendants of `taxv` for
        convexity
        """
        tid = taxidsubg.vertex_taxid[taxv]
        print 'checking', tid, taxidsubg.vertex_name[taxv]
        p, c = color_vertices(g, treegraph, tid)
        if len(c)==1 and len(c[1])==1: # taxv/tid is convex
            print '...success'
            rv = c[1][0] # rv is the root of the convex subtree
            treegraph.set_vertex_filter(p)
            ## lvs = [ x for x in treegraph.vertices() if x.out_degree()==1 ]
            lvs = [ x for x in treegraph_leaves if p[x] ]
            if len(lvs) > 2:
                # we are only interested in convex subgraphs having
                # more than 2 leaves
                rootpaths = []
                for lf in lvs:
                    ti = treegraph.vertex_taxid[lf]
                    tv = taxidsubg.taxid_vertex[ti]
                    if not taxidsubg.incertae_sedis[tv]:
                        rootpaths.append(tg.taxid_rootpath(taxidsubg, ti))
                if rootpaths:
                    mrca = tg.rootpath_mrca(rootpaths)
                    print 'traverse: mrca', mrca
                    ancv = [taxidsubg.taxid_vertex[mrca]]
                    while ancv[-1] != taxv:
                        # STRANGE EDGE CASES HERE
                        try: ancv.append(ancv[-1].in_neighbours().next())
                        except StopIteration: pass

                    k = '.'.join([ str(taxidsubg.vertex_taxid[x])
                                   for x in ancv ])
                    convex[k] = (rv, p)
            treegraph.set_vertex_filter(None)
        else:
            treegraph.set_vertex_filter(None)
            for n in taxv.out_neighbours():
                traverse(n)

    for v in taxidsubg.root.out_neighbours(): traverse(v)
    ## print 'done'

    def make_newick(root, seen):
        children = [ x for x in root.out_neighbours() if x not in seen ]
        if children:
            seen.update(children)
            s = '(%s)' % ','.join(
                [ make_newick(c, seen) for c in children ]
                )
        else:
            s = v2lf[root].label.replace(',','').replace('(','').replace(')','')
        return s
        
    newicks = []

    for k, (root, p) in convex.items():
        treegraph.set_vertex_filter(p)
        s = make_newick(root, set([root]))
        treegraph.set_vertex_filter(None)
        names = ','.join([ g.taxid_name(int(x)) for x in k.split('.') ])
        outfile.write('%s\t%s\t%s\t%s;\n' % (pbtree, k, names, s))
        print 'wrote subtree:', names

    for n in r.postiter():
        n.parent = None; del n.children
Example #3
0
ivy.tree.index(r)
for n in r:
    if n.isleaf:
        v = n.label.split('_')
        n.snode_id = int(v[0])
        n.taxid = int(v[1]) if (len(v) > 1 and v[1]
                                and v[1] != 'None') else None
    else:
        n.snode_id = int(n.label)
r.stree = stree

tg.map_stree(g, r)
taxids = set()
for lf in r.leaves():
    taxids.update(lf.taxid_rootpath)
taxg = tg.taxid_new_subgraph(g, taxids)
# taxg is a new graph containing only the taxids in stree

# these properties will store the vertices and edges that are traced
# by r
verts = taxg.new_vertex_property('bool')
edges = taxg.new_edge_property('bool')

# add stree's nodes and branches into taxonomy graph
tg.merge_stree(taxg, r, stree, verts, edges)
# verts and edges now filter the paths traced by r in taxg

# next, add taxonomy edges to taxg connecting 'incertae sedis'
# leaves in stree to their containing taxa
for lf in r.leaves():
    if lf.taxid and lf.taxid in taxg.taxid_vertex and lf.incertae_sedis:
Example #4
0
ivy.tree.index(r)
for n in r:
    if n.isleaf:
        v = n.label.split('_')
        n.snode_id = int(v[0])
        n.taxid = int(v[1]) if (len(v)>1 and
                                v[1] and v[1] != 'None') else None
    else:
        n.snode_id = int(n.label)
r.stree = stree

tg.map_stree(g, r)
taxids = set()
for lf in r.leaves():
    taxids.update(lf.taxid_rootpath)
taxg = tg.taxid_new_subgraph(g, taxids)
# taxg is a new graph containing only the taxids in stree

# these properties will store the vertices and edges that are traced
# by r
verts = taxg.new_vertex_property('bool')
edges = taxg.new_edge_property('bool')

# add stree's nodes and branches into taxonomy graph
tg.merge_stree(taxg, r, stree, verts, edges)
# verts and edges now filter the paths traced by r in taxg

# next, add taxonomy edges to taxg connecting 'incertae sedis'
# leaves in stree to their containing taxa
for lf in r.leaves():
    if lf.taxid and lf.taxid in taxg.taxid_vertex and lf.incertae_sedis:
def build_json(choice):


    
    if choice == "1":
        ## Loads a graph with the OTT taxonomy
        taxonomy="ott"
        print "Loading OTT taxonomy into graph..."
        g = tg.load_taxonomy_graph('taxonomy/ott2.2/ott2.2.xml.gz')
        print "OTT taxonomy Graph loaded successfully."
        print "Loading ott-treecache file..."
        datafile = open('trees/ott-treecache.txt', 'r') #read in the treecache file
        print "Loaded."

    elif choice == "2":
        taxonomy="ncbi"
        print "Loading NCBI taxonomy into graph..."
        g = tg.load_taxonomy_graph('taxonomy/ncbi/ncbi.xml.gz')
        print "NCBI taxonomy Graph loaded successfully."
        print "Loading ncbi-treecache file..."
        datafile = open('trees/ncbi-treecache.txt', 'r') #read in the treecache file
        print "Loaded."
    
    data = []
    errors = []
    blacklist = []

    ## Loop all of the entries in the treecache.txt file and assign them to data.
    for row in datafile:
        data.append(row)
        #print row


    ## Creates a Tree Blacklist that will ignore problematic trees that cause crashes based on strange formatting issues until then can be resolved.   
    print "Loading tree blacklist..."
    tree_blacklist = open('trees/tree_blacklist.txt', 'r') #read in the tree blacklist file
    print "Loaded."

    ## Loop all of the entries in the tree_blacklist.txt file and assign them to blacklist.
    for tree in tree_blacklist:
        blacklist.append(tree.strip())

    rowcount = 0

    for row in data: #iterate through each unique stree id in the file allowing the code below to generate the graph, write the JSON and save the file

        active_tree = row.split(":") #split the row from treecache into tree id and newick string tree
        
        if active_tree[0] in blacklist: ## if a tree is in the blacklist, ignore it.
            print ("Tree %s is being ignored as it is black listed." % active_tree[0])
            
        else:
            stree = int(active_tree[0]) # convert tree id string into int
            r = ivy.tree.read(active_tree[1].replace("?", "")) #read the tree, also replacing an extraneous ? characters
            leafcount = 0
            r.ladderize()
            ivy.tree.index(r)
            for n in r:
                if n.isleaf:
                    leafcount = leafcount + 1
                    v = n.label.split('_')
                    n.snode_id = int(v[0])
                    n.taxid = int(v[1]) if (len(v)>1 and
                                            v[1] and v[1] != 'None') else None
                else:
                    n.snode_id = int(n.label)
            if leafcount <= 5000: #check to prune trees that have more than 5000 leaves. They will not display correctly in graph form.
                try: #used to catch all errors from incorrectly formatted trees (ie: ? characters, and other issues)

                    r.stree = stree
                    ### ADD CODE HERE TO SKIP TREES WITH MORE THAN 5000 leaves
                    tg.map_stree(g, r)
                    taxids = set()
                    for lf in r.leaves():
                        taxids.update(lf.taxid_rootpath)
                    taxg = tg.taxid_new_subgraph(g, taxids)
                    # taxg is a new graph containing only the taxids in stree

                    # these properties will store the vertices and edges that are traced
                    # by r
                    verts = taxg.new_vertex_property('bool')
                    edges = taxg.new_edge_property('bool')

                    # add stree's nodes and branches into taxonomy graph
                    tg.merge_stree(taxg, r, stree, verts, edges)
                    # verts and edges now filter the paths traced by r in taxg

                    # next, add taxonomy edges to taxg connecting 'incertae sedis'
                    # leaves in stree to their containing taxa
                    for lf in r.leaves():
                        if lf.taxid and lf.incertae_sedis:
                            taxv = taxg.taxid_vertex[lf.taxid]
                            ev = taxg.edge(taxv, lf.v, True)
                            if ev:
                                assert len(ev)==1
                                e = ev[0]
                            else:
                                e = taxg.add_edge(taxv, lf.v)
                            taxg.edge_in_taxonomy[e] = 1

                    # make a view of taxg that keeps only the vertices and edges traced by
                    # the source tree
                    gv = tg.graph_view(taxg, vfilt=verts, efilt=edges)
                    gv.vertex_strees = taxg.vertex_strees
                    gv.edge_strees = taxg.edge_strees
                    # the following code sets up the visualization
                    ecolor = taxg.new_edge_property('string')
                    for e in taxg.edges():
                        est = taxg.edge_strees[e]
                        eit = taxg.edge_in_taxonomy[e]
                        if len(est) and not eit: ecolor[e] = 'blue'
                        elif len(est) and eit: ecolor[e] = 'green'
                        else: ecolor[e] = 'yellow'

                    ewidth = taxg.new_edge_property('int')
                    for e in taxg.edges():
                        est = taxg.edge_strees[e]
                        if len(est): ewidth[e] = 3
                        else: ewidth[e] = 1

                    vcolor = taxg.new_vertex_property('string')
                    for v in taxg.vertices():
                        if not taxg.vertex_in_taxonomy[v]: vcolor[v] = 'blue'
                        else: vcolor[v] = 'green'

                    vsize = taxg.new_vertex_property('int')
                    for v in taxg.vertices():
                        if taxg.vertex_in_taxonomy[v] or v.out_degree()==0:
                            vsize[v] = 8
                        else: vsize[v] = 2

                    pos, pin = tg.layout(taxg, gv, gv.root, sfdp=True, deg0=195.0,
                                         degspan=150.0, radius=400) 

                    for v in gv.vertices(): pin[v] = 1

                    for e in taxg.edges():
                        src = e.source()
                        tgt = e.target()
                        if not verts[src]:
                            verts[src] = 1
                            pos[src] = [0.0, 0.0]
                            vcolor[src] = 'red'
                        if not verts[tgt]:
                            verts[tgt] = 1
                            pos[tgt] = [0.0, 0.0]
                            vcolor[tgt] = 'red'
                        if not edges[e]:
                            edges[e] = 1
                            ecolor[e] = 'red'
                            ewidth[e] = 1.0
                            gv.wt[e] = 1.0

                    pos = gt.sfdp_layout(gv, pos=pos, pin=pin, eweight=gv.wt, multilevel=False)
                    ### Use function in TreeGraph.py to parse Graph(gv) into JSON
                    print "Generating JSON..."
                    result = tg.graph_json(gv, pos=pos, ecolor=ecolor, ewidth=ewidth, vcolor=vcolor, vsize=vsize)
                    result = result[1:] #strip the original { from the json so we can insert the time stamp
                    date = time.strftime("%Y%m%d%I%M%S") # grab the system date for the filename and convert it to a string
                    treeid = str(stree) # convert stree int into a string
                    timestamp = "{\"timestamp\": \"%s\", " %date
                    final_result = timestamp+result # add date to first line of json file for later parsing
                    path = str(os.path.dirname(os.path.realpath(__file__)))
                    path = path[:-8]
                    path = "%s//%s/" % (path, taxonomy) # build the full path to write the file too
                    filename = "%stree_%s.JSON" % (path, treeid)  # build the full file_name for writing
                    if not os.path.exists(path): ## if directory doesn't exist, create it.
                        os.makedirs(path)
                    
                    f = open(filename, 'w')
                    f.write(final_result)
                    f.close
                    print "Done."
                    rowcount = rowcount + 1

                except: # catch *all* exceptions
                    e = sys.exc_info()[0]
                    e = str(e)
                    treeid = str(stree)
                    print ("Error: %s</p>" % e)
                    errorstring = "Error: " + e + " on Tree: " + treeid # rough hack to store trees with errors and the general error
                    errors.append(errorstring) # store all of the error strings
                    rowcount = rowcount + 1
                    continue ## continue converting the rest of the trees into JSON even if a specific tree has errors
            else:
                print "Tree has more than 5000 leaves. No graph will be generated."

    print "JSON Generation Complete."    
    ## write the error strings to a log file for review later

    if errors:
        with open("error_log.txt", "w+") as error_log:
            pickle.dump(errors, error_log)
def build_json(choice):

    if choice == "1":
        ## Loads a graph with the OTT taxonomy
        taxonomy = "ott"
        print "Loading OTT taxonomy into graph..."
        g = tg.load_taxonomy_graph('taxonomy/ott2.2/ott2.2.xml.gz')
        print "OTT taxonomy Graph loaded successfully."
        print "Loading ott-treecache file..."
        datafile = open('trees/ott-treecache.txt',
                        'r')  #read in the treecache file
        print "Loaded."

    elif choice == "2":
        taxonomy = "ncbi"
        print "Loading NCBI taxonomy into graph..."
        g = tg.load_taxonomy_graph('taxonomy/ncbi/ncbi.xml.gz')
        print "NCBI taxonomy Graph loaded successfully."
        print "Loading ncbi-treecache file..."
        datafile = open('trees/ncbi-treecache.txt',
                        'r')  #read in the treecache file
        print "Loaded."

    data = []
    errors = []
    blacklist = []

    ## Loop all of the entries in the treecache.txt file and assign them to data.
    for row in datafile:
        data.append(row)
        #print row

    ## Creates a Tree Blacklist that will ignore problematic trees that cause crashes based on strange formatting issues until then can be resolved.
    print "Loading tree blacklist..."
    tree_blacklist = open('trees/tree_blacklist.txt',
                          'r')  #read in the tree blacklist file
    print "Loaded."

    ## Loop all of the entries in the tree_blacklist.txt file and assign them to blacklist.
    for tree in tree_blacklist:
        blacklist.append(tree.strip())

    rowcount = 0

    for row in data:  #iterate through each unique stree id in the file allowing the code below to generate the graph, write the JSON and save the file

        active_tree = row.split(
            ":"
        )  #split the row from treecache into tree id and newick string tree

        if active_tree[
                0] in blacklist:  ## if a tree is in the blacklist, ignore it.
            print("Tree %s is being ignored as it is black listed." %
                  active_tree[0])

        else:
            stree = int(active_tree[0])  # convert tree id string into int
            r = ivy.tree.read(active_tree[1].replace(
                "?",
                ""))  #read the tree, also replacing an extraneous ? characters
            leafcount = 0
            r.ladderize()
            ivy.tree.index(r)
            for n in r:
                if n.isleaf:
                    leafcount = leafcount + 1
                    v = n.label.split('_')
                    n.snode_id = int(v[0])
                    n.taxid = int(v[1]) if (len(v) > 1 and v[1]
                                            and v[1] != 'None') else None
                else:
                    n.snode_id = int(n.label)
            if leafcount <= 5000:  #check to prune trees that have more than 5000 leaves. They will not display correctly in graph form.
                try:  #used to catch all errors from incorrectly formatted trees (ie: ? characters, and other issues)

                    r.stree = stree
                    ### ADD CODE HERE TO SKIP TREES WITH MORE THAN 5000 leaves
                    tg.map_stree(g, r)
                    taxids = set()
                    for lf in r.leaves():
                        taxids.update(lf.taxid_rootpath)
                    taxg = tg.taxid_new_subgraph(g, taxids)
                    # taxg is a new graph containing only the taxids in stree

                    # these properties will store the vertices and edges that are traced
                    # by r
                    verts = taxg.new_vertex_property('bool')
                    edges = taxg.new_edge_property('bool')

                    # add stree's nodes and branches into taxonomy graph
                    tg.merge_stree(taxg, r, stree, verts, edges)
                    # verts and edges now filter the paths traced by r in taxg

                    # next, add taxonomy edges to taxg connecting 'incertae sedis'
                    # leaves in stree to their containing taxa
                    for lf in r.leaves():
                        if lf.taxid and lf.incertae_sedis:
                            taxv = taxg.taxid_vertex[lf.taxid]
                            ev = taxg.edge(taxv, lf.v, True)
                            if ev:
                                assert len(ev) == 1
                                e = ev[0]
                            else:
                                e = taxg.add_edge(taxv, lf.v)
                            taxg.edge_in_taxonomy[e] = 1

                    # make a view of taxg that keeps only the vertices and edges traced by
                    # the source tree
                    gv = tg.graph_view(taxg, vfilt=verts, efilt=edges)
                    gv.vertex_strees = taxg.vertex_strees
                    gv.edge_strees = taxg.edge_strees
                    # the following code sets up the visualization
                    ecolor = taxg.new_edge_property('string')
                    for e in taxg.edges():
                        est = taxg.edge_strees[e]
                        eit = taxg.edge_in_taxonomy[e]
                        if len(est) and not eit: ecolor[e] = 'blue'
                        elif len(est) and eit: ecolor[e] = 'green'
                        else: ecolor[e] = 'yellow'

                    ewidth = taxg.new_edge_property('int')
                    for e in taxg.edges():
                        est = taxg.edge_strees[e]
                        if len(est): ewidth[e] = 3
                        else: ewidth[e] = 1

                    vcolor = taxg.new_vertex_property('string')
                    for v in taxg.vertices():
                        if not taxg.vertex_in_taxonomy[v]: vcolor[v] = 'blue'
                        else: vcolor[v] = 'green'

                    vsize = taxg.new_vertex_property('int')
                    for v in taxg.vertices():
                        if taxg.vertex_in_taxonomy[v] or v.out_degree() == 0:
                            vsize[v] = 8
                        else:
                            vsize[v] = 2

                    pos, pin = tg.layout(taxg,
                                         gv,
                                         gv.root,
                                         sfdp=True,
                                         deg0=195.0,
                                         degspan=150.0,
                                         radius=400)

                    for v in gv.vertices():
                        pin[v] = 1

                    for e in taxg.edges():
                        src = e.source()
                        tgt = e.target()
                        if not verts[src]:
                            verts[src] = 1
                            pos[src] = [0.0, 0.0]
                            vcolor[src] = 'red'
                        if not verts[tgt]:
                            verts[tgt] = 1
                            pos[tgt] = [0.0, 0.0]
                            vcolor[tgt] = 'red'
                        if not edges[e]:
                            edges[e] = 1
                            ecolor[e] = 'red'
                            ewidth[e] = 1.0
                            gv.wt[e] = 1.0

                    pos = gt.sfdp_layout(gv,
                                         pos=pos,
                                         pin=pin,
                                         eweight=gv.wt,
                                         multilevel=False)
                    ### Use function in TreeGraph.py to parse Graph(gv) into JSON
                    print "Generating JSON..."
                    result = tg.graph_json(gv,
                                           pos=pos,
                                           ecolor=ecolor,
                                           ewidth=ewidth,
                                           vcolor=vcolor,
                                           vsize=vsize)
                    result = result[
                        1:]  #strip the original { from the json so we can insert the time stamp
                    date = time.strftime(
                        "%Y%m%d%I%M%S"
                    )  # grab the system date for the filename and convert it to a string
                    treeid = str(stree)  # convert stree int into a string
                    timestamp = "{\"timestamp\": \"%s\", " % date
                    final_result = timestamp + result  # add date to first line of json file for later parsing
                    path = str(os.path.dirname(os.path.realpath(__file__)))
                    path = path[:-8]
                    path = "%s//%s/" % (
                        path, taxonomy
                    )  # build the full path to write the file too
                    filename = "%stree_%s.JSON" % (
                        path, treeid)  # build the full file_name for writing
                    if not os.path.exists(
                            path):  ## if directory doesn't exist, create it.
                        os.makedirs(path)

                    f = open(filename, 'w')
                    f.write(final_result)
                    f.close
                    print "Done."
                    rowcount = rowcount + 1

                except:  # catch *all* exceptions
                    e = sys.exc_info()[0]
                    e = str(e)
                    treeid = str(stree)
                    print("Error: %s</p>" % e)
                    errorstring = "Error: " + e + " on Tree: " + treeid  # rough hack to store trees with errors and the general error
                    errors.append(
                        errorstring)  # store all of the error strings
                    rowcount = rowcount + 1
                    continue  ## continue converting the rest of the trees into JSON even if a specific tree has errors
            else:
                print "Tree has more than 5000 leaves. No graph will be generated."

    print "JSON Generation Complete."
    ## write the error strings to a log file for review later

    if errors:
        with open("error_log.txt", "w+") as error_log:
            pickle.dump(errors, error_log)