Example #1
0
def read_dlcoal_recon(filename, stree,
                      exts={"coal_tree": ".coal.tree",
                            "coal_recon": ".coal.recon",
                            "locus_tree": ".locus.tree",
                            "locus_recon": ".locus.recon",
                            "daughters": ".daughters"
                            },
                      filenames={}):
    """Reads a reconciled gene tree from files"""

    extra = {}

    # trees
    coal_tree = treelib.read_tree(
        filenames.get("coal_tree", filename + exts["coal_tree"]))
    extra["locus_tree"] = treelib.read_tree(
        filenames.get("locus_tree", filename + exts["locus_tree"]))

    # recons
    extra["coal_recon"], junk = phylo.read_recon_events(
        filenames.get("coal_recon", filename + exts["coal_recon"]),
        coal_tree, extra["locus_tree"])
    extra["locus_recon"], extra["locus_events"] = phylo.read_recon_events(
        filenames.get("locus_recon", filename + exts["locus_recon"]),
        extra["locus_tree"], stree)


    extra["daughters"] = set(
        extra["locus_tree"].nodes[x] for x in util.read_strings(
        filenames.get("daughters", filename + exts["daughters"])))

    return coal_tree, extra
Example #2
0
    def read(self, filename, stree,
             exts={"coal_tree": ".coal.tree",
                   "coal_recon": ".coal.recon",
                   "locus_tree": ".locus.tree",
                   "locus_recon": ".locus.recon",
                   "daughters": ".daughters"
                   },
             filenames={},
             check=True):
        """Reads a reconciled gene tree from files"""

        # trees
        coal_tree = treelib.read_tree(
            filenames.get("coal_tree", filename + exts["coal_tree"]))
        self.locus_tree = treelib.read_tree(
            filenames.get("locus_tree", filename + exts["locus_tree"]))

        # recons
        self.coal_recon, junk = phylo.read_recon_events(
            filenames.get("coal_recon", filename + exts["coal_recon"]),
            coal_tree, self.locus_tree)
        self.locus_recon, self.locus_events = phylo.read_recon_events(
            filenames.get("locus_recon", filename + exts["locus_recon"]),
            self.locus_tree, stree)

        self.daughters = set(
            self.locus_tree.nodes[x] for x in util.read_strings(
            filenames.get("daughters", filename + exts["daughters"])))

        assert (not check) or (check and self.is_valid(coal_tree))

        return coal_tree, self.get_dict()
Example #3
0
    def read(
            self,
            filename,
            stree,
            exts={
                "coal_tree": ".coal.tree",
                "coal_recon": ".coal.recon",
                "locus_tree": ".locus.tree",
                "locus_recon": ".locus.recon",
                "daughters": ".daughters"
            },
            filenames={},
            check=True):
        """Reads a reconciled gene tree from files"""

        # trees
        coal_tree = treelib.read_tree(
            filenames.get("coal_tree", filename + exts["coal_tree"]))
        self.locus_tree = treelib.read_tree(
            filenames.get("locus_tree", filename + exts["locus_tree"]))

        # recons
        self.coal_recon, junk = phylo.read_recon_events(
            filenames.get("coal_recon", filename + exts["coal_recon"]),
            coal_tree, self.locus_tree)
        self.locus_recon, self.locus_events = phylo.read_recon_events(
            filenames.get("locus_recon", filename + exts["locus_recon"]),
            self.locus_tree, stree)

        self.daughters = set(
            self.locus_tree.nodes[x] for x in util.read_strings(
                filenames.get("daughters", filename + exts["daughters"])))

        assert (not check) or (check and self.is_valid(coal_tree))

        return coal_tree, self.get_dict()
#gene2species = phylo.read_gene2species(conf.smap)
stree = treelib1.read_tree(conf.stree)
tree = treelib1.read_tree(conf.tree)
if conf.names:
    snames = dict(util.read_delim(conf.names))
else:
    snames = None

if conf.brecon:

    brecon = phylo.read_brecon(conf.brecon, tree, stree)



elif conf.recon:
    recon, events = phylo.read_recon_events(conf.recon, tree, stree)
    brecon = phylo.recon_events2brecon(recon, events)
    
else:
    gene2species = phylo.read_gene2species(conf.smap)
    recon = phylo.reconcile(tree, stree, gene2species)
    events = phylo.label_events(tree, recon)
    brecon = phylo.recon_events2brecon(recon, events)
    
phylo.add_implied_spec_nodes_brecon(tree, brecon)

transsvg.draw_tree(tree, brecon, stree, filename=conf.output,
                   snames=snames)


def hemiplasyConditions(numFamilies = 5351,
                        dataPath = '/home/muddcs15/research/work/hemiplasy/data/real-fungi/',
                        outputFile = '/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt',
                        spectree = '/home/muddcs15/research/work/hemiplasy/data/config/fungi.stree'):
    # create variables and output file
    output = open(outputFile,'w')
    count = 0

    # define a list of all species and lists of each of the species pairs in separate lists
    stree = treelib.read_tree(spectree) # species tree
    species = stree.leaf_names()
    species1 = []
    species2 = []
    for node in stree:
        if len(node.leaves()) == 2:
            species1.append(node.children[0].name)
            species2.append(node.children[1].name)
    
    # loop over each fam id
    for famid in xrange(numFamilies):
        flag = False                                # this families met the criteria for possible hemiplasy
        locus_dict = collections.defaultdict(list)  # key = locus number, val = list of (gn, sp) in the locus
        famFilename = dataPath + '%d/%d-dup.dlcoal.dlcpar.recon' % (famid,famid)

        # if the file is not empty, process it
        if os.stat(famFilename).st_size != 0:
        
            # read the locus tree and the reconcilitation file
            tree_filename = dataPath + '%d/%d.dlcoal.locus.tree' % (famid,famid)
            recon_filename = dataPath + '%d/%d.dlcoal.locus.recon' % (famid,famid)
            tree = treelib.read_tree(tree_filename) # locus tree
            recon, events = phylo.read_recon_events(recon_filename, tree, stree) # reconciliation and events

            # create a dictionary for [locus] = species tree location
            locus_sname = {}
            
            # find location in species tree where each locus was created and then close dlcpar file
            dupFilename = dataPath + '%d/%d.dlcoal.dlcpar.dup.rel.txt' % (famid,famid)
            for line in util.open_stream(dupFilename):
                locus, gns1, gns2, sname = line.rstrip().split('\t')
                locus_sname[locus] = sname

            # track to genes and species in each locus
            for line in util.open_stream(famFilename):

                # assign names to the columns in the file
                gn, sp, locus = line.rstrip().split('\t')
                if locus == "1":
                    continue
                
                # store dict of key = locus, val = list of (gene, species) in locus
                locus_dict[locus].append((gn, sp))

            # for each locus, determine if genes in locus satisfy the properties for a possible hemiplasy
            for locus, lst in locus_dict.iteritems():
                sps = [sp for (gn,sp) in lst]
                gns = [gn for (gn,sp) in lst]
                
                # check if exists in only one species in a pair
                for sp1, sp2 in zip(species1, species2):
                    if (sp1 in sps and sp2 not in sps) or \
                        (sp2 in sps and sp1 not in sps):
                        
                        # check if exists elsewhere (outside pair)
                        for allsp in species:
                            if (allsp != sp1) and (allsp != sp2) and (allsp in sps):
                                flag = True

                # output this family id, the locus, the species with that locus, the genes on that locus,\
                # the species tree branch on which the duplication occurred, and the daughter of the duplication node in the locus tree
                if flag:
                    leaf_sps = []
                    leaf_gns = []
                    for gn, sp in lst:
                       if not gn.isdigit():
                           leaf_sps.append(sp)
                           leaf_gns.append(gn)

                    gnodes = [tree.nodes[name] for name in leaf_gns]
                    lca = treelib.lca(gnodes)
                    
                    output.write('\t'.join([str(famid), locus, ','.join(leaf_sps), ','.join(leaf_gns), locus_sname[locus], lca.name]))
                    output.write('\n')
                    break
            
            # if it is a true case, add to count
            if flag:
                count += 1
            
    # print total count and close output file        
    print "Total number of true cases =", count
    output.close()
#=============================================================================
# parse options

conf, args = o.parse_args()

#gene2species = phylo.read_gene2species(conf.smap)
stree = treelib1.read_tree(conf.stree)
tree = treelib1.read_tree(conf.tree)
if conf.names:
    snames = dict(util.read_delim(conf.names))
else:
    snames = None

if conf.brecon:

    brecon = phylo.read_brecon(conf.brecon, tree, stree)

elif conf.recon:
    recon, events = phylo.read_recon_events(conf.recon, tree, stree)
    brecon = phylo.recon_events2brecon(recon, events)

else:
    gene2species = phylo.read_gene2species(conf.smap)
    recon = phylo.reconcile(tree, stree, gene2species)
    events = phylo.label_events(tree, recon)
    brecon = phylo.recon_events2brecon(recon, events)

phylo.add_implied_spec_nodes_brecon(tree, brecon)

transsvg.draw_tree(tree, brecon, stree, filename=conf.output, snames=snames)