if not n.is_leaf(): n.name = n.S else: sp_dict[n.name] = n.S logger.debug("sp_dict") logger.debug(sp_dict) # Let's reconcile our genetree with the species tree recon_tree, events = genetree.reconcile(sptree) # a new "reconcilied tree" is returned. As well as the list of # inferred events. ntrees, ndups, sptrees = genetree.get_speciation_trees() logger.debug( "Found %d species trees and %d duplication nodes", ntrees, ndups) HomologySummary=["DUPLICATIONS : " + str(ndups) ] logger.debug( "Orthology and Paralogy relationships:") for ev in events: if ev.etype == "S": logger.debug("".join(['ORTHOLOGY RELATIONSHIP:', ','.join(ev.inparalogs), " <===> ", ','.join(ev.orthologs)])) HomologySummary.append("".join(['ORTHOLOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===> ", ','.join(ev.orthologs)])) elif ev.etype == "D": logger.debug("".join(['PARALOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===>" , ','.join(ev.outparalogs)])) HomologySummary.append("".join(['PARALOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===> ", ','.join(ev.outparalogs)])) HomologyFile = OutPrefixName + ".orthologs.txt" with open(HomologyFile,"w") as File:
for ev in evev: if ev.etype == "S": print(ev.orthologs) # find evolutionary events evev = phy.get_descendant_evol_events(sos_thr=0.9) # all events for ev in evev: print(ev.etype, ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)) # all events involving either Hsap or Drer fseqs = lambda slist: [ s for s in slist if s.startswith("Drer") or s.startswith("Hsap") ] for ev in evev: if ev.etype == "D": print('Paralog: ', ','.join(fseqs(ev.in_seqs)), "<====>", ','.join(fseqs(ev.out_seqs))) for ev in evev: if ev.etype == "S": print('Ortholog:', ','.join(fseqs(ev.in_seqs)), "<====>", ','.join(fseqs(ev.out_seqs))) # obtain duplication events ntrees, ndups, sptrees = phy.get_speciation_trees() print("Found %d species trees and %d duplication nodes" % (ntrees, ndups)) for spt in sptrees: print(spt)
def main(): usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--genetree', help='GeneTree in nhx format') parser.add_option('--speciestree', help='Species Tree in nhx format') parser.add_option('--ingroup', help='Species Tree in nhx format') parser.add_option('--outgroup', help='Species Tree in nhx format') parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') parser.add_option('--split', type='choice', choices=['dups', 'treeko', 'species'], dest="split", default='dups', help='Choose GeneTree splitting algorithms') parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') parser.add_option( '-d', '--dir', type='string', default="", help= "Absolute or relative path to output directory. If directory does not exist it will be created" ) options, args = parser.parse_args() if options.dir and not os.path.exists(options.dir): os.makedirs(options.dir) if options.genetree is None: parser.error( "--genetree option must be specified, GeneTree in nhx format") if os.stat(options.genetree).st_size == 0: sys.exit() with open(options.genetree, 'r') as f: contents = f.read() # Remove empty NHX features that can be produced by TreeBest but break ete3 contents = contents.replace('[&&NHX]', '') # reads single gene tree genetree = PhyloTree(contents) # sets species naming function if options.gene_node == 0: genetree.set_species_naming_function(parse_sp_name) # reconcile species tree with gene tree to help find out gene gain/lose if options.gainlose: if options.speciestree is None: parser.error( "--speciestree option must be specified, species tree in nhx format" ) # reads species tree speciestree = PhyloTree(options.speciestree, format=options.species_format) # Removes '*' from Species names comes from Species tree configrured for TreeBest for leaf in speciestree: leaf.name = leaf.name.strip('*') genetree, events = genetree.reconcile(speciestree) if options.split == "dups": # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. for cluster_id, node in enumerate(genetree.split_by_dups(), start=1): outfile = '{}_genetree.nhx'.format(cluster_id) if options.dir: outfile = os.path.join(options.dir, outfile) with open(outfile, 'w') as f: f.write(node.write(format=options.output_format)) elif options.split == "treeko": # splits tree using the TreeKO algorithm. ntrees, ndups, sptrees = genetree.get_speciation_trees() for cluster_id, spt in enumerate(sptrees, start=1): outfile = '{}_genetree.nhx'.format(cluster_id) if options.dir: outfile = os.path.join(options.dir, outfile) with open(outfile, 'w') as f: f.write(spt.write(format=options.output_format)) elif options.split == "species": ingroup = options.ingroup.split(",") outgroup = options.outgroup.split(",") cluster_id = 0 def split_tree_by_species(tree, ingroup, outgroup): nonlocal cluster_id if len(outgroup) > 0: outgroup_bool = check_outgroup(tree, outgroup) else: outgroup_bool = True if outgroup_bool and check_ingroup(tree, ingroup): child1, child2 = tree.children split_tree_by_species(child1, ingroup, outgroup) split_tree_by_species(child2, ingroup, outgroup) else: cluster_id += 1 outfile = '{}_genetree.nhx'.format(cluster_id) if options.dir: outfile = os.path.join(options.dir, outfile) with open(outfile, 'w') as f: f.write(tree.write(format=options.output_format)) split_tree_by_species(genetree, ingroup, outgroup)