예제 #1
0
    if not n.is_leaf():
        n.name = n.S
    else:
        sp_dict[n.name] = n.S


logger.debug("sp_dict")
logger.debug(sp_dict)

# Let's reconcile our genetree with the species tree
recon_tree, events = genetree.reconcile(sptree)
# a new "reconcilied tree" is returned. As well as the list of
# inferred events.


ntrees, ndups, sptrees =  genetree.get_speciation_trees()
logger.debug( "Found %d species trees and %d duplication nodes", ntrees, ndups)

HomologySummary=["DUPLICATIONS : " + str(ndups) ]

logger.debug( "Orthology and Paralogy relationships:")
for ev in events:
    if ev.etype == "S":
        logger.debug("".join(['ORTHOLOGY RELATIONSHIP:', ','.join(ev.inparalogs), " <===> ", ','.join(ev.orthologs)]))
        HomologySummary.append("".join(['ORTHOLOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===> ", ','.join(ev.orthologs)]))
    elif ev.etype == "D":
        logger.debug("".join(['PARALOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===>" , ','.join(ev.outparalogs)]))
        HomologySummary.append("".join(['PARALOGY RELATIONSHIP: ', ', '.join(ev.inparalogs), " <===> ", ','.join(ev.outparalogs)]))

HomologyFile = OutPrefixName + ".orthologs.txt"
with open(HomologyFile,"w") as File:
예제 #2
0
for ev in evev:
    if ev.etype == "S":
        print(ev.orthologs)

# find evolutionary events
evev = phy.get_descendant_evol_events(sos_thr=0.9)

# all events
for ev in evev:
    print(ev.etype, ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs))

# all events involving either Hsap or Drer
fseqs = lambda slist: [
    s for s in slist if s.startswith("Drer") or s.startswith("Hsap")
]
for ev in evev:
    if ev.etype == "D":
        print('Paralog: ', ','.join(fseqs(ev.in_seqs)), "<====>",
              ','.join(fseqs(ev.out_seqs)))

for ev in evev:
    if ev.etype == "S":
        print('Ortholog:', ','.join(fseqs(ev.in_seqs)), "<====>",
              ','.join(fseqs(ev.out_seqs)))

# obtain duplication events
ntrees, ndups, sptrees = phy.get_speciation_trees()
print("Found %d species trees and %d duplication nodes" % (ntrees, ndups))
for spt in sptrees:
    print(spt)
예제 #3
0
def main():
    usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('--genetree', help='GeneTree in nhx format')
    parser.add_option('--speciestree', help='Species Tree in nhx format')
    parser.add_option('--ingroup', help='Species Tree in nhx format')
    parser.add_option('--outgroup', help='Species Tree in nhx format')
    parser.add_option('--species_format',
                      type='int',
                      default=8,
                      help='Species Tree input format (0-9)')
    parser.add_option('--gene_node',
                      type='int',
                      default=0,
                      help='Gene node format 0=gene_species, 1=species_gene')
    parser.add_option('--gainlose',
                      action='store_true',
                      default=False,
                      help='Find out gene gain/lose')
    parser.add_option('--split',
                      type='choice',
                      choices=['dups', 'treeko', 'species'],
                      dest="split",
                      default='dups',
                      help='Choose GeneTree splitting algorithms')
    parser.add_option('--output_format',
                      type='int',
                      default=9,
                      help='GeneTree output format (0-9)')
    parser.add_option(
        '-d',
        '--dir',
        type='string',
        default="",
        help=
        "Absolute or relative path to output directory. If directory does not exist it will be created"
    )

    options, args = parser.parse_args()

    if options.dir and not os.path.exists(options.dir):
        os.makedirs(options.dir)

    if options.genetree is None:
        parser.error(
            "--genetree option must be specified, GeneTree in nhx format")

    if os.stat(options.genetree).st_size == 0:
        sys.exit()

    with open(options.genetree, 'r') as f:
        contents = f.read()

    # Remove empty NHX features that can be produced by TreeBest but break ete3
    contents = contents.replace('[&&NHX]', '')

    # reads single gene tree
    genetree = PhyloTree(contents)

    # sets species naming function
    if options.gene_node == 0:
        genetree.set_species_naming_function(parse_sp_name)

    # reconcile species tree with gene tree to help find out gene gain/lose
    if options.gainlose:
        if options.speciestree is None:
            parser.error(
                "--speciestree option must be specified, species tree in nhx format"
            )

        # reads species tree
        speciestree = PhyloTree(options.speciestree,
                                format=options.species_format)

        # Removes '*' from Species names comes from Species tree configrured for TreeBest
        for leaf in speciestree:
            leaf.name = leaf.name.strip('*')

        genetree, events = genetree.reconcile(speciestree)

    if options.split == "dups":
        # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes.
        for cluster_id, node in enumerate(genetree.split_by_dups(), start=1):
            outfile = '{}_genetree.nhx'.format(cluster_id)
            if options.dir:
                outfile = os.path.join(options.dir, outfile)
            with open(outfile, 'w') as f:
                f.write(node.write(format=options.output_format))
    elif options.split == "treeko":
        # splits tree using the TreeKO algorithm.
        ntrees, ndups, sptrees = genetree.get_speciation_trees()

        for cluster_id, spt in enumerate(sptrees, start=1):
            outfile = '{}_genetree.nhx'.format(cluster_id)
            if options.dir:
                outfile = os.path.join(options.dir, outfile)
            with open(outfile, 'w') as f:
                f.write(spt.write(format=options.output_format))
    elif options.split == "species":
        ingroup = options.ingroup.split(",")
        outgroup = options.outgroup.split(",")
        cluster_id = 0

        def split_tree_by_species(tree, ingroup, outgroup):
            nonlocal cluster_id

            if len(outgroup) > 0:
                outgroup_bool = check_outgroup(tree, outgroup)
            else:
                outgroup_bool = True

            if outgroup_bool and check_ingroup(tree, ingroup):
                child1, child2 = tree.children
                split_tree_by_species(child1, ingroup, outgroup)
                split_tree_by_species(child2, ingroup, outgroup)
            else:
                cluster_id += 1
                outfile = '{}_genetree.nhx'.format(cluster_id)
                if options.dir:
                    outfile = os.path.join(options.dir, outfile)
                with open(outfile, 'w') as f:
                    f.write(tree.write(format=options.output_format))

        split_tree_by_species(genetree, ingroup, outgroup)