예제 #1
0
def get_evol_events_from_root(node, sos_thr):
    """ Returns a list of **all** duplication and speciation
    events detected after this node. Nodes are assumed to be
    duplications when a species overlap is found between its child
    linages. Method is described more detail in:

    "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon
    T. Genome Biol. 2007;8(6):R109.
    """

    # Get the tree's root
    root = node.get_tree_root()

    # Checks that is actually rooted
    outgroups = root.get_children()
    if len(outgroups) != 2:
        raise TypeError, "Tree is not rooted"

    # Cautch the smaller outgroup (will be stored as the tree outgroup)
    o1 = set([n.name for n in outgroups[0].get_leaves()])
    o2 = set([n.name for n in outgroups[1].get_leaves()])


    if len(o2)<len(o1):
        smaller_outg = outgroups[1]
    else:
        smaller_outg = outgroups[0]

    # Get family size
    fSize = len( [n for n in root.get_leaves()] )

    # Clean data from previous analyses
    for n in root.get_descendants()+[root]:
        n.del_feature("evoltype")

    # Gets Prepared to browse the tree from root to leaves
    to_visit = []
    current = root
    all_events = []
    while current:
        # Gets childs and appends them to the To_visit list
        childs = current.get_children()
        to_visit += childs
        if len(childs)>2:
            raise TypeError, "nodes are expected to have two childs."
        elif len(childs)==0:
            pass # leaf
        else:
            # Get leaves and species at both sides of event
            sideA_leaves= set([n for n in childs[0].get_leaves()])
            sideB_leaves= set([n for n in childs[1].get_leaves()])
            sideA_spcs  = set([n.species for n in childs[0].get_leaves()])
            sideB_spcs  = set([n.species for n in childs[1].get_leaves()])
            # Calculates species overlap
            overlaped_spcs = sideA_spcs & sideB_spcs
            all_spcs       = sideA_spcs | sideB_spcs
            score = float(len(overlaped_spcs))/len(all_spcs)

            # Creates a new evolEvent
            event = EvolEvent()
            event.fam_size   = fSize
            event.branch_supports = [current.support, current.children[0].support, current.children[1].support]
            # event.seed      = leafName
            # event.e_newick  = current.up.get_newick()  # high mem usage!!
            event.sos = score
            event.outgroup_spcs  = smaller_outg.get_species()
            event.in_seqs = set([n.name for n in sideA_leaves])
            event.out_seqs = set([n.name for n in sideB_leaves])
            event.inparalogs  = set([n.name for n in sideA_leaves])
            # If species overlap: duplication
            if score >sos_thr:
                event.node = current
                event.etype = "D"
                event.outparalogs = set([n.name for n in sideB_leaves])
                event.orthologs   = set([])
                current.add_feature("evoltype","D")
            # If NO species overlap: speciation
            else:
                event.node = current
                event.etype = "S"
                event.orthologs = set([n.name for n in sideB_leaves])
                event.outparalogs = set([])
                current.add_feature("evoltype","S")

            all_events.append(event)
        # Keep visiting nodes
        try:
            current = to_visit.pop(0)
        except IndexError:
            current = None
    return all_events
예제 #2
0
파일: spoverlap.py 프로젝트: tarah28/ete
def get_evol_events_from_root(node, sos_thr):
    """ Returns a list of **all** duplication and speciation
    events detected after this node. Nodes are assumed to be
    duplications when a species overlap is found between its child
    linages. Method is described more detail in:

    "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon
    T. Genome Biol. 2007;8(6):R109.
    """

    # Get the tree's root
    root = node.get_tree_root()

    # Checks that is actually rooted
    outgroups = root.get_children()
    if len(outgroups) != 2:
        raise TypeError, "Tree is not rooted"

    # Cautch the smaller outgroup (will be stored as the tree outgroup)
    o1 = set([n.name for n in outgroups[0].get_leaves()])
    o2 = set([n.name for n in outgroups[1].get_leaves()])

    if len(o2) < len(o1):
        smaller_outg = outgroups[1]
    else:
        smaller_outg = outgroups[0]

    # Get family size
    fSize = len([n for n in root.get_leaves()])

    # Clean data from previous analyses
    for n in root.get_descendants() + [root]:
        n.del_feature("evoltype")

    # Gets Prepared to browse the tree from root to leaves
    to_visit = []
    current = root
    all_events = []
    while current:
        # Gets childs and appends them to the To_visit list
        childs = current.get_children()
        to_visit += childs
        if len(childs) > 2:
            raise TypeError, "nodes are expected to have two childs."
        elif len(childs) == 0:
            pass  # leaf
        else:
            # Get leaves and species at both sides of event
            sideA_leaves = set([n for n in childs[0].get_leaves()])
            sideB_leaves = set([n for n in childs[1].get_leaves()])
            sideA_spcs = set([n.species for n in childs[0].get_leaves()])
            sideB_spcs = set([n.species for n in childs[1].get_leaves()])
            # Calculates species overlap
            overlaped_spcs = sideA_spcs & sideB_spcs
            all_spcs = sideA_spcs | sideB_spcs
            score = float(len(overlaped_spcs)) / len(all_spcs)

            # Creates a new evolEvent
            event = EvolEvent()
            event.fam_size = fSize
            event.branch_supports = [
                current.support, current.children[0].support,
                current.children[1].support
            ]
            # event.seed      = leafName
            # event.e_newick  = current.up.get_newick()  # high mem usage!!
            event.sos = score
            event.outgroup_spcs = smaller_outg.get_species()
            event.in_seqs = set([n.name for n in sideA_leaves])
            event.out_seqs = set([n.name for n in sideB_leaves])
            event.inparalogs = set([n.name for n in sideA_leaves])
            # If species overlap: duplication
            if score > sos_thr:
                event.node = current
                event.etype = "D"
                event.outparalogs = set([n.name for n in sideB_leaves])
                event.orthologs = set([])
                current.add_feature("evoltype", "D")
            # If NO species overlap: speciation
            else:
                event.node = current
                event.etype = "S"
                event.orthologs = set([n.name for n in sideB_leaves])
                event.outparalogs = set([])
                current.add_feature("evoltype", "S")

            all_events.append(event)
        # Keep visiting nodes
        try:
            current = to_visit.pop(0)
        except IndexError:
            current = None
    return all_events