예제 #1
0
def compute_median(dtl_recon_graph, event_scores, postorder_mapping_nodes,
                   mpr_roots):
    """
    :param dtl_recon_graph: A dictionary representing a DTL Recon Graph.
    :param event_scores: A dictionary with event nodes as keys and values corresponding to the frequency of
    that events in MPR space for the recon graph
    :param postorder_mapping_nodes: A list of the mapping nodes in a possible MPR, except sorted first in
    postorder by species node and postorder by gene node
    :param mpr_roots: A list of mapping nodes that could act as roots to an MPR for the species and
    gene trees in question, output from the findBestRoots function in DTLReconGraph.py
    :return: A new dictionary which is has the same form as a DTL reconciliation graph except every
    mapping node only has one event node, along with the number of median reconciliations for the given DTL
    reconciliation graph, as well as the root of the median MPR for the given graph. Thus, this graph will
    represent a single reconciliation: the median reconciliation.
    """

    # Note that for a symmetric median reconciliation, each frequency must have 0.5 subtracted from it

    # Initialize a dict that will store the running total frequency sum incurred up to the given mapping node,
    # and the event node that directly gave it that frequency sum. Keys are mapping nodes, values are tuples
    # consisting of a list of event nodes that maximize the frequency - 0.5 sum score for the lower level,
    # and the corresponding running total frequency - 0.5 sum up to that mapping node
    sum_freqs = dict()

    # Loop over all mapping nodes for the gene tree
    for map_node in postorder_mapping_nodes:

        # Contemporaneous events need to be caught from the get-go
        if dtl_recon_graph[map_node] == [('C', (None, None), (None, None))]:
            sum_freqs[map_node] = ([('C', (None, None), (None, None))], 0.5
                                   )  # C events have freq 1, so 1 - 0.5 = 0.5
            continue  # Contemporaneous events should be a lone event in a list, so we move to the next mapping node

        # Get the events for the current mapping node and their running (frequency - 0.5) sums, in a list
        events = list()
        for event in dtl_recon_graph[map_node]:

            # Note that 'event' is of the form: ('event ID', 'Child 1', 'Child 2'), so the 0th element is the event
            # ID and the 1st and 2nd elements are the children produced by the event
            if event[
                    0] == 'L':  # Losses produce only one child, so we only need to look to one lower mapping node
                events.append(
                    (event,
                     sum_freqs[event[1]][1] + event_scores[event] - 0.5))
            else:  # Only other options are T, S, and D, which produce two children
                events.append(
                    (event, sum_freqs[event[1]][1] + sum_freqs[event[2]][1] +
                     event_scores[event] - 0.5))

        # Find and save the max (frequency - 0.5) sum
        max_sum = max(events, key=itemgetter(1))[1]

        # Initialize list to find all events that gives the current mapping node the best (freq - 0.5) sum
        best_events = list()

        # Check to see which event(s) produce the max (frequency - 0.5) sum
        for event in events:
            if event[1] == max_sum:
                best_events.append(event[0])

        # Help out the garage collector by discarding the now-useless non-optimal events list
        del events

        # Save the result for this mapping node so it can be used in higher mapping nodes in the graph
        sum_freqs[map_node] = (best_events[:], max_sum)

    # Get all possible roots of the graph and their running frequency scores, in a list, for later use
    possible_root_combos = [(root, sum_freqs[root][1]) for root in mpr_roots]

    # Find the best frequency - 0.5 sum for all of the potential roots for the median
    best_sum = max(possible_root_combos, key=itemgetter(1))[1]

    # Find all of the root combos for a median by filtering out the roots that don't give the best freq - 0.5 sum
    best_root_combos = list(
        [x for x in possible_root_combos if x[1] == best_sum])

    # Extract just the roots from the previously filtered out list
    best_roots = [root[0] for root in best_root_combos]

    # Adjust the sum_freqs dictionary so we can use it with the buildDTLReconGraph function from DTLReconGraph.py
    for map_node in sum_freqs:

        # We place the event tuples into lists so they work well with the diameter algorithm
        sum_freqs[map_node] = sum_freqs[map_node][
            0]  # Only use the events, no longer the associated frequency sum

    # Use the buildDTLReconGraph function from DTLReconGraph.py to find the median recon graph
    # Note that build_dtl... requires a list of the best roots for a reconciliation graph, the events for each
    # mapping node that are viable for an MPR (in our case, the median), and an empty dicitonary to populate
    # as the final return value
    med_recon_graph = DTLReconGraph.build_dtl_recon_graph(
        best_roots, sum_freqs, {})

    # Check to make sure the median is a subgraph of the DTL reconciliation
    assert check_subgraph(
        dtl_recon_graph,
        med_recon_graph), 'Median is not a subgraph of the recon graph!'

    # We can use this function to find the number of medians once we've got the final median recon graph
    n_med_recons = DTLReconGraph.count_mprs_wrapper(best_roots,
                                                    med_recon_graph)

    return med_recon_graph, n_med_recons, best_roots
예제 #2
0
 def count_mprs(g):
     # Find the mapping nodes involving the gene root
     roots = [k for k in list(g.keys()) if k[0] == gene_root]
     return DTLReconGraph.count_mprs_wrapper(roots, g)