Example #1
0
def main():
    args = process_arg()
    newick_data = getInput(args.filename)
    if args.functionality == "costscape":
        costscape.solve(newick_data, args.dl, args.dh, args.tl, args.th, args)
    elif args.functionality == "reconcile":
        DTLReconGraph.reconcile_noninter(newick_data, args.d, args.t, args.l)
    elif args.functionality == "histogram":
        HistogramMain.compute_pdv(args.filename, newick_data, args.d, args.t, args.l, args)
    elif args.functionality == "clumpr":
        ClusterMain.perform_clustering(newick_data, args.d, args.t, args.l, args.k, args)
Example #2
0
 def test_topological_order(self):
     """
     Test topological_order by generating host and parasite trees of different sizes and going through
     each reconciliaiton in the reconciliaiton graph to see if the order generated by topological_order
     is a topological order. We skip the reconciliations that are temporally inconsistent.
     """
     count = 0
     for tree_size in self.size_range:
         tree_size_Folder = '%s/size%d' % (self.generated_dir_path,
                                           tree_size)
         for newick in os.listdir(tree_size_Folder):
             if count >= self.num_examples_to_test: break
             if newick.startswith('.'): continue
             recon_input = newickFormatReader.getInput(
                 os.path.join(tree_size_Folder, newick))
             host_tree = recon_input.host_tree
             parasite_tree = recon_input.parasite_tree
             for d, t, l in itertools.product(range(1, 5), repeat=3):
                 recon_graph, _, _, best_roots = DTLReconGraph.DP(
                     recon_input, d, t, l)
                 for reconciliation, _ in HistogramAlgTools.BF_enumerate_MPRs(
                         recon_graph, best_roots):
                     temporal_graph = recon_builder.build_temporal_graph(
                         host_tree, parasite_tree, reconciliation)
                     ordering_dict = recon_builder.topological_order(
                         temporal_graph)
                     # if there is no temporal inconsistency
                     if ordering_dict != None:
                         self.check_topological_order(
                             temporal_graph, ordering_dict)
             count += 1
Example #3
0
def calc_histogram(tree_data,
                   d,
                   t,
                   l,
                   time_it,
                   normalize=False,
                   zero_loss=False):
    """
    Compute the PDV from a .newick file
    :param tree_data <ReconInput> - Output of newickFormatReader.getInput()
    :param d <float> - the cost of a duplication
    :param t <float> - ^^ transfer
    :param l <float> - ^^ loss
    :param time_it <bool> - collect timing info
    :param normalize <bool> - normalize the histogram by the size of the gene tree
    :param zero_loss <bool> - ignore loss events
    :return diameter_alg_hist <Histogram> - the PDV for the given .newick
    :return elapsed <float> - the time it took to compute the PDV
        None if time_it is False
    """
    # From the newick tree create the reconciliation graph
    edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
        = DTLReconGraph.reconcile(tree_data, d, t, l)

    # If we want to know the number of MPRs
    #print(mpr_count)

    # Reformat the host and parasite tree to use it with the histogram algorithm
    gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
        edge_gene_tree, "pTop")
    species_tree, species_tree_root, species_node_count \
        = Diameter.reformat_tree(edge_species_tree, "hTop")

    if time_it:
        start = time.time()
    # Calculate the histogram via histogram algorithm
    diameter_alg_hist = HistogramAlg.diameter_algorithm(
        species_tree, gene_tree, gene_tree_root, dtl_recon_graph,
        dtl_recon_graph, False, zero_loss)
    if time_it:
        end = time.time()
        elapsed = end - start
    else:
        elapsed = None

    if normalize:
        # Number of internal gene tree nodes
        gene_tree_nodes = int(math.ceil(len(gene_tree) / 2.0))
        diameter_alg_hist = diameter_alg_hist.xscale(1.0 /
                                                     (2 * gene_tree_nodes))
    return diameter_alg_hist, elapsed
Example #4
0
def get_tree_info(newick, d,t,l):
    """
    Reconcile the trees and return all the relevant info.
    :param newick <ReconInput>: Output of newickFormatReader.getInput()
    :params d,t,l <float> - the relative DTL costs
    :return gene_tree <tree>
    :return species_tree <tree>
    :return gene_root <node>
    :return dtl_recon_graph <recon_graph>
    :return mpr_count <int> - the number of MPRs for the recon graph
    :return best_roots [<mapping_node>] - the sources of the recon graph
    """
    # From the newick tree create the reconciliation graph
    edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
        = DTLReconGraph.reconcile(newick, d, t, l)
    # Reformat the host and parasite tree to use it with the histogram algorithm
    gene_tree, gene_root, gene_node_count = Diameter.reformat_tree(edge_gene_tree, "pTop")
    species_tree, species_tree_root, species_node_count \
        = Diameter.reformat_tree(edge_species_tree, "hTop")
    return gene_tree, species_tree, gene_root, dtl_recon_graph, mpr_count, best_roots
Example #5
0
def main():
    """
    :return: nothing. This function will run the main loop for the command line interface.
    """

    p = optparse.OptionParser(usage=usage())

    p.add_option('-r',
                 '--random',
                 dest='random',
                 help='Add a random median reconciliation from the full median'
                 ' reconciliation graph of the given file to the output',
                 action='store_true',
                 default=False)
    p.add_option('-c',
                 '--count',
                 dest='count',
                 help='Add the number of median reconciliations to'
                 'the output',
                 action='store_true',
                 default=False)

    options, args = p.parse_args()

    if len(args) == 4:
        try:

            # These will be the outputs we eventually return
            output = []

            # Save arg values
            filename = args[0]
            dup = float(args[1])
            transfer = float(args[2])
            loss = float(args[3])

            # Get basic info just about the dtl recon graph
            species_tree, gene_tree, dtl_recon_graph, mpr_count, best_roots = DTLReconGraph.reconcile(
                filename, dup, transfer, loss)

            # Reformat gene tree and get info on it, as well as for the species tree in the following line
            postorder_gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
                gene_tree, "pTop")
            postorder_species_tree, species_tree_root, species_node_count = Diameter.reformat_tree(
                species_tree, "hTop")

            # Compute the median reconciliation graph
            median_reconciliation, n_meds, roots_for_median = get_median_graph(
                dtl_recon_graph, postorder_gene_tree, postorder_species_tree,
                gene_tree_root, best_roots)

            # We'll always want to output the median
            output.append(median_reconciliation)

            # Check if the user wants the number of medians
            if options.count:
                output.append(n_meds)

            # Check if the user wants a random median
            if options.random:
                med_counts = get_med_counts(median_reconciliation,
                                            roots_for_median)
                # Calculate a random, uniformly sampled single-path median from the median recon
                random_median = choose_random_median_wrapper(
                    median_reconciliation, roots_for_median, med_counts)
                output.append(random_median)

            # Now print all of the output requested by the user
            for i in range(len(output)):
                if i != (len(output) - 1):
                    print((str(output[i]) + '\n'))
                else:
                    print((str(output[i])))

        except ValueError:
            print((usage()))
    else:
        print(usage())
Example #6
0
def compute_median(dtl_recon_graph, event_scores, postorder_mapping_nodes,
                   mpr_roots):
    """
    :param dtl_recon_graph: A dictionary representing a DTL Recon Graph.
    :param event_scores: A dictionary with event nodes as keys and values corresponding to the frequency of
    that events in MPR space for the recon graph
    :param postorder_mapping_nodes: A list of the mapping nodes in a possible MPR, except sorted first in
    postorder by species node and postorder by gene node
    :param mpr_roots: A list of mapping nodes that could act as roots to an MPR for the species and
    gene trees in question, output from the findBestRoots function in DTLReconGraph.py
    :return: A new dictionary which is has the same form as a DTL reconciliation graph except every
    mapping node only has one event node, along with the number of median reconciliations for the given DTL
    reconciliation graph, as well as the root of the median MPR for the given graph. Thus, this graph will
    represent a single reconciliation: the median reconciliation.
    """

    # Note that for a symmetric median reconciliation, each frequency must have 0.5 subtracted from it

    # Initialize a dict that will store the running total frequency sum incurred up to the given mapping node,
    # and the event node that directly gave it that frequency sum. Keys are mapping nodes, values are tuples
    # consisting of a list of event nodes that maximize the frequency - 0.5 sum score for the lower level,
    # and the corresponding running total frequency - 0.5 sum up to that mapping node
    sum_freqs = dict()

    # Loop over all mapping nodes for the gene tree
    for map_node in postorder_mapping_nodes:

        # Contemporaneous events need to be caught from the get-go
        if dtl_recon_graph[map_node] == [('C', (None, None), (None, None))]:
            sum_freqs[map_node] = ([('C', (None, None), (None, None))], 0.5
                                   )  # C events have freq 1, so 1 - 0.5 = 0.5
            continue  # Contemporaneous events should be a lone event in a list, so we move to the next mapping node

        # Get the events for the current mapping node and their running (frequency - 0.5) sums, in a list
        events = list()
        for event in dtl_recon_graph[map_node]:

            # Note that 'event' is of the form: ('event ID', 'Child 1', 'Child 2'), so the 0th element is the event
            # ID and the 1st and 2nd elements are the children produced by the event
            if event[
                    0] == 'L':  # Losses produce only one child, so we only need to look to one lower mapping node
                events.append(
                    (event,
                     sum_freqs[event[1]][1] + event_scores[event] - 0.5))
            else:  # Only other options are T, S, and D, which produce two children
                events.append(
                    (event, sum_freqs[event[1]][1] + sum_freqs[event[2]][1] +
                     event_scores[event] - 0.5))

        # Find and save the max (frequency - 0.5) sum
        max_sum = max(events, key=itemgetter(1))[1]

        # Initialize list to find all events that gives the current mapping node the best (freq - 0.5) sum
        best_events = list()

        # Check to see which event(s) produce the max (frequency - 0.5) sum
        for event in events:
            if event[1] == max_sum:
                best_events.append(event[0])

        # Help out the garage collector by discarding the now-useless non-optimal events list
        del events

        # Save the result for this mapping node so it can be used in higher mapping nodes in the graph
        sum_freqs[map_node] = (best_events[:], max_sum)

    # Get all possible roots of the graph and their running frequency scores, in a list, for later use
    possible_root_combos = [(root, sum_freqs[root][1]) for root in mpr_roots]

    # Find the best frequency - 0.5 sum for all of the potential roots for the median
    best_sum = max(possible_root_combos, key=itemgetter(1))[1]

    # Find all of the root combos for a median by filtering out the roots that don't give the best freq - 0.5 sum
    best_root_combos = list(
        [x for x in possible_root_combos if x[1] == best_sum])

    # Extract just the roots from the previously filtered out list
    best_roots = [root[0] for root in best_root_combos]

    # Adjust the sum_freqs dictionary so we can use it with the buildDTLReconGraph function from DTLReconGraph.py
    for map_node in sum_freqs:

        # We place the event tuples into lists so they work well with the diameter algorithm
        sum_freqs[map_node] = sum_freqs[map_node][
            0]  # Only use the events, no longer the associated frequency sum

    # Use the buildDTLReconGraph function from DTLReconGraph.py to find the median recon graph
    # Note that build_dtl... requires a list of the best roots for a reconciliation graph, the events for each
    # mapping node that are viable for an MPR (in our case, the median), and an empty dicitonary to populate
    # as the final return value
    med_recon_graph = DTLReconGraph.build_dtl_recon_graph(
        best_roots, sum_freqs, {})

    # Check to make sure the median is a subgraph of the DTL reconciliation
    assert check_subgraph(
        dtl_recon_graph,
        med_recon_graph), 'Median is not a subgraph of the recon graph!'

    # We can use this function to find the number of medians once we've got the final median recon graph
    n_med_recons = DTLReconGraph.count_mprs_wrapper(best_roots,
                                                    med_recon_graph)

    return med_recon_graph, n_med_recons, best_roots
Example #7
0
if __name__ == '__main__':
    # Find the path to each tree sample file
    tree_dir = "newickSample"
    tree_paths = get_tree_paths(tree_dir, min_size)
    n_trees = len(tree_paths)
    tree_index = 0
    for (tree_file, tree_size, tree_id) in tree_paths:
        tree_index += 1
        print(tree_file)
        stdout.write("Processing tree: %d / %d\r" % (tree_index, n_trees))
        stdout.flush()
        # Test different D, T, L values in {1, 2, 3, 4}
        for D, T, L in itertools.product([1, 2, 3, 4], repeat=3):
            # From the newick tree create the reconciliation graph
            edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
                = DTLReconGraph.reconcile(tree_file, D, T, L)

            # Sanity check: the mpr_count returned is equal to the count generated via brute force
            assert (mpr_count == sum(
                1 for _ in HistogramAlgTools.BF_enumerate_MPRs(
                    dtl_recon_graph, best_roots)))

            # Calculate the histogram via brute force
            brute_force_hist = HistogramAlgTools.BF_find_histogram(
                dtl_recon_graph, best_roots)

            # Reformat the host and parasite tree to use it with the histogram algorithm
            gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
                edge_gene_tree, "pTop")
            species_tree, species_tree_root, species_node_count \
                = Diameter.reformat_tree(edge_species_tree, "hTop")
'''
This demo demonstrates how you would use the reconciliation graph visualizer
written by me and Dennis.
'''

from empress.clumpr import DTLReconGraph  # for creating a reconciliation graph
from empress.clumpr import ReconciliationVisualization  # for visualization

# Since we currently have no way of storing reconciliation graph in a file,
# we generate it every time when we run the algorithm
# create a reconciliation graph from file
result = DTLReconGraph.reconcile(
    "./newickSample/size5/test-size5-no700.newick", 2, 4, 2)
# the result is a five-tuple host, paras, graph, num_recon, best_roots
# we only want the reconciliation graph, which is the third item
# in the tuple
host, paras, graph, num_recon, best_roots = result
# this visualize the graph and save it at './sampleVis700.png'
ReconciliationVisualization.visualizeAndSave(graph, './sampleVis700.png')
Example #9
0
 def test_postorder(self):
     result = list (DTLReconGraph.postorder(self.tree, ('Top', 'A')))
     expected = [('B', 'D'), ('B', 'E'), ('A', 'B'), ('A', 'C'), ('Top', 'A')]
     self.assertEqual(result, expected)
Example #10
0
 def count_mprs(g):
     # Find the mapping nodes involving the gene root
     roots = [k for k in list(g.keys()) if k[0] == gene_root]
     return DTLReconGraph.count_mprs_wrapper(roots, g)