Beispiel #1
0
def calc_histogram(tree_data,
                   d,
                   t,
                   l,
                   time_it,
                   normalize=False,
                   zero_loss=False):
    """
    Compute the PDV from a .newick file
    :param tree_data <ReconInput> - Output of newickFormatReader.getInput()
    :param d <float> - the cost of a duplication
    :param t <float> - ^^ transfer
    :param l <float> - ^^ loss
    :param time_it <bool> - collect timing info
    :param normalize <bool> - normalize the histogram by the size of the gene tree
    :param zero_loss <bool> - ignore loss events
    :return diameter_alg_hist <Histogram> - the PDV for the given .newick
    :return elapsed <float> - the time it took to compute the PDV
        None if time_it is False
    """
    # From the newick tree create the reconciliation graph
    edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
        = DTLReconGraph.reconcile(tree_data, d, t, l)

    # If we want to know the number of MPRs
    #print(mpr_count)

    # Reformat the host and parasite tree to use it with the histogram algorithm
    gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
        edge_gene_tree, "pTop")
    species_tree, species_tree_root, species_node_count \
        = Diameter.reformat_tree(edge_species_tree, "hTop")

    if time_it:
        start = time.time()
    # Calculate the histogram via histogram algorithm
    diameter_alg_hist = HistogramAlg.diameter_algorithm(
        species_tree, gene_tree, gene_tree_root, dtl_recon_graph,
        dtl_recon_graph, False, zero_loss)
    if time_it:
        end = time.time()
        elapsed = end - start
    else:
        elapsed = None

    if normalize:
        # Number of internal gene tree nodes
        gene_tree_nodes = int(math.ceil(len(gene_tree) / 2.0))
        diameter_alg_hist = diameter_alg_hist.xscale(1.0 /
                                                     (2 * gene_tree_nodes))
    return diameter_alg_hist, elapsed
Beispiel #2
0
def get_tree_info(newick, d,t,l):
    """
    Reconcile the trees and return all the relevant info.
    :param newick <ReconInput>: Output of newickFormatReader.getInput()
    :params d,t,l <float> - the relative DTL costs
    :return gene_tree <tree>
    :return species_tree <tree>
    :return gene_root <node>
    :return dtl_recon_graph <recon_graph>
    :return mpr_count <int> - the number of MPRs for the recon graph
    :return best_roots [<mapping_node>] - the sources of the recon graph
    """
    # From the newick tree create the reconciliation graph
    edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
        = DTLReconGraph.reconcile(newick, d, t, l)
    # Reformat the host and parasite tree to use it with the histogram algorithm
    gene_tree, gene_root, gene_node_count = Diameter.reformat_tree(edge_gene_tree, "pTop")
    species_tree, species_tree_root, species_node_count \
        = Diameter.reformat_tree(edge_species_tree, "hTop")
    return gene_tree, species_tree, gene_root, dtl_recon_graph, mpr_count, best_roots
Beispiel #3
0
def main():
    """
    :return: nothing. This function will run the main loop for the command line interface.
    """

    p = optparse.OptionParser(usage=usage())

    p.add_option('-r',
                 '--random',
                 dest='random',
                 help='Add a random median reconciliation from the full median'
                 ' reconciliation graph of the given file to the output',
                 action='store_true',
                 default=False)
    p.add_option('-c',
                 '--count',
                 dest='count',
                 help='Add the number of median reconciliations to'
                 'the output',
                 action='store_true',
                 default=False)

    options, args = p.parse_args()

    if len(args) == 4:
        try:

            # These will be the outputs we eventually return
            output = []

            # Save arg values
            filename = args[0]
            dup = float(args[1])
            transfer = float(args[2])
            loss = float(args[3])

            # Get basic info just about the dtl recon graph
            species_tree, gene_tree, dtl_recon_graph, mpr_count, best_roots = DTLReconGraph.reconcile(
                filename, dup, transfer, loss)

            # Reformat gene tree and get info on it, as well as for the species tree in the following line
            postorder_gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
                gene_tree, "pTop")
            postorder_species_tree, species_tree_root, species_node_count = Diameter.reformat_tree(
                species_tree, "hTop")

            # Compute the median reconciliation graph
            median_reconciliation, n_meds, roots_for_median = get_median_graph(
                dtl_recon_graph, postorder_gene_tree, postorder_species_tree,
                gene_tree_root, best_roots)

            # We'll always want to output the median
            output.append(median_reconciliation)

            # Check if the user wants the number of medians
            if options.count:
                output.append(n_meds)

            # Check if the user wants a random median
            if options.random:
                med_counts = get_med_counts(median_reconciliation,
                                            roots_for_median)
                # Calculate a random, uniformly sampled single-path median from the median recon
                random_median = choose_random_median_wrapper(
                    median_reconciliation, roots_for_median, med_counts)
                output.append(random_median)

            # Now print all of the output requested by the user
            for i in range(len(output)):
                if i != (len(output) - 1):
                    print((str(output[i]) + '\n'))
                else:
                    print((str(output[i])))

        except ValueError:
            print((usage()))
    else:
        print(usage())
        for D, T, L in itertools.product([1, 2, 3, 4], repeat=3):
            # From the newick tree create the reconciliation graph
            edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
                = DTLReconGraph.reconcile(tree_file, D, T, L)

            # Sanity check: the mpr_count returned is equal to the count generated via brute force
            assert (mpr_count == sum(
                1 for _ in HistogramAlgTools.BF_enumerate_MPRs(
                    dtl_recon_graph, best_roots)))

            # Calculate the histogram via brute force
            brute_force_hist = HistogramAlgTools.BF_find_histogram(
                dtl_recon_graph, best_roots)

            # Reformat the host and parasite tree to use it with the histogram algorithm
            gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
                edge_gene_tree, "pTop")
            species_tree, species_tree_root, species_node_count \
                = Diameter.reformat_tree(edge_species_tree, "hTop")

            # Calculate the histogram via histogram algorithm
            diameter_alg_hist = HistogramAlg.diameter_algorithm(
                species_tree,
                gene_tree,
                gene_tree_root,
                dtl_recon_graph,
                dtl_recon_graph,
                False,
                False,
                verify=True)

            # If there is a mismatch, print the details and save the tree that causes