Beispiel #1
0
def calc_histogram(tree_data,
                   d,
                   t,
                   l,
                   time_it,
                   normalize=False,
                   zero_loss=False):
    """
    Compute the PDV from a .newick file
    :param tree_data <ReconInput> - Output of newickFormatReader.getInput()
    :param d <float> - the cost of a duplication
    :param t <float> - ^^ transfer
    :param l <float> - ^^ loss
    :param time_it <bool> - collect timing info
    :param normalize <bool> - normalize the histogram by the size of the gene tree
    :param zero_loss <bool> - ignore loss events
    :return diameter_alg_hist <Histogram> - the PDV for the given .newick
    :return elapsed <float> - the time it took to compute the PDV
        None if time_it is False
    """
    # From the newick tree create the reconciliation graph
    edge_species_tree, edge_gene_tree, dtl_recon_graph, mpr_count, best_roots \
        = DTLReconGraph.reconcile(tree_data, d, t, l)

    # If we want to know the number of MPRs
    #print(mpr_count)

    # Reformat the host and parasite tree to use it with the histogram algorithm
    gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
        edge_gene_tree, "pTop")
    species_tree, species_tree_root, species_node_count \
        = Diameter.reformat_tree(edge_species_tree, "hTop")

    if time_it:
        start = time.time()
    # Calculate the histogram via histogram algorithm
    diameter_alg_hist = HistogramAlg.diameter_algorithm(
        species_tree, gene_tree, gene_tree_root, dtl_recon_graph,
        dtl_recon_graph, False, zero_loss)
    if time_it:
        end = time.time()
        elapsed = end - start
    else:
        elapsed = None

    if normalize:
        # Number of internal gene tree nodes
        gene_tree_nodes = int(math.ceil(len(gene_tree) / 2.0))
        diameter_alg_hist = diameter_alg_hist.xscale(1.0 /
                                                     (2 * gene_tree_nodes))
    return diameter_alg_hist, elapsed
            # Calculate the histogram via brute force
            brute_force_hist = HistogramAlgTools.BF_find_histogram(
                dtl_recon_graph, best_roots)

            # Reformat the host and parasite tree to use it with the histogram algorithm
            gene_tree, gene_tree_root, gene_node_count = Diameter.reformat_tree(
                edge_gene_tree, "pTop")
            species_tree, species_tree_root, species_node_count \
                = Diameter.reformat_tree(edge_species_tree, "hTop")

            # Calculate the histogram via histogram algorithm
            diameter_alg_hist = HistogramAlg.diameter_algorithm(
                species_tree,
                gene_tree,
                gene_tree_root,
                dtl_recon_graph,
                dtl_recon_graph,
                False,
                False,
                verify=True)

            # If there is a mismatch, print the details and save the tree that causes
            # the error to a folder called errorTrees.
            if brute_force_hist != diameter_alg_hist:
                outname = './errorTrees/no%d-id%d-%d%d%d.png' % (
                    tree_size, tree_id, D, T, L)
                ReconciliationVisualization.visualizeAndSave(
                    dtl_recon_graph, outname)
                expected_n_pairs = HistogramAlgTools.calculate_n_pairs(
                    mpr_count)
                brute_force_n_pairs = HistogramAlgTools.count_mpr_pairs(
Beispiel #3
0
 def get_hist(g):
     h = HistogramAlg.diameter_algorithm(species_tree, gene_tree, gene_root,
                                         g, g, False, False)
     return h.histogram_dict
Beispiel #4
0
 def score(g):
     hist = HistogramAlg.diameter_algorithm(species_tree, gene_tree,
                                            gene_root, g, g, False, False)
     return hist.mean()