Exemple #1
0
def pll_task(alignment_file, partition_string, guidetree=None, tree_search=True, threads=1, seed=RANDOM_SEED, frequencies=None,
             write_to_file=None):
    try:
        import pllpy
    except:
        logger.error("Couldn't import pllpy: returning empty dict")
        retval = {}
        return retval
    guidetree = True if guidetree is None else guidetree
    instance = pllpy.pll(alignment_file, partition_string, guidetree, threads, seed)
    if frequencies is not None and len(frequencies) == instance.get_number_of_partitions():
        for i in range(len(frequencies)):
            instance.set_frequencies(frequencies[i], i, False)
    if tree_search:
        instance.optimise_tree_search(True)
    else:
        instance.optimise(True, True, True, True)
    result = pllpy.helpers.pll_to_dict(instance)
    if write_to_file is not None: # attempt to write to file specified by write_to_file
        try:
            parameters = Parameters()
            parameters.construct_from_dict(result)
            with fileIO.fwriter(write_to_file, gz=True) as fl:
                parameters.write(fl)
        except:
            pass  # fail silently
    return result
Exemple #2
0
def create_instance(alignment,
                    partitions,
                    tree,
                    threads=1,
                    rns=int("0xCA55E77E", 16)):
    try:
        with open(alignment):
            pass
    except IOError as exc:
        raise exc

    if tree in ['random', 'parsimony']:
        if tree == 'random':
            instance = pllpy.pll(alignment, partitions, False, threads, rns)
        else:
            instance = pllpy.pll(alignment, partitions, True, threads, rns)
    else:
        instance = pllpy.pll(alignment, partitions, tree, threads, rns)
    return instance
Exemple #3
0
def prob_alignment_nooptimize(alnfile,
                              partfile,
                              coal_tree,
                              rates,
                              freqs,
                              alphas,
                              threads=1,
                              seed=ALIGNMENT_SEED,
                              eps=0.1):
    """
    This function implements the pll function. It computes the log likelihood
    of alignment data given the coal_tree without optimizing parameters. 
    Mathematically, it computes: P(A | T^G, t^G).
        
    rates, freqs, alphas  -- parameters in pll computation
    alnfile               -- alignment file
    partfile              -- partition file
    coal_tree             -- coalescent tree
 
    """

    # convert coal_tree to filename
    tree_temp = tempfile.NamedTemporaryFile(delete=False)
    tree = treelib.unroot(coal_tree,
                          newCopy=True)  # unrooted tree required for ML
    tree.write(tree_temp, oneline=True)
    tree_temp.close()
    tree_filename = tree_temp.name

    # initialize pll instance
    pll = pllpy.pll(alnfile, partfile, tree_filename, threads, seed)

    # initialize pll with previously optimized parameters
    for i in range(pll.get_number_of_partitions()):
        pll.set_alpha(alphas[i], i, True)
        pll.set_frequencies(freqs[i], i, True)
        if pll.is_dna(i):
            pll.set_rates(rates[i], i, True)

    # set likelihood convergence
    pll.set_epsilon(eps)

    # do not optimize any of the parameters
    # pll.optimise(False, False, False, False)

    # get (log) likelihood
    prob = pll.get_likelihood()

    os.remove(tree_filename)
    return prob
Exemple #4
0
def prob_alignment(alnfile,
                   partfile,
                   coal_tree,
                   threads=1,
                   seed=int("0xDEADBEEF", 16),
                   eps=0.1,
                   opt_branches=False):
    """
    This function implements the pll function. It optimize the alpha, rates
    and frequencies, and use these parameters to compute the log likelihood
    of alignment data given the coal_tree. This function is not used because 
    it is computationally inefficient to optimize the parameters.
	 
    """

    # convert coal_tree to filename
    tree_temp = tempfile.NamedTemporaryFile(delete=False)
    tree = treelib.unroot(coal_tree,
                          newCopy=True)  # unrooted tree required for ML
    tree.write(tree_temp, oneline=True)
    tree_temp.close()
    tree_filename = tree_temp.name

    # initialize pll instance
    pll = pllpy.pll(alnfile, partfile, tree_filename, threads, seed)

    # tell pll to optimize all model parameters
    for i in range(pll.get_number_of_partitions()):
        pll.set_optimisable_alpha(i, True)
        pll.set_optimisable_frequencies(i, True)
        if pll.is_dna(i):
            pll.set_optimisable_rates(i, True)

    # set likelihood convergence
    pll.set_epsilon(eps)

    # optimize the model
    pll.optimise(True, True, True,
                 opt_branches)  # rates, freqs, alphas, branches

    # get (log) likelihood
    prob = pll.get_likelihood()

    os.remove(tree_filename)
    return prob
Exemple #5
0
def optimize_parameters(alnfile,
                        partfile,
                        coal_tree,
                        threads=1,
                        seed=ALIGNMENT_SEED,
                        eps=0.1):
    """
    The function takes in alignment file, partitions file, coal_tree,
    and return the rates, freqs, alphas after optimization. These 
    parameters are used when the alignment probability is calculated.
    """
    rates = []
    freqs = []
    alphas = []

    # convert coal_tree to filename
    tree_temp = tempfile.NamedTemporaryFile(delete=False)
    tree = treelib.unroot(coal_tree,
                          newCopy=True)  # unrooted tree required for ML
    tree.write(tree_temp, oneline=True)
    tree_temp.close()
    tree_filename = tree_temp.name

    # initialize pll instance
    pll = pllpy.pll(alnfile, partfile, tree_filename, threads, seed)

    # set likelihood convergence
    pll.set_epsilon(eps)

    # optimize rates, freqs, alphas, and branches
    pll.optimise(True, True, True, True)

    # store optimal parameters
    for i in range(pll.get_number_of_partitions()):
        rates.append(pll.get_rates_vector(i))
        freqs.append(pll.get_frequencies_vector(i))
        alphas.append(pll.get_alpha(i))

    os.remove(tree_filename)
    return rates, freqs, alphas
Exemple #6
-1
def create_instance(alignment, partitions, tree, threads=1, rns=int("0xCA55E77E", 16)):
    try:
        with open(alignment):
            pass
    except IOError as exc:
        raise exc

    if tree in ["random", "parsimony"]:
        if tree == "random":
            instance = pllpy.pll(alignment, partitions, False, threads, rns)
        else:
            instance = pllpy.pll(alignment, partitions, True, threads, rns)
    else:
        instance = pllpy.pll(alignment, partitions, tree, threads, rns)
    return instance