コード例 #1
0
    if not node.is_leaf():
        children = node.get_children()
        if outgroup in children:
            children.remove(outgroup)
        left_child, right_child = children
        op_list = [
            node.id, bg.BEAGLE_OP_NONE, bg.BEAGLE_OP_NONE, left_child.id,
            left_child.id, right_child.id, right_child.id
        ]
        op = bg.make_operation(op_list)
        bg.BeagleOperationArray_setitem(operations, op_index, op)
        op_index += 1

nodeIndices = bg.make_intarray(node_list)
dervIndices = bg.make_intarray(derv_list)
edgeLengths = bg.make_doublearray(edge_list)

for node in tree.traverse("preorder"):
    if not node.is_root():
        parent = node.up
        if not parent.is_root():
            sibling = node.get_sisters()[0]
            op_list = [
                node.id + n_edges, bg.BEAGLE_OP_NONE, bg.BEAGLE_OP_NONE,
                parent.id + n_edges, parent.id, sibling.id, sibling.id
            ]
            op = bg.make_operation(op_list)
            bg.BeagleOperationArray_setitem(operations, op_index, op)
            op_index += 1
        else:
            children = parent.get_children()
コード例 #2
0
def loglikelihood_beagle_evaluate(instance,
                                  tree,
                                  tip_name_to_address,
                                  id_attr="id",
                                  leaf_attr="name",
                                  scaling=False):
    n_taxa = len(tree)
    # n_patterns = len(val)
    # n_states = len(model.pi)
    n_internals = n_taxa - 2
    n_transition_probs = 2 * n_taxa - 3

    n_scale_buffers = 0
    if scaling:
        n_scale_buffers = n_internals + 1

    outgroup = tree.children[0]
    refresh_ids(tree, attr=id_attr)

    # Sanity check(s)

    assert set(getattr(node, id_attr)
               for node in tree.get_leaves()) == set(range(len(tree)))
    assert set(getattr(node, id_attr)
               for node in tree.traverse()) == set(range(2 * len(tree) - 2))

    # a list of indices and edge lengths
    # create a list of partial likelihood update operations
    node_list = []
    edge_list = []
    operations = bg.new_BeagleOperationArray(n_internals)
    op_index = 0
    for node in reversed(list(tree.traverse("levelorder"))):
        if not node.is_root():
            node_list.append(getattr(node, id_attr))
            edge_list.append(node.dist)

        if not node.is_leaf():
            children = node.get_children()
            if outgroup in children:
                children.remove(outgroup)
            left_child, right_child = children

            scaling_index = bg.BEAGLE_OP_NONE
            if scaling:
                scaling_index = op_index + 1

            node_address = getattr(node, id_attr)
            if left_child.is_leaf():
                left_child_address = tip_name_to_address[getattr(
                    left_child, leaf_attr)]
            else:
                left_child_address = getattr(left_child, id_attr)
            if right_child.is_leaf():
                right_child_address = tip_name_to_address[getattr(
                    right_child, leaf_attr)]
            else:
                right_child_address = getattr(right_child, id_attr)
            op_list = [
                node_address, scaling_index, bg.BEAGLE_OP_NONE,
                left_child_address, left_child_address, right_child_address,
                right_child_address
            ]
            op = bg.make_operation(op_list)
            bg.BeagleOperationArray_setitem(operations, op_index, op)
            op_index += 1
    nodeIndices = bg.make_intarray(node_list)
    edgeLengths = bg.make_doublearray(edge_list)

    # tell BEAGLE to populate the transition matrices for the above edge lengths
    bg.beagleUpdateTransitionMatrices(
        instance,  # instance
        0,  # eigenIndex
        nodeIndices,  # probabilityIndices
        None,  # firstDerivativeIndices
        None,  # secondDerivativeIndices
        edgeLengths,  # edgeLengths
        len(node_list))  # count

    # this invokes all the math to carry out the likelihood calculation
    cumulative_scale_index = bg.BEAGLE_OP_NONE
    if scaling:
        cumulative_scale_index = 0
        bg.beagleResetScaleFactors(instance, cumulative_scale_index)
    bg.beagleUpdatePartials(
        instance,  # instance
        operations,  # eigenIndex
        n_internals,  # operationCount
        cumulative_scale_index)  # cumulative scale index

    logLp = bg.new_doublep()
    categoryWeightIndex = bg.make_intarray([0])
    stateFrequencyIndex = bg.make_intarray([0])
    cumulativeScaleIndex = bg.make_intarray([cumulative_scale_index])

    indexFocalParent = bg.make_intarray([getattr(tree, id_attr)])
    indexFocalChild = bg.make_intarray([getattr(outgroup, id_attr)])

    bg.beagleCalculateEdgeLogLikelihoods(
        instance,  # instance number
        indexFocalParent,  # indices of parent partialsBuffers
        indexFocalChild,  # indices of child partialsBuffers
        indexFocalChild,  # transition probability matrices for this edge
        None,  # first derivative matrices
        None,  # second derivative matrices
        categoryWeightIndex,  # weights to apply to each partialsBuffer
        stateFrequencyIndex,  # state frequencies for each partialsBuffer
        cumulativeScaleIndex,  # scaleBuffers containing accumulated factors
        1,  # Number of partialsBuffer
        logLp,  # destination for log likelihood
        None,  # destination for first derivative
        None  # destination for second derivative
    )

    logL = bg.doublep_value(logLp)
    return logL
コード例 #3
0
def loglikelihood_beagle(tree,
                         seqs,
                         model=JC,
                         id_attr=None,
                         leaf_attr=None,
                         scaling=False):
    if id_attr is None:
        id_attr = "id"
    if leaf_attr is None:
        leaf_attr = "name"

    tree = tree.copy()
    seq_dict = convert_to_dict_lists(seqs)
    val = next(iter(seq_dict.values()))
    n_taxa = len(seq_dict)
    n_patterns = len(val)
    n_states = len(model.pi)
    n_internals = n_taxa - 2
    n_transition_probs = 2 * n_taxa - 3

    n_scale_buffers = 0
    if scaling:
        n_scale_buffers = n_internals + 1

    outgroup = next(iter(tree.get_leaves()))
    reroot(tree, outgroup)
    refresh_ids(tree, attr=id_attr)

    # Sanity check(s)

    assert set(getattr(node, id_attr)
               for node in tree.get_leaves()) == set(range(len(tree)))
    assert set(getattr(node, id_attr)
               for node in tree.traverse()) == set(range(2 * len(tree) - 2))

    # Instantiate Beagle

    requirementFlags = 0

    if scaling:
        requirementFlags |= bg.BEAGLE_FLAG_SCALING_MANUAL

    returnInfo = bg.BeagleInstanceDetails()
    instance = bg.beagleCreateInstance(
        n_taxa,  # tips
        n_internals,  # partials
        n_taxa,  # sequences
        n_states,  # states
        n_patterns,  # patterns
        1,  # models
        n_transition_probs,  # transition matrices
        1,  # rate categories
        n_scale_buffers,  # scale buffers
        None,  # resource restrictions
        0,  # length of resource list
        0,  # preferred flags
        requirementFlags,  # required flags
        returnInfo  # output details
    )

    assert instance >= 0

    # Set tip states block
    for node in tree.get_leaves():
        states = bg.make_intarray(seq_dict[getattr(node, leaf_attr)])
        # states = bg.createStates(seqs[getattr(node, leaf_attr)], dna_ids)
        bg.beagleSetTipStates(instance, getattr(node, id_attr), states)

    patternWeights = bg.createPatternWeights([1] * n_patterns)
    bg.beagleSetPatternWeights(instance, patternWeights)

    # create array of state background frequencies
    freqs = bg.createPatternWeights(model.pi)
    # freqs = bg.createPatternWeights([0.25] * 4)
    bg.beagleSetStateFrequencies(instance, 0, freqs)

    # create an array containing site category weights and rates
    weights = bg.createPatternWeights([1.0])
    rates = bg.createPatternWeights([1.0])
    bg.beagleSetCategoryWeights(instance, 0, weights)
    bg.beagleSetCategoryRates(instance, rates)

    # set the Eigen decomposition
    eigvec = bg.createPatternWeights(model.U.ravel())
    invvec = bg.createPatternWeights(model.U_inv.ravel())
    eigval = bg.createPatternWeights(model.D)
    # eigvec = bg.createPatternWeights([1.0, 2.0, 0.0, 0.5,
    #                                   1.0, -2.0, 0.5, 0.0,
    #                                   1.0, 2.0, 0.0, -0.5,
    #                                   1.0, -2.0, -0.5, 0.0])
    # invvec = bg.createPatternWeights([0.25, 0.25, 0.25, 0.25,
    #                                   0.125, -0.125, 0.125, -0.125,
    #                                   0.0, 1.0, 0.0, -1.0,
    #                                   1.0, 0.0, -1.0, 0.0])
    # eigval = bg.createPatternWeights([0.0, -1.3333333333333333, -1.3333333333333333, -1.3333333333333333])

    bg.beagleSetEigenDecomposition(instance, 0, eigvec, invvec, eigval)

    # a list of indices and edge lengths
    # create a list of partial likelihood update operations
    node_list = []
    edge_list = []
    operations = bg.new_BeagleOperationArray(n_internals)
    op_index = 0
    for node in reversed(list(tree.traverse("levelorder"))):
        if node is not outgroup:
            # print(f"Node is {getattr(node, id_attr)}")
            if node.is_root():
                # print(f"Adding outgroup {getattr(outgroup, id_attr)}")
                node_list.append(getattr(outgroup, id_attr))
                edge_list.append(outgroup.dist)
            else:
                # print(f"Adding node {getattr(node, id_attr)}")
                node_list.append(getattr(node, id_attr))
                edge_list.append(node.dist)

            if not node.is_leaf():
                children = node.get_children()
                if outgroup in children:
                    children.remove(outgroup)
                left_child, right_child = children

                scaling_index = bg.BEAGLE_OP_NONE
                if scaling:
                    scaling_index = op_index + 1

                op_list = [
                    getattr(node, id_attr), scaling_index, bg.BEAGLE_OP_NONE,
                    getattr(left_child, id_attr),
                    getattr(left_child, id_attr),
                    getattr(right_child, id_attr),
                    getattr(right_child, id_attr)
                ]
                # print(f"Adding operation {op_list}")
                op = bg.make_operation(op_list)
                bg.BeagleOperationArray_setitem(operations, op_index, op)
                op_index += 1
    nodeIndices = bg.make_intarray(node_list)
    edgeLengths = bg.make_doublearray(edge_list)

    # tell BEAGLE to populate the transition matrices for the above edge lengths
    bg.beagleUpdateTransitionMatrices(
        instance,  # instance
        0,  # eigenIndex
        nodeIndices,  # probabilityIndices
        None,  # firstDerivativeIndices
        None,  # secondDerivativeIndices
        edgeLengths,  # edgeLengths
        len(node_list))  # count

    # this invokes all the math to carry out the likelihood calculation
    cumulative_scale_index = bg.BEAGLE_OP_NONE
    if scaling:
        cumulative_scale_index = 0
        bg.beagleResetScaleFactors(instance, cumulative_scale_index)
    bg.beagleUpdatePartials(
        instance,  # instance
        operations,  # eigenIndex
        n_internals,  # operationCount
        cumulative_scale_index)  # cumulative scale index

    logLp = bg.new_doublep()
    categoryWeightIndex = bg.make_intarray([0])
    stateFrequencyIndex = bg.make_intarray([0])
    cumulativeScaleIndex = bg.make_intarray([cumulative_scale_index])

    indexFocalParent = bg.make_intarray([getattr(tree, id_attr)])
    indexFocalChild = bg.make_intarray([getattr(outgroup, id_attr)])

    bg.beagleCalculateEdgeLogLikelihoods(
        instance,  # instance number
        indexFocalParent,  # indices of parent partialsBuffers
        indexFocalChild,  # indices of child partialsBuffers
        indexFocalChild,  # transition probability matrices for this edge
        None,  # first derivative matrices
        None,  # second derivative matrices
        categoryWeightIndex,  # weights to apply to each partialsBuffer
        stateFrequencyIndex,  # state frequencies for each partialsBuffer
        cumulativeScaleIndex,  # scaleBuffers containing accumulated factors
        1,  # Number of partialsBuffer
        logLp,  # destination for log likelihood
        None,  # destination for first derivative
        None  # destination for second derivative
    )

    logL = bg.doublep_value(logLp)
    return logL
コード例 #4
0
def gradient_loglikelihood_beagle(tree,
                                  seqs,
                                  model=JC,
                                  id_attr=None,
                                  leaf_attr=None,
                                  scaling=False):
    """Calculate branch length gradient of a tree and sequences.

    :param tree: Ete3 Tree object representing the tree topology and branch lengths.
    :param seqs: Array-like or dictionary of lists of sequence characters or integers.
    :param model: Substitution model in namedtuple "Model". Default: Jukes-Cantor.
    :param id_attr: Attribute/feature of each node that uniquely identifies it.
    :param leaf_attr: Attribute/feature of each leaf that uniquely identifies it,
    and uniquely identifies the sequence data row/entry in seqs.
    :return: float representing the derivative of the log-likelihood of the tree,
    given the sequence data and model.
    """

    if id_attr is None:
        id_attr = "id"
    if leaf_attr is None:
        leaf_attr = "name"

    tree = tree.copy()
    seq_dict = convert_to_dict_lists(seqs)
    val = next(iter(seq_dict.values()))

    n_taxa = len(seq_dict)
    n_patterns = len(val)
    n_states = len(model.pi)

    n_internals = n_taxa - 2
    n_edges = 2 * n_taxa - 3

    n_partials = n_internals + n_edges
    n_transition_probs = n_edges
    n_derivatives = n_edges
    n_matrices = n_transition_probs + n_derivatives

    if scaling:
        print("Scaling not currently supported.")
        scaling = False
    n_scale_buffers = 0
    if scaling:
        n_scale_buffers = n_internals + 1

    outgroup = tree.children[0]
    refresh_ids(tree, attr=id_attr)

    # Sanity check(s)

    assert set(getattr(node, id_attr)
               for node in tree.get_leaves()) == set(range(len(tree)))
    assert set(getattr(node, id_attr)
               for node in tree.traverse()) == set(range(2 * len(tree) - 2))

    # Instantiate Beagle

    requirementFlags = 0

    if scaling:
        requirementFlags |= bg.BEAGLE_FLAG_SCALING_MANUAL

    returnInfo = bg.BeagleInstanceDetails()
    instance = bg.beagleCreateInstance(
        n_taxa,  # tips
        n_partials,  # partials
        n_taxa,  # sequences
        n_states,  # states
        n_patterns,  # patterns
        1,  # models
        n_matrices,  # transition matrices
        1,  # rate categories
        n_scale_buffers,  # scale buffers
        None,  # resource restrictions
        0,  # length of resource list
        0,  # preferred flags
        requirementFlags,  # required flags
        returnInfo  # output details
    )

    assert instance >= 0

    # Set tip states block
    for node in tree.get_leaves():
        states = bg.make_intarray(seq_dict[getattr(node, leaf_attr)])
        bg.beagleSetTipStates(instance, getattr(node, id_attr), states)

    patternWeights = bg.createPatternWeights([1] * n_patterns)
    bg.beagleSetPatternWeights(instance, patternWeights)

    # create array of state background frequencies
    freqs = bg.createPatternWeights(model.pi)
    # freqs = bg.createPatternWeights([0.25] * 4)
    bg.beagleSetStateFrequencies(instance, 0, freqs)

    # create an array containing site category weights and rates
    weights = bg.createPatternWeights([1.0])
    rates = bg.createPatternWeights([1.0])
    bg.beagleSetCategoryWeights(instance, 0, weights)
    bg.beagleSetCategoryRates(instance, rates)

    # set the Eigen decomposition
    eigvec = bg.createPatternWeights(model.U.ravel())
    invvec = bg.createPatternWeights(model.U_inv.ravel())
    eigval = bg.createPatternWeights(model.D)
    # eigvec = bg.createPatternWeights([1.0, 2.0, 0.0, 0.5,
    #                                   1.0, -2.0, 0.5, 0.0,
    #                                   1.0, 2.0, 0.0, -0.5,
    #                                   1.0, -2.0, -0.5, 0.0])
    # invvec = bg.createPatternWeights([0.25, 0.25, 0.25, 0.25,
    #                                   0.125, -0.125, 0.125, -0.125,
    #                                   0.0, 1.0, 0.0, -1.0,
    #                                   1.0, 0.0, -1.0, 0.0])
    # eigval = bg.createPatternWeights([0.0, -1.3333333333333333, -1.3333333333333333, -1.3333333333333333])

    bg.beagleSetEigenDecomposition(instance, 0, eigvec, invvec, eigval)

    # a list of indices and edge lengths
    # create a list of partial likelihood update operations
    node_list = []
    derv_list = []
    edge_list = []
    operations = bg.new_BeagleOperationArray(n_internals + n_edges)

    op_index = 0
    for node in tree.traverse("postorder"):
        if not node.is_root():
            node_list.append(getattr(node, id_attr))
            derv_list.append(getattr(node, id_attr) +
                             n_edges)  # derivative indices
            edge_list.append(node.dist)

        if not node.is_leaf():
            children = node.get_children()
            if outgroup in children:
                children.remove(outgroup)
            left_child, right_child = children

            scaling_index = bg.BEAGLE_OP_NONE
            if scaling:
                scaling_index = op_index + 1

            op_list = [
                getattr(node, id_attr), scaling_index, bg.BEAGLE_OP_NONE,
                getattr(left_child, id_attr),
                getattr(left_child, id_attr),
                getattr(right_child, id_attr),
                getattr(right_child, id_attr)
            ]
            op = bg.make_operation(op_list)
            bg.BeagleOperationArray_setitem(operations, op_index, op)
            op_index += 1

    nodeIndices = bg.make_intarray(node_list)
    dervIndices = bg.make_intarray(derv_list)
    edgeLengths = bg.make_doublearray(edge_list)

    for node in tree.traverse("preorder"):
        if not node.is_root():
            parent = node.up
            if not parent.is_root():
                sibling = node.get_sisters()[0]
                op_list = [
                    getattr(node, id_attr) + n_edges, bg.BEAGLE_OP_NONE,
                    bg.BEAGLE_OP_NONE,
                    getattr(parent, id_attr) + n_edges,
                    getattr(parent, id_attr),
                    getattr(sibling, id_attr),
                    getattr(sibling, id_attr)
                ]
                op = bg.make_operation(op_list)
                bg.BeagleOperationArray_setitem(operations, op_index, op)
                op_index += 1
            else:
                children = parent.get_children()
                children.remove(node)

                # TODO: Do I add scaling factors here?
                op_list = [
                    getattr(node, id_attr) + n_edges, bg.BEAGLE_OP_NONE,
                    bg.BEAGLE_OP_NONE,
                    getattr(children[0], id_attr),
                    getattr(children[0], id_attr),
                    getattr(children[1], id_attr),
                    getattr(children[1], id_attr)
                ]
                op = bg.make_operation(op_list)
                bg.BeagleOperationArray_setitem(operations, op_index, op)
                op_index += 1

    # tell BEAGLE to populate the transition matrices for the above edge lengths
    bg.beagleUpdateTransitionMatrices(
        instance,  # instance
        0,  # eigenIndex
        nodeIndices,  # probabilityIndices
        dervIndices,  # firstDerivativeIndices
        None,  # secondDerivativeIndices
        edgeLengths,  # edgeLengths
        len(node_list))  # count

    # this invokes all the math to carry out the likelihood calculation
    cumulative_scale_index = bg.BEAGLE_OP_NONE
    if scaling:
        cumulative_scale_index = 0
        bg.beagleResetScaleFactors(instance, cumulative_scale_index)
    bg.beagleUpdatePartials(
        instance,  # instance
        operations,  # eigenIndex
        n_internals + n_edges,  # operationCount
        cumulative_scale_index)  # cumulative scale index

    categoryWeightIndex = bg.make_intarray([0])
    stateFrequencyIndex = bg.make_intarray([0])
    cumulativeScaleIndex = bg.make_intarray([cumulative_scale_index])

    logLp = bg.new_doublep()
    dlogLp = bg.new_doublep()
    result = dict()

    for node in tree.traverse('preorder'):
        if not node.is_root():
            upper_partials_index = bg.make_intarray(
                [getattr(node, id_attr) + n_edges])
            node_index = bg.make_intarray([getattr(node, id_attr)])
            node_deriv_index = bg.make_intarray(
                [getattr(node, id_attr) + n_edges])
            bg.beagleCalculateEdgeLogLikelihoods(
                instance,  # instance number
                upper_partials_index,  # indices of parent partialsBuffers
                node_index,  # indices of child partialsBuffers
                node_index,  # transition probability matrices for this edge
                node_deriv_index,  # first derivative matrices
                None,  # second derivative matrices
                categoryWeightIndex,  # weights to apply to each partialsBuffer
                stateFrequencyIndex,  # state frequencies for each partialsBuffer
                cumulativeScaleIndex,  # scaleBuffers containing accumulated factors
                1,  # Number of partialsBuffer
                logLp,  # destination for log likelihood
                dlogLp,  # destination for first derivative  # derivative code
                None  # destination for second derivative
            )

            # logL = bg.doublep_value(logLp)
            dlogL = bg.doublep_value(dlogLp)
            result[getattr(node, id_attr)] = dlogL

    return result