예제 #1
0
 def test_invariant_selection_transition(self):
     selection = 1.1
     nchromosomes = 3
     npositions = 2
     P = get_selection_transition_matrix(
             selection, nchromosomes, npositions)
     MatrixUtil.assert_transition_matrix(P)
예제 #2
0
def get_absorption_variance(P, plain, absorbing):
    """
    Get expected times to absorption.
    Note that if an index is indicated as absorbing by its presence
    in the sequence of absorbing state indices,
    then it will be treated as absorbing
    even if the transition matrix P indicates otherwise.
    @param P: transition matrix
    @param plain: sequence of plain state indices
    @param absorbing: sequence of absorbing state indices
    @return: variance of times to absorption or 0 from absorbing states
    """
    # check that P is really a transition matrix
    MatrixUtil.assert_transition_matrix(P)
    # define some state lists
    states = np.hstack((plain, absorbing))
    # check that the index sequences match the size of P
    if sorted(states) != range(len(P)):
        raise ValueError('P is not conformant with the index sequences')
    # compute the time to absorption
    Q = P[plain, :][:, plain]
    c = np.ones_like(plain)
    I = np.eye(len(plain))
    t = linalg.solve(I - Q, c)
    # compute the variance
    vplain = 2*linalg.solve(I - Q, t) - t*(t+1)
    v = np.hstack((vplain, np.zeros_like(absorbing)))
    return v[inverse_permutation(states)]
def main(args):
    alpha = args.alpha
    N = args.N
    k = 3
    print 'alpha:', alpha
    print 'N:', N
    print 'k:', k
    print
    M = np.array(list(multinomstate.gen_states(N, k)), dtype=int)
    T = multinomstate.get_inverse_map(M)
    R_mut = wrightcore.create_mutation_abc(M, T)
    R_drift = wrightcore.create_moran_drift_rate_k3(M, T)
    Q = alpha * R_mut + R_drift
    # pick out the correct eigenvector
    W, V = scipy.linalg.eig(Q.T)
    w, v = min(zip(np.abs(W), V.T))
    print 'rate matrix:'
    print Q
    print
    print 'transpose of rate matrix:'
    print Q.T
    print
    print 'eigendecomposition of transpose of rate matrix as integers:'
    print scipy.linalg.eig(Q.T)
    print
    print 'transpose of rate matrix in mathematica notation:'
    print MatrixUtil.m_to_mathematica_string(Q.T.astype(int))
    print
    print 'abs eigenvector corresponding to smallest abs eigenvalue:'
    print np.abs(v)
    print
예제 #4
0
파일: pgmfancy.py 프로젝트: BIGtigr/xgcode
 def test_invariant_selection_transition(self):
     selection = 1.1
     nchromosomes = 3
     npositions = 2
     P = get_selection_transition_matrix(selection, nchromosomes,
                                         npositions)
     MatrixUtil.assert_transition_matrix(P)
예제 #5
0
파일: mrate.py 프로젝트: BIGtigr/xgcode
def R_to_distn_nonspectral(R):
    """
    The rate matrix must be irreducible and reversible.
    It is not necessarily symmetric.
    If the rate matrix is symmetric then this function is overkill
    because the stationary distribution would be uniform.
    """
    nstates = len(R)
    V = set(range(nstates))
    E = set()
    for i in range(nstates):
        for j in range(i):
            if R[i, j]:
                if not R[j, i]:
                    raise MatrixUtil.MatrixError(
                        'the matrix is not reversible')
                edge = frozenset((i, j))
                E.add(edge)
    nd = graph.g_to_nd(V, E)
    # construct an arbitrary rooted spanning tree of the states
    V_component, D_component = graph.nd_to_dag_component(nd, 0)
    if V_component != V:
        raise MatrixUtil.MatrixError('the matrix is not irreducible')
    # compute the stationary probabilities relative to the first state
    weights = [None] * nstates
    v_to_children = graph.dag_to_cd(V_component, D_component)
    preorder_states = graph.topo_sort(V_component, D_component)
    weights[preorder_states[0]] = 1.0
    for parent in preorder_states:
        for child in v_to_children[parent]:
            ratio = R[parent, child] / R[child, parent]
            weights[child] = weights[parent] * ratio
    total = sum(weights)
    return np.array(weights) / total
예제 #6
0
def get_type_2_info(P):
    """
    The expected time for a type 2 event is computed as follows.
    It is the expected number of steps from AB to ab
    conditional on not entering the states AB, Ab, or aB.
    It should also include a bit of exponential delay that it takes
    to leave the final fixed AB state before embark.
    @param P: a huge transition matrix which is not modified
    @return: expectation and variance of compensatory substitution time
    """
    MatrixUtil.assert_transition_matrix(P)
    nstates = len(P)
    # define index sequences
    plain = range(4, nstates)
    forbidden = [0, 1, 2]
    target = [3]
    #
    H = hittingtime.get_conditional_transition_matrix(P, plain, forbidden,
                                                      target)
    t = hittingtime.get_absorption_time(H, plain + forbidden, target)
    v = hittingtime.get_absorption_variance(H, plain + forbidden, target)
    #
    t0 = t[0]
    v0 = v[0]
    # add a geometric rv that depends on probability of leaving fixed AB
    p = 1 - P[0, 0]
    t0 += (1 - p) / p
    v0 += (1 - p) / (p * p)
    #
    return t0, v0
예제 #7
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    n = len(D)
    if n < 3:
        raise HandlingError('the matrix should have at least three rows')
    # define the other matrices
    D_inv = np.linalg.inv(D)
    row_sums = np.sum(D_inv, 0)
    grand_sum = np.sum(D_inv)
    A = np.zeros((n,n))
    B = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            A[i][j] = row_sums[i] + row_sums[j] - grand_sum
            B[i][j] = row_sums[i] * row_sums[j] / grand_sum
    C = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            C[i][j] = D_inv[i][j] - B[i][j]
    # define the response
    out = StringIO()
    print >> out, 'additive:'
    print >> out, MatrixUtil.m_to_string(A)
    print >> out, 'multiplicative:'
    print >> out, MatrixUtil.m_to_string(B)
    for row in C:
        print >> out, sum(row)
    # return the response
    return out.getvalue()
예제 #8
0
파일: 20090401c.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(
        sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row]
                              for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
예제 #9
0
def get_two_allele_distribution(N_big, N_small, f0, f1, f_subsample):
    """
    Assumes small genic selection.
    Assumes small mutation.
    The mutational bias does not affect the distribution.
    @param N_big: total number of alleles in the population
    @param N_small: number of alleles sampled from the population
    @param f0: fitness of allele 0
    @param f1: fitness of allele 1
    @param f_subsample: subsampling function
    @return: distribution over all non-fixed population states
    """
    # construct a transition matrix
    nstates = N_big + 1
    P = np.zeros((nstates, nstates))
    for i in range(nstates):
        p0, p1 = wrightfisher.genic_diallelic(f0, f1, i, N_big - i)
        if i == 0:
            P[i, 1] = 1.0
        elif i == N_big:
            P[i, N_big - 1] = 1.0
        else:
            for j in range(nstates):
                logp = StatsUtil.binomial_log_pmf(j, N_big, p0)
                P[i, j] = math.exp(logp)
    # find the stationary distribution
    v = MatrixUtil.get_stationary_distribution(P)
    MatrixUtil.assert_distribution(v)
    if not np.allclose(v, np.dot(v, P)):
        raise ValueError('expected a left eigenvector with eigenvalue 1')
    # return the stationary distribution conditional on dimorphism
    print v
    distn = f_subsample(v, N_small)
    return distn[1:-1] / np.sum(distn[1:-1])
예제 #10
0
def get_type_2_info(P):
    """
    The expected time for a type 2 event is computed as follows.
    It is the expected number of steps from AB to ab
    conditional on not entering the states AB, Ab, or aB.
    It should also include a bit of exponential delay that it takes
    to leave the final fixed AB state before embark.
    @param P: a huge transition matrix which is not modified
    @return: expectation and variance of compensatory substitution time
    """
    MatrixUtil.assert_transition_matrix(P)
    nstates = len(P)
    # define index sequences
    plain = range(4, nstates)
    forbidden = [0, 1, 2]
    target = [3]
    #
    H = hittingtime.get_conditional_transition_matrix(
            P, plain, forbidden, target)
    t = hittingtime.get_absorption_time(
            H, plain+forbidden, target)
    v = hittingtime.get_absorption_variance(
            H, plain+forbidden, target)
    #
    t0 = t[0]
    v0 = v[0]
    # add a geometric rv that depends on probability of leaving fixed AB
    p = 1 - P[0, 0]
    t0 += (1 - p) / p
    v0 += (1 - p) / (p*p)
    #
    return t0, v0
예제 #11
0
파일: mrate.py 프로젝트: argriffing/xgcode
def get_endpoint_conditioned_expected_occupancy(R, v, a, b, T):
    """
    Holmes and Rubin 2002.
    @param R: rate matrix
    @param v: stationary distribution
    @param a: integer state index of initial state
    @param b: integer state index of final state
    @param T: elapsed time
    @return: endpoint conditioned expected amount of time spent in each state
    """
    n = len(v)
    psi = np.sqrt(v)
    S = (R.T * psi).T / psi
    MatrixUtil.assert_symmetric(S)
    w, U = scipy.linalg.eigh(S)
    if not np.allclose(np.dot(U, U.T), np.eye(n)):
        raise Exception('U should be orthogonal')
    P = scipy.linalg.expm(T*R)
    # the Mab is Holmes and Rubin 2002 notation
    Mab = (psi[b] / psi[a]) * np.sum(U[a] * U[b] * np.exp(T*w))
    if not np.allclose(P[a,b], Mab):
        raise Exception('not close: %s %s' % (P[a,b], Mab))
    coeff = (psi[b] / psi[a]) / Mab
    K = _holmes_rubin_2002_kernel(w, T)
    occupancy = coeff * np.array([
        _holmes_rubin_2002_summation(U, a, b, i, K) for i in range(n)])
    if not np.allclose(T, np.sum(occupancy)):
        raise Exception(
                'the expectected occupancy times should add up '
                'to the total time')
    return occupancy
예제 #12
0
파일: 20080618b.py 프로젝트: BIGtigr/xgcode
def hard_coded_analysis_a():
    tree_string = '(a:1, (b:2, d:5):1, c:4);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    states = []
    id_list = []
    for state, id_ in sorted((node.name, id(node))
            for node in tree.gen_tips()):
        id_list.append(id_)
        states.append(state)
    for node in tree.gen_internal_nodes():
        id_list.append(id(node))
        states.append('')
    n = len(states)
    for method in ('tips', 'full'):
        # get the distance matrix from the tree
        if method == 'tips':
            print 'leaves only:'
            distance_matrix = tree.get_distance_matrix(states)
        else:
            print 'leaves and internal nodes:'
            distance_matrix = tree.get_full_distance_matrix(id_list)
        print 'distance matrix from the tree:'
        print MatrixUtil.m_to_string(distance_matrix)
        # get the equivalent euclidean points
        z_points = list(gen_euclidean_points(distance_matrix))
        for state, point in zip(states, z_points):
            print state, point
        # get the distance matrix from the transformed points
        print 'distance matrix from the transformed points:'
        distance_matrix = get_euclidean_distance_matrix(z_points)
        print MatrixUtil.m_to_string(distance_matrix)
        print
예제 #13
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2, 2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
            'response matrix with rows ordered alphabetically by leaf label:',
            MatrixUtil.m_to_string(R)
        ]
        paragraphs.append(paragraph)
    if fs.show_reduced_response:
        paragraph = [
            '2x2 submatrix of the response matrix:',
            MatrixUtil.m_to_string(R_reduced)
        ]
        paragraphs.append(paragraph)
    if True:
        paragraph = [
            'determinant of the 2x2 submatrix of the response matrix:',
            str(criterion)
        ]
        paragraphs.append(paragraph)
    if fs.show_blen:
        paragraph = ['branch length defined by the split:', str(blen)]
        paragraphs.append(paragraph)
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
예제 #14
0
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
            fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
            fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = ['generation', 'number.of.mutants']
    # compute the path samples
    P = np.dot(P_drift_selection, P_mutation)
    mypath = PathSampler.sample_endpoint_conditioned_path(
            fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P)
    arr = [[i, nmutants] for i, nmutants in enumerate(mypath)]
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
예제 #15
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
예제 #16
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def laplacian_to_adjacency(L):
    """
    @param L: a laplacian matrix
    @return: an adjacency matrix
    """
    MatrixUtil.assert_square(L)
    return np.diag(np.diag(L)) - L
예제 #17
0
파일: mrate.py 프로젝트: BIGtigr/xgcode
def get_endpoint_conditioned_expected_occupancy(R, v, a, b, T):
    """
    Holmes and Rubin 2002.
    @param R: rate matrix
    @param v: stationary distribution
    @param a: integer state index of initial state
    @param b: integer state index of final state
    @param T: elapsed time
    @return: endpoint conditioned expected amount of time spent in each state
    """
    n = len(v)
    psi = np.sqrt(v)
    S = (R.T * psi).T / psi
    MatrixUtil.assert_symmetric(S)
    w, U = scipy.linalg.eigh(S)
    if not np.allclose(np.dot(U, U.T), np.eye(n)):
        raise Exception('U should be orthogonal')
    P = scipy.linalg.expm(T * R)
    # the Mab is Holmes and Rubin 2002 notation
    Mab = (psi[b] / psi[a]) * np.sum(U[a] * U[b] * np.exp(T * w))
    if not np.allclose(P[a, b], Mab):
        raise Exception('not close: %s %s' % (P[a, b], Mab))
    coeff = (psi[b] / psi[a]) / Mab
    K = _holmes_rubin_2002_kernel(w, T)
    occupancy = coeff * np.array(
        [_holmes_rubin_2002_summation(U, a, b, i, K) for i in range(n)])
    if not np.allclose(T, np.sum(occupancy)):
        raise Exception('the expectected occupancy times should add up '
                        'to the total time')
    return occupancy
예제 #18
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def laplacian_to_edm(L):
    """
    @param L: a laplacian matrix
    @return: a Euclidean distance matrix
    """
    MatrixUtil.assert_square(L)
    return dccov_to_edm(laplacian_to_dccov(L))
예제 #19
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def adjacency_to_laplacian(A):
    """
    @param A: an adjacency matrix
    @return: a laplacian matrix
    """
    MatrixUtil.assert_square(A)
    return np.diag(np.sum(A, 0)) - A
예제 #20
0
 def test_row_sums(self):
     N = 20
     k = 4
     mutation, fitness = get_test_mutation_fitness()
     P = get_transition_matrix(N, k, mutation, fitness)
     MatrixUtil.assert_transition_matrix(mutation)
     MatrixUtil.assert_transition_matrix(P)
예제 #21
0
def bott_duffin(M, v):
    """
    Compute a constrained generalized inverse.
    Specifically, this is the Bott-Duffin inverse of M
    constrained to the orthogonal complement of v.
    This function assumes that v has rank 1,
    although Bott-Duffin inverses are also defined
    for inverses constrained to orthogonal complements
    of higher dimensional subspaces.
    Maybe this could be a separate python function
    where v is replaced by a shape-2 numpy array.
    @param M: a matrix
    @param v: a vector
    @return: the constrained generalized inverse of M
    """
    # check the shapes of the input matrix and vector
    MatrixUtil.assert_1d(v)
    n = len(v)
    if M.shape != (n, n):
        raise ValueError('M and v have incompatible shapes')
    # check that v is nonzero
    v_dot_v = np.inner(v, v)
    if not v_dot_v:
        raise ValueError('expected nonzero v')
    # compute the orthogonal projection onto v
    P = np.outer(v, v) / v_dot_v
    # compute the orthogonal projection onto the orthogonal complement of v
    I = np.eye(n)
    C = I - P
    # compute the constrained generalized inverse
    B = np.dot(C, np.linalg.inv(np.dot(M, C) + P))
    return B
예제 #22
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def dccov_to_edm(HSH):
    """
    @param HSH: a double centered covariance matrix
    @return: a Euclidean distance matrix
    """
    MatrixUtil.assert_square(HSH)
    return cov_to_edm(HSH)
예제 #23
0
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
예제 #24
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
def main(args):
    alpha = args.alpha
    N = args.N
    k = 3
    print 'alpha:', alpha
    print 'N:', N
    print 'k:', k
    print
    M = np.array(list(multinomstate.gen_states(N, k)), dtype=int)
    T = multinomstate.get_inverse_map(M)
    R_mut = wrightcore.create_mutation_abc(M, T)
    R_drift = wrightcore.create_moran_drift_rate_k3(M, T)
    Q = alpha * R_mut + R_drift
    # pick out the correct eigenvector
    W, V = scipy.linalg.eig(Q.T)
    w, v = min(zip(np.abs(W), V.T))
    print 'rate matrix:'
    print Q
    print
    print 'transpose of rate matrix:'
    print Q.T
    print
    print 'eigendecomposition of transpose of rate matrix as integers:'
    print scipy.linalg.eig(Q.T)
    print
    print 'transpose of rate matrix in mathematica notation:'
    print MatrixUtil.m_to_mathematica_string(Q.T.astype(int))
    print
    print 'abs eigenvector corresponding to smallest abs eigenvalue:'
    print np.abs(v)
    print
예제 #26
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    n = len(D)
    if n < 3:
        raise HandlingError('the matrix should have at least three rows')
    # define the other matrices
    D_inv = np.linalg.inv(D)
    row_sums = np.sum(D_inv, 0)
    grand_sum = np.sum(D_inv)
    A = np.zeros((n, n))
    B = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            A[i][j] = row_sums[i] + row_sums[j] - grand_sum
            B[i][j] = row_sums[i] * row_sums[j] / grand_sum
    C = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            C[i][j] = D_inv[i][j] - B[i][j]
    # define the response
    out = StringIO()
    print >> out, 'additive:'
    print >> out, MatrixUtil.m_to_string(A)
    print >> out, 'multiplicative:'
    print >> out, MatrixUtil.m_to_string(B)
    for row in C:
        print >> out, sum(row)
    # return the response
    return out.getvalue()
예제 #27
0
 def test_mutation(self):
     npop = 10
     mutation_ab = 0.1
     mutation_ba = 0.2
     P = create_mutation_transition_matrix(
             npop, mutation_ab, mutation_ba)
     MatrixUtil.assert_transition_matrix(P)
예제 #28
0
파일: 20120711a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
        fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
        fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = ['generation', 'number.of.mutants']
    # compute the path samples
    P = np.dot(P_drift_selection, P_mutation)
    mypath = PathSampler.sample_endpoint_conditioned_path(
        fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P)
    arr = [[i, nmutants] for i, nmutants in enumerate(mypath)]
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
예제 #29
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def edm_to_dccov(D):
    """
    @param D: a Euclidean distance matrix
    @return: a double centered covariance matrix
    """
    MatrixUtil.assert_square(D)
    return -(0.5)*MatrixUtil.double_centered(D)
예제 #30
0
 def test_row_sums(self):
     N = 20
     k = 4
     mutation, fitness = get_test_mutation_fitness()
     P = get_transition_matrix(N, k, mutation, fitness)
     MatrixUtil.assert_transition_matrix(mutation)
     MatrixUtil.assert_transition_matrix(P)
예제 #31
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2,2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
                'response matrix with rows ordered alphabetically by leaf label:',
                MatrixUtil.m_to_string(R)]
        paragraphs.append(paragraph)
    if fs.show_reduced_response:
        paragraph = [
                '2x2 submatrix of the response matrix:',
                MatrixUtil.m_to_string(R_reduced)]
        paragraphs.append(paragraph)
    if True:
        paragraph = [
                'determinant of the 2x2 submatrix of the response matrix:',
                str(criterion)]
        paragraphs.append(paragraph)
    if fs.show_blen:
        paragraph = [
                'branch length defined by the split:',
                str(blen)]
        paragraphs.append(paragraph)
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
예제 #32
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def q_to_cov(Q):
    """
    @param Q: a neighbor joining Q matrix
    @return: something like a covariance matrix
    """
    MatrixUtil.assert_square(Q)
    n = len(Q)
    S = -Q/(2*(n-2))
    return S
예제 #33
0
파일: 20090511a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the arbitrarily ordered names
    ordered_names = set(node.get_name() for node in tree.preorder())
    # get the corresponding ordered ids
    name_to_id = dict((node.get_name(), id(node)) for node in tree.preorder())
    ordered_ids = [name_to_id[name] for name in ordered_names]
    # get the full distance matrix
    D_direct = np.array(tree.get_full_distance_matrix(ordered_ids))
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    # get the full degree matrix
    degree_matrix = np.diag(np.sum(A, 0))
    # get the sum of the branch lengths
    n = len(ordered_names)
    gamma_inv = 0
    for i in range(n):
        for j in range(n):
            if i < j:
                if A[i][j]:
                    gamma_inv += 1.0 / A[i][j]
    gamma = 1.0 / gamma_inv
    # get the delta vector
    delta_list = []
    for row in A:
        nonzero_edge_count = sum(1 for x in row if x)
        delta_list.append(2 - nonzero_edge_count)
    d = np.array(delta_list)
    # get the full distance matrix using the clever formula
    J = np.ones((n, n))
    D_clever = 2*np.linalg.inv(A + gamma * np.outer(d, d) - degree_matrix)
    # check whether the distance matrices are close
    closeness_string = 'the distance matrices are close'
    if not np.allclose(D_direct, D_clever):
        closeness_string = 'the distance matrices are not close'
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_direct_d:
        paragraph = [
                'directly calculated distance matrix:',
                MatrixUtil.m_to_string(D_direct)]
        paragraphs.append(paragraph)
    if fs.show_clever_d:
        paragraph = [
                'cleverly calculated distance matrix:',
                MatrixUtil.m_to_string(D_clever)]
        paragraphs.append(paragraph)
    if fs.show_closeness:
        paragraph = [
                'closeness:',
                closeness_string]
        paragraphs.append(paragraph)
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
예제 #34
0
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
            fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
            fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = [
            'generation',
            'number.of.mutants',
            'probability',
            'log.prob',
            ]
    # compute the transition matrix
    P = np.dot(P_drift_selection, P_mutation)
    # Compute the endpoint conditional probabilities for various states
    # along the unobserved path.
    nstates = fs.npop + 1
    M = np.zeros((nstates, fs.ngenerations))
    M[fs.nmutants_initial, 0] = 1.0
    M[fs.nmutants_final, fs.ngenerations-1] = 1.0
    for i in range(fs.ngenerations-2):
        A_exponent = i + 1
        B_exponent = fs.ngenerations - 1 - A_exponent
        A = np.linalg.matrix_power(P, A_exponent)
        B = np.linalg.matrix_power(P, B_exponent)
        weights = np.zeros(nstates)
        for k in range(nstates):
            weights[k] = A[fs.nmutants_initial, k] * B[k, fs.nmutants_final]
        weights /= np.sum(weights)
        for k, p in enumerate(weights):
            M[k, i+1] = p
    arr = []
    for g in range(fs.ngenerations):
        for k in range(nstates):
            p = M[k, g]
            if p:
                logp = math.log(p)
            else:
                logp = float('-inf')
            row = [g, k, p, logp]
            arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
예제 #35
0
def bott_duffin_const(M):
    """
    Compute a constrained generalized inverse.
    Specifically, this is the Bott-Duffin inverse of M
    constrained to the orthogonal complement of the constant vector.
    """
    MatrixUtil.assert_square(M)
    n = len(M)
    e = np.ones(n)
    return bott_duffin(M, e)
예제 #36
0
 def test_invariant_mutation_transition_s(self):
     mutation = 0.01
     nchromosomes = 3
     npositions = 2
     ci_to_short, short_to_count, sorted_chrom_lists = get_state_space_info(
             nchromosomes, npositions)
     P = get_mutation_transition_matrix_s(
             ci_to_short, short_to_count, sorted_chrom_lists,
             mutation, nchromosomes, npositions)
     MatrixUtil.assert_transition_matrix(P)
예제 #37
0
파일: pgmfancy.py 프로젝트: BIGtigr/xgcode
 def test_invariant_mutation_transition_s(self):
     mutation = 0.01
     nchromosomes = 3
     npositions = 2
     ci_to_short, short_to_count, sorted_chrom_lists = get_state_space_info(
         nchromosomes, npositions)
     P = get_mutation_transition_matrix_s(ci_to_short, short_to_count,
                                          sorted_chrom_lists, mutation,
                                          nchromosomes, npositions)
     MatrixUtil.assert_transition_matrix(P)
예제 #38
0
파일: 20090413a.py 프로젝트: BIGtigr/xgcode
def get_response_content(fs):
    # read the matrix
    L = fs.laplacian
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    if not ordered_labels:
        raise HandlingError('no ordered taxa were provided')
    if len(ordered_labels) != len(set(ordered_labels)):
        raise HandlingError('the ordered taxa should be unique')
    # get the label selection and its complement
    min_selected_labels = 2
    min_unselected_labels = 1
    selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection)))
    if len(selected_labels) < min_selected_labels:
        raise HandlingError(
            'at least %d taxa should be selected to be grouped' %
            min_selected_labels)
    # get the set of labels in the complement
    unselected_labels = set(ordered_labels) - selected_labels
    if len(unselected_labels) < min_unselected_labels:
        raise HandlingError(
            'at least %d taxa should remain outside the selected group' %
            min_unselected_labels)
    # assert that no bizarre labels were selected
    weird_labels = selected_labels - set(ordered_labels)
    if weird_labels:
        raise HandlingError('some selected taxa are invalid: ' +
                            str(weird_labels))
    # assert that the size of the distance matrix is compatible with the number of ordered labels
    if len(L) != len(ordered_labels):
        raise HandlingError(
            'the number of listed taxa does not match the number of rows in the distance matrix'
        )
    # get the set of selected indices and its complement
    n = len(L)
    index_selection = set(i for i, label in enumerate(ordered_labels)
                          if label in selected_labels)
    index_complement = set(range(n)) - index_selection
    # begin the response
    out = StringIO()
    # calculate the new laplacian matrix
    L_small = SchurAlgebra.mschur(L, index_selection)
    D_small = Euclid.laplacian_to_edm(L_small)
    # print the matrices and the labels of its rows
    print >> out, 'new laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(L_small)
    print >> out
    print >> out, 'new distance matrix:'
    print >> out, MatrixUtil.m_to_string(D_small)
    print >> out
    print >> out, 'new taxon labels:'
    for index in sorted(index_complement):
        print >> out, ordered_labels[index]
    # write the response
    return out.getvalue()
예제 #39
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def laplacian_to_dccov(L):
    """
    @param L: a laplacian matrix
    @return: a double centered covariance matrix
    """
    MatrixUtil.assert_square(L)
    M = np.ones_like(L) / float(len(L))
    # This should be the same but perhaps not as numerically stable:
    # HSH = np.linalg.pinv(L)
    HSH = np.linalg.pinv(L - M) + M
    return HSH
예제 #40
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def cov_to_edm(S):
    """
    @param S: a covariance matrix
    @return: a Euclidean distance matrix
    """
    MatrixUtil.assert_square(S)
    n = len(S)
    d = np.diag(S)
    e = np.ones_like(d)
    D = np.outer(d, e) + np.outer(e, d) - 2*S
    return D
예제 #41
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def edm_to_q(D):
    """
    @param D: a treelike distance matrix
    @return: the neighbor joining Q matrix
    """
    MatrixUtil.assert_square(D)
    n = len(D)
    r = np.sum(D, 0)
    e = np.ones_like(r)
    Q = (n-2)*D - np.outer(e, r) - np.outer(r, e)
    return Q
예제 #42
0
파일: kmeans.py 프로젝트: BIGtigr/xgcode
def get_wcss(sqdists, labels):
    """
    Get the within-cluster sum of squares.
    @param sqdists: for each point, the squared distance to each center
    @param labels: cluster labels
    @return: within-cluster sum of squares
    """
    MatrixUtil.assert_2d(sqdists)
    MatrixUtil.assert_1d(labels)
    if len(sqdists) != len(labels):
        raise ValueError('array incompatibility')
    return sum(row[label] for row, label in zip(sqdists, labels))
예제 #43
0
파일: Euclid.py 프로젝트: BIGtigr/xgcode
def dccov_to_laplacian(HSH):
    """
    This function stably pseudoinverts a double centered matrix.
    @param HSH: a double centered covariance matrix
    @return: a laplacian matrix
    """
    MatrixUtil.assert_square(HSH)
    M = np.ones_like(HSH) / float(len(HSH))
    # This should be the same but perhaps not as numerically stable:
    # L = np.linalg.pinv(HSH)
    L = np.linalg.pinv(HSH - M) + M
    return L