Python EigUtil.eigh 예제들

예제 #1

0

파일 보기

파일: 20100816b.py 프로젝트: BIGtigr/xgcode

def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the adjacency matrix and the augmented adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    A_aug = get_augmented_adjacency(A, nleaves, fs.ndups, fs.strength)
    # get the laplacian matrices
    L = Euclid.adjacency_to_laplacian(A)
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the augmented Laplacian
    L_aug_pinv = np.linalg.pinv(L_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector of Schur complement:'
    print >> out, fiedler
    print >> out
    print >> out, 'eigenvalues of pinv of Schur complement:'
    print >> out, vals
    print >> out
    print >> out, 'corresponding eigenvectors of pinv of Schur complement:'
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, 'augmented Laplacian matrix:'
    print >> out, L_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented Laplacian:'
    print >> out, fiedler_aug
    print >> out
    print >> out, 'eigenvalues of pinv of augmented Laplacian:'
    print >> out, vals_aug
    print >> out
    print >> out, 'rows are eigenvectors of pinv of augmented Laplacian:'
    print >> out, np.array(vecs_aug)
    return out.getvalue()

예제 #2

0

파일 보기

파일: 20100816b.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the adjacency matrix and the augmented adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    A_aug = get_augmented_adjacency(A, nleaves, fs.ndups, fs.strength)
    # get the laplacian matrices
    L = Euclid.adjacency_to_laplacian(A)
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the augmented Laplacian
    L_aug_pinv = np.linalg.pinv(L_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector of Schur complement:'
    print >> out, fiedler
    print >> out
    print >> out, 'eigenvalues of pinv of Schur complement:'
    print >> out, vals
    print >> out
    print >> out, 'corresponding eigenvectors of pinv of Schur complement:'
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, 'augmented Laplacian matrix:'
    print >> out, L_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented Laplacian:'
    print >> out, fiedler_aug
    print >> out
    print >> out, 'eigenvalues of pinv of augmented Laplacian:'
    print >> out, vals_aug
    print >> out
    print >> out, 'rows are eigenvectors of pinv of augmented Laplacian:'
    print >> out, np.array(vecs_aug)
    return out.getvalue()

예제 #3

0

파일 보기

파일: 20100817a.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the distance matrix and the augmented distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # get the laplacian matrix
    L = Euclid.edm_to_laplacian(D)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the centered augmented distance matrix
    L_aug_pinv = Euclid.edm_to_dccov(D_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "Laplacian matrix:"
    print >> out, L
    print >> out
    print >> out, "Schur complement of Laplacian matrix:"
    print >> out, R
    print >> out
    print >> out, "scaled Fiedler vector of Schur complement:"
    print >> out, fiedler
    print >> out
    print >> out, "eigenvalues of pinv of Schur complement:"
    print >> out, vals
    print >> out
    print >> out, "corresponding eigenvectors of pinv of Schur complement:"
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, "augmented distance matrix:"
    print >> out, D_aug
    print >> out
    print >> out, "scaled Fiedler vector of augmented Laplacian limit:"
    print >> out, fiedler_aug
    print >> out
    print >> out, "eigenvalues of pinv of augmented Laplacian limit:"
    print >> out, vals_aug
    print >> out
    print >> out, "rows are eigenvectors of pinv of augmented Laplacian limit:"
    print >> out, np.array(vecs_aug)
    return out.getvalue()

예제 #4

0

파일 보기

파일: eigenpop.py 프로젝트: BIGtigr/xgcode

def get_eval_evec_pairs(C_full, diploid_and_biallelic):
    """
    Input rows are OTUs and columns are loci.
    Each element of the input data is a count.
    @param C_full: matrix of float counts where each row represents an OTU
    @param diploid_and_biallelic: a flag
    @return: (eigenvalues, eigenvectors)
    """
    # create the floating point count matrix
    m_full, n_full = C_full.shape
    # check compatibility of counts and ploidy
    if diploid_and_biallelic:
        if np.max(C_full) > 2:
            raise ValueError(
                    'no count should be greater than two for diploid data')
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v))>1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u)
    # normalize if diploid and biallelic
    if diploid_and_biallelic:
        p = u/2
        variances = p * (1 - p)
        M /= np.sqrt(variances)
    # construct the sample covariance matrix
    # FIXME this should probably use a singular value decomposition instead
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    return EigUtil.eigh(X)

예제 #5

0

파일 보기

def get_grant_proposal_points_b(lfdi):
    M, p, q = lfdi.M, lfdi.p, lfdi.q
    G = -.5 * M
    GQ, GX, GXT, GP = ProofDecoration.get_corners(G, q, p)
    # Get the eigendecomposition of the leaf-only Gower matrix.
    ws, vs = EigUtil.eigh(GQ)
    S = np.diag(ws)
    U = np.vstack(vs).T
    USUT = np.dot(np.dot(U, S), U.T)
    if not np.allclose(USUT, GQ):
        raise ValueError('eigenfail')
    S_sqrt = np.diag(np.sqrt(ws))
    X = np.dot(U, S_sqrt)
    # Find the imputed internal points.
    S_sqrt_pinv = np.linalg.pinv(S_sqrt)
    #W = np.dot(np.dot(S_sqrt_pinv, GX.T), U)
    try:
        W = np.dot(np.dot(GX.T, U), S_sqrt_pinv)
    except ValueError as e:
        arr = [GX.shape, U.shape, S_sqrt_pinv.shape]
        raise ValueError(', '.join(str(x) for x in arr))
    # put them together and get only the first coordinates
    full_points = np.vstack([X, W])
    X = full_points.T[0]
    Y = full_points.T[1]
    Z = full_points.T[2]
    return X, Y, Z

예제 #6

0

파일 보기

파일: 20110112a.py 프로젝트: argriffing/xgcode

def get_grant_proposal_points_b(lfdi):
    M, p, q = lfdi.M, lfdi.p, lfdi.q
    G = -.5 * M
    GQ, GX, GXT, GP = ProofDecoration.get_corners(G, q, p)
    # Get the eigendecomposition of the leaf-only Gower matrix.
    ws, vs = EigUtil.eigh(GQ)
    S = np.diag(ws)
    U = np.vstack(vs).T
    USUT = np.dot(np.dot(U, S), U.T)
    if not np.allclose(USUT, GQ):
        raise ValueError('eigenfail')
    S_sqrt = np.diag(np.sqrt(ws))
    X = np.dot(U, S_sqrt)
    # Find the imputed internal points.
    S_sqrt_pinv = np.linalg.pinv(S_sqrt)
    #W = np.dot(np.dot(S_sqrt_pinv, GX.T), U)
    try:
        W = np.dot(np.dot(GX.T, U), S_sqrt_pinv)
    except ValueError as e:
        arr = [
                GX.shape,
                U.shape,
                S_sqrt_pinv.shape]
        msg = ', '.join(str(x) for x in arr)
        raise ValueError(msg)
    # put them together and get only the first coordinates
    full_points = np.vstack([X, W])
    points = full_points.T[:2].T
    return points

예제 #7

0

파일 보기

파일: 20100608b.py 프로젝트: argriffing/xgcode

def do_pca(hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: names, scaled vectors
    """
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v))>1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    # scale the eigenvectos by the eigenvalues
    pcs = [w*v for w, v in zip(evals, evecs)]
    return names, pcs

예제 #8

0

파일 보기

def do_pca(hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: names, scaled vectors
    """
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v)) > 1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    # scale the eigenvectos by the eigenvalues
    pcs = [w * v for w, v in zip(evals, evecs)]
    return names, pcs

예제 #9

0

파일 보기

파일: 20110202a.py 프로젝트: BIGtigr/xgcode

def get_response_content(fs):
    # check input compatibility
    if fs.nvertices < fs.naxes+1:
        raise ValueError(
                'attempting to plot too many eigenvectors '
                'for the given number of vertices')
    # construct the path Laplacian matrix
    N = fs.nvertices
    L = create_laplacian_matrix(N)
    # compute the eigendecomposition
    ws, vs = EigUtil.eigh(L)
    # reorder the eigenvalues and eigenvectors
    ws = ws[:-1][::-1]
    vs = vs[:-1][::-1]
    # write the report
    np.set_printoptions(linewidth=200, threshold=10000)
    out = StringIO()
    for i in range(fs.naxes):
        w = ws[i]
        v = vs[i]
        n = i+1
        #scaled_eigenvector = v / math.sqrt(w)
        scaled_eigenvector = v * math.sqrt(N * 0.5)
        print >> out, scaled_eigenvector
        prediction = np.array([
            sinusoidal_approximation_b(N, n, k) for k in range(N)])
        print >> out, prediction
        print >> out, scaled_eigenvector / prediction
        print >> out
    return out.getvalue()

예제 #10

0

파일 보기

def get_response_content(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError('the mds axis must be nonnegative')
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError('choose a smaller mds axis')
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v]
    radii = [2] * nafrica + [5 for p in points]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    try:
        helper = ImgHelper(allpoints, alledges, fs.total_width,
                           fs.total_height, fs.border)
        return helper.get_image_string(colors, radii, ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

예제 #11

0

파일 보기

파일: 20100817b.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError("the mds axis must be nonnegative")
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError("choose a smaller mds axis")
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v]
    radii = [2] * nafrica + [5 for p in points]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = "the image dimensions do not allow for enough drawable area"
        raise HandlingError(msg)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    try:
        helper = ImgHelper(allpoints, alledges, fs.total_width, fs.total_height, fs.border)
        return helper.get_image_string(colors, radii, ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

예제 #12

0

파일 보기

def main(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError('the mds axis must be nonnegative')
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError('choose a smaller mds axis')
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    # draw the picture
    helper = ImgHelper(allpoints, alledges, fs.total_width, fs.total_height,
                       fs.border)
    helper.draw_contour_plot(v, nafrica)

예제 #13

0

파일 보기

파일: 20100817b.py 프로젝트: argriffing/xgcode

def main(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError("the mds axis must be nonnegative")
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError("choose a smaller mds axis")
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    # draw the picture
    helper = ImgHelper(allpoints, alledges, fs.total_width, fs.total_height, fs.border)
    helper.draw_contour_plot(v, nafrica)

예제 #14

0

파일 보기

def process(args, hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v)) > 1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    L1 = evals.sum()
    L2 = np.dot(evals, evals)
    proportion = evals[0] / L1
    # compute the relative size of the first eigenvalue
    L = m * proportion
    # compute the Tracy-Widom statistic
    x = get_tracy_widom_statistic(m, n, L)
    # do linkage correction
    n_prime = ((m + 1) * L1 * L1) / ((m - 1) * L2 - L1 * L1)
    # detect additional structure using alpha level of 0.05
    crit = 0.9794
    if n_prime < n:
        L_prime = (m - 1) * proportion
        x_prime = get_tracy_widom_statistic(m, n_prime, L_prime)
        sigs, insig = get_corrected_structure(crit, evals, m, n_prime)
    else:
        sigs, insig = get_corrected_structure(crit, evals, m, n)
    # print some infos
    print >> out, 'number of isolates:'
    print >> out, m_full
    print >> out
    print >> out, 'total number of SNPs:'
    print >> out, n_full
    print >> out
    print >> out, 'number of informative SNPs:'
    print >> out, n
    print >> out
    print >> out, 'effective number of linkage-corrected SNPs:'
    if n_prime < n:
        print >> out, n_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-naive):'
    print >> out, x
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-corrected):'
    if n_prime < n:
        print >> out, x_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'proportion of variance explained by principal axis:'
    print >> out, proportion
    print >> out
    print >> out, 'number of significant axes of variation:'
    print >> out, len(sigs)
    print >> out
    print >> out, 'significant Tracy-Widom statistics:'
    for sig in sigs:
        print >> out, sig
    print >> out
    print >> out, 'first insignificant Tracy-Widom statistic:'
    print >> out, insig
    print >> out
    print >> out, 'principal axis projection:'
    for loading, name in sorted(zip(evecs[0] * evals[0], names)):
        print >> out, '\t'.join([name, str(loading)])
    print >> out
    # evals should sum to the number of OTUs
    evals_sum = sum(evals)
    if args.sum_to_n:
        print >> out, 'eigenvalues normalized to sum to the number of OTUs:'
        for w in evals:
            print >> out, m_full * w / float(evals_sum)
    elif args.sum_to_1:
        print >> out, 'eigenvalues normalized to sum to 1.0:'
        for w in evals:
            print >> out, w / float(evals_sum)
    return out.getvalue().rstrip()

예제 #15

0

파일 보기

파일: 20110208a.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # define the number of nodes
    N = 1 + fs.lena + fs.lenb + fs.lenc
    # check input compatibility
    if not (fs.eigk+1 <= N):
        raise ValueError(
                'attempting to find a too highly indexed eigenvector '
                'for the number of vertices in the graph')
    if N < 2:
        raise ValueError('the tree has no length')
    # define the total distance of the constructed tree
    d = float(N-1)
    h = 1/d
    # construct the studded tree Laplacian matrix
    if fs.sparse:
        v0 = np.ones(N, dtype=float)
        L_csr = create_laplacian_csr_matrix(fs.lena, fs.lenb, fs.lenc)
        arpack_k = fs.eigk+1
        ncv = 3*arpack_k + 3
        ws, vs = scipy.sparse.linalg.eigsh(
                L_csr, arpack_k, which='SM',
                v0=v0,
                ncv=ncv, return_eigenvectors=True)
        ws = ws[1:]
        vs = vs.T[1:]
    else:
        L = create_laplacian_matrix(fs.lena, fs.lenb, fs.lenc)
        ws, vs = EigUtil.eigh(L)
        ws = ws[:-1][::-1]
        vs = vs[:-1][::-1]
    scaling_factor = math.sqrt(N * 0.5)
    # get the eigenvector of interest
    eigenvalue = ws[fs.eigk-1]
    v = vs[fs.eigk-1]
    # init the branch info
    binfos = [BranchInfo() for i in range(3)]
    for i, binfo in enumerate(binfos):
        binfo.k = i+1
        # split the eigenvector of interest into the branch components
        if binfo.k == 1:
            offset = 1
            binfo.width = fs.lena
            w = np.array([v[0]] + v[offset:offset+binfo.width].tolist())
        elif binfo.k == 2:
            offset = 1 + fs.lena
            binfo.width = fs.lenb
            w = np.array([v[0]] + v[offset:offset+binfo.width].tolist())
        elif binfo.k == 3:
            offset = 1 + fs.lena + fs.lenb
            binfo.width = fs.lenc
            w = np.array([v[0]] + v[offset:offset+binfo.width].tolist())
        else:
            raise ValueError
        # compute some boundary info
        if len(w) >= 1:
            binfo.p0 = w[0]
        if len(w) >= 2:
            binfo.p1 = (w[1] - w[0]) / h
        if len(w) >= 3:
            binfo.p2 = (w[0] - 2*w[1] + w[2]) / (h*h)
        if len(w) >= 1:
            binfo.q0 = w[-1]
        if len(w) >= 2:
            binfo.q1 = (w[-1] - w[-2]) / h
        if len(w) >= 3:
            binfo.q2 = (w[-3] - 2*w[-2] + w[-1]) / (h*h)
    # begin writing the report
    np.set_printoptions(linewidth=200, threshold=10000)
    out = StringIO()
    # summarize global properties
    print >> out, 'total branch length:'
    print >> out, N - 1
    print >> out
    print >> out, 'total number of graph vertices including degree 2 vertices:'
    print >> out, N
    print >> out
    # show the sum of first derivatives near the hub
    if N > 1:
        p1sum = 0
        for binfo in binfos:
            if binfo.p1:
                p1sum += binfo.p1
        p1sum_string = str(p1sum)
    else:
        d1sum_string = 'undefined'
    print >> out, "sum of f'(x) on all branches near the hub:", p1sum_string
    print >> out
    # summarize properties per branch per eigenvector
    for binfo in binfos:
        print >> out, 'summary of eigenvector', fs.eigk, 'on branch', binfo.k
        print >> out, 'unscaled branch length:', binfo.width
        if binfo.width:
            print >> out, 'internal', ''.join(['-']*binfo.width), 'pendant'
            print >> out, "internal f(x):  ", value_to_string(binfo.p0)
            print >> out, "internal f'(x): ", value_to_string(binfo.p1)
            print >> out, "internal f''(x):", value_to_string(binfo.p2)
            print >> out, "pendant  f(x):  ", value_to_string(binfo.q0)
            print >> out, "pendant  f'(x): ", value_to_string(binfo.q1)
            print >> out, "pendant  f''(x):", value_to_string(binfo.q2)
        print >> out
    if fs.showv:
        print >> out, 'the eigenvalue:'
        print >> out, eigenvalue
        print >> out
        print >> out, 'the whole eigenvector:'
        print >> out, v
        print >> out
    if fs.showmatrix:
        if fs.sparse:
            print >> out, 'Laplacian matrix (from sparse internal repr):'
            print >> out, L_csr.toarray()
            print >> out
        else:
            print >> out, 'Laplacian matrix (from dense internal repr):'
            print >> out, L
            print >> out
    return out.getvalue()

예제 #16

0

파일 보기

def get_response_content(fs):
    # define the number of nodes
    N = 1 + fs.lena + fs.lenb + fs.lenc
    # check input compatibility
    if not (fs.eigk + 1 <= N):
        raise ValueError('attempting to find a too highly indexed eigenvector '
                         'for the number of vertices in the graph')
    if N < 2:
        raise ValueError('the tree has no length')
    # define the total distance of the constructed tree
    d = float(N - 1)
    h = 1 / d
    # construct the studded tree Laplacian matrix
    if fs.sparse:
        v0 = np.ones(N, dtype=float)
        L_csr = create_laplacian_csr_matrix(fs.lena, fs.lenb, fs.lenc)
        arpack_k = fs.eigk + 1
        ncv = 3 * arpack_k + 3
        ws, vs = scipy.sparse.linalg.eigsh(L_csr,
                                           arpack_k,
                                           which='SM',
                                           v0=v0,
                                           ncv=ncv,
                                           return_eigenvectors=True)
        ws = ws[1:]
        vs = vs.T[1:]
    else:
        L = create_laplacian_matrix(fs.lena, fs.lenb, fs.lenc)
        ws, vs = EigUtil.eigh(L)
        ws = ws[:-1][::-1]
        vs = vs[:-1][::-1]
    scaling_factor = math.sqrt(N * 0.5)
    # get the eigenvector of interest
    eigenvalue = ws[fs.eigk - 1]
    v = vs[fs.eigk - 1]
    # init the branch info
    binfos = [BranchInfo() for i in range(3)]
    for i, binfo in enumerate(binfos):
        binfo.k = i + 1
        # split the eigenvector of interest into the branch components
        if binfo.k == 1:
            offset = 1
            binfo.width = fs.lena
            w = np.array([v[0]] + v[offset:offset + binfo.width].tolist())
        elif binfo.k == 2:
            offset = 1 + fs.lena
            binfo.width = fs.lenb
            w = np.array([v[0]] + v[offset:offset + binfo.width].tolist())
        elif binfo.k == 3:
            offset = 1 + fs.lena + fs.lenb
            binfo.width = fs.lenc
            w = np.array([v[0]] + v[offset:offset + binfo.width].tolist())
        else:
            raise ValueError
        # compute some boundary info
        if len(w) >= 1:
            binfo.p0 = w[0]
        if len(w) >= 2:
            binfo.p1 = (w[1] - w[0]) / h
        if len(w) >= 3:
            binfo.p2 = (w[0] - 2 * w[1] + w[2]) / (h * h)
        if len(w) >= 1:
            binfo.q0 = w[-1]
        if len(w) >= 2:
            binfo.q1 = (w[-1] - w[-2]) / h
        if len(w) >= 3:
            binfo.q2 = (w[-3] - 2 * w[-2] + w[-1]) / (h * h)
    # begin writing the report
    np.set_printoptions(linewidth=200, threshold=10000)
    out = StringIO()
    # summarize global properties
    print >> out, 'total branch length:'
    print >> out, N - 1
    print >> out
    print >> out, 'total number of graph vertices including degree 2 vertices:'
    print >> out, N
    print >> out
    # show the sum of first derivatives near the hub
    if N > 1:
        p1sum = 0
        for binfo in binfos:
            if binfo.p1:
                p1sum += binfo.p1
        p1sum_string = str(p1sum)
    else:
        d1sum_string = 'undefined'
    print >> out, "sum of f'(x) on all branches near the hub:", p1sum_string
    print >> out
    # summarize properties per branch per eigenvector
    for binfo in binfos:
        print >> out, 'summary of eigenvector', fs.eigk, 'on branch', binfo.k
        print >> out, 'unscaled branch length:', binfo.width
        if binfo.width:
            print >> out, 'internal', ''.join(['-'] * binfo.width), 'pendant'
            print >> out, "internal f(x):  ", value_to_string(binfo.p0)
            print >> out, "internal f'(x): ", value_to_string(binfo.p1)
            print >> out, "internal f''(x):", value_to_string(binfo.p2)
            print >> out, "pendant  f(x):  ", value_to_string(binfo.q0)
            print >> out, "pendant  f'(x): ", value_to_string(binfo.q1)
            print >> out, "pendant  f''(x):", value_to_string(binfo.q2)
        print >> out
    if fs.showv:
        print >> out, 'the eigenvalue:'
        print >> out, eigenvalue
        print >> out
        print >> out, 'the whole eigenvector:'
        print >> out, v
        print >> out
    if fs.showmatrix:
        if fs.sparse:
            print >> out, 'Laplacian matrix (from sparse internal repr):'
            print >> out, L_csr.toarray()
            print >> out
        else:
            print >> out, 'Laplacian matrix (from dense internal repr):'
            print >> out, L
            print >> out
    return out.getvalue()

예제 #17

0

파일 보기

파일: 20100819a.py 프로젝트: argriffing/xgcode

def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[: nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "leaf distance matrix:"
    print >> out, D_leaf
    print >> out
    print >> out, "eigenvalues derived from the leaf distance matrix"
    print >> out, w_leaf
    print >> out
    print >> out, "corresponding eigenvectors (as columns)"
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, "candidates for W:"
    print >> out, W
    print >> out
    print >> out, "left multiplier of W:"
    print >> out, M_alpha
    print >> out
    print >> out, "each column is a (left multiplier, W) product:"
    print >> out, MW_alpha
    print >> out
    print >> out, "left multiplier of Y:"
    print >> out, M_beta
    print >> out
    print >> out, "each column is a (left multiplier, Y) product:"
    print >> out, MY_beta
    print >> out
    print >> out, "the above matrix divided by 2*eigenvalue:"
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()

예제 #18

0

파일 보기

파일: 20100819a.py 프로젝트: BIGtigr/xgcode

def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[:nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'eigenvalues derived from the leaf distance matrix'
    print >> out, w_leaf
    print >> out
    print >> out, 'corresponding eigenvectors (as columns)'
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, 'candidates for W:'
    print >> out, W
    print >> out
    print >> out, 'left multiplier of W:'
    print >> out, M_alpha
    print >> out
    print >> out, 'each column is a (left multiplier, W) product:'
    print >> out, MW_alpha
    print >> out
    print >> out, 'left multiplier of Y:'
    print >> out, M_beta
    print >> out
    print >> out, 'each column is a (left multiplier, Y) product:'
    print >> out, MY_beta
    print >> out
    print >> out, 'the above matrix divided by 2*eigenvalue:'
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()

예제 #19

0

파일 보기

파일: 20100607a.py 프로젝트: argriffing/xgcode

def process(args, hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v))>1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    L1 = evals.sum()
    L2 = np.dot(evals, evals)
    proportion = evals[0] / L1
    # compute the relative size of the first eigenvalue
    L = m*proportion
    # compute the Tracy-Widom statistic
    x = get_tracy_widom_statistic(m, n, L)
    # do linkage correction
    n_prime = ((m+1)*L1*L1) / ((m-1)*L2 - L1*L1)
    # detect additional structure using alpha level of 0.05
    crit = 0.9794
    if n_prime < n:
        L_prime = (m-1)*proportion
        x_prime = get_tracy_widom_statistic(m, n_prime, L_prime)
        sigs, insig = get_corrected_structure(crit, evals, m, n_prime)
    else:
        sigs, insig = get_corrected_structure(crit, evals, m, n)
    # print some infos
    print >> out, 'number of isolates:'
    print >> out, m_full
    print >> out
    print >> out, 'total number of SNPs:'
    print >> out, n_full
    print >> out
    print >> out, 'number of informative SNPs:'
    print >> out, n
    print >> out
    print >> out, 'effective number of linkage-corrected SNPs:'
    if n_prime < n:
        print >> out, n_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-naive):'
    print >> out, x
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-corrected):'
    if n_prime < n:
        print >> out, x_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'proportion of variance explained by principal axis:'
    print >> out, proportion
    print >> out
    print >> out, 'number of significant axes of variation:'
    print >> out, len(sigs)
    print >> out
    print >> out, 'significant Tracy-Widom statistics:'
    for sig in sigs:
        print >> out, sig
    print >> out
    print >> out, 'first insignificant Tracy-Widom statistic:'
    print >> out, insig
    print >> out
    print >> out, 'principal axis projection:'
    for loading, name in sorted(zip(evecs[0] * evals[0], names)):
        print >> out, '\t'.join([name, str(loading)])
    print >> out
    # evals should sum to the number of OTUs
    evals_sum = sum(evals)
    if args.sum_to_n:
        print >> out, 'eigenvalues normalized to sum to the number of OTUs:'
        for w in evals:
            print >> out, m_full * w / float(evals_sum)
    elif args.sum_to_1:
        print >> out, 'eigenvalues normalized to sum to 1.0:'
        for w in evals:
            print >> out, w / float(evals_sum)
    return out.getvalue().rstrip()