def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\
    randomize=None,max_dimensions=None,\
    get_eigenvalues=get_mean_eigenvalues,\
    get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1,sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2,sample_id_map)
    # rearrange the order of coords in coords2 to correspond to 
    # the order of coords in coords1 
    order = list(set(sample_ids1) & set(sample_ids2)) 
    coords1 = reorder_coords(coords1,sample_ids1,order)
    coords2 = reorder_coords(coords2,sample_ids2,order)
    
    # If this is a random trial, apply the shuffling function passed as 
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        
    coords1, coords2 = pad_coords_matrices(coords1,coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1,max_dimensions)
        coords2 = filter_coords_matrix(coords2,max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1)>len(pct_var2):
            pct_var2 = append(pct_var2,zeros(len(pct_var1)-len(pct_var2)))
            eigvals2 = append(eigvals2,zeros(len(eigvals1)-len(eigvals2)))
        elif len(pct_var1)<len(pct_var2):
            pct_var1 = append(pct_var1,zeros(len(pct_var2)-len(pct_var1)))
            eigvals1 = append(eigvals1,zeros(len(eigvals2)-len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
     procrustes(coords1,coords2)
    # print coords2
    #print transformed_coords_m2
    
    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1,pct_var2)
    
    transformed_coords1 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m1,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m2,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    
    # Return the results
    return transformed_coords1, transformed_coords2, m_squared
Ejemplo n.º 2
0
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\
    randomize=None,max_dimensions=None,\
    get_eigenvalues=get_mean_eigenvalues,\
    get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1,sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2,sample_id_map)
    # rearrange the order of coords in coords2 to correspond to 
    # the order of coords in coords1 
    order = list(set(sample_ids1) & set(sample_ids2)) 
    coords1 = reorder_coords(coords1,sample_ids1,order)
    coords2 = reorder_coords(coords2,sample_ids2,order)
    
    # If this is a random trial, apply the shuffling function passed as 
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        
    coords1, coords2 = pad_coords_matrices(coords1,coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1,max_dimensions)
        coords2 = filter_coords_matrix(coords2,max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    
    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
     procrustes(coords1,coords2)
    
    #print transformed_coords_m2
    
    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1,pct_var2)
    
    transformed_coords1 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m1,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m2,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    
    # Return the results
    return transformed_coords1, transformed_coords2, m_squared
Ejemplo n.º 3
0
 def test_format_coords(self):
     """format_coords should return tab-delimited table of coords"""
     a = array([[1,2,3],[4,5,6],[7,8,9]])
     header = list('abc')
     eigvals = [2,4,6]
     pct_var = [3,2,1]
     res = format_coords(header, a, eigvals, pct_var)
     self.assertEqual(res, "pc vector number\t1\t2\t3\na\t1\t2\t3\nb\t4\t5\t6\nc\t7\t8\t9\n\n\neigvals\t2\t4\t6\n% variation explained\t3\t2\t1")
Ejemplo n.º 4
0
 def test_format_coords(self):
     """format_coords should return tab-delimited table of coords"""
     a = array([[1,2,3],[4,5,6],[7,8,9]])
     header = list('abc')
     eigvals = [2,4,6]
     pct_var = [3,2,1]
     res = format_coords(header, a, eigvals, pct_var)
     self.assertEqual(res, "pc vector number\t1\t2\t3\na\t1\t2\t3\nb\t4\t5\t6\nc\t7\t8\t9\n\n\neigvals\t2\t4\t6\n% variation explained\t3\t2\t1")
Ejemplo n.º 5
0
def pcoa(file):
    samples, distmtx = parse_distmat(file)
    # coords, each row is an axis
    coords, eigvals = ms.principal_coordinates_analysis(distmtx)

    pcnts = (numpy.abs(eigvals) / sum(numpy.abs(eigvals))) * 100
    idxs_descending = pcnts.argsort()[::-1]
    coords = coords[idxs_descending]
    eigvals = eigvals[idxs_descending]
    pcnts = pcnts[idxs_descending]

    return format_coords(samples, coords.T, eigvals, pcnts)
Ejemplo n.º 6
0
def pcoa(file):
    samples, distmtx = parse_distmat(file)
    # coords, each row is an axis
    coords, eigvals = ms.principal_coordinates_analysis(distmtx)

    pcnts = (numpy.abs(eigvals) / sum(numpy.abs(eigvals))) * 100
    idxs_descending = pcnts.argsort()[::-1]
    coords = coords[idxs_descending]
    eigvals = eigvals[idxs_descending]
    pcnts = pcnts[idxs_descending]

    return format_coords(samples, coords.T, eigvals, pcnts)
Ejemplo n.º 7
0
def generate_3d_plots_invue(prefs, data, dir_path, filename, intp_pts, polyh_pts, offset):
    """ Make files to be imported to inVUE 
        http://sourceforge.net/projects/invue/"""
    
    # Validating existance of all columns
    for col in prefs:
        if col not in data['map'][0]:
            raise ValueError, 'Column given "%s" does not exits in mapping \
                file' % col
    
    # Split matrix by labelname, groups & give colors
    groups_and_colors=iter_color_groups(data['map'],prefs)
    groups_and_colors=list(groups_and_colors)
    
    smp_lbl, smp_lbl_grp, polypts = make_3d_plots_invue(data, groups_and_colors, \
       intp_pts, polyh_pts, offset)
                
    # Looping to binning result to write full and binned files
    for lbl in smp_lbl:
        for grp in smp_lbl_grp[lbl]:
            # writting individual files
            ind_path = "%s/%s_%s_%s.txt" % (dir_path, filename, lbl, grp)
            smp = smp_lbl_grp[lbl][grp]
            outfile = open(ind_path, 'w')
            outfile.write(format_coords(smp['headrs'], smp['coords'], [], [], False))
            outfile.close()
        # writing full file
        full_path = "%s/%s_%s.txt" % (dir_path, filename, lbl)
        outfile = open(full_path, 'w')
        outfile.write (format_coords(smp_lbl[lbl]['headrs'], smp_lbl[lbl]['coords'], \
            [], [], False))
        outfile.close()
    
    # Writing tetraVertices.txt
    ind_path = "%s/tetraVertices.txt" % (dir_path)
    outfile = open(ind_path, 'w')
    outfile.write('\n'.join(['\t'.join(map(str, row)) for row in polypts]))
    outfile.write('\n')
    outfile.close()
Ejemplo n.º 8
0
def compute_manifold(in_file,alg,params=None):

    """compute the specified manifold on the specified file"""

    otu_table = parse_biom_table(in_file)

    samples = otu_table.SampleIds

    #Dense tables already have all values available
    #For sparse tables we have to more or less generate missing points
    if isinstance(otu_table, DenseTable):
        otumtx = otu_table._data.T
    else:
        otumtx = asarray([v for v in otu_table.iterSampleData()])

    #Setup the mapping algorithms from sklearns using specified parameters
    #if a parameter in the dict is invalid for the chosen algorithm it is simply ignored
    if alg=="isomap":
        defaults = {"n_neighbors":5,"n_components":3,"eigen_solver":"auto",
            "tol":0,"max_iter":None,"path_method":"auto","neighbors_algorithm":"auto"}
        params = fill_args(defaults,params)
        mapper = manifold.Isomap(
            n_neighbors=params["n_neighbors"],
            n_components=params["n_components"],
            eigen_solver=params["eigen_solver"],
            tol=params["tol"],
            max_iter=params["max_iter"],
            path_method=params["path_method"],
            neighbors_algorithm=params["neighbors_algorithm"])
    elif alg=="lle":
        defaults = {"n_neighbors":5,"n_components":3,"reg":0.001,"eigen_solver":"auto",
            "tol":1e-06,"max_iter":100,"method":"standard","hessian_tol":0.0001,
            "modified_tol":1e-12,"neighbors_algorithm":"auto","random_state":None}
        params = fill_args(defaults,params)
        mapper = manifold.LocallyLinearEmbedding(
            n_neighbors=params["n_neighbors"],
            n_components=params["n_components"],
            reg=params["reg"],
            eigen_solver=params["eigen_solver"],
            tol=params["tol"],
            max_iter=params["max_iter"],
            method=params["method"],
            hessian_tol=params["hessian_tol"],
            modified_tol=params["modified_tol"],
            neighbors_algorithm=params["neighbors_algorithm"],
            random_state=params["random_state"])
    elif alg=="spectral":
        defaults = {"n_components":3,"affinity":"nearest_neighbors","gamma":None,
            "random_state":None,"eigen_solver":None,"n_neighbors":None}
        params = fill_args(defaults,params)
        mapper = manifold.SpectralEmbedding(
            n_components=params["n_components"],
            affinity=params["affinity"],
            gamma=params["gamma"],
            random_state=params["random_state"],
            eigen_solver=params["eigen_solver"],
            n_neighbors=params["n_neighbors"])
    elif alg=="ltsa":
        defaults = {"n_neighbors":5,"n_components":3,"reg":0.001,"eigen_solver":"auto",
            "tol":1e-06,"max_iter":100,"method":"ltsa","hessian_tol":0.0001,
            "modified_tol":1e-12,"neighbors_algorithm":"auto","random_state":None}
        params = fill_args(defaults,params)
        mapper = manifold.LocallyLinearEmbedding(
            n_neighbors=params["n_neighbors"],
            n_components=params["n_components"],
            reg=params["reg"],
            eigen_solver=params["eigen_solver"],
            tol=params["tol"],
            max_iter=params["max_iter"],
            method=params["method"],
            hessian_tol=params["hessian_tol"],
            modified_tol=params["modified_tol"],
            neighbors_algorithm=params["neighbors_algorithm"],
            random_state=params["random_state"])
    elif alg=="mds":
        defaults = {"n_components":3,"metric":True,"n_init":4,"max_iter":300,
            "verbose":0,"eps":0.001,"n_jobs":1,"random_state":None,
            "dissimilarity":"euclidean"}
        params = fill_args(defaults,params)
        mapper = manifold.Isomap(
            n_components=params["n_components"],
            metric=params["metric"],
            n_init=params["n_init"],
            max_iter=params["max_iter"],
            verbose=params["verbose"],
            eps=params["eps"],
            n_jobs=params["n_jobs"],
            random_state=params["random_state"],
            dissimilarity=params["dissimilarity"])
    else:
        print("arg in error, unknown algorithm '"+alg+"'")
        exit(1)

    #compute the fit and scale from -1 to 1
    fit = mapper.fit_transform(otumtx)
    fit /= abs(fit).max()

    #dummy eigenvalues and percent variation explained
    #"make_emperor.py" does not work if these are not supplied
    eigvals = [3.0,2.0,1.0]
    pcnts = [30.0,20.0,10.0]
    
    return format_coords(samples, fit, eigvals, pcnts)
def get_procrustes_results(
        coords_f1,
        coords_f2,
        sample_id_map=None,
        randomize=None,
        max_dimensions=None,
        get_eigenvalues=get_mean_eigenvalues,
        get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1, sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2, sample_id_map)
    # rearrange the order of coords in coords2 to correspond to
    # the order of coords in coords1
    order = list(set(sample_ids1) & set(sample_ids2))
    coords1 = reorder_coords(coords1, sample_ids1, order)
    coords2 = reorder_coords(coords2, sample_ids2, order)
    if len(order) == 0:
        raise ValueError('No overlapping samples in the two files')

    # If this is a random trial, apply the shuffling function passed as
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        randomized_coords2 = format_coords(coord_header=order,
                                           coords=coords2,
                                           eigvals=eigvals2,
                                           pct_var=pct_var2)
    else:
        randomized_coords2 = None

    coords1, coords2 = pad_coords_matrices(coords1, coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1, max_dimensions)
        coords2 = filter_coords_matrix(coords2, max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1) > len(pct_var2):
            pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2)))
            eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2)))
        elif len(pct_var1) < len(pct_var2):
            pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1)))
            eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
        procrustes(coords1, coords2)
    # print coords2
    # print transformed_coords_m2

    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1, pct_var2)

    transformed_coords1 = format_coords(coord_header=order,
                                        coords=transformed_coords_m1,
                                        eigvals=eigvals,
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,
                                        coords=transformed_coords_m2,
                                        eigvals=eigvals,
                                        pct_var=pct_var)

    # Return the results
    return (transformed_coords1, transformed_coords2, m_squared,
            randomized_coords2)