def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\ randomize=None,max_dimensions=None,\ get_eigenvalues=get_mean_eigenvalues,\ get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1,sample_id_map) sample_ids2 = map_sample_ids(sample_ids2,sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1,sample_ids1,order) coords2 = reorder_coords(coords2,sample_ids2,order) # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) coords1, coords2 = pad_coords_matrices(coords1,coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1,max_dimensions) coords2 = filter_coords_matrix(coords2,max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1)>len(pct_var2): pct_var2 = append(pct_var2,zeros(len(pct_var1)-len(pct_var2))) eigvals2 = append(eigvals2,zeros(len(eigvals1)-len(eigvals2))) elif len(pct_var1)<len(pct_var2): pct_var1 = append(pct_var1,zeros(len(pct_var2)-len(pct_var1))) eigvals1 = append(eigvals1,zeros(len(eigvals2)-len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1,coords2) # print coords2 #print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1,pct_var2) transformed_coords1 = format_coords(coord_header=order,\ coords=transformed_coords_m1,\ eigvals=eigvals,\ pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order,\ coords=transformed_coords_m2,\ eigvals=eigvals,\ pct_var=pct_var) # Return the results return transformed_coords1, transformed_coords2, m_squared
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\ randomize=None,max_dimensions=None,\ get_eigenvalues=get_mean_eigenvalues,\ get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1,sample_id_map) sample_ids2 = map_sample_ids(sample_ids2,sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1,sample_ids1,order) coords2 = reorder_coords(coords2,sample_ids2,order) # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) coords1, coords2 = pad_coords_matrices(coords1,coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1,max_dimensions) coords2 = filter_coords_matrix(coords2,max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1,coords2) #print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1,pct_var2) transformed_coords1 = format_coords(coord_header=order,\ coords=transformed_coords_m1,\ eigvals=eigvals,\ pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order,\ coords=transformed_coords_m2,\ eigvals=eigvals,\ pct_var=pct_var) # Return the results return transformed_coords1, transformed_coords2, m_squared
def test_format_coords(self): """format_coords should return tab-delimited table of coords""" a = array([[1,2,3],[4,5,6],[7,8,9]]) header = list('abc') eigvals = [2,4,6] pct_var = [3,2,1] res = format_coords(header, a, eigvals, pct_var) self.assertEqual(res, "pc vector number\t1\t2\t3\na\t1\t2\t3\nb\t4\t5\t6\nc\t7\t8\t9\n\n\neigvals\t2\t4\t6\n% variation explained\t3\t2\t1")
def pcoa(file): samples, distmtx = parse_distmat(file) # coords, each row is an axis coords, eigvals = ms.principal_coordinates_analysis(distmtx) pcnts = (numpy.abs(eigvals) / sum(numpy.abs(eigvals))) * 100 idxs_descending = pcnts.argsort()[::-1] coords = coords[idxs_descending] eigvals = eigvals[idxs_descending] pcnts = pcnts[idxs_descending] return format_coords(samples, coords.T, eigvals, pcnts)
def generate_3d_plots_invue(prefs, data, dir_path, filename, intp_pts, polyh_pts, offset): """ Make files to be imported to inVUE http://sourceforge.net/projects/invue/""" # Validating existance of all columns for col in prefs: if col not in data['map'][0]: raise ValueError, 'Column given "%s" does not exits in mapping \ file' % col # Split matrix by labelname, groups & give colors groups_and_colors=iter_color_groups(data['map'],prefs) groups_and_colors=list(groups_and_colors) smp_lbl, smp_lbl_grp, polypts = make_3d_plots_invue(data, groups_and_colors, \ intp_pts, polyh_pts, offset) # Looping to binning result to write full and binned files for lbl in smp_lbl: for grp in smp_lbl_grp[lbl]: # writting individual files ind_path = "%s/%s_%s_%s.txt" % (dir_path, filename, lbl, grp) smp = smp_lbl_grp[lbl][grp] outfile = open(ind_path, 'w') outfile.write(format_coords(smp['headrs'], smp['coords'], [], [], False)) outfile.close() # writing full file full_path = "%s/%s_%s.txt" % (dir_path, filename, lbl) outfile = open(full_path, 'w') outfile.write (format_coords(smp_lbl[lbl]['headrs'], smp_lbl[lbl]['coords'], \ [], [], False)) outfile.close() # Writing tetraVertices.txt ind_path = "%s/tetraVertices.txt" % (dir_path) outfile = open(ind_path, 'w') outfile.write('\n'.join(['\t'.join(map(str, row)) for row in polypts])) outfile.write('\n') outfile.close()
def compute_manifold(in_file,alg,params=None): """compute the specified manifold on the specified file""" otu_table = parse_biom_table(in_file) samples = otu_table.SampleIds #Dense tables already have all values available #For sparse tables we have to more or less generate missing points if isinstance(otu_table, DenseTable): otumtx = otu_table._data.T else: otumtx = asarray([v for v in otu_table.iterSampleData()]) #Setup the mapping algorithms from sklearns using specified parameters #if a parameter in the dict is invalid for the chosen algorithm it is simply ignored if alg=="isomap": defaults = {"n_neighbors":5,"n_components":3,"eigen_solver":"auto", "tol":0,"max_iter":None,"path_method":"auto","neighbors_algorithm":"auto"} params = fill_args(defaults,params) mapper = manifold.Isomap( n_neighbors=params["n_neighbors"], n_components=params["n_components"], eigen_solver=params["eigen_solver"], tol=params["tol"], max_iter=params["max_iter"], path_method=params["path_method"], neighbors_algorithm=params["neighbors_algorithm"]) elif alg=="lle": defaults = {"n_neighbors":5,"n_components":3,"reg":0.001,"eigen_solver":"auto", "tol":1e-06,"max_iter":100,"method":"standard","hessian_tol":0.0001, "modified_tol":1e-12,"neighbors_algorithm":"auto","random_state":None} params = fill_args(defaults,params) mapper = manifold.LocallyLinearEmbedding( n_neighbors=params["n_neighbors"], n_components=params["n_components"], reg=params["reg"], eigen_solver=params["eigen_solver"], tol=params["tol"], max_iter=params["max_iter"], method=params["method"], hessian_tol=params["hessian_tol"], modified_tol=params["modified_tol"], neighbors_algorithm=params["neighbors_algorithm"], random_state=params["random_state"]) elif alg=="spectral": defaults = {"n_components":3,"affinity":"nearest_neighbors","gamma":None, "random_state":None,"eigen_solver":None,"n_neighbors":None} params = fill_args(defaults,params) mapper = manifold.SpectralEmbedding( n_components=params["n_components"], affinity=params["affinity"], gamma=params["gamma"], random_state=params["random_state"], eigen_solver=params["eigen_solver"], n_neighbors=params["n_neighbors"]) elif alg=="ltsa": defaults = {"n_neighbors":5,"n_components":3,"reg":0.001,"eigen_solver":"auto", "tol":1e-06,"max_iter":100,"method":"ltsa","hessian_tol":0.0001, "modified_tol":1e-12,"neighbors_algorithm":"auto","random_state":None} params = fill_args(defaults,params) mapper = manifold.LocallyLinearEmbedding( n_neighbors=params["n_neighbors"], n_components=params["n_components"], reg=params["reg"], eigen_solver=params["eigen_solver"], tol=params["tol"], max_iter=params["max_iter"], method=params["method"], hessian_tol=params["hessian_tol"], modified_tol=params["modified_tol"], neighbors_algorithm=params["neighbors_algorithm"], random_state=params["random_state"]) elif alg=="mds": defaults = {"n_components":3,"metric":True,"n_init":4,"max_iter":300, "verbose":0,"eps":0.001,"n_jobs":1,"random_state":None, "dissimilarity":"euclidean"} params = fill_args(defaults,params) mapper = manifold.Isomap( n_components=params["n_components"], metric=params["metric"], n_init=params["n_init"], max_iter=params["max_iter"], verbose=params["verbose"], eps=params["eps"], n_jobs=params["n_jobs"], random_state=params["random_state"], dissimilarity=params["dissimilarity"]) else: print("arg in error, unknown algorithm '"+alg+"'") exit(1) #compute the fit and scale from -1 to 1 fit = mapper.fit_transform(otumtx) fit /= abs(fit).max() #dummy eigenvalues and percent variation explained #"make_emperor.py" does not work if these are not supplied eigvals = [3.0,2.0,1.0] pcnts = [30.0,20.0,10.0] return format_coords(samples, fit, eigvals, pcnts)
def get_procrustes_results( coords_f1, coords_f2, sample_id_map=None, randomize=None, max_dimensions=None, get_eigenvalues=get_mean_eigenvalues, get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1, sample_id_map) sample_ids2 = map_sample_ids(sample_ids2, sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1, sample_ids1, order) coords2 = reorder_coords(coords2, sample_ids2, order) if len(order) == 0: raise ValueError('No overlapping samples in the two files') # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) randomized_coords2 = format_coords(coord_header=order, coords=coords2, eigvals=eigvals2, pct_var=pct_var2) else: randomized_coords2 = None coords1, coords2 = pad_coords_matrices(coords1, coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1, max_dimensions) coords2 = filter_coords_matrix(coords2, max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1) > len(pct_var2): pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2))) eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2))) elif len(pct_var1) < len(pct_var2): pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1))) eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1, coords2) # print coords2 # print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1, pct_var2) transformed_coords1 = format_coords(coord_header=order, coords=transformed_coords_m1, eigvals=eigvals, pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order, coords=transformed_coords_m2, eigvals=eigvals, pct_var=pct_var) # Return the results return (transformed_coords1, transformed_coords2, m_squared, randomized_coords2)