def _create_ordination_results(self): eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868] proportion_explained = [ 0.2675738328, 0.157044696, 0.1399118638, 0.1091402725 ] sample_ids = [ '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192', '1.SKB7.640196' ] axis_labels = ['PC1', 'PC2', 'PC3', 'PC4'] samples = [[-2.584, 1.739, 3.828, -1.944], [-2.710, -1.859, -8.648, 1.180], [2.350, 9.625, -3.457, -3.208], [2.614, -1.114, 1.476, 2.908], [2.850, -1.925, 6.232, 1.381]] ord_res = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame(np.asarray(samples), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) fd, fp = mkstemp(suffix='.txt', dir=self.out_dir) close(fd) ord_res.write(fp) return fp
def rpca(in_biom: str, output_dir: str, min_sample_depth: int, rank: int) -> None: """ Runs RPCA with an rclr preprocessing step""" # import table table = load_table(in_biom) # filter sample to min depth def sample_filter(val, id_, md): return sum(val) > min_sample_depth table = table.filter(sample_filter, axis='sample') table = table.to_dataframe().T.drop_duplicates() # rclr for saving the transformed OTU table (RSC edited) tablefit = rclr().fit_transform(table.copy()) U,s,V = OptSpace().fit_transform(tablefit) tablefit = np.dot(np.dot(U, s), V.T) tablefit = pd.DataFrame(tablefit.T, index=table.columns, columns=table.index) with open(os.path.join(output_dir, 'rclr_OTUtable.txt'), 'w'): tablefit.to_csv(os.path.join(output_dir, 'rclr_OTUtable.txt'), sep='\t', index_label='OTU_ID') # rclr preprocessing and OptSpace (RPCA) opt = OptSpace(rank=rank).fit(rclr().fit_transform(table.copy())) rename_cols = {i - 1: 'PC' + str(i) for i in range(1, rank + 1)} # Feature Loadings feature_loading = pd.DataFrame(opt.feature_weights, index=table.columns) feature_loading = feature_loading.rename(columns=rename_cols) feature_loading.sort_values('PC1', inplace=True, ascending=True) # Sample Loadings sample_loading = pd.DataFrame(opt.sample_weights, index=table.index) sample_loading = sample_loading.rename(columns=rename_cols) proportion_explained = pd.Series(opt.explained_variance_ratio, index=list(rename_cols.values())) eigvals = pd.Series(opt.eigenvalues, index=list(rename_cols.values())) # save ordination results ord_res = OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals.copy(), sample_loading.copy(), features=feature_loading.copy(), proportion_explained=proportion_explained.copy()) # write files to output folder ord_res.write(os.path.join(output_dir, 'RPCA_Ordination.txt')) # save distance matrix dist_res = skbio.stats.distance.DistanceMatrix( opt.distance, ids=sample_loading.index) dist_res.write(os.path.join(output_dir, 'RPCA_distance.txt')) return
def _1(data: skbio.OrdinationResults) -> OrdinationFormat: ff = OrdinationFormat() data.write(str(ff), format='ordination') return ff