def augment(table: biom.Table, sampling_depth: int, augment_times: int, output_path_metadata: str, raw_metadata: qiime2.Metadata, with_replacement: bool = False, rarefy_start: bool = True) -> biom.Table: metadata = raw_metadata.to_dataframe() metadata = metadata.sort_index() all_df = table.to_dataframe().sort_index().sort_index(axis=1) ## change sorted table back to biom table = biom.Table(all_df.values, all_df.index.to_list(), all_df.columns.to_list()) zero_df = all_df[all_df==0].fillna(0) zero_table = biom.Table(zero_df.values, zero_df.index.to_list(), zero_df.columns.to_list()) sub_table = table.subsample(sampling_depth, axis='sample', by_id=False, with_replacement=with_replacement) if rarefy_start == True: output_table = zero_table.merge(sub_table) else: output_table = table output_metadata = metadata for i in range(augment_times): num = i+1 sub_table = table.subsample(sampling_depth, axis='sample', by_id=False, with_replacement=with_replacement) sub_df = sub_table.to_dataframe().sort_index().sort_index(axis=1) ## rename sub_df_names = sub_df.columns.to_list() sub_df_names_added = [x + '_' + str(num) for x in sub_df_names] sub_df.columns = sub_df_names_added sub_table = biom.Table(sub_df.values, sub_df.index.to_list(), sub_df.columns.to_list()) output_table = output_table.merge(sub_table) metadata_names = metadata.index.to_list() metadata_names_added = [x + '_' + str(num) for x in metadata_names] tmp_metadata = metadata.copy() tmp_metadata.index = metadata_names_added print(output_metadata) output_metadata = pd.concat((output_metadata, tmp_metadata)) output_metadata.index.name = 'sample-id' output_metadata = qiime2.metadata.Metadata(output_metadata) output_metadata.save(output_path_metadata) if output_table.is_empty(): raise ValueError('The output table contains no features.') return output_table
def subsample(table: biom.Table, subsampling_depth: int, axis: str) -> biom.Table: if axis == 'feature': # we are transposing the table due to biocore/biom-format#759 table = table.transpose() if len(table.ids()) < subsampling_depth: raise ValueError('The subsampling depth exceeds the number of ' 'elements on the desired axis. The maximum depth ' 'is: %d.' % len(table.ids())) # the axis is always 'sample' due to the above transpose table = table.subsample(subsampling_depth, axis='sample', by_id=True) # the inverted axis is always observation due to the above transpose invaxis = 'observation' table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis) if axis == 'feature': # reverse the transpose necessary due to biocore/biom-format#759 table = table.transpose() if table.is_empty(): raise ValueError('The subsampled table contains no samples or features' ' (samples/features that sum to zero after filtering' ' are automatically removed). It may be a good idea' ' to double check that your table is valid/nonempty.') return table
def rarefy(table: biom.Table, sampling_depth: int) -> biom.Table: table = table.subsample(sampling_depth, axis='sample', by_id=False) if table.is_empty(): raise ValueError('The rarefied table contains no samples or features. ' 'Verify your table is valid and that you provided a ' 'shallow enough sampling depth.') return table
def rarefy(table: biom.Table, sampling_depth: int) -> biom.Table: return table.subsample(sampling_depth, axis='sample', by_id=False)