import pysurvey as ps, numpy as np import argparse if __name__ == '__main__': p = argparse.ArgumentParser( description='filter otus based on their maximum abundance') p.add_argument('table', help='input otu table') p.add_argument('-o', '--output', help='output otu table', required=True) g = p.add_mutually_exclusive_group(required=True) g.add_argument('-l', '--log', type=float, help='minimum max (negative log10)') g.add_argument('-m', '--min', type=float, help='minimum max (raw value)') args = p.parse_args() # load up the table table = ps.read_txt(args.table) # convert to raw minimum median if args.log: args.min = 10.0**(-args.log) # filter columns if the maximum is below the specified min criterion = (np.max, lambda x, y: x > y, args.min) table = ps.filter_by_vals(table, criterion) # write out the filtered table ps.write_txt(table, args.output)
p.add_argument('-o', '--output_dir', default='proc', help='output directory (default: proc)') args = p.parse_args() otu_sum_fn, sample_sum_fn, shannon_fn, abund_fn, log_fn = [ "{}/{}.txt".format(args.output_dir, n) for n in ['otu_sum', 'sample_sum', 'shannon', 'abund', 'log'] ] table = ps.read_txt(args.table) otu_sum = table.apply(np.sum, axis=0) np.savetxt(otu_sum_fn, otu_sum) sample_sum = table.apply(np.sum, axis=1) np.savetxt(sample_sum_fn, sample_sum) shannon_vals = table.apply(shannon, axis=1) np.savetxt(shannon_fn, shannon_vals) abund = ps.normalize(table) ps.write_txt(abund, abund_fn) abund_with_pseudocount = ps.to_fractions(table, method='pseudo', p_counts=1) log_table = np.log10(abund_with_pseudocount) ps.write_txt(log_table, log_fn)
#!/usr/bin/env python ''' author: scott w olesen ([email protected]) convert relative abundances to log10 ''' import pysurvey as ps, numpy as np, pandas as pd import argparse if __name__ == '__main__': p = argparse.ArgumentParser(description='convert to log10') p.add_argument('table', help='input otu table') p.add_argument('-o', '--output', help='output table') p.add_argument('-z', '--zero', type=float, default=10.0, help='zero values become what negative log? (default: 10)') args = p.parse_args() new_zero = 10.0**(-args.zero) table = ps.read_txt(args.table, verbose=False) table.replace(to_replace=0.0, value=new_zero, inplace=True) logs = table.applymap(np.log10) ps.write_txt(logs, args.output, verbose=False)
#!/usr/bin/env python ''' author: scott w olesen ([email protected]) this script grabs medians from an otu table ''' import pysurvey as ps, numpy as np, pandas as pd import argparse if __name__ == '__main__': p = argparse.ArgumentParser(description='get medians by otu') p.add_argument('table', help='input otu table') p.add_argument('out', help='output medians list') args = p.parse_args() table = ps.read_txt(args.table, verbose=False) medians = pd.DataFrame({'median': table.apply(np.median)}).T ps.write_txt(medians, args.out)
#!/usr/bin/env python ''' author: scott w olesen ([email protected]) combine two otu tables, etc. ''' import pysurvey as ps, numpy as np, pandas as pd import argparse if __name__ == '__main__': p = argparse.ArgumentParser(description='get medians') p.add_argument('table1', help='input otu table') p.add_argument('table2', help='other input otu table') p.add_argument('out', help='output table') args = p.parse_args() # read tables. don't transpose, since that would cause joining samples, not OTU IDs table1 = ps.read_txt(args.table1, verbose=False, T=False) table2 = ps.read_txt(args.table2, verbose=False, T=False) table = table1.join(table2, how='outer').fillna(0) ps.write_txt(table, args.out, T=False)