def parse_biom_table(fp, input_is_dense=False): try: return Table.from_hdf5(fp) except: pass if hasattr(fp, 'read'): return Table.from_json(json.load(fp), input_is_dense=input_is_dense) elif isinstance(fp, list): return Table.from_json(json.loads(''.join(fp)), input_is_dense=input_is_dense) else: return Table.from_json(json.loads(fp), input_is_dense=input_is_dense)
def load_BIOM(table, informat='json', v=1): """ load a BIOM table from BIOM format. Default format is 'json'. """ from biom.table import Table import json import sys informats = ['json','tsv'] if not informat in informats: print "\nPlease specify a valid BIOM input format. Currently we support: '%s'.\n" %"', '".join(informats) else: if v: print "\nSpecified BIOM input format '%s' - ok!" %(informat) if informat == 'json': with open(table) as data_file: data = json.load(data_file) t = Table.from_json(data) elif informat == 'tsv': tsv = open(in_tsv) func = lambda x : x t = Table.from_tsv(tsv, obs_mapping=None, sample_mapping=None, process_func=func) tsv.close() return t
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in the filepath `fp` Parameters ---------- fp : file like File alike object storing the BIOM table ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at fp Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(fp, ids=ids, axis=axis) except ValueError: pass except RuntimeError: pass if hasattr(fp, 'read'): old_pos = fp.tell() # Read in characters until first non-whitespace # If it is a {, then this is (most likely) JSON c = fp.read(1) while c.isspace(): c = fp.read(1) if c == '{': fp.seek(old_pos) t = Table.from_json(json.load(fp, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) else: fp.seek(old_pos) t = Table.from_tsv(fp, None, None, lambda x: x) elif isinstance(fp, list): try: t = Table.from_json(json.loads(''.join(fp), object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(fp, None, None, lambda x: x) else: t = Table.from_json(json.loads(fp, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) def subset_ids(data, id_, md): return id_ in ids def gt_zero(vals, id_, md): return np.any(vals) if ids is not None: t.filter(subset_ids, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' t.filter(gt_zero, axis=axis) return t
from __future__ import division import argparse from biom.table import Table import json parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=argparse.FileType('r'), nargs='+', help='profile list separated by space') parser.add_argument('-biom_out', default='combined.biom', help='Output biom file name') args = parser.parse_args() biomFile = args.biom_out biomList = [] for f in args.input: biomList.append(f) if len(biomList) <= 1: print('Found only one biom profile, will still give you a (brand) new one.') biomProfile = Table.from_json(json.load(biomList[0])) with open(biomFile, 'w') as f: biomProfile.to_json('Generated_by_almighty_metaSeq', f) else: print('Found {0} biom profiles under.'.format(len(biomList))) biomProfile = Table.from_json(json.load(biomList[0])) for f in biomList[1:]: biomProfile = biomProfile.concat([Table.from_json(json.load(f))]) with open(biomFile, 'w') as f: biomProfile = biomProfile.sort() biomProfile.to_json('Generated_by_almighty_metaSeq', f) print('Concatenated {0} profiles into {1}.'.format(len(biomList), biomFile))
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in the filepath `fp` Parameters ---------- fp : file like File alike object storing the BIOM table ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at fp Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(fp, ids=ids, axis=axis) except: pass if hasattr(fp, 'read'): old_pos = fp.tell() try: t = Table.from_json(json.load(fp), input_is_dense=input_is_dense) except ValueError: fp.seek(old_pos) t = Table.from_tsv(fp, None, None, lambda x: x) elif isinstance(fp, list): try: t = Table.from_json(json.loads(''.join(fp)), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(fp, None, None, lambda x: x) else: t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense) if ids is not None: f = lambda data, id_, md: id_ in ids t.filter(f, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' f = lambda vals, id_, md: np.any(vals) t.filter(f, axis=axis) return t
def parse_biom_table(file_obj, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in `file_obj` Parameters ---------- file_obj : file-like object, or list file-like object storing the BIOM table (tab-delimited or JSON), or a list of lines of the BIOM table in tab-delimited or JSON format ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at file_obj Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(file_obj, ids=ids, axis=axis) except ValueError: pass except RuntimeError: pass if hasattr(file_obj, 'read'): old_pos = file_obj.tell() # Read in characters until first non-whitespace # If it is a {, then this is (most likely) JSON c = file_obj.read(1) while c.isspace(): c = file_obj.read(1) if c == '{': file_obj.seek(old_pos) t = Table.from_json(json.load(file_obj, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) else: file_obj.seek(old_pos) t = Table.from_tsv(file_obj, None, None, lambda x: x) elif isinstance(file_obj, list): try: t = Table.from_json(json.loads(''.join(file_obj), object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(file_obj, None, None, lambda x: x) else: t = Table.from_json(json.loads(file_obj, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) def subset_ids(data, id_, md): return id_ in ids def gt_zero(vals, id_, md): return np.any(vals) if ids is not None: t.filter(subset_ids, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' t.filter(gt_zero, axis=axis) return t