def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--url", dest="url", default=API_URL, help="API url") parser.add_option("", "--md5", dest="md5", default=None, help="sequence md5") parser.add_option("", "--id", dest="id", default=None, help="accession ID") parser.add_option("", "--source", dest="source", default='SwissProt', help="datasource to get record from, one of: SwissProt, TreMBL, InterPro") parser.add_option("", "--version", dest="version", default='1', help="M5NR version to use, one of 1 or 9") # get inputs (opts, args) = parser.parse_args() # build url for m5nr query params = [ ('limit', '1'), ('version', opts.version), ('source', opts.source) ] if opts.md5: url = opts.url+'/m5nr/md5/'+opts.md5+'?'+urlencode(params, True) elif opts.id: url = opts.url+'/m5nr/accession/'+opts.id+'?'+urlencode(params, True) else: sys.stderr.write("ERROR: no md5 checksum or accession given\n") return 1 # retrieve data result = obj_from_url(url) if len(result['data']) == 0: sys.stderr.write("ERROR: no match in M5NR version %s\n"%opts.version) return 1 # output data stdout_from_url(UNIPROT_URL+result['data'][0]['accession']+'.txt') return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs (opts, args) = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [ ('level', opts.level), ('source', opts.source) ] for i in id_list: url = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" %(i, data['data'])) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="API url") parser.add_argument("--md5", dest="md5", default=None, help="sequence md5") parser.add_argument("--id", dest="id", default=None, help="accession ID") parser.add_argument( "--source", dest="source", default='SwissProt', help= "datasource to get record from, one of: SwissProt, TreMBL, InterPro") parser.add_argument("--version", dest="version", default='1', help="M5NR version to use, one of 1 or 9") # get inputs opts = parser.parse_args() # build url for m5nr query params = [('limit', '1'), ('version', opts.version), ('source', opts.source)] if opts.md5: url = opts.url + '/m5nr/md5/' + opts.md5 + '?' + urlencode( params, True) elif opts.id: url = opts.url + '/m5nr/accession/' + opts.id + '?' + urlencode( params, True) else: sys.stderr.write("ERROR: no md5 checksum or accession given\n") return 1 # retrieve data result = obj_from_url(url) if len(result['data']) == 0: sys.stderr.write("ERROR: no match in M5NR version %s\n" % opts.version) return 1 # output data stdout_from_url(UNIPROT_URL + result['data'][0]['accession'] + '.txt') return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15") parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc") parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public.") parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all") parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal") for sfield in SEARCH_FIELDS: parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no') ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url+'/search?'+urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields)+"\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [('level', opts.level), ('source', opts.source)] for i in id_list: url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode( params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" % (i, data['data'])) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, type=str, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, type=str, help="OAuth password") parser.add_argument("--token", dest="token", default=None, type=str, help="OAuth token") parser.add_argument("--name", dest="name", default=None, type=str, help="function name to filter by") parser.add_argument("--level", dest="level", default='function', help="function level to filter by") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum percent identity cutoff, default is 60") parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.name: params.append(('filter', opts.name)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+opts.id+'?'+urlencode(params, True) # output data stdout_from_url(url, auth=token) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--name", dest="name", default=None, help="taxon name to filter by") parser.add_option("", "--level", dest="level", default=None, help="taxon level to filter by") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") # get inputs (opts, args) = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('type', 'organism') ] if opts.name: params.append(('filter', opts.name)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/similarity/'+opts.id+'?'+urlencode(params, True) # output data stdout_from_url(url, auth=token) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % (VERSION, search_opts), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--limit", dest="limit", type=int, default=15, help= "Number of results to show, if > 50 will use paginated queries to get all, default 15" ) parser.add_argument( "--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument( "--direction", dest="direction", default="asc", help= "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc" ) parser.add_argument( "--public", dest="public", action="store_true", default=False, help= "return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public." ) parser.add_argument( "--match", dest="match", default="all", help= "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all" ) parser.add_argument( "--status", dest="status", default="public", help= "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public" ) parser.add_argument( "--verbosity", dest="verbosity", default='minimal', help= "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal" ) for sfield in SEARCH_FIELDS: parser.add_argument("--" + sfield, dest=sfield, default=None, help="search parameter: query string for " + sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no')] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url + '/search?' + urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields) + "\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus") parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED") parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list") parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection") parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list") parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15") parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids,'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [ ('group_level', opts.level), ('source', opts.source), ('hit_type', opts.hit_type), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1') ] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i+size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url+'/matrix/organism?'+urlencode(cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url+'/matrix/organism?'+urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [ ('version', opts.version), ('min_level', opts.level) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) for ann in data['data']: if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[opts.level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom)+"\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) biomorig = biom biom = biomorig["data"] # sort data assert "matrix_type" in biom.keys(), repr(biom) if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) # sort data if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument( "--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5" ) parser.add_argument( "--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 # get auth token = get_auth_token(opts) # build url params = [('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1')] url = opts.url + '/matrix/organism?' + urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version)] url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True) data = obj_from_url(url) sub_ann = set(map(lambda x: x[opts.level], data['data'])) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip(rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" % (k, v)) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10") parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs (opts, args) = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/organism?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) sub_ann = set( map(lambda x: x[opts.level], data['data']) ) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip( rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
key = get_auth_token() # assign parameters limit = 1000 # initial call # construct API call parameters = { "limit": limit, "order": "created_on", "direction": "asc", "public": "1" } API_URL = "https://api.mg-rast.org/" base_url = API_URL + CALL + "?" + urlencode(parameters) # convert the data from a JSON structure to a python data type, a dict of dicts. jsonstructure = obj_from_url(base_url, auth=key) # unpack and display the data table total_count = int(jsonstructure["total_count"]) sys.stderr.write("Total number of records: {:d}\n".format(total_count)) for i in range(0, int(total_count / limit) + 1): sys.stderr.write("Page {:d}\t".format(i)) jsonstructure = obj_from_url(base_url, auth=key) printlist(jsonstructure) try: next_url = jsonstructure["next"] base_url = next_url
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_option("", "--url", dest="url", default=API_URL, help="API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='function', help="function level to filter by") parser.add_option("", "--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_option("", "--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") for sfield in SEARCH_FIELDS: parser.add_option("", "--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs (opts, args) = parser.parse_args() # get auth token = get_auth_token(opts) # build url for metagenome query params = [ ('limit', '100'), ('verbosity', 'minimal'), ('match', 'all'), ('status', opts.status) ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append( (sfield, getattr(opts, sfield)) ) url = opts.url+'/metagenome?'+urlencode(params, True) # retrieve query results result = obj_from_url(url, auth=token) if len(result['data']) == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 mgids = set( map(lambda x: x['id'], result['data']) ) while result['next']: url = result['next'] result = obj_from_url(url, auth=token) if len(result['data']) == 0: break for d in result['data']: mgids.add(d['id']) # get sequences for mgids for mg in mgids: params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.function: params.append(('filter', opts.function)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True) # output data safe_print('Results from '+mg+":\n") stdout_from_url(url, auth=token) return 0
repr(public), item["created_on"], mg_name, project_id, project_name]) + "\n")) CALL = "/search" key = get_auth_token() # assign parameters limit = 1000 # initial call # construct API call parameters = {"limit": limit, "order":"created_on", "direction": "asc", "public": "1"} API_URL= "https://api.mg-rast.org/" base_url = API_URL + CALL + "?" + urlencode(parameters) # convert the data from a JSON structure to a python data type, a dict of dicts. jsonstructure = obj_from_url(base_url, auth=key) # unpack and display the data table total_count = int(jsonstructure["total_count"]) sys.stderr.write("Total number of records: {:d}\n".format(total_count)) for i in range(0, int(total_count / limit) +1): sys.stderr.write("Page {:d}\t".format(i)) jsonstructure = obj_from_url(base_url, auth=key) printlist(jsonstructure) try: next_url = jsonstructure["next"] base_url = next_url
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument( "--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='level3', help="functional level to retrieve abundances for, default is level3") parser.add_argument( "--source", dest="source", default='Subsystems', help="function datasource to filter results by, default is Subsystems") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument( "--filter_name", dest="filter_name", default=None, help="function name to filter by, file or comma seperated list") parser.add_argument( "--intersect_source", dest="intersect_source", default='SEED', help="taxon datasource for insersection, default is SEED") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="taxon level for insersection") parser.add_argument( "--intersect_name", dest="intersect_name", default=None, help="taxon name(s) for insersection, file or comma seperated list") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--format", dest="format", default='biom', help= "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument( "--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15" ) parser.add_argument( "--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument( "--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write( "ERROR: both --intersect_level and --intersect_name need to be used together\n" ) return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids, 'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1')] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i + size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url + '/matrix/function?' + urlencode( cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url + '/matrix/function?' + urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [('version', opts.version), ('min_level', opts.level), ('source', opts.source)] url = opts.url + '/m5nr/ontology?' + urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level for ann in data['data']: if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom) + "\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="function level to filter by") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") for sfield in SEARCH_FIELDS: parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build url for metagenome query params = [ ('limit', '100'), ('verbosity', 'minimal'), ('match', 'all'), ('status', opts.status) ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) url = opts.url+'/metagenome?'+urlencode(params, True) # retrieve query results result = obj_from_url(url, auth=token) if len(result['data']) == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 mgids = set(map(lambda x: x['id'], result['data'])) while result['next']: url = result['next'] result = obj_from_url(url, auth=token) if len(result['data']) == 0: break for d in result['data']: mgids.add(d['id']) # get sequences for mgids for mg in mgids: params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.function: params.append(('filter', opts.function)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True) # output data safe_print('Results from '+mg+":\n") stdout_from_url(url, auth=token) return 0