def validate(fformat, files, get_info=False): for f in files: data = obj_from_url(API_URL + "/inbox/" + f, auth=mgrast_auth['token'], debug=DEBUG) if ('data_type' in data) and (data['data_type'] == fformat): print("%s (%s) is a valid %s file" % (data['filename'], f, fformat)) elif fformat == 'sequence': if data['stats_info']['file_type'] in ['fasta', 'fastq']: info = obj_from_url(API_URL + "/inbox/stats/" + f, auth=mgrast_auth['token'], debug=DEBUG) print(info['status'].replace("stats computation", "validation")) else: sys.stderr.write( "ERROR: %s (%s) is not a fastq or fasta file\n" % (data['filename'], f)) elif fformat == 'metadata': if data['stats_info']['file_type'] == 'excel': info = obj_from_url(API_URL + "/inbox/validate/" + f, auth=mgrast_auth['token'], debug=DEBUG) if get_info: return info else: print(info['status']) if info['status'].startswith('invalid'): print(info['error']) else: sys.stderr.write("ERROR: %s (%s) is not a spreadsheet file\n" % (data['filename'], f))
def upload(files): for f in files: attr = json.dumps({ "type": "inbox", "id": mgrast_auth['id'], "user": mgrast_auth['login'], "email": mgrast_auth['email'] }) # get format if f.endswith(".gz"): fformat = "gzip" elif f.endswith(".bz2"): fformat = "bzip2" else: fformat = "upload" # POST to shock data = {"attributes_str": attr} result = post_file(SHOCK_URL + "/node", fformat, f, data=data, auth=mgrast_auth['token'], debug=DEBUG) # compute file info info = obj_from_url(API_URL + "/inbox/info/" + result['data']['id'], auth=mgrast_auth['token'], debug=DEBUG) print(info['status']) # compute sequence stats if info['stats_info']['file_type'] in ['fasta', 'fastq']: stats = obj_from_url(API_URL + "/inbox/stats/" + result['data']['id'], auth=mgrast_auth['token'], debug=DEBUG) print(stats['status'].replace("stats computation", "validation"))
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15") parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc") parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public.") parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all") parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal") for sfield in SEARCH_FIELDS: parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no') ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url+'/search?'+urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields)+"\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def test_heartbeat(): URI0 = API_URL + "heartbeat" obj = obj_from_url(URI0) SERVICES = [obj["requests"][1]["parameters"]["required"]["service"][1][i][0] for i in range(len(obj["requests"][1]["parameters"]["required"]["service"][1]))] for service in SERVICES: URI = API_URL + "heartbeat/" + service obj_detail = obj_from_url(URI) assert obj_detail["status"] == 1, "Failed heartbeat on " + service return 1
def ko2roles(opts, sshier, koid): ko_anno = obj_from_url(opts.url+'/m5nr/accession/'+koid+'?version=1&source=KO&limit=1000') ko_md5s = set( map(lambda x: x['md5'], ko_anno['data']) ) if len(ko_md5s) == 0: return [], [] ko_post = {'version': 1, 'source': 'Subsystems', 'data': list(ko_md5s), 'limit': 10000} ss_anno = obj_from_url(opts.url+'/m5nr/md5', data=json.dumps(ko_post, separators=(',',':'))) roles = set() for ss in ss_anno['data']: if ss['accession'] in sshier: roles.add( sshier[ss['accession']]['level4'] ) return list(roles), list(ko_md5s)
def ko2roles(opts, sshier, koid): ko_anno = obj_from_url(opts.url+'/m5nr/accession/'+koid+'?version=1&source=KO&limit=1000') ko_md5s = set(map(lambda x: x['md5'], ko_anno['data'])) if len(ko_md5s) == 0: return [], [] ko_post = {'version': 1, 'source': 'Subsystems', 'data': list(ko_md5s), 'limit': 10000} ss_anno = obj_from_url(opts.url+'/m5nr/md5', data=json.dumps(ko_post, separators=(',',':'))) roles = set() for ss in ss_anno['data']: if ss['accession'] in sshier: roles.add(sshier[ss['accession']]['level4']) return list(roles), list(ko_md5s)
def upload_archive(afile): attr = json.dumps({ "type": "inbox", "id": mgrast_auth['id'], "user": mgrast_auth['login'], "email": mgrast_auth['email'] }) # get format if afile.endswith(".tar.gz"): aformat = "tar.gz" elif afile.endswith(".tar.bz2"): aformat = "tar.bz2" elif afile.endswith(".tar"): aformat = "tar" elif afile.endswith(".zip"): aformat = "zip" else: sys.stderr.write("ERROR: input file %s is incorrect archive format\n" % afile) sys.exit(1) # POST to shock / unpack data = {"file_name": os.path.basename(afile), "attributes_str": attr} result = post_file(SHOCK_URL + "/node", "upload", afile, data=data, auth=mgrast_auth['token'], debug=DEBUG) data = { "unpack_node": result['data']['id'], "archive_format": aformat, "attributes_str": attr } unpack = obj_from_url(SHOCK_URL + "/node", data=data, auth=mgrast_auth['token'], debug=DEBUG) # process new nodes for node in unpack['data']: # compute file info info = obj_from_url(API_URL + "/inbox/info/" + node['id'], auth=mgrast_auth['token'], debug=DEBUG) print(info['status']) # compute sequence stats if info['stats_info']['file_type'] in ['fasta', 'fastq']: stats = obj_from_url(API_URL + "/inbox/stats/" + node['id'], auth=mgrast_auth['token'], debug=DEBUG) print(stats['status'].replace("stats computation", "validation"))
def rename(fid, fname): data = {"name": fname, "file": fid} result = obj_from_url(API_URL + "/inbox/rename", data=json.dumps(data), auth=mgrast_auth['token'], debug=DEBUG) print(result['status'])
def delete(files): for f in files: result = obj_from_url(API_URL + "/inbox/" + f, auth=mgrast_auth['token'], method='DELETE', debug=DEBUG) print(result['status'])
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs (opts, args) = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [ ('level', opts.level), ('source', opts.source) ] for i in id_list: url = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" %(i, data['data'])) return 0
def listall(): data = obj_from_url(API_URL+"/submission/list", auth=mgrast_auth['token']) submissions = sorted(data['submissions'], key=itemgetter('timestamp')) pt = PrettyTable(["ID", "type", "status", "time"]) for s in submissions: row = [ s['id'], s['type'], s['status'], s['timestamp'] ] pt.add_row(row) pt.align = "l" print(pt)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [('level', opts.level), ('source', opts.source)] for i in id_list: url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode( params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" % (i, data['data'])) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID, required") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument("--output", dest="output", default='text', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is text") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.output not in ['text', 'biom']: sys.stderr.write("ERROR: invalid output format\n") return 1 # get biom try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() biom = json.loads(indata) rows, cols, matrix = biom_to_matrix(biom) except: sys.stderr.write("ERROR: unable to load input biom data\n") return 1 # get SS hierarchy ss_hier = dict([ (x['accession'], x) for x in obj_from_url(opts.url+'m5nr/ontology?version=1&source=Subsystems')['data'] ]) # biom KO -> SS ssrows = [] ssmatrix = [] for r, rid in enumerate(rows): roles, md5s = ko2roles(opts, ss_hier, rid) if not roles: continue for role in roles: fig_ids = role2figs(opts, role, md5s) if opts.output == 'text': # text output: feature list, function, abundance for function, avg evalue for function, organism safe_print("%s\t%s\t%d\t%.2e\t%s\n" %(",".join(fig_ids), role, matrix[r][0], 0, 'glob')) elif opts.output == 'biom': ssrows.append({'id': role, 'metadata': {'accession': fig_ids}}) ssmatrix.append(matrix[r]) # biom output if opts.output == 'biom': biom['matrix_type'] = 'sparse' biom['shape'][0] = len(ssrows) biom['rows'] = ssrows biom['data'] = ssmatrix safe_print(json.dumps(biom)+"\n") return 0
def status(sid): data = obj_from_url(API_URL+"/submission/"+sid+'?full=1', auth=mgrast_auth['token']) # check for errors if ('error' in data) and data['error']: sys.stderr.write("ERROR: %s\n"%data['error']) sys.exit(1) try: fids = map(lambda x: x['id'], data['inputs']) fnames = map(lambda x: x['filename'], data['inputs']) fsizes = map(lambda x: str(x['filesize']), data['inputs']) except: fids = map(lambda x: x['id'], data['inputs'].values()) fnames = map(lambda x: x['filename'], data['inputs'].values()) fsizes = map(lambda x: str(x['filesize']), data['inputs'].values()) # submission summary pt_summary = PrettyTable(["submission ID", "type", "project", "submit time", "input file ID", "input file name", "input file size", "status"]) pt_summary.add_row([data['id'], data['type'], data['project'], data['info']['submittime'], "\n".join(fids), "\n".join(fnames), "\n".join(fsizes), data['state']]) pt_summary.align = "l" print(pt_summary) # submission status if ('preprocessing' in data) and data['preprocessing']: pt_status = PrettyTable(["submission step", "step name", "step status", "step inputs"]) for i, p in enumerate(data['preprocessing']): pstatus = p['status'] if ('error' in p) and p['error']: pstatus += "\n"+p['error'] pt_status.add_row([i, p['stage'], pstatus, "\n".join(p['inputs'])]) pt_status.align = "l" print(pt_status) # metagenome info if ('metagenomes' in data) and data['metagenomes']: pt_mg = PrettyTable(["metagenome ID", "metagenome name", "status", "current steps", "submit time", "complete time", "pipeline ID"]) for m in data['metagenomes']: state = "in-progress" if len(m['state']) == 1: state = m['state'][0] else: for s in m['state']: if s == 'suspend': state = 'suspend' remain = 0 if m['task'] and (len(m['task']) > 0): remain = len(m['task']) pt_mg.add_row([m['userattr']['id'], m['userattr']['name'], state, remain, m['submittime'], m['completedtime'], m['id']]) pt_mg.align = "l" print(pt_mg)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs") # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata' url = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb # retrieve / output data result = obj_from_url(url, auth=token) if opts.verbosity == 'mixs': for r in sorted(result.keys()): if r not in ['project', 'library', 'sample']: safe_print("%s\t%s\n" %(r, result[r])) elif opts.verbosity == 'full': md = result['metadata'] safe_print("category\tlabel\tvalue\n") if ('project' in md) and md['project']['data']: for p in sorted(md['project']['data'].keys()): safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p])) if ('sample' in md) and md['sample']['data']: for s in sorted(md['sample']['data'].keys()): safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s])) if ('library' in md) and ('type' in md['library']) and md['library']['data']: for l in sorted(md['library']['data'].keys()): safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l])) if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']: for e in sorted(md['env_package']['data'].keys()): safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e])) else: sys.stderr.write("ERROR: invalid verbosity type\n") return 1 return 0
def check_ids(files): data = obj_from_url(API_URL + "/inbox", auth=mgrast_auth['token'], debug=DEBUG) if len(data['files']) == 0: sys.stderr.write("ERROR: Your inbox is empty, please upload first.\n") sys.exit(1) ids = [x['id'] for x in data['files']] for f in files: if f not in ids: sys.stderr.write( "ERROR: File ID '%s' does not exist in your inbox. Did you use File Name by mistake?\n" % f) sys.exit(1)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="API url") parser.add_argument("--md5", dest="md5", default=None, help="sequence md5") parser.add_argument("--id", dest="id", default=None, help="accession ID") parser.add_argument( "--source", dest="source", default='SwissProt', help= "datasource to get record from, one of: SwissProt, TreMBL, InterPro") parser.add_argument("--version", dest="version", default='1', help="M5NR version to use, one of 1 or 9") # get inputs opts = parser.parse_args() # build url for m5nr query params = [('limit', '1'), ('version', opts.version), ('source', opts.source)] if opts.md5: url = opts.url + '/m5nr/md5/' + opts.md5 + '?' + urlencode( params, True) elif opts.id: url = opts.url + '/m5nr/accession/' + opts.id + '?' + urlencode( params, True) else: sys.stderr.write("ERROR: no md5 checksum or accession given\n") return 1 # retrieve data result = obj_from_url(url) if len(result['data']) == 0: sys.stderr.write("ERROR: no match in M5NR version %s\n" % opts.version) return 1 # output data stdout_from_url(UNIPROT_URL + result['data'][0]['accession'] + '.txt') return 0
def upload(files, verbose): fids = [] attr = json.dumps({ "type": "inbox", "id": mgrast_auth['id'], "user": mgrast_auth['login'], "email": mgrast_auth['email'] }) for i, f in enumerate(files): # get format if f.endswith(".gz"): fformat = "gzip" fname = os.path.basename(f[:-3]) elif f.endswith(".bz2"): fformat = "bzip2" fname = os.path.basename(f[:-4]) else: fformat = "upload" fname = os.path.basename(f) # POST to shock data = { "file_name": fname, "attributes_str": attr } if verbose: if len(files) > 1: print("Uploading file %d of %d (%s) to MG-RAST Shock"%(i+1, len(files), f)) else: print("Uploading file %s to MG-RAST Shock"%(f)) result = post_file(SHOCK_URL+"/node", fformat, f, data=data, auth=mgrast_auth['token'], debug=verbose) if verbose: print(json.dumps(result['data'])) if len(files) > 1: print("Setting info for file %d of %d (%s) in MG-RAST inbox"%(i+1, len(files), f)) else: print("Setting info for file %s in MG-RAST inbox"%(f)) # compute file info info = obj_from_url(API_URL+"/inbox/info/"+result['data']['id'], auth=mgrast_auth['token'], debug=verbose) if verbose: print(json.dumps(info)) else: print(info['status']) fids.append(result['data']['id']) return fids
def archive_upload(afile, verbose): attr = json.dumps({ "type": "inbox", "id": mgrast_auth['id'], "user": mgrast_auth['login'], "email": mgrast_auth['email'] }) # get format if afile.endswith(".tar.gz"): aformat = "tar.gz" elif afile.endswith(".tar.bz2"): aformat = "tar.bz2" elif afile.endswith(".tar"): aformat = "tar" elif afile.endswith(".zip"): aformat = "zip" else: sys.stderr.write("ERROR: input file %s is incorrect archive format\n"%afile) sys.exit(1) # POST to shock / unpack if verbose: print("Uploading file %s to MG-RAST Shock"%(afile)) data = { "file_name": os.path.basename(afile), "attributes_str": attr } result = post_file(SHOCK_URL+"/node", "upload", afile, data=data, auth=mgrast_auth['token'], debug=verbose) if verbose: print(json.dumps(result['data'])) print("Unpacking archive file %s"%(afile)) data = { "unpack_node": result['data']['id'], "archive_format": aformat, "attributes_str": attr } unpack = obj_from_url(SHOCK_URL+"/node", data=data, auth=mgrast_auth['token'], debug=verbose) if verbose: print(json.dumps(unpack['data'])) fids = map(lambda x: x['id'], unpack['data']) return fids
def compute(action, files, retain, joinfile, rc_index): if action == "sff2fastq": data = {"sff_file": files[0]} elif action == "demultiplex": data = { "seq_file": files[0], "barcode_file": files[1], "rc_index": 1 if rc_index else 0 } if len(files) == 3: data["index_file"] = files[2] elif action == "pairjoin": data = { "pair_file_1": files[0], "pair_file_2": files[1], "retain": 1 if retain else 0 } if joinfile: data['output'] = joinfile elif action == "pairjoin_demultiplex": data = { "pair_file_1": files[0], "pair_file_2": files[1], "index_file": files[2], "barcode_file": files[3], "retain": 1 if retain else 0, "rc_index": 1 if rc_index else 0 } else: sys.stderr.write("ERROR: invalid compute action. use one of: %s\n" % ", ".join(compute_actions)) info = obj_from_url(API_URL + "/inbox/" + action, data=json.dumps(data), auth=mgrast_auth['token'], debug=DEBUG) print(info['status'])
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument( "--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='level3', help="functional level to retrieve abundances for, default is level3") parser.add_argument( "--source", dest="source", default='Subsystems', help="function datasource to filter results by, default is Subsystems") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument( "--filter_name", dest="filter_name", default=None, help="function name to filter by, file or comma seperated list") parser.add_argument( "--intersect_source", dest="intersect_source", default='SEED', help="taxon datasource for insersection, default is SEED") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="taxon level for insersection") parser.add_argument( "--intersect_name", dest="intersect_name", default=None, help="taxon name(s) for insersection, file or comma seperated list") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--format", dest="format", default='biom', help= "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument( "--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15" ) parser.add_argument( "--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument( "--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write( "ERROR: both --intersect_level and --intersect_name need to be used together\n" ) return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids, 'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1')] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i + size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url + '/matrix/function?' + urlencode( cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url + '/matrix/function?' + urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [('version', opts.version), ('min_level', opts.level), ('source', opts.source)] url = opts.url + '/m5nr/ontology?' + urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level for ann in data['data']: if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom) + "\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url + '/metagenome/' + opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url + '/researchobject/manifest/' + opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([ info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"] ]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... " % (info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace( '../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append( [s1, os.path.join(folder, info["bundledAs"]["filename"])]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n" % (s1[0], s1[1])) else: tagsha1.write("%s\t%s\n" % (s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % (VERSION, search_opts), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--limit", dest="limit", type=int, default=15, help= "Number of results to show, if > 50 will use paginated queries to get all, default 15" ) parser.add_argument( "--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument( "--direction", dest="direction", default="asc", help= "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc" ) parser.add_argument( "--public", dest="public", action="store_true", default=False, help= "return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public." ) parser.add_argument( "--match", dest="match", default="all", help= "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all" ) parser.add_argument( "--status", dest="status", default="public", help= "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public" ) parser.add_argument( "--verbosity", dest="verbosity", default='minimal', help= "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal" ) for sfield in SEARCH_FIELDS: parser.add_argument("--" + sfield, dest=sfield, default=None, help="search parameter: query string for " + sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no')] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url + '/search?' + urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields) + "\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def test_large_01(): URI = API_URL + '/matrix/organism?group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&id=mgm4510219.3' obj = obj_from_url(URI) o = open("5.txt", "w") o.write(str(obj))
def test_matrix_04(): URI = API_URL + '/matrix/organism?id=mgm4440284.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0' obj = obj_from_url(URI) o = open("4.txt", "w") o.write(str(obj))
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--outdir", dest="outdir", default=None, help= "ouput is placed in dir as filenmae.obj, fielname.type, only for 'biom' input" ) parser.add_argument( "--format", dest="format", default='biom', help= "input / output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] # parse inputs biom = None rows = [] cols = [] data = [] maxval = 0 tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) if opts.rlib: maxval = biom_to_tab(biom, tmp_hdl) else: rows, cols, data = biom_to_matrix(biom) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: rows, cols, data = tab_to_matrix(indata) data = map(lambda x: map(float, x), data) # floatify it if opts.rlib: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 finally: tmp_hdl.close() # check values to see if already normalized, otherwise R fails badly data = list(data) if len(data) > 0: maxval = max(map(max, data)) if maxval <= 1: os.remove(tmp_in) sys.stderr.write("ERROR: data is already normalized.\n") return 1 # retrieve data norm = None if opts.rlib: tmp_out = 'tmp_' + random_str() + '.txt' r_cmd = """source("%s/preprocessing.r") suppressMessages( MGRAST_preprocessing( file_in="%s", file_out="%s" ))""" % (opts.rlib, tmp_in, tmp_out) execute_r(r_cmd) nrows, ncols, ndata = tab_to_matrix(open(tmp_out, 'r').read()) num_data = map(lambda x: map(float, x), ndata) norm = {"columns": ncols, "rows": nrows, "data": num_data} os.remove(tmp_out) else: post = {"columns": cols, "rows": rows, "data": data} norm = obj_from_url(opts.url + '/compute/normalize', data=json.dumps(post, separators=(',', ':'))) # output data os.remove(tmp_in) if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if biom and (opts.format == 'biom'): # may have rows removed new_rows = [] for r in biom['rows']: if r['id'] in norm['rows']: new_rows.append(r) biom['rows'] = new_rows biom['data'] = norm['data'] biom['shape'][0] = len(biom['rows']) biom['id'] = biom['id'] + '_normalized' biom['matrix_type'] = 'dense' biom['matrix_element_type'] = 'float' matrix_type = None if biom['type'].startswith('Taxon'): matrix_type = "Communities.TaxonomicMatrix" elif biom['type'].startswith('Function'): matrix_type = "Communities.FunctionalMatrix" if opts.outdir and matrix_type: if not os.path.isdir(opts.outdir): os.mkdir(opts.outdir) ohdl = open(os.path.join(opts.outdir, opts.output + '.obj'), 'w') thdl = open(os.path.join(opts.outdir, opts.output + '.type'), 'w') ohdl.write(json.dumps(biom) + "\n") thdl.write(matrix_type) ohdl.close() thdl.close() else: out_hdl.write(json.dumps(biom) + "\n") else: out_hdl.write("\t%s\n" % "\t".join(norm['columns'])) for i, d in enumerate(norm['data']): out_hdl.write("%s\t%s\n" % (norm['rows'][i], "\t".join(map(str, d)))) out_hdl.close() if os.stat(opts.output).st_size == 0: os.remove(opts.output) return 0
def main(args): global API_URL ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) # access options parser.add_argument("-u", "--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token") # other options parser.add_argument("-f", "--file", dest="mdfile", default=None, help="metadata .xlsx file") parser.add_argument( "--taxa", dest="taxa", default=None, help= "metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169" ) parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Run in debug mode") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT") parser.add_argument("args", type=str, nargs="+", help="Action (" + ",".join(valid_actions) + ")") # get inputs opts = parser.parse_args() args = opts.args API_URL = opts.url # validate inputs if len(args) < 1: sys.stderr.write("ERROR: missing action\n") return 1 action = args[0] if action not in valid_actions: sys.stderr.write("ERROR: invalid action. use one of: %s\n" % ", ".join(valid_actions)) return 1 if len(args) < 2: sys.stderr.write("ERROR: missing Project ID\n") return 1 pid = args[1] DEBUG = opts.verbose + opts.debug # get token token = get_auth_token(opts) if not token: token = input('Enter your MG-RAST auth token: ') # actions if action == "get-info": data = obj_from_url(opts.url + '/project/' + pid + '?verbosity=verbose&nocache=1', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "get-metadata": data = obj_from_url(opts.url + '/metadata/export/' + pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "update-metadata": result = post_file(opts.url + '/metadata/update', 'upload', opts.mdfile, auth=token, data=json.dumps({'project': pid}, separators=(',', ':')), debug=DEBUG) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "make-public": data = obj_from_url(opts.url + '/project/' + pid + '/makepublic', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "submit-ebi": debug = 1 if opts.debug else 0 info = {'project_id': pid, 'debug': debug} if opts.taxa: info['project_taxonomy'] = opts.taxa data = obj_from_url(opts.url + '/submission/ebi', auth=token, data=json.dumps(info, separators=(',', ':'))) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "status-ebi": data = obj_from_url(opts.url + '/submission/' + pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--project", dest="project", default=None, help="project ID") # get inputs opts = parser.parse_args() if not opts.project or opts.project[0:3] != "mgp": sys.stderr.write("ERROR: a project id is required\n") return 1 # get auth PROJECT = opts.project TOKEN = get_auth_token(opts) # export metadata outfile = PROJECT + "-export.xlsx" # k = obj_from_url( "http://api.mg-rast.org/metadata/export/{project}?verbosity=full". format(project=PROJECT), auth=TOKEN) metadata = k # json.loads(open(infile).read()) workbook = xlsxwriter.Workbook(outfile) print("Creating", outfile) worksheet = {} worksheet["README"] = workbook.add_worksheet("README") row = 0 for i in range(10): worksheet["README"].write_number(row, 0, i) row += 1 worksheet["project"] = workbook.add_worksheet("project") project_keys = get_project_keys(metadata) col = 0 for l in project_keys: value = metadata["data"][l]["value"] definition = metadata["data"][l]["definition"] worksheet["project"].write_string(0, col, l) worksheet["project"].write_string(1, col, definition) worksheet["project"].write_string(2, col, value) col += 1 worksheet["sample"] = workbook.add_worksheet("sample") samplekeys = get_sample_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in samplekeys: if l in sample["data"].keys(): value = sample["data"][l]["value"] definition = sample["data"][l]["definition"] fmt = sample["data"][l]["type"] worksheet["sample"].write_string(0, col, l) worksheet["sample"].write_string(1, col, definition) write_worksheet_value(worksheet["sample"], row, col, value, fmt) col += 1 col = 0 row += 1 try: librarytype = metadata["samples"][0]["libraries"][0]["data"][ "investigation_type"]["value"] except IndexError: sys.exit("This metadata bundle does not have any libraries") worksheet["library"] = workbook.add_worksheet("library " + librarytype) libkeys = get_library_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in libkeys: if l in sample["libraries"][0]["data"].keys(): value = sample["libraries"][0]["data"][l]["value"] definition = sample["libraries"][0]["data"][l]["definition"] fmt = sample["libraries"][0]["data"][l]["type"] worksheet["library"].write_string(0, col, l) worksheet["library"].write_string(1, col, definition) write_worksheet_value(worksheet["library"], row, col, value, fmt) col += 1 col = 0 row += 1 eps = get_eps(metadata) print("eps", " ".join(eps)) epcol = {} eprow = {} for ep in eps: worksheet[ep] = workbook.add_worksheet("ep " + ep) epcol[ep] = 0 eprow[ep] = 2 epkeys = get_ep_keys(metadata, eps) for sample in metadata["samples"]: ep = sample["envPackage"]["type"] for l in epkeys[ep]: try: value = sample["envPackage"]["data"][l]["value"] definition = sample["envPackage"]["data"][l]["definition"] fmt = sample["envPackage"]["data"][l]["type"] except KeyError: value = "" definition = "" fmt = "string" worksheet[ep].write_string(0, epcol[ep], l) worksheet[ep].write_string(1, epcol[ep], definition) write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt) epcol[ep] += 1 epcol[ep] = 0 eprow[ep] += 1 workbook.close()
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--format", dest="format", default='biom', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument( "--cluster", dest="cluster", default='ward', help= "cluster function, one of: ward, single, complete, mcquitty, median, centroid, default is ward" ) parser.add_argument( "--distance", dest="distance", default='bray-curtis', help= "distance function, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis" ) parser.add_argument( "--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument( "--normalize", dest="normalize", type=int, default=0, help="normalize the input data, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # parse inputs rows = [] cols = [] data = [] try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) rows, cols, data = biom_to_matrix(biom, col_name=opts.name) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: rows, cols, data = tab_to_matrix(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 # retrieve data raw = '0' if opts.normalize else '1' post = { "raw": raw, "cluster": opts.cluster, "distance": opts.distance, "columns": cols, "rows": rows, "data": data } hmap = obj_from_url(opts.url + '/compute/heatmap', data=json.dumps(post, separators=(',', ':'))) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') out_hdl.write(json.dumps(hmap, separators=(', ', ': '), indent=4) + "\n") out_hdl.close() return 0
def submit(stype, files, opts): fids = [] # post files to shock if stype == 'batch': fids = archive_upload(files[0], opts.verbose) else: fids = upload(files, opts.verbose) # set POST data data = {} if opts.debug: data['debug'] = 1 if opts.barcode: bids = upload([opts.barcode], opts.verbose) data['barcode_file'] = bids[0] if opts.metadata: mids = upload([opts.metadata], opts.verbose) data['metadata_file'] = mids[0] elif opts.project_id: data['project_id'] = opts.project_id elif opts.project_name: data['project_name'] = opts.project_name # figure out type if (stype == 'simple') or (stype == 'batch'): data['seq_files'] = fids elif stype == 'demultiplex': data['multiplex_file'] = fids[0] data['rc_index'] = 1 if opts.rc_index else 0 if len(fids) == 3: data["index_file"] = fids[2] elif stype == 'pairjoin': data['pair_file_1'] = fids[0] data['pair_file_2'] = fids[1] data['retain'] = 1 if opts.retain else 0 if opts.mgname: data['mg_name'] = opts.mgname elif stype == 'pairjoin_demultiplex': data['pair_file_1'] = fids[0] data['pair_file_2'] = fids[1] data['index_file'] = fids[2] data['retain'] = 1 if opts.retain else 0 data['rc_index'] = 1 if opts.rc_index else 0 # set pipeline flags - assembeled is special case if opts.assembled: data['assembled'] = 1 data['filter_ln'] = 0 data['filter_ambig'] = 0 data['dynamic_trim'] = 0 data['dereplicate'] = 0 data['bowtie'] = 0 else: data['assembled'] = 0 data['filter_ln'] = 0 if opts.no_filter_ln else 1 data['filter_ambig'] = 0 if opts.no_filter_ambig else 1 data['dynamic_trim'] = 0 if opts.no_dynamic_trim else 1 data['dereplicate'] = 0 if opts.no_dereplicate else 1 data['bowtie'] = 0 if opts.no_bowtie else 1 # set pipeline options data['filter_ln_mult'] = opts.filter_ln_mult data['max_ambig'] = opts.max_ambig data['max_lqb'] = opts.max_lqb data['min_qual'] = opts.min_qual if opts.screen_indexes: data['screen_indexes'] = opts.screen_indexes if opts.priority: data['priority'] = opts.priority # submit it if opts.verbose: print("Submitting to MG-RAST with the following parameters:") print(json.dumps(data, sort_keys=True, indent=4)) result = obj_from_url(API_URL+"/submission/submit", data=json.dumps(data), auth=mgrast_auth['token']) if opts.verbose and (not opts.debug): print(json.dumps(result)) if opts.debug: pprint.pprint(result) elif opts.synch or opts.json_out: print("Project ID: "+result['project']) print("Submission ID: "+result['id']) wait_on_complete(result['id'], opts.json_out) else: print("Project ID: "+result['project']) print("Submission ID: "+result['id']) status(result['id'])
def delete(sid): data = obj_from_url(API_URL+"/submission/"+sid, auth=mgrast_auth['token'], method='DELETE') print(data['status'])
def wait_on_complete(sid, json_out): listed_mgs = set() incomplete = True data = None total_mg = 0 while incomplete: time.sleep(synch_pause) data = obj_from_url(API_URL+"/submission/"+sid, auth=mgrast_auth['token']) # check for global errors if isinstance(data['status'], str): sys.stderr.write("ERROR: %s\n"%data['status']) sys.exit(1) # check for submission errors for task in data['status']['preprocessing']: if task['status'] == "suspend": sys.stderr.write("ERROR: %s\n"%task['error']) sys.exit(1) # check for metagenomes total_mg = len(data['status']['metagenomes']) done_mg = 0 error_mg = 0 if total_mg > 0: for mg in data['status']['metagenomes']: if mg['id'] not in listed_mgs: print("metagenome analysis started: "+mg['id']) listed_mgs.add(mg['id']) if mg['status'] == "completed": done_mg += 1 elif mg['status'] == "suspend": error_mg += 1 if total_mg == (done_mg + error_mg): incomplete = False # display completed if json_out: mgs = [] jhdl = open(json_out, 'w') for mg in data['status']['metagenomes']: if mg['status'] == "completed": print("metagenome analysis completed: "+mg['id']) mgdata = obj_from_url(API_URL+"/metagenome/"+mg['id']+"?verbosity=full", auth=mgrast_auth['token']) mgs.append(mgdata) elif mg['status'] == "suspend": print("metagenome analysis failed: "+mg['id']) if "error" in mg: print("[error] "+mg['error']) if len(mgs) == 1: # output single dict json.dump(mgs[0], jhdl) elif len(mgs) > 1: # output list of dicts json.dump(mgs, jhdl) else: # error here sys.stderr.write("ERROR: no metagenome(s) produced in submission %s\n"%sid) sys.exit(1) jhdl.close() else: pt_mg = PrettyTable(["metagenome ID", "metagenome name", "total status", "submit time"]) for mg in data['status']['metagenomes']: pt_mg.add_row([mg['id'], mg['name'], mg['status'], mg['timestamp']]) pt_mg.align = "l" print(pt_mg)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--plot", dest="plot", action="store_true", default=False, help= "display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level" ) parser.add_argument( "--stat", dest="stat", default='sequence', help= "type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence" ) # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url url = opts.url + '/metagenome/' + opts.id + '?verbosity=stats&public=1' # retrieve / output data result = obj_from_url(url, auth=token) stats = result['statistics'] if opts.stat == 'sequence': for s in sorted(stats['sequence_stats'].keys()): safe_print("%s\t%s\n" % (s, stats['sequence_stats'][s])) elif opts.stat == 'bp_profile': if not stats['qc']['bp_profile']['percents']['data']: sys.stderr.write("ERROR: %s has no bp_profile statistics\n" % opts.id) return 1 if opts.plot: cols = stats['qc']['bp_profile']['percents']['columns'][1:5] data = map(lambda x: x[1:5], stats['qc']['bp_profile']['percents']['data']) plot_histo(cols, data, 20, 80) else: safe_print( "\t".join(stats['qc']['bp_profile']['percents']['columns']) + "\n") for d in stats['qc']['bp_profile']['percents']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'drisee': if not stats['qc']['drisee']['percents']['data']: sys.stderr.write("ERROR: %s has no drisee statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['drisee']['percents']['data']: x.append(d[0]) y.append(d[7]) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print( "\t".join(stats['qc']['drisee']['percents']['columns']) + "\n") for d in stats['qc']['drisee']['percents']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'kmer': if not stats['qc']['kmer']['15_mer']['data']: sys.stderr.write("ERROR: %s has no kmer statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['kmer']['15_mer']['data']: x.append(math.log(d[3], 10)) y.append(math.log(d[0], 10)) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns']) + "\n") for d in stats['qc']['kmer']['15_mer']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'rarefaction': if not stats['rarefaction']: sys.stderr.write("ERROR: %s has no rarefaction statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for r in stats['rarefaction']: x.append(int(r[0])) y.append(float(r[1])) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("x\ty\n") for r in stats['rarefaction']: safe_print("%s\t%s\n" % (str(r[0]), str(r[1]))) elif opts.stat in stats['taxonomy']: ranked = sorted(stats['taxonomy'][opts.stat], key=lambda x: (-int(x[1]), x[0])) if opts.plot: top = map(lambda x: int(x[1]), ranked)[:50] aplotter.plot(top, output=sys.stdout, draw_axes=True, plot_slope=False, min_x=0, min_y=0) else: for t in ranked: safe_print("%s\t%s\n" % (t[0], str(t[1]))) else: sys.stderr.write("ERROR: invalid stat type\n") return 1 return 0