def file_download(auth, info, dirpath="."): fhandle = open(os.path.join(dirpath, info['file_name']), 'w') sys.stdout.write("Downloading %s for %s ... " % (info['file_name'], info['id'])) file_from_url(info['url'], fhandle, auth=auth) fhandle.close() sys.stdout.write("Done\n")
def file_download(auth, info, dirpath="."): sys.stdout.write("Downloading %s for %s ... "%(info['file_name'], info['id'])) if "url" in info.keys(): # all is well fhandle = open(os.path.join(dirpath, info['file_name']), 'w') file_from_url(info['url'], fhandle, auth=auth) fhandle.close() else: # Don't open empty file if download doesn't have url sys.stderr.write("WARNING Download info does not contain url. Possibly datasets pre- human screening?\n" + repr(info)+"\n") sys.stdout.write("Done\n")
def seqs_from_json(json_in, tmp_dir): files = [] try: seq_obj = json.load(open(json_in, 'r')) except: sys.stderr.write("ERROR: %s is invalid json\n"%json_in) sys.exit(1) # simple type if 'handle' in seq_obj: stype = "simple" down_url = "%s/node/%s?download"%(seq_obj['handle']['url'], seq_obj['handle']['id']) down_file = os.path.join(tmp_dir, seq_obj['handle']['file_name']) down_hdl = open(down_file, 'w') file_from_url(down_url, down_hdl, auth=mgrast_auth['token']) down_hdl.close() files.append(down_file) # pairjoin type elif ('handle_1' in seq_obj) and ('handle_2' in seq_obj): stype = "pairjoin" down_url_1 = "%s/node/%s?download"%(seq_obj['handle_1']['url'], seq_obj['handle_1']['id']) down_url_2 = "%s/node/%s?download"%(seq_obj['handle_2']['url'], seq_obj['handle_2']['id']) down_file_1 = os.path.join(tmp_dir, seq_obj['handle_1']['file_name']) down_file_2 = os.path.join(tmp_dir, seq_obj['handle_2']['file_name']) down_hdl_1 = open(down_file_1, 'w') down_hdl_2 = open(down_file_2, 'w') file_from_url(down_url_1, down_hdl_1, auth=mgrast_auth['token']) file_from_url(down_url_2, down_hdl_2, auth=mgrast_auth['token']) down_hdl_1.close() down_hdl_2.close() files.append(down_file_1) files.append(down_file_2) else: sys.stderr.write("ERROR: input object %s is incorrect format\n"%json_in) sys.exit(1) return stype, files
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url+'/metagenome/'+opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url+'/researchobject/manifest/'+opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n"%(s1[0], s1[1])) else: tagsha1.write("%s\t%s\n"%(s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url + '/metagenome/' + opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url + '/researchobject/manifest/' + opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([ info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"] ]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... " % (info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace( '../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append( [s1, os.path.join(folder, info["bundledAs"]["filename"])]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n" % (s1[0], s1[1])) else: tagsha1.write("%s\t%s\n" % (s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def file_download(auth, info, dirpath="."): fhandle = open(os.path.join(dirpath, info['file_name']), 'w') sys.stdout.write("Downloading %s for %s ... "%(info['file_name'], info['id'])) file_from_url(info['url'], fhandle, auth=auth) fhandle.close() sys.stdout.write("Done\n")