Exemple #1
0
def file_download(auth, info, dirpath="."):
    fhandle = open(os.path.join(dirpath, info['file_name']), 'w')
    sys.stdout.write("Downloading %s for %s ... " %
                     (info['file_name'], info['id']))
    file_from_url(info['url'], fhandle, auth=auth)
    fhandle.close()
    sys.stdout.write("Done\n")
def file_download(auth, info, dirpath="."):
    sys.stdout.write("Downloading %s for %s ... "%(info['file_name'], info['id']))
    if "url" in info.keys():  # all is well
        fhandle = open(os.path.join(dirpath, info['file_name']), 'w')
        file_from_url(info['url'], fhandle, auth=auth)
        fhandle.close()
    else:   # Don't open empty file if download doesn't have url
        sys.stderr.write("WARNING Download info does not contain url.  Possibly datasets pre- human screening?\n" + repr(info)+"\n")
    sys.stdout.write("Done\n")
Exemple #3
0
def seqs_from_json(json_in, tmp_dir):
    files = []
    try:
        seq_obj = json.load(open(json_in, 'r'))
    except:
        sys.stderr.write("ERROR: %s is invalid json\n"%json_in)
        sys.exit(1)
    # simple type
    if 'handle' in seq_obj:
        stype = "simple"
        down_url  = "%s/node/%s?download"%(seq_obj['handle']['url'], seq_obj['handle']['id'])
        down_file = os.path.join(tmp_dir, seq_obj['handle']['file_name'])
        down_hdl  = open(down_file, 'w')
        file_from_url(down_url, down_hdl, auth=mgrast_auth['token'])
        down_hdl.close()
        files.append(down_file)
    # pairjoin type
    elif ('handle_1' in seq_obj) and ('handle_2' in seq_obj):
        stype = "pairjoin"
        down_url_1  = "%s/node/%s?download"%(seq_obj['handle_1']['url'], seq_obj['handle_1']['id'])
        down_url_2  = "%s/node/%s?download"%(seq_obj['handle_2']['url'], seq_obj['handle_2']['id'])
        down_file_1 = os.path.join(tmp_dir, seq_obj['handle_1']['file_name'])
        down_file_2 = os.path.join(tmp_dir, seq_obj['handle_2']['file_name'])
        down_hdl_1  = open(down_file_1, 'w')
        down_hdl_2  = open(down_file_2, 'w')
        file_from_url(down_url_1, down_hdl_1, auth=mgrast_auth['token'])
        file_from_url(down_url_2, down_hdl_2, auth=mgrast_auth['token'])
        down_hdl_1.close()
        down_hdl_2.close()
        files.append(down_file_1)
        files.append(down_file_2)
    else:
        sys.stderr.write("ERROR: input object %s is incorrect format\n"%json_in)
        sys.exit(1)
    return stype, files
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID")
    parser.add_argument("--dir", dest="dir", default=".", help="directory to export to")
    parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.metagenome:
        sys.stderr.write("ERROR: a metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # get mg info
    url = opts.url+'/metagenome/'+opts.metagenome
    mg  = obj_from_url(url, auth=token)
    
    # get manifest
    url  = opts.url+'/researchobject/manifest/'+opts.metagenome
    data = obj_from_url(url, auth=token)
    
    # just list
    if opts.list:
        pt = PrettyTable(["File Name", "Folder", "Media Type"])
        for info in data["aggregates"]:
            pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]])
        pt.align = "l"
        print(pt)
        return 0
    
    # get cwl files
    temp_name = random_str(10)
    pipeline_dir = os.path.join(opts.dir, temp_name)
    git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
    os.system(git_clone)
    
    # download manifest
    sha1s = []
    base = data["@context"][0]["@base"].strip('/')
    manifest_dir = os.path.join(opts.dir, base)
    os.mkdir(manifest_dir)
    data_str = json.dumps(data)
    open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
    sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ])
    
    # download aggregates
    for info in data["aggregates"]:
        sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"]))
        folder = info["bundledAs"]["folder"].strip('/')
        folder_dir = os.path.join(opts.dir, folder)
        if not os.path.isdir(folder_dir):
            os.mkdir(folder_dir)
        if "githubusercontent" in info["uri"]:
            pos = info["uri"].find("CWL")
            src = os.path.join(pipeline_dir, info["uri"][pos:])
            dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
            text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '')
            if dst.endswith('job.yaml'):
                text = edit_input(text, mg) 
            open(dst, 'w').write(text)
            sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ])
        else:
            fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w')
            s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
            fh.close()
            sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ])
        sys.stdout.write("Done\n")
    
    # output sha1
    mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
    tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
    sha1s.sort(key=lambda x: x[1])
    for s1 in sha1s:
        if s1[1].startswith('data'):
            mansha1.write("%s\t%s\n"%(s1[0], s1[1]))
        else:
            tagsha1.write("%s\t%s\n"%(s1[0], s1[1]))
    mansha1.close()
    tagsha1.close()
    
    # cleanup
    shutil.rmtree(pipeline_dir)
    
    return 0
Exemple #5
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp.format(VERSION, RO_VERSION),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--metagenome",
                        dest="metagenome",
                        default=None,
                        help="metagenome ID")
    parser.add_argument("--dir",
                        dest="dir",
                        default=".",
                        help="directory to export to")
    parser.add_argument("--list",
                        dest="list",
                        action="store_true",
                        default=False,
                        help="list files in manifest")

    # get inputs
    opts = parser.parse_args()
    if not opts.metagenome:
        sys.stderr.write("ERROR: a metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir)
        return 1

    # get auth
    token = get_auth_token(opts)

    # get mg info
    url = opts.url + '/metagenome/' + opts.metagenome
    mg = obj_from_url(url, auth=token)

    # get manifest
    url = opts.url + '/researchobject/manifest/' + opts.metagenome
    data = obj_from_url(url, auth=token)

    # just list
    if opts.list:
        pt = PrettyTable(["File Name", "Folder", "Media Type"])
        for info in data["aggregates"]:
            pt.add_row([
                info["bundledAs"]["filename"], info["bundledAs"]["folder"],
                info["mediatype"]
            ])
        pt.align = "l"
        print(pt)
        return 0

    # get cwl files
    temp_name = random_str(10)
    pipeline_dir = os.path.join(opts.dir, temp_name)
    git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
    os.system(git_clone)

    # download manifest
    sha1s = []
    base = data["@context"][0]["@base"].strip('/')
    manifest_dir = os.path.join(opts.dir, base)
    os.mkdir(manifest_dir)
    data_str = json.dumps(data)
    open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
    sha1s.append([
        hashlib.sha1(data_str).hexdigest(),
        os.path.join(base, data["manifest"])
    ])

    # download aggregates
    for info in data["aggregates"]:
        sys.stdout.write("Downloading %s ... " %
                         (info["bundledAs"]["filename"]))
        folder = info["bundledAs"]["folder"].strip('/')
        folder_dir = os.path.join(opts.dir, folder)
        if not os.path.isdir(folder_dir):
            os.mkdir(folder_dir)
        if "githubusercontent" in info["uri"]:
            pos = info["uri"].find("CWL")
            src = os.path.join(pipeline_dir, info["uri"][pos:])
            dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
            text = open(src, 'r').read().replace('../Inputs/', '').replace(
                '../Tools/', '').replace('../Workflows/', '')
            if dst.endswith('job.yaml'):
                text = edit_input(text, mg)
            open(dst, 'w').write(text)
            sha1s.append([
                hashlib.sha1(text).hexdigest(),
                os.path.join(folder, info["bundledAs"]["filename"])
            ])
        else:
            fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]),
                      'w')
            s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
            fh.close()
            sha1s.append(
                [s1, os.path.join(folder, info["bundledAs"]["filename"])])
        sys.stdout.write("Done\n")

    # output sha1
    mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
    tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
    sha1s.sort(key=lambda x: x[1])
    for s1 in sha1s:
        if s1[1].startswith('data'):
            mansha1.write("%s\t%s\n" % (s1[0], s1[1]))
        else:
            tagsha1.write("%s\t%s\n" % (s1[0], s1[1]))
    mansha1.close()
    tagsha1.close()

    # cleanup
    shutil.rmtree(pipeline_dir)

    return 0
Exemple #6
0
def file_download(auth, info, dirpath="."):
    fhandle = open(os.path.join(dirpath, info['file_name']), 'w')
    sys.stdout.write("Downloading %s for %s ... "%(info['file_name'], info['id']))
    file_from_url(info['url'], fhandle, auth=auth)
    fhandle.close()
    sys.stdout.write("Done\n")