Ejemplo n.º 1
0
def validate(fformat, files, get_info=False):
    for f in files:
        data = obj_from_url(API_URL + "/inbox/" + f,
                            auth=mgrast_auth['token'],
                            debug=DEBUG)
        if ('data_type' in data) and (data['data_type'] == fformat):
            print("%s (%s) is a valid %s file" %
                  (data['filename'], f, fformat))
        elif fformat == 'sequence':
            if data['stats_info']['file_type'] in ['fasta', 'fastq']:
                info = obj_from_url(API_URL + "/inbox/stats/" + f,
                                    auth=mgrast_auth['token'],
                                    debug=DEBUG)
                print(info['status'].replace("stats computation",
                                             "validation"))
            else:
                sys.stderr.write(
                    "ERROR: %s (%s) is not a fastq or fasta file\n" %
                    (data['filename'], f))
        elif fformat == 'metadata':
            if data['stats_info']['file_type'] == 'excel':
                info = obj_from_url(API_URL + "/inbox/validate/" + f,
                                    auth=mgrast_auth['token'],
                                    debug=DEBUG)
                if get_info:
                    return info
                else:
                    print(info['status'])
                    if info['status'].startswith('invalid'):
                        print(info['error'])
            else:
                sys.stderr.write("ERROR: %s (%s) is not a spreadsheet file\n" %
                                 (data['filename'], f))
Ejemplo n.º 2
0
def upload(files):
    for f in files:
        attr = json.dumps({
            "type": "inbox",
            "id": mgrast_auth['id'],
            "user": mgrast_auth['login'],
            "email": mgrast_auth['email']
        })
        # get format
        if f.endswith(".gz"):
            fformat = "gzip"
        elif f.endswith(".bz2"):
            fformat = "bzip2"
        else:
            fformat = "upload"
        # POST to shock
        data = {"attributes_str": attr}
        result = post_file(SHOCK_URL + "/node",
                           fformat,
                           f,
                           data=data,
                           auth=mgrast_auth['token'],
                           debug=DEBUG)
        # compute file info
        info = obj_from_url(API_URL + "/inbox/info/" + result['data']['id'],
                            auth=mgrast_auth['token'],
                            debug=DEBUG)
        print(info['status'])
        # compute sequence stats
        if info['stats_info']['file_type'] in ['fasta', 'fastq']:
            stats = obj_from_url(API_URL + "/inbox/stats/" +
                                 result['data']['id'],
                                 auth=mgrast_auth['token'],
                                 debug=DEBUG)
            print(stats['status'].replace("stats computation", "validation"))
Ejemplo n.º 3
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15")
    parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc")
    parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public.")
    parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all")
    parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal")
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    opts = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    total = 0
    maxLimit = 50
    params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit),
               ('public', 'yes' if opts.public or (not token) else 'no') ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url+'/search?'+urlencode(params, True)
    
    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found
    
    # output header
    safe_print("\t".join(fields)+"\n")
    # output rows
    display_search(result['data'], fields)
    
    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)
    
    return 0
Ejemplo n.º 4
0
def test_heartbeat():
    URI0 = API_URL + "heartbeat"
    obj = obj_from_url(URI0)
    SERVICES = [obj["requests"][1]["parameters"]["required"]["service"][1][i][0] for i in range(len(obj["requests"][1]["parameters"]["required"]["service"][1]))]
    for service in SERVICES:
        URI = API_URL + "heartbeat/" + service
        obj_detail = obj_from_url(URI)
        assert obj_detail["status"] == 1, "Failed heartbeat on " + service
    return 1
Ejemplo n.º 5
0
def test_heartbeat():
    URI0 = API_URL + "heartbeat"
    obj = obj_from_url(URI0)
    SERVICES = [obj["requests"][1]["parameters"]["required"]["service"][1][i][0] for i in range(len(obj["requests"][1]["parameters"]["required"]["service"][1]))]
    for service in SERVICES:
        URI = API_URL + "heartbeat/" + service
        obj_detail = obj_from_url(URI)
        assert obj_detail["status"] == 1, "Failed heartbeat on " + service
    return 1
Ejemplo n.º 6
0
def ko2roles(opts, sshier, koid):
    ko_anno = obj_from_url(opts.url+'/m5nr/accession/'+koid+'?version=1&source=KO&limit=1000')
    ko_md5s = set( map(lambda x: x['md5'], ko_anno['data']) )
    if len(ko_md5s) == 0:
        return [], []
    ko_post = {'version': 1, 'source': 'Subsystems', 'data': list(ko_md5s), 'limit': 10000}
    ss_anno = obj_from_url(opts.url+'/m5nr/md5', data=json.dumps(ko_post, separators=(',',':')))
    roles   = set()
    for ss in ss_anno['data']:
        if ss['accession'] in sshier:
            roles.add( sshier[ss['accession']]['level4'] )
    return list(roles), list(ko_md5s)
Ejemplo n.º 7
0
def ko2roles(opts, sshier, koid):
    ko_anno = obj_from_url(opts.url+'/m5nr/accession/'+koid+'?version=1&source=KO&limit=1000')
    ko_md5s = set(map(lambda x: x['md5'], ko_anno['data']))
    if len(ko_md5s) == 0:
        return [], []
    ko_post = {'version': 1, 'source': 'Subsystems', 'data': list(ko_md5s), 'limit': 10000}
    ss_anno = obj_from_url(opts.url+'/m5nr/md5', data=json.dumps(ko_post, separators=(',',':')))
    roles   = set()
    for ss in ss_anno['data']:
        if ss['accession'] in sshier:
            roles.add(sshier[ss['accession']]['level4'])
    return list(roles), list(ko_md5s)
Ejemplo n.º 8
0
def upload_archive(afile):
    attr = json.dumps({
        "type": "inbox",
        "id": mgrast_auth['id'],
        "user": mgrast_auth['login'],
        "email": mgrast_auth['email']
    })
    # get format
    if afile.endswith(".tar.gz"):
        aformat = "tar.gz"
    elif afile.endswith(".tar.bz2"):
        aformat = "tar.bz2"
    elif afile.endswith(".tar"):
        aformat = "tar"
    elif afile.endswith(".zip"):
        aformat = "zip"
    else:
        sys.stderr.write("ERROR: input file %s is incorrect archive format\n" %
                         afile)
        sys.exit(1)
    # POST to shock / unpack
    data = {"file_name": os.path.basename(afile), "attributes_str": attr}
    result = post_file(SHOCK_URL + "/node",
                       "upload",
                       afile,
                       data=data,
                       auth=mgrast_auth['token'],
                       debug=DEBUG)
    data = {
        "unpack_node": result['data']['id'],
        "archive_format": aformat,
        "attributes_str": attr
    }
    unpack = obj_from_url(SHOCK_URL + "/node",
                          data=data,
                          auth=mgrast_auth['token'],
                          debug=DEBUG)
    # process new nodes
    for node in unpack['data']:
        # compute file info
        info = obj_from_url(API_URL + "/inbox/info/" + node['id'],
                            auth=mgrast_auth['token'],
                            debug=DEBUG)
        print(info['status'])
        # compute sequence stats
        if info['stats_info']['file_type'] in ['fasta', 'fastq']:
            stats = obj_from_url(API_URL + "/inbox/stats/" + node['id'],
                                 auth=mgrast_auth['token'],
                                 debug=DEBUG)
            print(stats['status'].replace("stats computation", "validation"))
Ejemplo n.º 9
0
def rename(fid, fname):
    data = {"name": fname, "file": fid}
    result = obj_from_url(API_URL + "/inbox/rename",
                          data=json.dumps(data),
                          auth=mgrast_auth['token'],
                          debug=DEBUG)
    print(result['status'])
Ejemplo n.º 10
0
def delete(files):
    for f in files:
        result = obj_from_url(API_URL + "/inbox/" + f,
                              auth=mgrast_auth['token'],
                              method='DELETE',
                              debug=DEBUG)
        print(result['status'])
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params  = [ ('level', opts.level), ('source', opts.source) ]
    for i in id_list:
        url  = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" %(i, data['data']))
    
    return 0
Ejemplo n.º 12
0
def listall():
    data = obj_from_url(API_URL+"/submission/list", auth=mgrast_auth['token'])
    submissions = sorted(data['submissions'], key=itemgetter('timestamp'))
    pt = PrettyTable(["ID", "type", "status", "time"])
    for s in submissions:
        row = [ s['id'], s['type'], s['status'], s['timestamp'] ]
        pt.add_row(row)
    pt.align = "l"
    print(pt)
Ejemplo n.º 13
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--ids",
                        dest="ids",
                        default=None,
                        help="comma seperated list of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params = [('level', opts.level), ('source', opts.source)]
    for i in id_list:
        url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode(
            params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" % (i, data['data']))

    return 0
Ejemplo n.º 14
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID, required")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin")
    parser.add_argument("--output", dest="output", default='text', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is text")
    
    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.output not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid output format\n")
        return 1
    
    # get biom
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read()
        biom = json.loads(indata)
        rows, cols, matrix = biom_to_matrix(biom)
    except:
        sys.stderr.write("ERROR: unable to load input biom data\n")
        return 1
    
    # get SS hierarchy
    ss_hier = dict([ (x['accession'], x) for x in obj_from_url(opts.url+'m5nr/ontology?version=1&source=Subsystems')['data'] ])
    
    # biom KO -> SS
    ssrows = []
    ssmatrix = []
    for r, rid in enumerate(rows):
        roles, md5s = ko2roles(opts, ss_hier, rid)
        if not roles:
            continue
        for role in roles:
            fig_ids = role2figs(opts, role, md5s)
            if opts.output == 'text':
                # text output: feature list, function, abundance for function, avg evalue for function, organism
                safe_print("%s\t%s\t%d\t%.2e\t%s\n" %(",".join(fig_ids), role, matrix[r][0], 0, 'glob'))
            elif opts.output == 'biom':
                ssrows.append({'id': role, 'metadata': {'accession': fig_ids}})
                ssmatrix.append(matrix[r])
    
    # biom output
    if opts.output == 'biom':
        biom['matrix_type'] = 'sparse'
        biom['shape'][0] = len(ssrows)
        biom['rows'] = ssrows
        biom['data'] = ssmatrix
        safe_print(json.dumps(biom)+"\n")
    
    return 0
Ejemplo n.º 15
0
def status(sid):
    data = obj_from_url(API_URL+"/submission/"+sid+'?full=1', auth=mgrast_auth['token'])
    
    # check for errors
    if ('error' in data) and data['error']:
        sys.stderr.write("ERROR: %s\n"%data['error'])
        sys.exit(1)
    
    try:
        fids   = map(lambda x: x['id'], data['inputs'])
        fnames = map(lambda x: x['filename'], data['inputs'])
        fsizes = map(lambda x: str(x['filesize']), data['inputs'])
    except:
        fids   = map(lambda x: x['id'], data['inputs'].values())
        fnames = map(lambda x: x['filename'], data['inputs'].values())
        fsizes = map(lambda x: str(x['filesize']), data['inputs'].values())
    
    # submission summary
    pt_summary = PrettyTable(["submission ID", "type", "project", "submit time", "input file ID", "input file name", "input file size", "status"])
    pt_summary.add_row([data['id'], data['type'], data['project'], data['info']['submittime'], "\n".join(fids), "\n".join(fnames), "\n".join(fsizes), data['state']])
    pt_summary.align = "l"
    print(pt_summary)
    
    # submission status
    if ('preprocessing' in data) and data['preprocessing']:
        pt_status = PrettyTable(["submission step", "step name", "step status", "step inputs"])
        for i, p in enumerate(data['preprocessing']):
            pstatus = p['status']
            if ('error' in p) and p['error']:
                pstatus += "\n"+p['error']
            pt_status.add_row([i, p['stage'], pstatus, "\n".join(p['inputs'])])
        pt_status.align = "l"
        print(pt_status)
    
    # metagenome info
    if ('metagenomes' in data) and data['metagenomes']:
        pt_mg = PrettyTable(["metagenome ID", "metagenome name", "status", "current steps", "submit time", "complete time", "pipeline ID"])
        for m in data['metagenomes']:
            state = "in-progress"
            if len(m['state']) == 1:
                state = m['state'][0]
            else:
                for s in m['state']:
                    if s == 'suspend':
                        state = 'suspend'
            remain = 0
            if m['task'] and (len(m['task']) > 0):
                remain = len(m['task'])
            pt_mg.add_row([m['userattr']['id'], m['userattr']['name'], state, remain, m['submittime'], m['completedtime'], m['id']])
        pt_mg.align = "l"
        print(pt_mg)
Ejemplo n.º 16
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata'
    url  = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    if opts.verbosity == 'mixs':
        for r in sorted(result.keys()):
            if r not in ['project', 'library', 'sample']:
                safe_print("%s\t%s\n" %(r, result[r]))
    elif opts.verbosity == 'full':
        md = result['metadata']
        safe_print("category\tlabel\tvalue\n")
        if ('project' in md) and md['project']['data']:
            for p in sorted(md['project']['data'].keys()):
                safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p]))
        if ('sample' in md) and md['sample']['data']:
            for s in sorted(md['sample']['data'].keys()):
                safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s]))
        if ('library' in md) and ('type' in md['library']) and md['library']['data']:
            for l in sorted(md['library']['data'].keys()):
                safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l]))
        if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']:
            for e in sorted(md['env_package']['data'].keys()):
                safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e]))
    else:
        sys.stderr.write("ERROR: invalid verbosity type\n")
        return 1
    
    return 0
Ejemplo n.º 17
0
def check_ids(files):
    data = obj_from_url(API_URL + "/inbox",
                        auth=mgrast_auth['token'],
                        debug=DEBUG)
    if len(data['files']) == 0:
        sys.stderr.write("ERROR: Your inbox is empty, please upload first.\n")
        sys.exit(1)
    ids = [x['id'] for x in data['files']]
    for f in files:
        if f not in ids:
            sys.stderr.write(
                "ERROR: File ID '%s' does not exist in your inbox. Did you use File Name by mistake?\n"
                % f)
            sys.exit(1)
Ejemplo n.º 18
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="API url")
    parser.add_argument("--md5", dest="md5", default=None, help="sequence md5")
    parser.add_argument("--id", dest="id", default=None, help="accession ID")
    parser.add_argument(
        "--source",
        dest="source",
        default='SwissProt',
        help=
        "datasource to get record from, one of: SwissProt, TreMBL, InterPro")
    parser.add_argument("--version",
                        dest="version",
                        default='1',
                        help="M5NR version to use, one of 1 or 9")

    # get inputs
    opts = parser.parse_args()

    # build url for m5nr query
    params = [('limit', '1'), ('version', opts.version),
              ('source', opts.source)]
    if opts.md5:
        url = opts.url + '/m5nr/md5/' + opts.md5 + '?' + urlencode(
            params, True)
    elif opts.id:
        url = opts.url + '/m5nr/accession/' + opts.id + '?' + urlencode(
            params, True)
    else:
        sys.stderr.write("ERROR: no md5 checksum or accession given\n")
        return 1

    # retrieve data
    result = obj_from_url(url)
    if len(result['data']) == 0:
        sys.stderr.write("ERROR: no match in M5NR version %s\n" % opts.version)
        return 1

    # output data
    stdout_from_url(UNIPROT_URL + result['data'][0]['accession'] + '.txt')

    return 0
Ejemplo n.º 19
0
def upload(files, verbose):
    fids = []
    attr = json.dumps({
        "type": "inbox",
        "id": mgrast_auth['id'],
        "user": mgrast_auth['login'],
        "email": mgrast_auth['email']
    })
    for i, f in enumerate(files):
        # get format
        if f.endswith(".gz"):
            fformat = "gzip"
            fname = os.path.basename(f[:-3])
        elif f.endswith(".bz2"):
            fformat = "bzip2"
            fname = os.path.basename(f[:-4])
        else:
            fformat = "upload"
            fname = os.path.basename(f)
        # POST to shock
        data = {
            "file_name": fname,
            "attributes_str": attr
        }
        if verbose:
            if len(files) > 1:
                print("Uploading file %d of %d (%s) to MG-RAST Shock"%(i+1, len(files), f))
            else:
                print("Uploading file %s to MG-RAST Shock"%(f))
        result = post_file(SHOCK_URL+"/node", fformat, f, data=data, auth=mgrast_auth['token'], debug=verbose)
        if verbose:
            print(json.dumps(result['data']))
            if len(files) > 1:
                print("Setting info for file %d of %d (%s) in MG-RAST inbox"%(i+1, len(files), f))
            else:
                print("Setting info for file %s in MG-RAST inbox"%(f))
        # compute file info
        info = obj_from_url(API_URL+"/inbox/info/"+result['data']['id'], auth=mgrast_auth['token'], debug=verbose)
        if verbose:
            print(json.dumps(info))
        else:
            print(info['status'])
        fids.append(result['data']['id'])
    return fids
Ejemplo n.º 20
0
def archive_upload(afile, verbose):
    attr = json.dumps({
        "type": "inbox",
        "id": mgrast_auth['id'],
        "user": mgrast_auth['login'],
        "email": mgrast_auth['email']
    })
    # get format
    if afile.endswith(".tar.gz"):
        aformat = "tar.gz"
    elif afile.endswith(".tar.bz2"):
        aformat = "tar.bz2"
    elif afile.endswith(".tar"):
        aformat = "tar"
    elif afile.endswith(".zip"):
        aformat = "zip"
    else:
        sys.stderr.write("ERROR: input file %s is incorrect archive format\n"%afile)
        sys.exit(1)
    # POST to shock / unpack
    if verbose:
        print("Uploading file %s to MG-RAST Shock"%(afile))
    data = {
        "file_name": os.path.basename(afile),
        "attributes_str": attr
    }
    result = post_file(SHOCK_URL+"/node", "upload", afile, data=data, auth=mgrast_auth['token'], debug=verbose)
    if verbose:
        print(json.dumps(result['data']))
        print("Unpacking archive file %s"%(afile))
    data = {
        "unpack_node": result['data']['id'],
        "archive_format": aformat,
        "attributes_str": attr
    }
    unpack = obj_from_url(SHOCK_URL+"/node", data=data, auth=mgrast_auth['token'], debug=verbose)
    if verbose:
        print(json.dumps(unpack['data']))
    fids = map(lambda x: x['id'], unpack['data'])
    return fids
Ejemplo n.º 21
0
def compute(action, files, retain, joinfile, rc_index):
    if action == "sff2fastq":
        data = {"sff_file": files[0]}
    elif action == "demultiplex":
        data = {
            "seq_file": files[0],
            "barcode_file": files[1],
            "rc_index": 1 if rc_index else 0
        }
        if len(files) == 3:
            data["index_file"] = files[2]
    elif action == "pairjoin":
        data = {
            "pair_file_1": files[0],
            "pair_file_2": files[1],
            "retain": 1 if retain else 0
        }
        if joinfile:
            data['output'] = joinfile
    elif action == "pairjoin_demultiplex":
        data = {
            "pair_file_1": files[0],
            "pair_file_2": files[1],
            "index_file": files[2],
            "barcode_file": files[3],
            "retain": 1 if retain else 0,
            "rc_index": 1 if rc_index else 0
        }
    else:
        sys.stderr.write("ERROR: invalid compute action. use one of: %s\n" %
                         ", ".join(compute_actions))
    info = obj_from_url(API_URL + "/inbox/" + action,
                        data=json.dumps(data),
                        auth=mgrast_auth['token'],
                        debug=DEBUG)
    print(info['status'])
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument(
        "--ids",
        dest="ids",
        default=None,
        help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='level3',
        help="functional level to retrieve abundances for, default is level3")
    parser.add_argument(
        "--source",
        dest="source",
        default='Subsystems',
        help="function datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="function level to filter by")
    parser.add_argument(
        "--filter_name",
        dest="filter_name",
        default=None,
        help="function name to filter by, file or comma seperated list")
    parser.add_argument(
        "--intersect_source",
        dest="intersect_source",
        default='SEED',
        help="taxon datasource for insersection, default is SEED")
    parser.add_argument("--intersect_level",
                        dest="intersect_level",
                        default=None,
                        help="taxon level for insersection")
    parser.add_argument(
        "--intersect_name",
        dest="intersect_name",
        default=None,
        help="taxon name(s) for insersection, file or comma seperated list")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument(
        "--evalue",
        type=int,
        dest="evalue",
        default=15,
        help="negative exponent value for maximum e-value cutoff, default is 15"
    )
    parser.add_argument(
        "--identity",
        type=int,
        dest="identity",
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        type=int,
        dest="length",
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")
    parser.add_argument(
        "--temp",
        dest="temp",
        default=None,
        help="filename to temporarly save biom output at each iteration")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1
    if (opts.intersect_name and
        (not opts.intersect_level)) or ((not opts.intersect_name)
                                        and opts.intersect_level):
        sys.stderr.write(
            "ERROR: both --intersect_level and --intersect_name need to be used together\n"
        )
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids, 'r').read()
        try:
            id_obj = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [('group_level', opts.level), ('source', opts.source),
              ('evalue', opts.evalue), ('identity', opts.identity),
              ('length', opts.length), ('version', opts.version),
              ('result_type', 'abundance'), ('asynchronous', '1')]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i + size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url = opts.url + '/matrix/function?' + urlencode(
                cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url + '/matrix/function?' + urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [('version', opts.version), ('min_level', opts.level),
                  ('source', opts.source)]
        url = opts.url + '/m5nr/ontology?' + urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        for ann in data['data']:
            if (opts.filter_level
                    in ann) and (level in ann) and (ann[opts.filter_level]
                                                    in filter_list):
                sub_ann.add(ann[level])

    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom) + "\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)

    out_hdl.close()
    return 0
Ejemplo n.º 23
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp.format(VERSION, RO_VERSION),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--metagenome",
                        dest="metagenome",
                        default=None,
                        help="metagenome ID")
    parser.add_argument("--dir",
                        dest="dir",
                        default=".",
                        help="directory to export to")
    parser.add_argument("--list",
                        dest="list",
                        action="store_true",
                        default=False,
                        help="list files in manifest")

    # get inputs
    opts = parser.parse_args()
    if not opts.metagenome:
        sys.stderr.write("ERROR: a metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir)
        return 1

    # get auth
    token = get_auth_token(opts)

    # get mg info
    url = opts.url + '/metagenome/' + opts.metagenome
    mg = obj_from_url(url, auth=token)

    # get manifest
    url = opts.url + '/researchobject/manifest/' + opts.metagenome
    data = obj_from_url(url, auth=token)

    # just list
    if opts.list:
        pt = PrettyTable(["File Name", "Folder", "Media Type"])
        for info in data["aggregates"]:
            pt.add_row([
                info["bundledAs"]["filename"], info["bundledAs"]["folder"],
                info["mediatype"]
            ])
        pt.align = "l"
        print(pt)
        return 0

    # get cwl files
    temp_name = random_str(10)
    pipeline_dir = os.path.join(opts.dir, temp_name)
    git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
    os.system(git_clone)

    # download manifest
    sha1s = []
    base = data["@context"][0]["@base"].strip('/')
    manifest_dir = os.path.join(opts.dir, base)
    os.mkdir(manifest_dir)
    data_str = json.dumps(data)
    open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
    sha1s.append([
        hashlib.sha1(data_str).hexdigest(),
        os.path.join(base, data["manifest"])
    ])

    # download aggregates
    for info in data["aggregates"]:
        sys.stdout.write("Downloading %s ... " %
                         (info["bundledAs"]["filename"]))
        folder = info["bundledAs"]["folder"].strip('/')
        folder_dir = os.path.join(opts.dir, folder)
        if not os.path.isdir(folder_dir):
            os.mkdir(folder_dir)
        if "githubusercontent" in info["uri"]:
            pos = info["uri"].find("CWL")
            src = os.path.join(pipeline_dir, info["uri"][pos:])
            dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
            text = open(src, 'r').read().replace('../Inputs/', '').replace(
                '../Tools/', '').replace('../Workflows/', '')
            if dst.endswith('job.yaml'):
                text = edit_input(text, mg)
            open(dst, 'w').write(text)
            sha1s.append([
                hashlib.sha1(text).hexdigest(),
                os.path.join(folder, info["bundledAs"]["filename"])
            ])
        else:
            fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]),
                      'w')
            s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
            fh.close()
            sha1s.append(
                [s1, os.path.join(folder, info["bundledAs"]["filename"])])
        sys.stdout.write("Done\n")

    # output sha1
    mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
    tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
    sha1s.sort(key=lambda x: x[1])
    for s1 in sha1s:
        if s1[1].startswith('data'):
            mansha1.write("%s\t%s\n" % (s1[0], s1[1]))
        else:
            tagsha1.write("%s\t%s\n" % (s1[0], s1[1]))
    mansha1.close()
    tagsha1.close()

    # cleanup
    shutil.rmtree(pipeline_dir)

    return 0
Ejemplo n.º 24
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % (VERSION, search_opts),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--limit",
        dest="limit",
        type=int,
        default=15,
        help=
        "Number of results to show, if > 50 will use paginated queries to get all, default 15"
    )
    parser.add_argument(
        "--order",
        dest="order",
        default=None,
        help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument(
        "--direction",
        dest="direction",
        default="asc",
        help=
        "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc"
    )
    parser.add_argument(
        "--public",
        dest="public",
        action="store_true",
        default=False,
        help=
        "return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public."
    )
    parser.add_argument(
        "--match",
        dest="match",
        default="all",
        help=
        "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all"
    )
    parser.add_argument(
        "--status",
        dest="status",
        default="public",
        help=
        "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public"
    )
    parser.add_argument(
        "--verbosity",
        dest="verbosity",
        default='minimal',
        help=
        "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal"
    )
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--" + sfield,
                            dest=sfield,
                            default=None,
                            help="search parameter: query string for " +
                            sfield)

    # get inputs
    opts = parser.parse_args()

    # get auth
    token = get_auth_token(opts)

    # build call url
    total = 0
    maxLimit = 50
    params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit),
              ('public', 'yes' if opts.public or (not token) else 'no')]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url + '/search?' + urlencode(params, True)

    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found

    # output header
    safe_print("\t".join(fields) + "\n")
    # output rows
    display_search(result['data'], fields)

    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)

    return 0
Ejemplo n.º 25
0
def test_large_01():
    URI = API_URL + '/matrix/organism?group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&id=mgm4510219.3'
    obj = obj_from_url(URI)
    o = open("5.txt", "w")
    o.write(str(obj))
Ejemplo n.º 26
0
def test_matrix_04():
    URI = API_URL + '/matrix/organism?id=mgm4440284.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0'
    obj = obj_from_url(URI)
    o = open("4.txt", "w")
    o.write(str(obj))
Ejemplo n.º 27
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path")
    parser.add_argument("--input",
                        dest="input",
                        default='-',
                        help="input: filename or stdin (-), default is stdin")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--outdir",
        dest="outdir",
        default=None,
        help=
        "ouput is placed in dir as filenmae.obj, fielname.type, only for 'biom' input"
    )
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "input / output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )

    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid format\n")
        return 1
    if (not opts.rlib) and ('KB_PERL_PATH' in os.environ):
        opts.rlib = os.environ['KB_PERL_PATH']

    # parse inputs
    biom = None
    rows = []
    cols = []
    data = []
    maxval = 0
    tmp_in = 'tmp_' + random_str() + '.txt'
    tmp_hdl = open(tmp_in, 'w')
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(
            opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                biom = json.loads(indata)
                if opts.rlib:
                    maxval = biom_to_tab(biom, tmp_hdl)
                else:
                    rows, cols, data = biom_to_matrix(biom)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            rows, cols, data = tab_to_matrix(indata)
            data = map(lambda x: map(float, x), data)  # floatify it
            if opts.rlib:
                tmp_hdl.write(indata)
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1
    finally:
        tmp_hdl.close()

    # check values to see if already normalized, otherwise R fails badly
    data = list(data)
    if len(data) > 0:
        maxval = max(map(max, data))
    if maxval <= 1:
        os.remove(tmp_in)
        sys.stderr.write("ERROR: data is already normalized.\n")
        return 1

    # retrieve data
    norm = None
    if opts.rlib:
        tmp_out = 'tmp_' + random_str() + '.txt'
        r_cmd = """source("%s/preprocessing.r")
suppressMessages( MGRAST_preprocessing(
    file_in="%s",
    file_out="%s"
))""" % (opts.rlib, tmp_in, tmp_out)
        execute_r(r_cmd)
        nrows, ncols, ndata = tab_to_matrix(open(tmp_out, 'r').read())
        num_data = map(lambda x: map(float, x), ndata)
        norm = {"columns": ncols, "rows": nrows, "data": num_data}
        os.remove(tmp_out)
    else:
        post = {"columns": cols, "rows": rows, "data": data}
        norm = obj_from_url(opts.url + '/compute/normalize',
                            data=json.dumps(post, separators=(',', ':')))

    # output data
    os.remove(tmp_in)
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    if biom and (opts.format == 'biom'):
        # may have rows removed
        new_rows = []
        for r in biom['rows']:
            if r['id'] in norm['rows']:
                new_rows.append(r)
        biom['rows'] = new_rows
        biom['data'] = norm['data']
        biom['shape'][0] = len(biom['rows'])
        biom['id'] = biom['id'] + '_normalized'
        biom['matrix_type'] = 'dense'
        biom['matrix_element_type'] = 'float'
        matrix_type = None
        if biom['type'].startswith('Taxon'):
            matrix_type = "Communities.TaxonomicMatrix"
        elif biom['type'].startswith('Function'):
            matrix_type = "Communities.FunctionalMatrix"
        if opts.outdir and matrix_type:
            if not os.path.isdir(opts.outdir):
                os.mkdir(opts.outdir)
            ohdl = open(os.path.join(opts.outdir, opts.output + '.obj'), 'w')
            thdl = open(os.path.join(opts.outdir, opts.output + '.type'), 'w')
            ohdl.write(json.dumps(biom) + "\n")
            thdl.write(matrix_type)
            ohdl.close()
            thdl.close()
        else:
            out_hdl.write(json.dumps(biom) + "\n")
    else:
        out_hdl.write("\t%s\n" % "\t".join(norm['columns']))
        for i, d in enumerate(norm['data']):
            out_hdl.write("%s\t%s\n" %
                          (norm['rows'][i], "\t".join(map(str, d))))

    out_hdl.close()
    if os.stat(opts.output).st_size == 0:
        os.remove(opts.output)
    return 0
Ejemplo n.º 28
0
def main(args):
    global API_URL
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    # access options
    parser.add_argument("-u",
                        "--url",
                        dest="url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("-t",
                        "--token",
                        dest="token",
                        default=None,
                        help="MG-RAST token")
    # other options
    parser.add_argument("-f",
                        "--file",
                        dest="mdfile",
                        default=None,
                        help="metadata .xlsx file")
    parser.add_argument(
        "--taxa",
        dest="taxa",
        default=None,
        help=
        "metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169"
    )
    parser.add_argument("--debug",
                        dest="debug",
                        action="store_true",
                        default=False,
                        help="Run in debug mode")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Verbose STDOUT")
    parser.add_argument("args",
                        type=str,
                        nargs="+",
                        help="Action (" + ",".join(valid_actions) + ")")

    # get inputs
    opts = parser.parse_args()
    args = opts.args
    API_URL = opts.url

    # validate inputs
    if len(args) < 1:
        sys.stderr.write("ERROR: missing action\n")
        return 1
    action = args[0]
    if action not in valid_actions:
        sys.stderr.write("ERROR: invalid action. use one of: %s\n" %
                         ", ".join(valid_actions))
        return 1
    if len(args) < 2:
        sys.stderr.write("ERROR: missing Project ID\n")
        return 1
    pid = args[1]
    DEBUG = opts.verbose + opts.debug
    # get token
    token = get_auth_token(opts)
    if not token:
        token = input('Enter your MG-RAST auth token: ')

    # actions
    if action == "get-info":
        data = obj_from_url(opts.url + '/project/' + pid +
                            '?verbosity=verbose&nocache=1',
                            auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "get-metadata":
        data = obj_from_url(opts.url + '/metadata/export/' + pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "update-metadata":
        result = post_file(opts.url + '/metadata/update',
                           'upload',
                           opts.mdfile,
                           auth=token,
                           data=json.dumps({'project': pid},
                                           separators=(',', ':')),
                           debug=DEBUG)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "make-public":
        data = obj_from_url(opts.url + '/project/' + pid + '/makepublic',
                            auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "submit-ebi":
        debug = 1 if opts.debug else 0
        info = {'project_id': pid, 'debug': debug}
        if opts.taxa:
            info['project_taxonomy'] = opts.taxa
        data = obj_from_url(opts.url + '/submission/ebi',
                            auth=token,
                            data=json.dumps(info, separators=(',', ':')))
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "status-ebi":
        data = obj_from_url(opts.url + '/submission/' + pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))

    return 0
Ejemplo n.º 29
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--project",
                        dest="project",
                        default=None,
                        help="project ID")

    # get inputs
    opts = parser.parse_args()
    if not opts.project or opts.project[0:3] != "mgp":
        sys.stderr.write("ERROR: a project id is required\n")
        return 1
    # get auth
    PROJECT = opts.project

    TOKEN = get_auth_token(opts)

    # export metadata

    outfile = PROJECT + "-export.xlsx"
    #
    k = obj_from_url(
        "http://api.mg-rast.org/metadata/export/{project}?verbosity=full".
        format(project=PROJECT),
        auth=TOKEN)
    metadata = k  # json.loads(open(infile).read())

    workbook = xlsxwriter.Workbook(outfile)
    print("Creating", outfile)
    worksheet = {}
    worksheet["README"] = workbook.add_worksheet("README")
    row = 0
    for i in range(10):
        worksheet["README"].write_number(row, 0, i)
        row += 1

    worksheet["project"] = workbook.add_worksheet("project")
    project_keys = get_project_keys(metadata)
    col = 0
    for l in project_keys:
        value = metadata["data"][l]["value"]
        definition = metadata["data"][l]["definition"]
        worksheet["project"].write_string(0, col, l)
        worksheet["project"].write_string(1, col, definition)
        worksheet["project"].write_string(2, col, value)
        col += 1

    worksheet["sample"] = workbook.add_worksheet("sample")

    samplekeys = get_sample_keys(metadata)

    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in samplekeys:
            if l in sample["data"].keys():
                value = sample["data"][l]["value"]
                definition = sample["data"][l]["definition"]
                fmt = sample["data"][l]["type"]
                worksheet["sample"].write_string(0, col, l)
                worksheet["sample"].write_string(1, col, definition)
                write_worksheet_value(worksheet["sample"], row, col, value,
                                      fmt)
            col += 1
        col = 0
        row += 1
    try:
        librarytype = metadata["samples"][0]["libraries"][0]["data"][
            "investigation_type"]["value"]
    except IndexError:
        sys.exit("This metadata bundle does not have any libraries")

    worksheet["library"] = workbook.add_worksheet("library " + librarytype)

    libkeys = get_library_keys(metadata)
    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in libkeys:
            if l in sample["libraries"][0]["data"].keys():
                value = sample["libraries"][0]["data"][l]["value"]
                definition = sample["libraries"][0]["data"][l]["definition"]
                fmt = sample["libraries"][0]["data"][l]["type"]
                worksheet["library"].write_string(0, col, l)
                worksheet["library"].write_string(1, col, definition)
                write_worksheet_value(worksheet["library"], row, col, value,
                                      fmt)
            col += 1
        col = 0
        row += 1

    eps = get_eps(metadata)
    print("eps", " ".join(eps))
    epcol = {}
    eprow = {}
    for ep in eps:
        worksheet[ep] = workbook.add_worksheet("ep " + ep)
        epcol[ep] = 0
        eprow[ep] = 2
    epkeys = get_ep_keys(metadata, eps)
    for sample in metadata["samples"]:
        ep = sample["envPackage"]["type"]
        for l in epkeys[ep]:
            try:
                value = sample["envPackage"]["data"][l]["value"]
                definition = sample["envPackage"]["data"][l]["definition"]
                fmt = sample["envPackage"]["data"][l]["type"]
            except KeyError:
                value = ""
                definition = ""
                fmt = "string"

            worksheet[ep].write_string(0, epcol[ep], l)
            worksheet[ep].write_string(1, epcol[ep], definition)
            write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value,
                                  fmt)
            epcol[ep] += 1
        epcol[ep] = 0
        eprow[ep] += 1

    workbook.close()
Ejemplo n.º 30
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--input",
                        dest="input",
                        default='-',
                        help="input: filename or stdin (-), default is stdin")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument(
        "--cluster",
        dest="cluster",
        default='ward',
        help=
        "cluster function, one of: ward, single, complete, mcquitty, median, centroid, default is ward"
    )
    parser.add_argument(
        "--distance",
        dest="distance",
        default='bray-curtis',
        help=
        "distance function, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis"
    )
    parser.add_argument(
        "--name",
        dest="name",
        type=int,
        default=0,
        help="label columns by name, default is by id: 1=true, 0=false")
    parser.add_argument(
        "--normalize",
        dest="normalize",
        type=int,
        default=0,
        help="normalize the input data, default is off: 1=true, 0=false")

    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1

    # parse inputs
    rows = []
    cols = []
    data = []
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(
            opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                biom = json.loads(indata)
                rows, cols, data = biom_to_matrix(biom, col_name=opts.name)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            rows, cols, data = tab_to_matrix(indata)
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1

    # retrieve data
    raw = '0' if opts.normalize else '1'
    post = {
        "raw": raw,
        "cluster": opts.cluster,
        "distance": opts.distance,
        "columns": cols,
        "rows": rows,
        "data": data
    }
    hmap = obj_from_url(opts.url + '/compute/heatmap',
                        data=json.dumps(post, separators=(',', ':')))

    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    out_hdl.write(json.dumps(hmap, separators=(', ', ': '), indent=4) + "\n")
    out_hdl.close()
    return 0
Ejemplo n.º 31
0
def submit(stype, files, opts):
    fids = []
    # post files to shock
    if stype == 'batch':
        fids = archive_upload(files[0], opts.verbose)
    else:
        fids = upload(files, opts.verbose)
    
    # set POST data
    data = {}
    if opts.debug:
        data['debug'] = 1
    if opts.barcode:
        bids = upload([opts.barcode], opts.verbose)
        data['barcode_file'] = bids[0]
    if opts.metadata:
        mids = upload([opts.metadata], opts.verbose)
        data['metadata_file'] = mids[0]
    elif opts.project_id:
        data['project_id'] = opts.project_id
    elif opts.project_name:
        data['project_name'] = opts.project_name
    # figure out type
    if (stype == 'simple') or (stype == 'batch'):
        data['seq_files'] = fids
    elif stype == 'demultiplex':
        data['multiplex_file'] = fids[0]
        data['rc_index'] = 1 if opts.rc_index else 0
        if len(fids) == 3:
            data["index_file"] = fids[2]
    elif stype == 'pairjoin':
        data['pair_file_1'] = fids[0]
        data['pair_file_2'] = fids[1]
        data['retain'] = 1 if opts.retain else 0
        if opts.mgname:
            data['mg_name'] = opts.mgname
    elif stype == 'pairjoin_demultiplex':
        data['pair_file_1'] = fids[0]
        data['pair_file_2'] = fids[1]
        data['index_file'] = fids[2]
        data['retain'] = 1 if opts.retain else 0
        data['rc_index'] = 1 if opts.rc_index else 0
    
    # set pipeline flags - assembeled is special case
    if opts.assembled:
        data['assembled'] = 1
        data['filter_ln'] = 0
        data['filter_ambig'] = 0
        data['dynamic_trim'] = 0
        data['dereplicate'] = 0
        data['bowtie'] = 0
    else:
        data['assembled'] = 0
        data['filter_ln'] = 0 if opts.no_filter_ln else 1
        data['filter_ambig'] = 0 if opts.no_filter_ambig else 1
        data['dynamic_trim'] = 0 if opts.no_dynamic_trim else 1
        data['dereplicate'] = 0 if opts.no_dereplicate else 1
        data['bowtie'] = 0 if opts.no_bowtie else 1
    # set pipeline options
    data['filter_ln_mult'] = opts.filter_ln_mult
    data['max_ambig'] = opts.max_ambig
    data['max_lqb'] = opts.max_lqb
    data['min_qual'] = opts.min_qual
    if opts.screen_indexes:
        data['screen_indexes'] = opts.screen_indexes
    if opts.priority:
        data['priority'] = opts.priority
    
    # submit it
    if opts.verbose:
        print("Submitting to MG-RAST with the following parameters:")
        print(json.dumps(data, sort_keys=True, indent=4))
    result = obj_from_url(API_URL+"/submission/submit", data=json.dumps(data), auth=mgrast_auth['token'])
    if opts.verbose and (not opts.debug):
        print(json.dumps(result))
    if opts.debug:
        pprint.pprint(result)
    elif opts.synch or opts.json_out:
        print("Project ID: "+result['project'])
        print("Submission ID: "+result['id'])
        wait_on_complete(result['id'], opts.json_out)
    else:
        print("Project ID: "+result['project'])
        print("Submission ID: "+result['id'])
        status(result['id'])
Ejemplo n.º 32
0
def delete(sid):
    data = obj_from_url(API_URL+"/submission/"+sid, auth=mgrast_auth['token'], method='DELETE')
    print(data['status'])
Ejemplo n.º 33
0
def wait_on_complete(sid, json_out):
    listed_mgs = set()
    incomplete = True
    data = None
    total_mg = 0
    while incomplete:
        time.sleep(synch_pause)
        data = obj_from_url(API_URL+"/submission/"+sid, auth=mgrast_auth['token'])
        # check for global errors
        if isinstance(data['status'], str):
            sys.stderr.write("ERROR: %s\n"%data['status'])
            sys.exit(1)
        # check for submission errors
        for task in data['status']['preprocessing']:
            if task['status'] == "suspend":
                sys.stderr.write("ERROR: %s\n"%task['error'])
                sys.exit(1)
        # check for metagenomes
        total_mg = len(data['status']['metagenomes'])
        done_mg  = 0
        error_mg = 0
        if total_mg > 0:
            for mg in data['status']['metagenomes']:
                if mg['id'] not in listed_mgs:
                    print("metagenome analysis started: "+mg['id'])
                    listed_mgs.add(mg['id'])
                if mg['status'] == "completed":
                    done_mg += 1
                elif mg['status'] == "suspend":
                    error_mg += 1
            if total_mg == (done_mg + error_mg):
                incomplete = False
    # display completed
    if json_out:
        mgs = []
        jhdl = open(json_out, 'w')
        for mg in data['status']['metagenomes']:
            if mg['status'] == "completed":
                print("metagenome analysis completed: "+mg['id'])
                mgdata = obj_from_url(API_URL+"/metagenome/"+mg['id']+"?verbosity=full", auth=mgrast_auth['token'])
                mgs.append(mgdata)
            elif mg['status'] == "suspend":
                print("metagenome analysis failed: "+mg['id'])
                if "error" in mg:
                    print("[error] "+mg['error'])
        if len(mgs) == 1:
            # output single dict
            json.dump(mgs[0], jhdl)
        elif len(mgs) > 1:
            # output list of dicts
            json.dump(mgs, jhdl)
        else:
            # error here
            sys.stderr.write("ERROR: no metagenome(s) produced in submission %s\n"%sid)
            sys.exit(1)
        jhdl.close()
    else:
        pt_mg = PrettyTable(["metagenome ID", "metagenome name", "total status", "submit time"])
        for mg in data['status']['metagenomes']:
            pt_mg.add_row([mg['id'], mg['name'], mg['status'], mg['timestamp']])
        pt_mg.align = "l"
        print(pt_mg)
Ejemplo n.º 34
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--id",
                        dest="id",
                        default=None,
                        help="KBase Metagenome ID")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--plot",
        dest="plot",
        action="store_true",
        default=False,
        help=
        "display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level"
    )
    parser.add_argument(
        "--stat",
        dest="stat",
        default='sequence',
        help=
        "type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence"
    )

    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build call url
    url = opts.url + '/metagenome/' + opts.id + '?verbosity=stats&public=1'

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    stats = result['statistics']
    if opts.stat == 'sequence':
        for s in sorted(stats['sequence_stats'].keys()):
            safe_print("%s\t%s\n" % (s, stats['sequence_stats'][s]))
    elif opts.stat == 'bp_profile':
        if not stats['qc']['bp_profile']['percents']['data']:
            sys.stderr.write("ERROR: %s has no bp_profile statistics\n" %
                             opts.id)
            return 1
        if opts.plot:
            cols = stats['qc']['bp_profile']['percents']['columns'][1:5]
            data = map(lambda x: x[1:5],
                       stats['qc']['bp_profile']['percents']['data'])
            plot_histo(cols, data, 20, 80)
        else:
            safe_print(
                "\t".join(stats['qc']['bp_profile']['percents']['columns']) +
                "\n")
            for d in stats['qc']['bp_profile']['percents']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'drisee':
        if not stats['qc']['drisee']['percents']['data']:
            sys.stderr.write("ERROR: %s has no drisee statistics\n" % opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['drisee']['percents']['data']:
                x.append(d[0])
                y.append(d[7])
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print(
                "\t".join(stats['qc']['drisee']['percents']['columns']) + "\n")
            for d in stats['qc']['drisee']['percents']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'kmer':
        if not stats['qc']['kmer']['15_mer']['data']:
            sys.stderr.write("ERROR: %s has no kmer statistics\n" % opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['kmer']['15_mer']['data']:
                x.append(math.log(d[3], 10))
                y.append(math.log(d[0], 10))
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns']) +
                       "\n")
            for d in stats['qc']['kmer']['15_mer']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'rarefaction':
        if not stats['rarefaction']:
            sys.stderr.write("ERROR: %s has no rarefaction statistics\n" %
                             opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for r in stats['rarefaction']:
                x.append(int(r[0]))
                y.append(float(r[1]))
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print("x\ty\n")
            for r in stats['rarefaction']:
                safe_print("%s\t%s\n" % (str(r[0]), str(r[1])))
    elif opts.stat in stats['taxonomy']:
        ranked = sorted(stats['taxonomy'][opts.stat],
                        key=lambda x: (-int(x[1]), x[0]))
        if opts.plot:
            top = map(lambda x: int(x[1]), ranked)[:50]
            aplotter.plot(top,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=False,
                          min_x=0,
                          min_y=0)
        else:
            for t in ranked:
                safe_print("%s\t%s\n" % (t[0], str(t[1])))
    else:
        sys.stderr.write("ERROR: invalid stat type\n")
        return 1

    return 0