コード例 #1
0
def test_500():
    URI = API_URL + '/nonexistentapicall'
    try:
        response = async_rest_api(URI, auth="")
        assert False
    except SystemExit:
        pass
コード例 #2
0
def test_private():
    URI = API_URL + '/matrix/organism?id=mgm4454266.3'  # mgm4454266.3 is private
    try:
        response = async_rest_api(URI, auth="")
        assert False
    except SystemExit:
        pass
コード例 #3
0
ファイル: test_api.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_500():
    URI = API_URL + '/nonexistentapicall'
    try:
        response = async_rest_api(URI, auth="")
        assert False
    except SystemExit:
        pass
コード例 #4
0
ファイル: test_api.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_private():
    URI = API_URL + '/matrix/organism?id=mgm4454266.3'  # mgm4454266.3 is private
    try:
        response = async_rest_api(URI, auth="")
        assert False
    except SystemExit:
        pass
コード例 #5
0
def test_nonexist():
    URI = API_URL + '/matrix/organism?id=mgm4454394.3'  # mgm4454394.3 is deleted
    try:
        response = async_rest_api(URI, auth="")
        pass
    except SystemExit:
        pass
コード例 #6
0
def test_badkey():
    URI = API_URL + '/matrix/organism?id=mgm4454266.3'  # mgm4454266.3 is private
    try:
        response = async_rest_api(URI, auth="ABCDEFGThisIsOneNoGoodKey")
        assert False
    except SystemExit:
        pass
コード例 #7
0
ファイル: test_api.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_nonexist():
    URI = API_URL + '/matrix/organism?id=mgm4454394.3'  # mgm4454394.3 is deleted
    try:
        response = async_rest_api(URI, auth="")
        pass
    except SystemExit:
        pass
コード例 #8
0
ファイル: test_api.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_badkey():
    URI = API_URL + '/matrix/organism?id=mgm4454266.3'  # mgm4454266.3 is private
    try:
        response = async_rest_api(URI, auth="ABCDEFGThisIsOneNoGoodKey")
        assert False
    except SystemExit:
        pass
コード例 #9
0
def test_async0():
    URI = API_URL + '/matrix/organism?id=mgm4440275.3&id=mgm4440276.3&id=mgm4440281.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'
    token = get_auth_token(None)
    print(token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
コード例 #10
0
def test_async_matrix3():
    URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'  # takes too long??
    URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15'
    token = get_auth_token(None)
    response = async_rest_api(URI, auth=token)
    print(response)
コード例 #11
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    
    # sort data
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
コード例 #12
0
ファイル: test_async2.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_async():
    URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1'
    token = get_auth_token(None)
    print("MG-RAST token: ", token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
コード例 #13
0
ファイル: test_async2.py プロジェクト: wltrimbl/MG-RAST-Tools
def test_async():
    URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1'
    token = get_auth_token(None)
    print("MG-RAST token: ", token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
コード例 #14
0
ファイル: mg-query.py プロジェクト: teharrison/MG-RAST-Tools
if __name__ == '__main__':
    usage = "usage: %prog [options]  URI"
    parser = ArgumentParser(usage)
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
    parser.add_argument("-k",
                        "--token",
                        dest="token",
                        type=str,
                        help="Auth token")
    parser.add_argument("URI", type=str, help="URI to query")

    opts = parser.parse_args()
    key = get_auth_token(opts)
    if opts.verbose:
        print("KEY = {}".format(key), file=sys.stderr)
# assign parameters
    URI = opts.URI

    # construct API call
    print(URI, file=sys.stderr)

    # retrieve the data by sending at HTTP GET request to the MG-RAST API
    jsonstructure = async_rest_api(URI, auth=key)

    # unpack and display the data table
    if type(jsonstructure) == dict:  # If we have data, not json structure
        print(json.dumps(jsonstructure), file=sys.stdout)
    else:
        sys.stdout.write(jsonstructure.decode("utf-8"))
コード例 #15
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus")
    parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED")
    parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list")
    parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems")
    parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection")
    parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list")
    parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout")
    parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
    parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15")
    parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level):
        sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids,'r').read()
        try:
            id_obj  = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [ ('group_level', opts.level), 
               ('source', opts.source),
               ('hit_type', opts.hit_type),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1') ]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i+size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url  = opts.url+'/matrix/organism?'+urlencode(cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url+'/matrix/organism?'+urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [ ('version', opts.version),
                   ('min_level', opts.level) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        for ann in data['data']:
            if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list):
                sub_ann.add(ann[opts.level])
    
    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')
    
    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom)+"\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)
    
    out_hdl.close()
    return 0
コード例 #16
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    biomorig = biom
    biom = biomorig["data"] 
    # sort data
    assert "matrix_type" in biom.keys(), repr(biom)
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
コード例 #17
0
ファイル: mg-query.py プロジェクト: MG-RAST/MG-RAST-Tools
from mglib import async_rest_api, get_auth_token

DEBUG = 0

if __name__ == '__main__':
    usage = "usage: %prog [options]  URI"
    parser = ArgumentParser(usage)
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
    parser.add_argument("-k", "--token", dest="token", type=str,
                      help="Auth token")
    parser.add_argument("URI", type=str, help="URI to query")

    opts = parser.parse_args()
    key = get_auth_token(opts)
    if opts.verbose:
        print("KEY = {}".format(key), file=sys.stderr)   
# assign parameters
    URI = opts.URI

# construct API call
    print(URI, file=sys.stderr)

# retrieve the data by sending at HTTP GET request to the MG-RAST API
    jsonstructure = async_rest_api(URI, auth=key)

# unpack and display the data table
    if type(jsonstructure) == str:    # If we have data, not json structure
        sys.stdout.write(jsonstructure)
    else:
        print(json.dumps(jsonstructure), file=sys.stdout)
コード例 #18
0
ファイル: test_api.py プロジェクト: MG-RAST/MG-RAST-Tools
def test_async_matrix3():
    URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'  # takes too long??
    URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15'
    token = get_auth_token(None)
    response = async_rest_api(URI, auth=token)
    print(response)
コード例 #19
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--id",
                        dest="id",
                        default=None,
                        help="KBase Metagenome ID")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")
    parser.add_argument("--filter_name",
                        dest="filter_name",
                        default=None,
                        help="taxon name to filter by")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="taxon level to filter by")
    parser.add_argument("--top",
                        dest="top",
                        type=int,
                        default=10,
                        help="display only the top N taxa, default is 10")
    parser.add_argument(
        "--evalue",
        dest="evalue",
        type=int,
        default=5,
        help="negative exponent value for maximum e-value cutoff, default is 5"
    )
    parser.add_argument(
        "--identity",
        dest="identity",
        type=int,
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        dest="length",
        type=int,
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")

    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    params = [('id', opts.id), ('group_level', opts.level),
              ('source', opts.source), ('evalue', opts.evalue),
              ('identity', opts.identity), ('length', opts.length),
              ('version', opts.version), ('result_type', 'abundance'),
              ('asynchronous', '1'), ('hide_metadata', '1')]
    url = opts.url + '/matrix/organism?' + urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [('filter', opts.filter_name),
                  ('filter_level', opts.filter_level),
                  ('min_level', opts.level), ('version', opts.version)]
        url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set(map(lambda x: x[opts.level], data['data']))
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']),
                               len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip(rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" % (k, v))

    return 0
コード例 #20
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument(
        "--ids",
        dest="ids",
        default=None,
        help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='level3',
        help="functional level to retrieve abundances for, default is level3")
    parser.add_argument(
        "--source",
        dest="source",
        default='Subsystems',
        help="function datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="function level to filter by")
    parser.add_argument(
        "--filter_name",
        dest="filter_name",
        default=None,
        help="function name to filter by, file or comma seperated list")
    parser.add_argument(
        "--intersect_source",
        dest="intersect_source",
        default='SEED',
        help="taxon datasource for insersection, default is SEED")
    parser.add_argument("--intersect_level",
                        dest="intersect_level",
                        default=None,
                        help="taxon level for insersection")
    parser.add_argument(
        "--intersect_name",
        dest="intersect_name",
        default=None,
        help="taxon name(s) for insersection, file or comma seperated list")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument(
        "--evalue",
        type=int,
        dest="evalue",
        default=15,
        help="negative exponent value for maximum e-value cutoff, default is 15"
    )
    parser.add_argument(
        "--identity",
        type=int,
        dest="identity",
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        type=int,
        dest="length",
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")
    parser.add_argument(
        "--temp",
        dest="temp",
        default=None,
        help="filename to temporarly save biom output at each iteration")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1
    if (opts.intersect_name and
        (not opts.intersect_level)) or ((not opts.intersect_name)
                                        and opts.intersect_level):
        sys.stderr.write(
            "ERROR: both --intersect_level and --intersect_name need to be used together\n"
        )
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids, 'r').read()
        try:
            id_obj = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [('group_level', opts.level), ('source', opts.source),
              ('evalue', opts.evalue), ('identity', opts.identity),
              ('length', opts.length), ('version', opts.version),
              ('result_type', 'abundance'), ('asynchronous', '1')]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i + size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url = opts.url + '/matrix/function?' + urlencode(
                cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url + '/matrix/function?' + urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [('version', opts.version), ('min_level', opts.level),
                  ('source', opts.source)]
        url = opts.url + '/m5nr/ontology?' + urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        for ann in data['data']:
            if (opts.filter_level
                    in ann) and (level in ann) and (ann[opts.filter_level]
                                                    in filter_list):
                sub_ann.add(ann[level])

    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom) + "\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)

    out_hdl.close()
    return 0
コード例 #21
0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by")
    parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10")
    parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    (opts, args) = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level),
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/organism?'+urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set( map(lambda x: x[opts.level], data['data']) )
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip( rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0