def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params  = [ ('level', opts.level), ('source', opts.source) ]
    for i in id_list:
        url  = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" %(i, data['data']))
    
    return 0
Ejemplo n.º 2
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15")
    parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc")
    parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public.")
    parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all")
    parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal")
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    opts = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    total = 0
    maxLimit = 50
    params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit),
               ('public', 'yes' if opts.public or (not token) else 'no') ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url+'/search?'+urlencode(params, True)
    
    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found
    
    # output header
    safe_print("\t".join(fields)+"\n")
    # output rows
    display_search(result['data'], fields)
    
    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)
    
    return 0
Ejemplo n.º 3
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--ids",
                        dest="ids",
                        default=None,
                        help="comma seperated list of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params = [('level', opts.level), ('source', opts.source)]
    for i in id_list:
        url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode(
            params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" % (i, data['data']))

    return 0
Ejemplo n.º 4
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata'
    url  = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    if opts.verbosity == 'mixs':
        for r in sorted(result.keys()):
            if r not in ['project', 'library', 'sample']:
                safe_print("%s\t%s\n" %(r, result[r]))
    elif opts.verbosity == 'full':
        md = result['metadata']
        safe_print("category\tlabel\tvalue\n")
        if ('project' in md) and md['project']['data']:
            for p in sorted(md['project']['data'].keys()):
                safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p]))
        if ('sample' in md) and md['sample']['data']:
            for s in sorted(md['sample']['data'].keys()):
                safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s]))
        if ('library' in md) and ('type' in md['library']) and md['library']['data']:
            for l in sorted(md['library']['data'].keys()):
                safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l]))
        if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']:
            for e in sorted(md['env_package']['data'].keys()):
                safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e]))
    else:
        sys.stderr.write("ERROR: invalid verbosity type\n")
        return 1
    
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata'
    url  = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    if opts.verbosity == 'mixs':
        for r in sorted(result.keys()):
            if r not in ['project', 'library', 'sample']:
                safe_print("%s\t%s\n" %(r, result[r]))
    elif opts.verbosity == 'full':
        md = result['metadata']
        safe_print("category\tlabel\tvalue\n")
        if ('project' in md) and md['project']['data']:
            for p in sorted(md['project']['data'].keys()):
                safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p]))
        if ('sample' in md) and md['sample']['data']:
            for s in sorted(md['sample']['data'].keys()):
                safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s]))
        if ('library' in md) and ('type' in md['library']) and md['library']['data']:
            for l in sorted(md['library']['data'].keys()):
                safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l]))
        if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']:
            for e in sorted(md['env_package']['data'].keys()):
                safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e]))
    else:
        sys.stderr.write("ERROR: invalid verbosity type\n")
        return 1
    
    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, type=str, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, type=str, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, type=str, help="OAuth token")
    parser.add_argument("--name", dest="name", default=None, type=str, help="function name to filter by")
    parser.add_argument("--level", dest="level", default='function', help="function level to filter by")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum percent identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length) ]
    if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
        params.append(('type', 'ontology'))
    else:
        params.append(('type', 'function'))
    if opts.name:
        params.append(('filter', opts.name))
        if opts.level:
            params.append(('filter_level', opts.level))
    url = opts.url+'/annotation/sequence/'+opts.id+'?'+urlencode(params, True)
    
    # output data
    stdout_from_url(url, auth=token)
    
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--name", dest="name", default=None, help="taxon name to filter by")
    parser.add_option("", "--level", dest="level", default=None, help="taxon level to filter by")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('type', 'organism') ]
    if opts.name:
        params.append(('filter', opts.name))
        if opts.level:
            params.append(('filter_level', opts.level))
    url = opts.url+'/annotation/similarity/'+opts.id+'?'+urlencode(params, True)
    
    # output data
    stdout_from_url(url, auth=token)
    
    return 0
Ejemplo n.º 8
0
def main(args):
    global mgrast_auth, API_URL, SHOCK_URL
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("-u",
                        "--mgrast_url",
                        dest="mgrast_url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("-s",
                        "--shock_url",
                        dest="shock_url",
                        default=SHOCK_URL,
                        help="Shock API url")
    parser.add_argument("-t",
                        "--token",
                        dest="token",
                        default=None,
                        help="MG-RAST token")
    parser.add_argument("-p",
                        "--project",
                        dest="project",
                        default=None,
                        help="project ID")
    parser.add_argument("-m",
                        "--metadata",
                        dest="metadata",
                        default=None,
                        help="metadata file ID")
    parser.add_argument(
        "-j",
        "--joinfile",
        dest="joinfile",
        default=None,
        help=
        "name of resulting pair-merge file (without extension), default is <pair 1 filename>_<pair 2 filename>"
    )
    parser.add_argument("--retain",
                        dest="retain",
                        action="store_true",
                        default=False,
                        help="retain non-overlapping sequences in pair-merge")
    parser.add_argument(
        "--rc_index",
        dest="rc_index",
        action="store_true",
        default=False,
        help="barcodes in index file are reverse compliment of mapping file")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Verbose STDOUT")
    parser.add_argument("action", nargs="+", help="Action")

    global DEBUG
    # get inputs
    opts = parser.parse_args()
    args = opts.action
    if len(args) < 1:
        sys.stderr.write(
            "ERROR: missing action, please check usage with %s -h\n" %
            (sys.argv[0]))
        return 1
    if opts.verbose:
        print("DEBUG")
        DEBUG = 1
    else:
        DEBUG = 0
    action = args[0]
    API_URL = opts.mgrast_url
    SHOCK_URL = opts.shock_url

    # validate inputs
    if action not in valid_actions:
        sys.stderr.write("ERROR: invalid action. use one of: %s\n" %
                         ", ".join(valid_actions))
        return 1
    elif (action == "view") and ((len(args) < 2) or
                                 (args[1] not in view_options)):
        sys.stderr.write("ERROR: invalid view ion. use one of: %s\n" %
                         ", ".join(view_options))
        return 1
    elif (action in ["upload", "upload-archive", "delete", "submit"
                     ]) and (len(args) < 2):
        sys.stderr.write("ERROR: %s missing file\n" % action)
        return 1
    elif action == "upload":
        for f in args[1:]:
            if not os.path.isfile(f):
                sys.stderr.write("ERROR: upload file '%s' does not exist\n" %
                                 f)
                return 1
    elif action == "upload-archive":
        if len(args[1:]) > 1:
            sys.stderr.write("ERROR: upload-archive only supports one file\n")
            return 1
        if not os.path.isfile(args[1]):
            sys.stderr.write(
                "ERROR: upload-archive file '%s' does not exist\n" % args[1])
            return 1
    elif (action == "rename") and (len(args) != 3):
        sys.stderr.write("ERROR: %s missing file or name\n" % action)
        return 1
    elif action == "validate":
        if (len(args) < 2) or (args[1] not in validate_options):
            sys.stderr.write("ERROR: invalid validate ion. use one of: %s\n" %
                             ", ".join(validate_options))
            return 1
        if len(args) < 3:
            sys.stderr.write("ERROR: validate missing file\n")
            return 1
    elif action == "compute":
        if (len(args) < 2) or (args[1] not in compute_actions):
            sys.stderr.write(
                "ERROR: invalid compute action. use one of: %s\n" %
                ", ".join(compute_actions))
            return 1
        if (((args[1] == "sff2fastq") and (len(args) != 3))
                or ((args[1] == "demultiplex") and (len(args) < 4))
                or ((args[1] == "pairjoin") and (len(args) != 4))
                or ((args[1] == "pairjoin_demultiplex") and (len(args) != 6))):
            sys.stderr.write("ERROR: compute %s missing file(s)\n" % args[1])
            return 1
    elif (action == "submit") and (not opts.project) and (not opts.metadata):
        sys.stderr.write(
            "ERROR: invalid submit, must have one of project or metadata\n")
        return 1

    # explict login
    token = get_auth_token(opts)
    if action == "login":
        if not token:
            token = input('Enter your MG-RAST auth token: ')
        login(token)
        return 0

    # get auth object, get from token if no login
    mgrast_auth = get_auth(token)
    if not mgrast_auth:
        return 1

    # actions
    if action == "view":
        view(args[1])
    elif action == "upload":
        upload(args[1:])
    elif action == "upload-archive":
        upload_archive(args[1])
    elif action == "rename":
        check_ids([args[1]])
        rename(args[1], args[2])
    elif action == "validate":
        check_ids(args[2:])
        validate(args[1], args[2:])
    elif action == "compute":
        check_ids(args[2:])
        compute(args[1], args[2:], opts.retain, opts.joinfile, opts.rc_index)
    elif action == "delete":
        check_ids(args[1:])
        delete(args[1:])
    elif action == "submit":
        check_ids(args[1:])
        submit(args[1:], opts.project, opts.metadata)

    return 0
Ejemplo n.º 9
0
def test_async_matrix3():
    URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'  # takes too long??
    URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15'
    token = get_auth_token(None)
    response = async_rest_api(URI, auth=token)
    print(response)
Ejemplo n.º 10
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--input",
                        dest="input",
                        default='-',
                        help="input: filename or stdin (-), default is stdin")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument("--plot",
                        dest="plot",
                        default=None,
                        help="filename for output plot")
    parser.add_argument(
        "--cluster",
        dest="cluster",
        default='ward',
        help=
        "cluster function, one of: ward, single, complete, mcquitty, median, centroid, default is ward"
    )
    parser.add_argument(
        "--distance",
        dest="distance",
        default='bray-curtis',
        help=
        "distance function, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis"
    )
    parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path")
    parser.add_argument("--height",
                        dest="height",
                        type=float,
                        default=10,
                        help="image height in inches, default is 5")
    parser.add_argument("--width",
                        dest="width",
                        type=float,
                        default=10,
                        help="image width in inches, default is 4")
    parser.add_argument("--dpi",
                        dest="dpi",
                        type=int,
                        default=300,
                        help="image DPI, default is 300")
    parser.add_argument("--order",
                        dest="order",
                        type=int,
                        default=0,
                        help="order columns, default is off: 1=true, 0=false")
    parser.add_argument(
        "--name",
        dest="name",
        type=int,
        default=0,
        help="label columns by name, default is by id: 1=true, 0=false")
    parser.add_argument(
        "--label",
        dest="label",
        type=int,
        default=0,
        help="label image rows, default is off: 1=true, 0=false")

    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    if not opts.plot:
        sys.stderr.write("ERROR: missing output filename\n")
        return 1
    if (not opts.rlib) and ('KB_PERL_PATH' in os.environ):
        opts.rlib = os.environ['KB_PERL_PATH']
    if not opts.rlib:
        sys.stderr.write("ERROR: missing path to R libs\n")
        return 1
    for o in ['reference', 'order', 'name', 'label']:
        if getattr(opts, o) not in [0, 1]:
            sys.stderr.write("ERROR: invalid value for '%s'\n" % o)
            return 1

    # get auth
    token = get_auth_token(opts)

    # parse input for R
    tmp_in = 'tmp_' + random_str() + '.txt'
    tmp_hdl = open(tmp_in, 'w')
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(
            opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                indata = json.loads(indata)
                col_name = True if opts.name == 1 else False
                biom_to_tab(indata, tmp_hdl, col_name=col_name)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            tmp_hdl.write(indata)
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1
    tmp_hdl.close()

    # build R cmd
    order = 'TRUE' if opts.order == 1 else 'FALSE'
    label = 'TRUE' if opts.label == 1 else 'FALSE'
    r_cmd = """source("%s/plot_mg_heatdend.r")
suppressMessages( plot_mg_heatdend(
    table_in="%s",
    image_out="%s",
    order_columns=%s,
    label_rows=%s,
    image_height_in=%.1f,
    image_width_in=%.1f,
    image_res_dpi=%d
))""" % (opts.rlib, tmp_in, opts.plot, order, label, opts.height, opts.width,
         opts.dpi)
    execute_r(r_cmd)

    # cleanup
    os.remove(tmp_in)

    return 0
Ejemplo n.º 11
0
    parser = OptionParser(usage)
#    parser.add_option("-i", "--input", dest="input", default=None, help="Input sequence file.")
    parser.add_option("-s", "--source", dest="source", default="RefSeq", help="Annotation source: RefSeq, GenBank, IMG, SEED, TrEMBL, SwissProt, PATRIC, KEG, RDP, Greengenes, LSU, SSU")
    parser.add_option("-g", "--grouplevel", dest="grouplevel", default="domain", help="Grouping level: strain, species, genus, family, order, class, phylum, domain / function, level1, level2, level3")
    parser.add_option("-l", "--list", dest="targetlist", default="", help="Target list (filename).")
#    parser.add_option("-o", "--output", dest="output", default=None, help="Output file.")
    parser.add_option("-i", "--hittype", dest="hittype", default="single", help="Hit type: all, single, lca")
    parser.add_option("-c", "--call", dest="call", default="organism", help="organism or function")
    parser.add_option("-e", "--evalue", dest="evalue", default="1", help="organism or function")
    parser.add_option("-t", "--type", dest="resulttype", default="abundnace", help="Result type: abundnace, evalue, identity, or length")
#    parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=True, help="Verbose [default off]")
    parser.add_option("-k", "--token", dest="token", type="str", help="Auth token")
    parser.add_option("-m", "--metagenomes", dest="metagenomes", default="", type="str", help="Metagenome list")

    (opts, args) = parser.parse_args()
    key = get_auth_token(opts)
# assign parameters
    if not opts.targetlist == "":
        metagenomes = get_ids(opts.targetlist)
    elif not opts.metagenomes == "":
       metagenomes = opts.metagenomes.split(",")
    else:
        metagenomes = ["mgm4447943.3", "mgm4447102.3"]
    group_level = opts.grouplevel
    result_type = "abundance"
    result_call = opts.call
    evalue = opts.evalue
    source = opts.source
    hittype = opts.hittype

# construct API call
Ejemplo n.º 12
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % (VERSION, search_opts),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--limit",
        dest="limit",
        type=int,
        default=15,
        help=
        "Number of results to show, if > 50 will use paginated queries to get all, default 15"
    )
    parser.add_argument(
        "--order",
        dest="order",
        default=None,
        help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument(
        "--direction",
        dest="direction",
        default="asc",
        help=
        "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc"
    )
    parser.add_argument(
        "--public",
        dest="public",
        action="store_true",
        default=False,
        help=
        "return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public."
    )
    parser.add_argument(
        "--match",
        dest="match",
        default="all",
        help=
        "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all"
    )
    parser.add_argument(
        "--status",
        dest="status",
        default="public",
        help=
        "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public"
    )
    parser.add_argument(
        "--verbosity",
        dest="verbosity",
        default='minimal',
        help=
        "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal"
    )
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--" + sfield,
                            dest=sfield,
                            default=None,
                            help="search parameter: query string for " +
                            sfield)

    # get inputs
    opts = parser.parse_args()

    # get auth
    token = get_auth_token(opts)

    # build call url
    total = 0
    maxLimit = 50
    params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit),
              ('public', 'yes' if opts.public or (not token) else 'no')]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url + '/search?' + urlencode(params, True)

    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found

    # output header
    safe_print("\t".join(fields) + "\n")
    # output rows
    display_search(result['data'], fields)

    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)

    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID")
    parser.add_argument("--dir", dest="dir", default=".", help="directory to export to")
    parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.metagenome:
        sys.stderr.write("ERROR: a metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # get mg info
    url = opts.url+'/metagenome/'+opts.metagenome
    mg  = obj_from_url(url, auth=token)
    
    # get manifest
    url  = opts.url+'/researchobject/manifest/'+opts.metagenome
    data = obj_from_url(url, auth=token)
    
    # just list
    if opts.list:
        pt = PrettyTable(["File Name", "Folder", "Media Type"])
        for info in data["aggregates"]:
            pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]])
        pt.align = "l"
        print(pt)
        return 0
    
    # get cwl files
    temp_name = random_str(10)
    pipeline_dir = os.path.join(opts.dir, temp_name)
    git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
    os.system(git_clone)
    
    # download manifest
    sha1s = []
    base = data["@context"][0]["@base"].strip('/')
    manifest_dir = os.path.join(opts.dir, base)
    os.mkdir(manifest_dir)
    data_str = json.dumps(data)
    open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
    sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ])
    
    # download aggregates
    for info in data["aggregates"]:
        sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"]))
        folder = info["bundledAs"]["folder"].strip('/')
        folder_dir = os.path.join(opts.dir, folder)
        if not os.path.isdir(folder_dir):
            os.mkdir(folder_dir)
        if "githubusercontent" in info["uri"]:
            pos = info["uri"].find("CWL")
            src = os.path.join(pipeline_dir, info["uri"][pos:])
            dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
            text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '')
            if dst.endswith('job.yaml'):
                text = edit_input(text, mg) 
            open(dst, 'w').write(text)
            sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ])
        else:
            fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w')
            s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
            fh.close()
            sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ])
        sys.stdout.write("Done\n")
    
    # output sha1
    mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
    tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
    sha1s.sort(key=lambda x: x[1])
    for s1 in sha1s:
        if s1[1].startswith('data'):
            mansha1.write("%s\t%s\n"%(s1[0], s1[1]))
        else:
            tagsha1.write("%s\t%s\n"%(s1[0], s1[1]))
    mansha1.close()
    tagsha1.close()
    
    # cleanup
    shutil.rmtree(pipeline_dir)
    
    return 0
Ejemplo n.º 14
0
def test_async_matrix3():
    URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'  # takes too long??
    URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15'
    token = get_auth_token(None)
    response = async_rest_api(URI, auth=token)
    print(response)
Ejemplo n.º 15
0
def main(args):
    global mgrast_auth, API_URL, SHOCK_URL
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    # access options
    parser.add_argument("-u", "--mgrast_url", dest="mgrast_url", default=API_URL, help="MG-RAST API url")
    parser.add_argument("-s", "--shock_url", dest="shock_url", default=SHOCK_URL, help="Shock API url")
    parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token")
    # required options
    parser.add_argument("-m", "--metadata", dest="metadata", default=None, help="metadata .xlsx file")
    parser.add_argument("--project_id", dest="project_id", default=None, help="project ID")
    parser.add_argument("--project_name", dest="project_name", default=None, help="project name")
    # pairjoin / demultiplex options
    parser.add_argument("--mg_name", dest="mgname", default=None, help="name of pair-merge metagenome if not in metadata, default is UUID")
    parser.add_argument("--barcode", dest="barcode", default=None, help="barcode file: metagenome_name \\t barcode_sequence")
    parser.add_argument("--retain", dest="retain", action="store_true", default=False, help="retain non-overlapping sequences in pair-merge")
    parser.add_argument("--rc_index", dest="rc_index", action="store_true", default=False, help="barcodes in index file are reverse compliment of mapping file")
    # pipeline flags
    parser.add_argument("--assembled", dest="assembled", action="store_true", default=False, help="if true sequences are assembeled, default is false")
    parser.add_argument("--no_filter_ln", dest="no_filter_ln", action="store_true", default=False, help="if true skip sequence length filtering, default is on")
    parser.add_argument("--no_filter_ambig", dest="no_filter_ambig", action="store_true", default=False, help="if true skip sequence ambiguous bp filtering, default is on")
    parser.add_argument("--no_dynamic_trim", dest="no_dynamic_trim", action="store_true", default=False, help="if true skip qual score dynamic trimmer, default is on")
    parser.add_argument("--no_dereplicate", dest="no_dereplicate", action="store_true", default=False, help="if true skip dereplication, default is on")
    parser.add_argument("--no_bowtie", dest="no_bowtie", action="store_true", default=False, help="if true skip bowtie screening, default is on")
    # pipeline options
    parser.add_argument("--filter_ln_mult", dest="filter_ln_mult", type=int, default=5, help="maximum ambiguous bps to allow through per sequence, default is 5")
    parser.add_argument("--max_ambig", dest="max_ambig", type=int, default=5, help="maximum number of low-quality bases per read, default is 5")
    parser.add_argument("--max_lqb", dest="max_lqb", type=int, default=15, help="quality threshold for low-quality bases, default is 15")
    parser.add_argument("--min_qual", dest="min_qual", type=float, default=2.0, help="sequence length filtering multiplier, default is 2.0")
    parser.add_argument("--screen_indexes", dest="screen_indexes", default=None, help="host organism to filter sequences by")
    parser.add_argument("--priority", dest="priority", default=None, help="indicate when making data public, influences analysis run time")
    # extra modes
    parser.add_argument("--synch", dest="synch", action="store_true", default=False, help="Run submit action in synchronious mode")
    parser.add_argument("--json_out", dest="json_out", default=None, help="Output final metagenome product as json object to this file, synch mode only")
    parser.add_argument("--json_in", dest="json_in", default=None, help="Input sequence file(s) encoded as shock handle in json file, simple or pairjoin types only")
    parser.add_argument("--tmp_dir", dest="tmp_dir", default="", help="Temp dir to download too if using json_in option, default is current working dir")
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT")
    parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Submit in debug mode")
    parser.add_argument("action",  type=str, default=False, help="Action")
#    parser.add_argument("subaction", type=str, default=False, help="Action word 2", default=None)
    
    # get inputs
    opts = parser.parse_args()
    
    # special case
    json_submit = True if opts.json_in and os.path.isfile(opts.json_in) else False
    if json_submit:
        action = "submit"
    else:
        if len(opts.action) < 1:
            sys.stderr.write("ERROR: missing action\n")
            return 1
        action = opts.action
    args = opts.action
    API_URL = opts.mgrast_url
    SHOCK_URL = opts.shock_url
     
    if opts.verbose and opts.debug:
        print("##### Running in Debug Mode #####")
    
    # validate inputs
    if action not in valid_actions:
        sys.stderr.write("ERROR: invalid action. use one of: %s\n"%", ".join(valid_actions))
        return 1
    elif (action in ["status", "delete"]) and (len(args) < 2):
        sys.stderr.write("ERROR: %s missing submission ID\n"%action)
        return 1
    elif (action == "submit") and (not json_submit):
        if not (opts.project_id or opts.project_name or opts.metadata):
            sys.stderr.write("ERROR: invalid submit, must have one of project_id, project_name, or metadata\n")
            return 1
        if (len(args) < 2) or (args[1] not in submit_types):
            sys.stderr.write("ERROR: invalid submit option. use one of: %s\n"%", ".join(submit_types))
            return 1
        if ((args[1] == "simple") and (len(args) < 3) or
             ((args[1] == "batch") and (len(args) != 3)) or
             ((args[1] == "demultiplex") and (len(args) < 3)) or
             ((args[1] == "pairjoin") and (len(args) != 4)) or
             ((args[1] == "pairjoin_demultiplex") and (len(args) != 5))):
            sys.stderr.write("ERROR: submit %s missing file(s)\n"%args[1])
            return 1
        if ((args[1] == "demultiplex") or (args[1] == "pairjoin_demultiplex")) and (not (opts.metadata or opts.barcode)):
            sys.stderr.write("ERROR: submit %s requires either metadata or barcode file\n"%args[1])
            return 1
    
    # explict login
    token = get_auth_token(opts)
    if action == "login":
        if not token:
            token = input('Enter your MG-RAST auth token: ')
        login(token)
        return 0
    
    # get auth object, get from token if no login
    mgrast_auth = get_auth(token)
    if not mgrast_auth:
        return 1
    
    # actions
    if action == "list":
        if opts.verbose:
            print("Listing all submissions for "+mgrast_auth['login'])
        listall()
    elif action == "status":
        if opts.verbose:
            print("Status for submission"+args[1])
        status(args[1])
    elif action == "delete":
        if opts.verbose:
            print("Deleting submission"+args[1])
        delete(args[1])
    elif action == "submit":
        # process input json if exists
        if json_submit:
            stype, infiles = seqs_from_json(opts.json_in, opts.tmp_dir)
        else:
            stype, infiles = args[1], args[2:]
        # get name from output json if used
        if opts.json_out and (stype == "pairjoin") and (not opts.mgname):
            opts.mgname = os.path.splitext(opts.json_out)[0]
        # submit it
        if opts.verbose:
            print("Starting submission %s for %d files"%(stype, len(infiles)))
        submit(stype, infiles, opts)

    return 0
Ejemplo n.º 16
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--input",
                        dest="input",
                        default='-',
                        help="input: filename or stdin (-), default is stdin")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument("--plot",
                        dest="plot",
                        default=None,
                        help="filename for output plot")
    parser.add_argument(
        "--distance",
        dest="distance",
        default='bray-curtis',
        help=
        "distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis"
    )
    parser.add_argument(
        "--metadata",
        dest="metadata",
        default=None,
        help="metadata field to color by, only for 'biom' input")
    parser.add_argument(
        "--groups",
        dest="groups",
        default=None,
        help=
        "list of groups in JSON or tabbed format - either as input string or filename"
    )
    parser.add_argument("--group_pos",
                        dest="group_pos",
                        type=int,
                        default=1,
                        help="position of group to use, default is 1 (first)")
    parser.add_argument(
        "--color_auto",
        dest="color_auto",
        type=int,
        default=0,
        help=
        "auto-create colors based on like group names, default is use group name as color: 1=true, 0=false"
    )
    parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path")
    parser.add_argument("--height",
                        dest="height",
                        type=float,
                        default=10,
                        help="image height in inches, default is 6")
    parser.add_argument("--width",
                        dest="width",
                        type=float,
                        default=10,
                        help="image width in inches, default is 6")
    parser.add_argument("--dpi",
                        dest="dpi",
                        type=int,
                        default=300,
                        help="image DPI, default is 300")
    parser.add_argument(
        "--three",
        dest="three",
        type=int,
        default=0,
        help="create 3-D PCoA, default is 2-D: 1=true, 0=false")
    parser.add_argument(
        "--name",
        dest="name",
        type=int,
        default=0,
        help="label columns by name, default is by id: 1=true, 0=false")
    parser.add_argument(
        "--label",
        dest="label",
        type=int,
        default=0,
        help="label image rows, default is off: 1=true, 0=false")

    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    if not opts.plot:
        sys.stderr.write("ERROR: missing output filename\n")
        return 1
    if (not opts.rlib) and ('KB_PERL_PATH' in os.environ):
        opts.rlib = os.environ['KB_PERL_PATH']
    if not opts.rlib:
        sys.stderr.write("ERROR: missing path to R libs\n")
        return 1
    if opts.metadata:
        opts.color_auto = 1
    for o in ['reference', 'color_auto', 'three', 'name', 'label']:
        if getattr(opts, o) not in [0, 1]:
            sys.stderr.write("ERROR: invalid value for '%s'\n" % o)
            return 1

    # get auth
    token = get_auth_token(opts)

    # parse inputs
    tmp_in = 'tmp_' + random_str() + '.txt'
    tmp_hdl = open(tmp_in, 'w')
    mg_list = []
    groups = []
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(
            opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                indata = json.loads(indata)
                mg_list = map(lambda x: x['id'], indata['columns'])
                col_name = True if opts.name == 1 else False
                biom_to_tab(indata, tmp_hdl, col_name=col_name)
                if opts.metadata:
                    groups = metadata_from_biom(indata, opts.metadata)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            tmp_hdl.write(indata)
            mg_list = indata.split('\n')[0].strip().split('\t')
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1
    tmp_hdl.close()

    # get groups if not in BIOM metadata and option used
    if (len(groups) == 0) and opts.groups:
        # is it json ?
        ## example of 2 group sets in json format
        ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]},
        ##   {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ]
        try:
            gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(
                opts.groups) else json.loads(opts.groups)
            if opts.group_pos > len(gdata):
                sys.stderr.write(
                    "ERROR: position (%d) of group is out of bounds\n" %
                    opts.group_pos)
                return 1
            for m in mg_list:
                found_g = None
                for g, mgs in gdata[opts.group_pos - 1].items():
                    if m in mgs:
                        found_g = g
                        break
                if found_g:
                    groups.append(found_g)
                else:
                    sys.stderr.write("ERROR: metagenome %s not in a group\n" %
                                     m)
                    return 1
        # no - its tabbed
        except:
            gtext = open(opts.groups, 'r').read() if os.path.isfile(
                opts.groups) else opts.groups
            grows, gcols, gdata = tab_to_matrix(gtext)
            if opts.group_pos > len(gdata[0]):
                sys.stderr.write(
                    "ERROR: position (%d) of group is out of bounds\n" %
                    opts.group_pos)
            for m in mg_list:
                try:
                    midx = gcols.index(m)
                    groups.append(gdata[midx][opts.group_pos - 1])
                except:
                    sys.stderr.write("ERROR: metagenome %s not in a group\n" %
                                     m)
                    return 1

    # print groups to file for R input
    tmp_group = None
    if len(groups) == len(mg_list):
        tmp_group = 'tmp_' + random_str() + '.txt'
        hdl_group = open(tmp_group, 'w')
        hdl_group.write("\tgroup\n")
        for i, m in enumerate(mg_list):
            hdl_group.write(
                "%s\t%s\n" %
                (m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]])))
        hdl_group.close()
    elif len(groups) > 0:
        sys.stderr.write("Warning: Not all metagenomes in a group\n")

    # build R cmd
    three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)'
    label = 'TRUE' if opts.label == 1 else 'FALSE'
    table = '"%s"' % tmp_group if tmp_group else 'NA'
    color = 'TRUE' if opts.color_auto == 1 else 'FALSE'
    r_cmd = """source("%s/plot_mg_pcoa.r")
suppressMessages( plot_mg_pcoa(
    table_in="%s",
    image_out="%s",
    plot_pcs=%s,
    dist_metric="%s",
    label_points=%s,
    color_table=%s,
    color_column=1,
    auto_colors=%s,
    image_height_in=%.1f,
    image_width_in=%.1f,
    image_res_dpi=%d
))""" % (opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table,
         color, opts.height, opts.width, opts.dpi)
    execute_r(r_cmd)

    # cleanup
    os.remove(tmp_in)
    if tmp_group:
        os.remove(tmp_group)

    return 0
Ejemplo n.º 17
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    
    # sort data
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
Ejemplo n.º 18
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--id",
                        dest="id",
                        default=None,
                        help="KBase Metagenome ID")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--plot",
        dest="plot",
        action="store_true",
        default=False,
        help=
        "display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level"
    )
    parser.add_argument(
        "--stat",
        dest="stat",
        default='sequence',
        help=
        "type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence"
    )

    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build call url
    url = opts.url + '/metagenome/' + opts.id + '?verbosity=stats&public=1'

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    stats = result['statistics']
    if opts.stat == 'sequence':
        for s in sorted(stats['sequence_stats'].keys()):
            safe_print("%s\t%s\n" % (s, stats['sequence_stats'][s]))
    elif opts.stat == 'bp_profile':
        if not stats['qc']['bp_profile']['percents']['data']:
            sys.stderr.write("ERROR: %s has no bp_profile statistics\n" %
                             opts.id)
            return 1
        if opts.plot:
            cols = stats['qc']['bp_profile']['percents']['columns'][1:5]
            data = map(lambda x: x[1:5],
                       stats['qc']['bp_profile']['percents']['data'])
            plot_histo(cols, data, 20, 80)
        else:
            safe_print(
                "\t".join(stats['qc']['bp_profile']['percents']['columns']) +
                "\n")
            for d in stats['qc']['bp_profile']['percents']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'drisee':
        if not stats['qc']['drisee']['percents']['data']:
            sys.stderr.write("ERROR: %s has no drisee statistics\n" % opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['drisee']['percents']['data']:
                x.append(d[0])
                y.append(d[7])
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print(
                "\t".join(stats['qc']['drisee']['percents']['columns']) + "\n")
            for d in stats['qc']['drisee']['percents']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'kmer':
        if not stats['qc']['kmer']['15_mer']['data']:
            sys.stderr.write("ERROR: %s has no kmer statistics\n" % opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['kmer']['15_mer']['data']:
                x.append(math.log(d[3], 10))
                y.append(math.log(d[0], 10))
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns']) +
                       "\n")
            for d in stats['qc']['kmer']['15_mer']['data']:
                safe_print("\t".join(map(str, d)) + "\n")
    elif opts.stat == 'rarefaction':
        if not stats['rarefaction']:
            sys.stderr.write("ERROR: %s has no rarefaction statistics\n" %
                             opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for r in stats['rarefaction']:
                x.append(int(r[0]))
                y.append(float(r[1]))
            aplotter.plot(x,
                          y,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=True,
                          min_x=0,
                          min_y=0)
        else:
            safe_print("x\ty\n")
            for r in stats['rarefaction']:
                safe_print("%s\t%s\n" % (str(r[0]), str(r[1])))
    elif opts.stat in stats['taxonomy']:
        ranked = sorted(stats['taxonomy'][opts.stat],
                        key=lambda x: (-int(x[1]), x[0]))
        if opts.plot:
            top = map(lambda x: int(x[1]), ranked)[:50]
            aplotter.plot(top,
                          output=sys.stdout,
                          draw_axes=True,
                          plot_slope=False,
                          min_x=0,
                          min_y=0)
        else:
            for t in ranked:
                safe_print("%s\t%s\n" % (t[0], str(t[1])))
    else:
        sys.stderr.write("ERROR: invalid stat type\n")
        return 1

    return 0
Ejemplo n.º 19
0
def main(args):
    global API_URL
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    # access options
    parser.add_argument("-u", "--url", dest="url", default=API_URL, help="MG-RAST API url")
    parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token")
    # other options
    parser.add_argument("-f", "--file", dest="mdfile", default=None, help="metadata .xlsx file")
    parser.add_argument("--taxa", dest="taxa", default=None, help="metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169")
    parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Run in debug mode")
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT")
    parser.add_argument("args",type=str, nargs="+", help="Action (" + ",".join(valid_actions)+")" )
    
    # get inputs
    opts = parser.parse_args()
    args = opts.args
    API_URL = opts.url
    
    # validate inputs
    if len(args) < 1:
        sys.stderr.write("ERROR: missing action\n")
        return 1
    action = args[0]
    if action not in valid_actions:
        sys.stderr.write("ERROR: invalid action. use one of: %s\n"%", ".join(valid_actions))
        return 1
    if len(args) < 2:
        sys.stderr.write("ERROR: missing Project ID\n")
        return 1
    pid = args[1]
    DEBUG = opts.verbose + opts.debug 
    # get token
    token = get_auth_token(opts)
    if not token:
        token = input('Enter your MG-RAST auth token: ')
    
    # actions
    if action == "get-info":
        data = obj_from_url(opts.url+'/project/'+pid+'?verbosity=verbose&nocache=1', auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "get-metadata":
        data = obj_from_url(opts.url+'/metadata/export/'+pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "update-metadata":
        result = post_file(opts.url+'/metadata/update', 'upload', opts.mdfile, auth=token, data=json.dumps({'project': pid}, separators=(',',':')), debug=DEBUG)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "make-public":
        data = obj_from_url(opts.url+'/project/'+pid+'/makepublic', auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "submit-ebi":
        debug = 1 if opts.debug else 0
        info  = {
            'project_id': pid,
            'debug': debug
        }
        if opts.taxa:
            info['project_taxonomy'] = opts.taxa
        data = obj_from_url(opts.url+'/submission/ebi', auth=token, data=json.dumps(info, separators=(',',':')))
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "status-ebi":
        data = obj_from_url(opts.url+'/submission/'+pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    
    return 0
Ejemplo n.º 20
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--id",
                        dest="id",
                        default=None,
                        help="KBase Metagenome ID")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")
    parser.add_argument("--filter_name",
                        dest="filter_name",
                        default=None,
                        help="taxon name to filter by")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="taxon level to filter by")
    parser.add_argument("--top",
                        dest="top",
                        type=int,
                        default=10,
                        help="display only the top N taxa, default is 10")
    parser.add_argument(
        "--evalue",
        dest="evalue",
        type=int,
        default=5,
        help="negative exponent value for maximum e-value cutoff, default is 5"
    )
    parser.add_argument(
        "--identity",
        dest="identity",
        type=int,
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        dest="length",
        type=int,
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")

    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    params = [('id', opts.id), ('group_level', opts.level),
              ('source', opts.source), ('evalue', opts.evalue),
              ('identity', opts.identity), ('length', opts.length),
              ('version', opts.version), ('result_type', 'abundance'),
              ('asynchronous', '1'), ('hide_metadata', '1')]
    url = opts.url + '/matrix/organism?' + urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [('filter', opts.filter_name),
                  ('filter_level', opts.filter_level),
                  ('min_level', opts.level), ('version', opts.version)]
        url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set(map(lambda x: x[opts.level], data['data']))
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']),
                               len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip(rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" % (k, v))

    return 0
Ejemplo n.º 21
0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by")
    parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10")
    parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    (opts, args) = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level),
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/organism?'+urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set( map(lambda x: x[opts.level], data['data']) )
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip( rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
Ejemplo n.º 22
0
            sys.stderr.write(repr(item))
        sys.stdout.write((
            "\t".join([
                item["metagenome_id"],
                #                        str(len(item.keys())),
                repr(public),
                item["created_on"],
                mg_name,
                project_id,
                project_name
            ]) + "\n"))


CALL = "/search"

key = get_auth_token()

# assign parameters
limit = 1000  # initial call

# construct API call

parameters = {
    "limit": limit,
    "order": "created_on",
    "direction": "asc",
    "public": "1"
}
API_URL = "https://api.mg-rast.org/"

base_url = API_URL + CALL + "?" + urlencode(parameters)
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--url", dest="url", default=API_URL, help="API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='function', help="function level to filter by")
    parser.add_option("", "--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_option("", "--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    for sfield in SEARCH_FIELDS:
        parser.add_option("", "--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    (opts, args) = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)

    # build url for metagenome query
    params = [ ('limit', '100'),
               ('verbosity', 'minimal'),
               ('match', 'all'),
               ('status', opts.status) ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append( (sfield, getattr(opts, sfield)) )
    url = opts.url+'/metagenome?'+urlencode(params, True)

    # retrieve query results
    result = obj_from_url(url, auth=token)
    if len(result['data']) == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    mgids = set( map(lambda x: x['id'], result['data']) )
    while result['next']:
        url = result['next']
        result = obj_from_url(url, auth=token)
        if len(result['data']) == 0:
            break
        for d in result['data']:
            mgids.add(d['id'])

    # get sequences for mgids
    for mg in mgids:
        params = [ ('source', opts.source),
                   ('evalue', opts.evalue),
                   ('identity', opts.identity),
                   ('length', opts.length) ]
        if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
            params.append(('type', 'ontology'))
        else:
            params.append(('type', 'function'))
        if opts.function:
            params.append(('filter', opts.function))
            if opts.level:
                params.append(('filter_level', opts.level))
        url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True)
        # output data
        safe_print('Results from '+mg+":\n")
        stdout_from_url(url, auth=token)
    
    return 0
Ejemplo n.º 24
0
DEBUG = 0

if __name__ == '__main__':
    usage = "usage: %prog [options]  URI"
    parser = ArgumentParser(usage)
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
    parser.add_argument("-k",
                        "--token",
                        dest="token",
                        type=str,
                        help="Auth token")
    parser.add_argument("URI", type=str, help="URI to query")

    opts = parser.parse_args()
    key = get_auth_token(opts)
    if opts.verbose:
        print("KEY = {}".format(key), file=sys.stderr)
# assign parameters
    URI = opts.URI

    # construct API call
    print(URI, file=sys.stderr)

    # retrieve the data by sending at HTTP GET request to the MG-RAST API
    jsonstructure = async_rest_api(URI, auth=key)

    # unpack and display the data table
    if type(jsonstructure) == dict:  # If we have data, not json structure
        print(json.dumps(jsonstructure), file=sys.stdout)
    else:
Ejemplo n.º 25
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--project", dest="project", default=None, help="project ID")
    parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID")
    parser.add_argument("--file", dest="file", default=None, help="file ID for given project or metagenome")
    parser.add_argument("--dir", dest="dir", default=".", help="directory to do downloads")
    parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files and their info for given ID")
    
    # get inputs
    opts = parser.parse_args()
    if not (opts.project or opts.metagenome):
        sys.stderr.write("ERROR: a project or metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
        return 1
    downdir = opts.dir
    
    # get auth
    token = get_auth_token(opts)
    
    # get metagenome list
    mgs = []
    if opts.project:
        url  = opts.url+'/project/'+opts.project+'?verbosity=full'
        data = obj_from_url(url, auth=token)
        for mg in data['metagenomes']:
            mgs.append(mg["metagenome_id"])
    elif opts.metagenome:
        mgs.append(opts.metagenome)
    
    # get file lists
    all_files = {}
    for mg in mgs:
        url  = opts.url+'/download/'+mg
        data = obj_from_url(url, auth=token)
        all_files[mg] = data['data']
    
    # just list
    if opts.list:
        pt = PrettyTable(["Metagenome", "File Name", "File ID", "Checksum", "Byte Size"])
        for mg, files in all_files.items():
            for f in files:
                fsize = f['file_size'] if f['file_size'] else 0
                pt.add_row([mg, f['file_name'], f['file_id'], f['file_md5'], fsize])
        pt.align = "l"
        pt.align['Byte Size'] = "r"
        print(pt)
        return 0
    
    # download all in dirs by ID
    if opts.project:
        downdir = os.path.join(downdir, opts.project)
        if not os.path.isdir(downdir):
            os.mkdir(downdir)
    for mg, files in all_files.items():
        mgdir = os.path.join(downdir, mg)
        if not os.path.isdir(mgdir):
            os.mkdir(mgdir)
        for f in files:
            if opts.file:
                if f['file_id'] == opts.file:
                    file_download(token, f, dirpath=mgdir)
                elif f['file_name'] == opts.file:
                    file_download(token, f, dirpath=mgdir)
            else:
                file_download(token, f, dirpath=mgdir)
    
    return 0
Ejemplo n.º 26
0
def test_async():
    URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1'
    token = get_auth_token(None)
    print("MG-RAST token: ", token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
Ejemplo n.º 27
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    biomorig = biom
    biom = biomorig["data"] 
    # sort data
    assert "matrix_type" in biom.keys(), repr(biom)
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--plot", dest="plot", action="store_true", default=False, help="display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level")
    parser.add_option("", "--stat", dest="stat", default='sequence', help="type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    url = opts.url+'/metagenome/'+opts.id+'?verbosity=stats'

    # retrieve / output data
    result = obj_from_url(url, auth=token)
    stats  = result['statistics']
    if opts.stat == 'sequence':
        for s in sorted(stats['sequence_stats'].keys()):
            safe_print("%s\t%s\n" %(s, stats['sequence_stats'][s]))
    elif opts.stat == 'bp_profile':
        if not stats['qc']['bp_profile']['percents']['data']:
            sys.stderr.write("ERROR: %s has no bp_profile statistics\n"%opts.id)
            return 1
        if opts.plot:
            cols = stats['qc']['bp_profile']['percents']['columns'][1:5]
            data = map(lambda x: x[1:5], stats['qc']['bp_profile']['percents']['data'])
            plot_histo(cols, data, 20, 80)
        else:
            safe_print("\t".join(stats['qc']['bp_profile']['percents']['columns'])+"\n")
            for d in stats['qc']['bp_profile']['percents']['data']:
                safe_print("\t".join(map(str, d))+"\n")
    elif opts.stat == 'drisee':
        if not stats['qc']['drisee']['percents']['data']:
            sys.stderr.write("ERROR: %s has no drisee statistics\n"%opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['drisee']['percents']['data']:
                x.append(d[0])
                y.append(d[7])
            aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0)
        else:
            safe_print("\t".join(stats['qc']['drisee']['percents']['columns'])+"\n")                
            for d in stats['qc']['drisee']['percents']['data']:
                safe_print("\t".join(map(str, d))+"\n")
    elif opts.stat == 'kmer':
        if not stats['qc']['kmer']['15_mer']['data']:
            sys.stderr.write("ERROR: %s has no kmer statistics\n"%opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for d in stats['qc']['kmer']['15_mer']['data']:
                x.append( math.log(d[3], 10) )
                y.append( math.log(d[0], 10) )
            aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0)
        else:
            safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns'])+"\n")
            for d in stats['qc']['kmer']['15_mer']['data']:
                safe_print("\t".join(map(str, d))+"\n")
    elif opts.stat == 'rarefaction':
        if not stats['rarefaction']:
            sys.stderr.write("ERROR: %s has no rarefaction statistics\n"%opts.id)
            return 1
        if opts.plot:
            x, y = [], []
            for r in stats['rarefaction']:
                x.append(int(r[0]))
                y.append(float(r[1]))
            aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0)
        else:
            safe_print("x\ty\n")
            for r in stats['rarefaction']:
                safe_print("%s\t%s\n" %(str(r[0]), str(r[1])))
    elif opts.stat in stats['taxonomy']:
        ranked = sorted(stats['taxonomy'][opts.stat], key=lambda x: (-int(x[1]), x[0]))
        if opts.plot:
            top = map(lambda x: int(x[1]), ranked)[:50]
            aplotter.plot(top, output=sys.stdout, draw_axes=True, plot_slope=False, min_x=0, min_y=0)
        else:
            for t in ranked:
                safe_print("%s\t%s\n" %(t[0], str(t[1])))
    else:
        sys.stderr.write("ERROR: invalid stat type\n")
        return 1
    
    return 0    
Ejemplo n.º 29
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp.format(VERSION, RO_VERSION),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--metagenome",
                        dest="metagenome",
                        default=None,
                        help="metagenome ID")
    parser.add_argument("--dir",
                        dest="dir",
                        default=".",
                        help="directory to export to")
    parser.add_argument("--list",
                        dest="list",
                        action="store_true",
                        default=False,
                        help="list files in manifest")

    # get inputs
    opts = parser.parse_args()
    if not opts.metagenome:
        sys.stderr.write("ERROR: a metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir)
        return 1

    # get auth
    token = get_auth_token(opts)

    # get mg info
    url = opts.url + '/metagenome/' + opts.metagenome
    mg = obj_from_url(url, auth=token)

    # get manifest
    url = opts.url + '/researchobject/manifest/' + opts.metagenome
    data = obj_from_url(url, auth=token)

    # just list
    if opts.list:
        pt = PrettyTable(["File Name", "Folder", "Media Type"])
        for info in data["aggregates"]:
            pt.add_row([
                info["bundledAs"]["filename"], info["bundledAs"]["folder"],
                info["mediatype"]
            ])
        pt.align = "l"
        print(pt)
        return 0

    # get cwl files
    temp_name = random_str(10)
    pipeline_dir = os.path.join(opts.dir, temp_name)
    git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
    os.system(git_clone)

    # download manifest
    sha1s = []
    base = data["@context"][0]["@base"].strip('/')
    manifest_dir = os.path.join(opts.dir, base)
    os.mkdir(manifest_dir)
    data_str = json.dumps(data)
    open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
    sha1s.append([
        hashlib.sha1(data_str).hexdigest(),
        os.path.join(base, data["manifest"])
    ])

    # download aggregates
    for info in data["aggregates"]:
        sys.stdout.write("Downloading %s ... " %
                         (info["bundledAs"]["filename"]))
        folder = info["bundledAs"]["folder"].strip('/')
        folder_dir = os.path.join(opts.dir, folder)
        if not os.path.isdir(folder_dir):
            os.mkdir(folder_dir)
        if "githubusercontent" in info["uri"]:
            pos = info["uri"].find("CWL")
            src = os.path.join(pipeline_dir, info["uri"][pos:])
            dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
            text = open(src, 'r').read().replace('../Inputs/', '').replace(
                '../Tools/', '').replace('../Workflows/', '')
            if dst.endswith('job.yaml'):
                text = edit_input(text, mg)
            open(dst, 'w').write(text)
            sha1s.append([
                hashlib.sha1(text).hexdigest(),
                os.path.join(folder, info["bundledAs"]["filename"])
            ])
        else:
            fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]),
                      'w')
            s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
            fh.close()
            sha1s.append(
                [s1, os.path.join(folder, info["bundledAs"]["filename"])])
        sys.stdout.write("Done\n")

    # output sha1
    mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
    tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
    sha1s.sort(key=lambda x: x[1])
    for s1 in sha1s:
        if s1[1].startswith('data'):
            mansha1.write("%s\t%s\n" % (s1[0], s1[1]))
        else:
            tagsha1.write("%s\t%s\n" % (s1[0], s1[1]))
    mansha1.close()
    tagsha1.close()

    # cleanup
    shutil.rmtree(pipeline_dir)

    return 0
Ejemplo n.º 30
0
def test_async():
    URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1'
    token = get_auth_token(None)
    print("MG-RAST token: ", token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
Ejemplo n.º 31
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--project", dest="project", default=None, help="project ID")

    # get inputs
    opts = parser.parse_args()
    if not opts.project or opts.project[0:3] != "mgp":
        sys.stderr.write("ERROR: a project id is required\n")
        return 1
    # get auth
    PROJECT = opts.project

    TOKEN = get_auth_token(opts)

    # export metadata

    outfile = PROJECT + "-export.xlsx"
#
    k = obj_from_url("http://api.mg-rast.org/metadata/export/{project}?verbosity=full".format(project=PROJECT), auth=TOKEN)
    metadata = k # json.loads(open(infile).read())

    workbook = xlsxwriter.Workbook(outfile)
    print("Creating", outfile)
    worksheet = {}
    worksheet["README"] = workbook.add_worksheet("README")
    row = 0
    for i in range(10):
        worksheet["README"].write_number(row, 0, i)
        row += 1

    worksheet["project"] = workbook.add_worksheet("project")
    project_keys = get_project_keys(metadata)
    col = 0
    for l in project_keys:
        value = metadata["data"][l]["value"]
        definition = metadata["data"][l]["definition"]
        worksheet["project"].write_string(0, col, l)
        worksheet["project"].write_string(1, col, definition)
        worksheet["project"].write_string(2, col, value)
        col += 1

    worksheet["sample"] = workbook.add_worksheet("sample")

    samplekeys = get_sample_keys(metadata)

    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in samplekeys:
            if l in sample["data"].keys():
                value = sample["data"][l]["value"]
                definition = sample["data"][l]["definition"]
                fmt = sample["data"][l]["type"]
                worksheet["sample"].write_string(0, col, l)
                worksheet["sample"].write_string(1, col, definition)
                write_worksheet_value(worksheet["sample"], row, col, value, fmt)
            col += 1
        col = 0
        row += 1
    try:
        librarytype = metadata["samples"][0]["libraries"][0]["data"]["investigation_type"]["value"]
    except IndexError:
        sys.exit("This metadata bundle does not have any libraries")

    worksheet["library"] = workbook.add_worksheet("library "+librarytype)

    libkeys = get_library_keys(metadata)
    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in libkeys:
            if l in sample["libraries"][0]["data"].keys():
                value = sample["libraries"][0]["data"][l]["value"]
                definition = sample["libraries"][0]["data"][l]["definition"]
                fmt = sample["libraries"][0]["data"][l]["type"]
                worksheet["library"].write_string(0, col, l)
                worksheet["library"].write_string(1, col, definition)
                write_worksheet_value(worksheet["library"], row, col, value, fmt)
            col += 1
        col = 0
        row += 1

    eps = get_eps(metadata)
    print("eps", " ".join(eps))
    epcol = {}
    eprow = {}
    for ep in eps:
        worksheet[ep] = workbook.add_worksheet("ep " + ep)
        epcol[ep] = 0
        eprow[ep] = 2
    epkeys = get_ep_keys(metadata, eps)
    for sample in metadata["samples"]:
        ep = sample["envPackage"]["type"]
        for l in epkeys[ep]:
            try:
                value = sample["envPackage"]["data"][l]["value"]
                definition = sample["envPackage"]["data"][l]["definition"]
                fmt = sample["envPackage"]["data"][l]["type"]
            except KeyError:
                value = "" ; definition = ""; fmt = "string"

            worksheet[ep].write_string(0, epcol[ep], l)
            worksheet[ep].write_string(1, epcol[ep], definition)
            write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt)
            epcol[ep] += 1
        epcol[ep] = 0
        eprow[ep] += 1

    workbook.close()
Ejemplo n.º 32
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin")
    parser.add_argument("--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
    parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot")
    parser.add_argument("--distance", dest="distance", default='bray-curtis', help="distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis")
    parser.add_argument("--metadata", dest="metadata", default=None, help="metadata field to color by, only for 'biom' input")
    parser.add_argument("--groups", dest="groups", default=None, help="list of groups in JSON or tabbed format - either as input string or filename")
    parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)")
    parser.add_argument("--color_auto", dest="color_auto", type=int, default=0, help="auto-create colors based on like group names, default is use group name as color: 1=true, 0=false")
    parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path")
    parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 6")
    parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 6")
    parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300")
    parser.add_argument("--three", dest="three", type=int, default=0, help="create 3-D PCoA, default is 2-D: 1=true, 0=false")
    parser.add_argument("--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false")
    parser.add_argument("--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false")
    
    # get inputs
    opts = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    if not opts.plot:
        sys.stderr.write("ERROR: missing output filename\n")
        return 1
    if (not opts.rlib) and ('KB_PERL_PATH' in os.environ):
        opts.rlib = os.environ['KB_PERL_PATH']
    if not opts.rlib:
        sys.stderr.write("ERROR: missing path to R libs\n")
        return 1
    if opts.metadata:
        opts.color_auto = 1
    for o in ['reference', 'color_auto', 'three', 'name', 'label']:
        if getattr(opts, o) not in [0, 1]:
            sys.stderr.write("ERROR: invalid value for '%s'\n"%o)
            return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # parse inputs
    tmp_in  = 'tmp_'+random_str()+'.txt'
    tmp_hdl = open(tmp_in, 'w')
    mg_list = []
    groups  = []
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                indata  = json.loads(indata)
                mg_list = map(lambda x: x['id'], indata['columns'])
                col_name = True if opts.name == 1 else False
                biom_to_tab(indata, tmp_hdl, col_name=col_name)
                if opts.metadata:
                    groups = metadata_from_biom(indata, opts.metadata)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            tmp_hdl.write(indata)
            mg_list = indata.split('\n')[0].strip().split('\t')
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1
    tmp_hdl.close()
    
    # get groups if not in BIOM metadata and option used
    if (len(groups) == 0) and opts.groups:
        # is it json ?
        ## example of 2 group sets in json format
        ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]},
        ##   {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ]
        try:
            gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups)
            if opts.group_pos > len(gdata):
                sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos)
                return 1
            for m in mg_list:
                found_g = None
                for g, mgs in gdata[opts.group_pos-1].items():
                    if m in mgs:
                        found_g = g
                        break
                if found_g:
                    groups.append(found_g)
                else:
                    sys.stderr.write("ERROR: metagenome %s not in a group\n"%m)
                    return 1                  
        # no - its tabbed
        except:
            gtext = open(opts.groups, 'r').read() if os.path.isfile(opts.groups) else opts.groups
            grows, gcols, gdata = tab_to_matrix(gtext)
            if opts.group_pos > len(gdata[0]):
                sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos)
            for m in mg_list:
                try:
                    midx = gcols.index(m)
                    groups.append(gdata[midx][opts.group_pos-1])
                except:
                    sys.stderr.write("ERROR: metagenome %s not in a group\n"%m)
                    return 1
    
    # print groups to file for R input
    tmp_group = None
    if len(groups) == len(mg_list):
        tmp_group = 'tmp_'+random_str()+'.txt'
        hdl_group = open(tmp_group, 'w')
        hdl_group.write("\tgroup\n")
        for i, m in enumerate(mg_list):
            hdl_group.write("%s\t%s\n"%(m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]])))
        hdl_group.close()
    elif len(groups) > 0:
        sys.stderr.write("Warning: Not all metagenomes in a group\n")
    
    # build R cmd
    three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)'
    label = 'TRUE' if opts.label == 1 else 'FALSE'
    table = '"%s"'%tmp_group if tmp_group else 'NA'
    color = 'TRUE' if opts.color_auto == 1 else 'FALSE'
    r_cmd = """source("%s/plot_mg_pcoa.r")
suppressMessages( plot_mg_pcoa(
    table_in="%s",
    image_out="%s",
    plot_pcs=%s,
    dist_metric="%s",
    label_points=%s,
    color_table=%s,
    color_column=1,
    auto_colors=%s,
    image_height_in=%.1f,
    image_width_in=%.1f,
    image_res_dpi=%d
))"""%(opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table, color, opts.height, opts.width, opts.dpi)
    execute_r(r_cmd)
    
    # cleanup
    os.remove(tmp_in)
    if tmp_group:
        os.remove(tmp_group)
    
    return 0
Ejemplo n.º 33
0
def main(args):
    global API_URL
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    # access options
    parser.add_argument("-u",
                        "--url",
                        dest="url",
                        default=API_URL,
                        help="MG-RAST API url")
    parser.add_argument("-t",
                        "--token",
                        dest="token",
                        default=None,
                        help="MG-RAST token")
    # other options
    parser.add_argument("-f",
                        "--file",
                        dest="mdfile",
                        default=None,
                        help="metadata .xlsx file")
    parser.add_argument(
        "--taxa",
        dest="taxa",
        default=None,
        help=
        "metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169"
    )
    parser.add_argument("--debug",
                        dest="debug",
                        action="store_true",
                        default=False,
                        help="Run in debug mode")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Verbose STDOUT")
    parser.add_argument("args",
                        type=str,
                        nargs="+",
                        help="Action (" + ",".join(valid_actions) + ")")

    # get inputs
    opts = parser.parse_args()
    args = opts.args
    API_URL = opts.url

    # validate inputs
    if len(args) < 1:
        sys.stderr.write("ERROR: missing action\n")
        return 1
    action = args[0]
    if action not in valid_actions:
        sys.stderr.write("ERROR: invalid action. use one of: %s\n" %
                         ", ".join(valid_actions))
        return 1
    if len(args) < 2:
        sys.stderr.write("ERROR: missing Project ID\n")
        return 1
    pid = args[1]
    DEBUG = opts.verbose + opts.debug
    # get token
    token = get_auth_token(opts)
    if not token:
        token = input('Enter your MG-RAST auth token: ')

    # actions
    if action == "get-info":
        data = obj_from_url(opts.url + '/project/' + pid +
                            '?verbosity=verbose&nocache=1',
                            auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "get-metadata":
        data = obj_from_url(opts.url + '/metadata/export/' + pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "update-metadata":
        result = post_file(opts.url + '/metadata/update',
                           'upload',
                           opts.mdfile,
                           auth=token,
                           data=json.dumps({'project': pid},
                                           separators=(',', ':')),
                           debug=DEBUG)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "make-public":
        data = obj_from_url(opts.url + '/project/' + pid + '/makepublic',
                            auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "submit-ebi":
        debug = 1 if opts.debug else 0
        info = {'project_id': pid, 'debug': debug}
        if opts.taxa:
            info['project_taxonomy'] = opts.taxa
        data = obj_from_url(opts.url + '/submission/ebi',
                            auth=token,
                            data=json.dumps(info, separators=(',', ':')))
        print(json.dumps(data, sort_keys=True, indent=4))
    elif action == "status-ebi":
        data = obj_from_url(opts.url + '/submission/' + pid, auth=token)
        print(json.dumps(data, sort_keys=True, indent=4))

    return 0
Ejemplo n.º 34
0
def test_async0():
    URI = API_URL + '/matrix/organism?id=mgm4440275.3&id=mgm4440276.3&id=mgm4440281.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1'
    token = get_auth_token(None)
    print(token)
    response = async_rest_api(URI, auth=token)
    print(repr(response))
Ejemplo n.º 35
0
        else:
            public = "False"
        try:
            mg_name= item["name"]
            project_id = item["project_id"]
            project_name = item["project_name"]
        except KeyError:
            sys.stderr.write(repr(item))
        sys.stdout.write(("\t".join([item["metagenome_id"],
#                        str(len(item.keys())),
                         repr(public), item["created_on"],
                         mg_name, project_id, project_name]) + "\n"))

CALL = "/search"

key = get_auth_token()

# assign parameters
limit = 1000 # initial call

# construct API call

parameters = {"limit": limit, "order":"created_on", "direction": "asc", "public": "1"}
API_URL= "https://api.mg-rast.org/"

base_url = API_URL + CALL + "?" + urlencode(parameters)

# convert the data from a JSON structure to a python data type, a dict of dicts.
jsonstructure = obj_from_url(base_url, auth=key)

# unpack and display the data table
Ejemplo n.º 36
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="function level to filter by")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    opts = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)

    # build url for metagenome query
    params = [ ('limit', '100'),
               ('verbosity', 'minimal'),
               ('match', 'all'),
               ('status', opts.status) ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    url = opts.url+'/metagenome?'+urlencode(params, True)

    # retrieve query results
    result = obj_from_url(url, auth=token)
    if len(result['data']) == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    mgids = set(map(lambda x: x['id'], result['data']))
    while result['next']:
        url = result['next']
        result = obj_from_url(url, auth=token)
        if len(result['data']) == 0:
            break
        for d in result['data']:
            mgids.add(d['id'])

    # get sequences for mgids
    for mg in mgids:
        params = [ ('source', opts.source),
                   ('evalue', opts.evalue),
                   ('identity', opts.identity),
                   ('length', opts.length) ]
        if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
            params.append(('type', 'ontology'))
        else:
            params.append(('type', 'function'))
        if opts.function:
            params.append(('filter', opts.function))
            if opts.level:
                params.append(('filter_level', opts.level))
        url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True)
        # output data
        safe_print('Results from '+mg+":\n")
        stdout_from_url(url, auth=token)
    
    return 0
Ejemplo n.º 37
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus")
    parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED")
    parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list")
    parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems")
    parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection")
    parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list")
    parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout")
    parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
    parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15")
    parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level):
        sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids,'r').read()
        try:
            id_obj  = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [ ('group_level', opts.level), 
               ('source', opts.source),
               ('hit_type', opts.hit_type),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1') ]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i+size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url  = opts.url+'/matrix/organism?'+urlencode(cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url+'/matrix/organism?'+urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [ ('version', opts.version),
                   ('min_level', opts.level) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        for ann in data['data']:
            if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list):
                sub_ann.add(ann[opts.level])
    
    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')
    
    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom)+"\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)
    
    out_hdl.close()
    return 0
Ejemplo n.º 38
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--project",
                        dest="project",
                        default=None,
                        help="project ID")

    # get inputs
    opts = parser.parse_args()
    if not opts.project or opts.project[0:3] != "mgp":
        sys.stderr.write("ERROR: a project id is required\n")
        return 1
    # get auth
    PROJECT = opts.project

    TOKEN = get_auth_token(opts)

    # export metadata

    outfile = PROJECT + "-export.xlsx"
    #
    k = obj_from_url(
        "http://api.mg-rast.org/metadata/export/{project}?verbosity=full".
        format(project=PROJECT),
        auth=TOKEN)
    metadata = k  # json.loads(open(infile).read())

    workbook = xlsxwriter.Workbook(outfile)
    print("Creating", outfile)
    worksheet = {}
    worksheet["README"] = workbook.add_worksheet("README")
    row = 0
    for i in range(10):
        worksheet["README"].write_number(row, 0, i)
        row += 1

    worksheet["project"] = workbook.add_worksheet("project")
    project_keys = get_project_keys(metadata)
    col = 0
    for l in project_keys:
        value = metadata["data"][l]["value"]
        definition = metadata["data"][l]["definition"]
        worksheet["project"].write_string(0, col, l)
        worksheet["project"].write_string(1, col, definition)
        worksheet["project"].write_string(2, col, value)
        col += 1

    worksheet["sample"] = workbook.add_worksheet("sample")

    samplekeys = get_sample_keys(metadata)

    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in samplekeys:
            if l in sample["data"].keys():
                value = sample["data"][l]["value"]
                definition = sample["data"][l]["definition"]
                fmt = sample["data"][l]["type"]
                worksheet["sample"].write_string(0, col, l)
                worksheet["sample"].write_string(1, col, definition)
                write_worksheet_value(worksheet["sample"], row, col, value,
                                      fmt)
            col += 1
        col = 0
        row += 1
    try:
        librarytype = metadata["samples"][0]["libraries"][0]["data"][
            "investigation_type"]["value"]
    except IndexError:
        sys.exit("This metadata bundle does not have any libraries")

    worksheet["library"] = workbook.add_worksheet("library " + librarytype)

    libkeys = get_library_keys(metadata)
    col = 0
    row = 2
    for sample in metadata["samples"]:
        for l in libkeys:
            if l in sample["libraries"][0]["data"].keys():
                value = sample["libraries"][0]["data"][l]["value"]
                definition = sample["libraries"][0]["data"][l]["definition"]
                fmt = sample["libraries"][0]["data"][l]["type"]
                worksheet["library"].write_string(0, col, l)
                worksheet["library"].write_string(1, col, definition)
                write_worksheet_value(worksheet["library"], row, col, value,
                                      fmt)
            col += 1
        col = 0
        row += 1

    eps = get_eps(metadata)
    print("eps", " ".join(eps))
    epcol = {}
    eprow = {}
    for ep in eps:
        worksheet[ep] = workbook.add_worksheet("ep " + ep)
        epcol[ep] = 0
        eprow[ep] = 2
    epkeys = get_ep_keys(metadata, eps)
    for sample in metadata["samples"]:
        ep = sample["envPackage"]["type"]
        for l in epkeys[ep]:
            try:
                value = sample["envPackage"]["data"][l]["value"]
                definition = sample["envPackage"]["data"][l]["definition"]
                fmt = sample["envPackage"]["data"][l]["type"]
            except KeyError:
                value = ""
                definition = ""
                fmt = "string"

            worksheet[ep].write_string(0, epcol[ep], l)
            worksheet[ep].write_string(1, epcol[ep], definition)
            write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value,
                                  fmt)
            epcol[ep] += 1
        epcol[ep] = 0
        eprow[ep] += 1

    workbook.close()
Ejemplo n.º 39
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument("--project",
                        dest="project",
                        default=None,
                        help="project ID")
    parser.add_argument("--metagenome",
                        dest="metagenome",
                        default=None,
                        help="metagenome ID")
    parser.add_argument("--file",
                        dest="file",
                        default=None,
                        help="file ID for given project or metagenome")
    parser.add_argument("--dir",
                        dest="dir",
                        default=".",
                        help="directory to do downloads")
    parser.add_argument("--list",
                        dest="list",
                        action="store_true",
                        default=False,
                        help="list files and their info for given ID")

    # get inputs
    opts = parser.parse_args()
    if not (opts.project or opts.metagenome):
        sys.stderr.write("ERROR: a project or metagenome id is required\n")
        return 1
    if not os.path.isdir(opts.dir):
        sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir)
        return 1
    downdir = opts.dir

    # get auth
    token = get_auth_token(opts)

    # get metagenome list
    mgs = []
    if opts.project:
        url = opts.url + '/project/' + opts.project + '?verbosity=full'
        data = obj_from_url(url, auth=token)
        for mg in data['metagenomes']:
            mgs.append(mg["metagenome_id"])
    elif opts.metagenome:
        mgs.append(opts.metagenome)

    # get file lists
    all_files = {}
    for mg in mgs:
        url = opts.url + '/download/' + mg
        data = obj_from_url(url, auth=token)
        all_files[mg] = data['data']

    # just list
    if opts.list:
        pt = PrettyTable(
            ["Metagenome", "File Name", "File ID", "Checksum", "Byte Size"])
        for mg, files in all_files.items():
            for f in files:
                fsize = f['file_size'] if f['file_size'] else 0
                pt.add_row(
                    [mg, f['file_name'], f['file_id'], f['file_md5'], fsize])
        pt.align = "l"
        pt.align['Byte Size'] = "r"
        print(pt)
        return 0

    # download all in dirs by ID
    if opts.project:
        downdir = os.path.join(downdir, opts.project)
        if not os.path.isdir(downdir):
            os.mkdir(downdir)
    for mg, files in all_files.items():
        mgdir = os.path.join(downdir, mg)
        if not os.path.isdir(mgdir):
            os.mkdir(mgdir)
        for f in files:
            if opts.file:
                if f['file_id'] == opts.file:
                    file_download(token, f, dirpath=mgdir)
                elif f['file_name'] == opts.file:
                    file_download(token, f, dirpath=mgdir)
            else:
                file_download(token, f, dirpath=mgdir)

    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument(
        "--ids",
        dest="ids",
        default=None,
        help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='level3',
        help="functional level to retrieve abundances for, default is level3")
    parser.add_argument(
        "--source",
        dest="source",
        default='Subsystems',
        help="function datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="function level to filter by")
    parser.add_argument(
        "--filter_name",
        dest="filter_name",
        default=None,
        help="function name to filter by, file or comma seperated list")
    parser.add_argument(
        "--intersect_source",
        dest="intersect_source",
        default='SEED',
        help="taxon datasource for insersection, default is SEED")
    parser.add_argument("--intersect_level",
                        dest="intersect_level",
                        default=None,
                        help="taxon level for insersection")
    parser.add_argument(
        "--intersect_name",
        dest="intersect_name",
        default=None,
        help="taxon name(s) for insersection, file or comma seperated list")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument(
        "--evalue",
        type=int,
        dest="evalue",
        default=15,
        help="negative exponent value for maximum e-value cutoff, default is 15"
    )
    parser.add_argument(
        "--identity",
        type=int,
        dest="identity",
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        type=int,
        dest="length",
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")
    parser.add_argument(
        "--temp",
        dest="temp",
        default=None,
        help="filename to temporarly save biom output at each iteration")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1
    if (opts.intersect_name and
        (not opts.intersect_level)) or ((not opts.intersect_name)
                                        and opts.intersect_level):
        sys.stderr.write(
            "ERROR: both --intersect_level and --intersect_name need to be used together\n"
        )
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids, 'r').read()
        try:
            id_obj = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [('group_level', opts.level), ('source', opts.source),
              ('evalue', opts.evalue), ('identity', opts.identity),
              ('length', opts.length), ('version', opts.version),
              ('result_type', 'abundance'), ('asynchronous', '1')]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i + size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url = opts.url + '/matrix/function?' + urlencode(
                cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url + '/matrix/function?' + urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [('version', opts.version), ('min_level', opts.level),
                  ('source', opts.source)]
        url = opts.url + '/m5nr/ontology?' + urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        for ann in data['data']:
            if (opts.filter_level
                    in ann) and (level in ann) and (ann[opts.filter_level]
                                                    in filter_list):
                sub_ann.add(ann[level])

    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom) + "\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)

    out_hdl.close()
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin")
    parser.add_option("", "--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
    parser.add_option("", "--plot", dest="plot", default=None, help="filename for output plot")
    parser.add_option("", "--rlib", dest="rlib", default=None, help="R lib path")
    parser.add_option("", "--height", dest="height", type="float", default=8.5, help="image height in inches, default is 4")
    parser.add_option("", "--width", dest="width", type="float", default=11, help="image width in inches, default is 5")
    parser.add_option("", "--dpi", dest="dpi", type="int", default=300, help="image DPI, default is 300")
    parser.add_option("", "--name", dest="name", type="int", default=0, help="label columns by name, default is by id: 1=true, 0=false")
    parser.add_option("", "--label", dest="label", type="int", default=0, help="label image rows, default is off: 1=true, 0=false")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if (opts.input != '-') and (not os.path.isfile(opts.input)):
        sys.stderr.write("ERROR: input data missing\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    if not opts.plot:
        sys.stderr.write("ERROR: missing output filename\n")
        return 1
    if (not opts.rlib) and ('KB_PERL_PATH' in os.environ):
        opts.rlib = os.environ['KB_PERL_PATH']
    if not opts.rlib:
        sys.stderr.write("ERROR: missing path to R libs\n")
        return 1
    for o in ['reference', 'name', 'label']:
        if getattr(opts, o) not in [0, 1]:
            sys.stderr.write("ERROR: invalid value for '%s'\n"%o)
            return 1
    
    return 0
    # get auth
    token = get_auth_token(opts)
    
    # parse input for R
    tmp_in  = 'tmp_'+random_str()+'.txt'
    tmp_hdl = open(tmp_in, 'w')
    try:
        indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read()
        if opts.format == 'biom':
            try:
                indata = json.loads(indata)
                col_name = True if opts.name == 1 else False
                biom_to_tab(indata, tmp_hdl, col_name=col_name)
            except:
                sys.stderr.write("ERROR: input BIOM data not correct format\n")
                return 1
        else:
            tmp_hdl.write(indata)
    except:
        sys.stderr.write("ERROR: unable to load input data\n")
        return 1
    tmp_hdl.close()
    
    # build R cmd
    label = 'TRUE' if opts.label == 1 else 'FALSE'
    r_cmd = """source("%s/plot_mg_boxplot.r")
suppressMessages( plot_mg_boxplot(
    table_in="%s",
    image_out="%s",
    label_rows=%s,
    image_height_in=%.1f,
    image_width_in=%.1f,
    image_res_dpi=%d
))"""%(opts.rlib, tmp_in, opts.plot, label, opts.height, opts.width, opts.dpi)
    execute_r(r_cmd)
    
    # cleanup
    os.remove(tmp_in)
    
    return 0