def main(): parser = argparse.ArgumentParser() parser.add_argument("--scheme_name", "-s", dest="scheme_name", help="scheme name", required=True) parser.add_argument("--scheme_id", "-i", dest="scheme_id", default="1", help="scheme id") parser.add_argument("--outdir", "-o", dest="outdir", default='.', help="output directory") args = parser.parse_args() if not os.path.exists(args.outdir): os.mkdir(args.outdir) api_url_base = 'http://rest.pubmlst.org/db' scheme_url = '/'.join([ api_url_base, 'pubmlst_' + args.scheme_name + '_seqdef', 'schemes', args.scheme_id ]) scheme_response = json.loads(get(scheme_url)) for locus_url in scheme_response['loci']: locus = json.loads(get(locus_url)) plaintext_header = {'Content-Type': 'text/plain'} alleles_fasta = get(locus['alleles_fasta'], headers=plaintext_header).decode('utf-8') output_filename = os.path.join(args.outdir, locus['id'] + '.fasta') with open(output_filename, 'w') as f: f.write(alleles_fasta) log_msg = { 'timestamp': str(datetime.datetime.now().isoformat()), 'event': 'file_downloaded', 'filename': output_filename, } print(json.dumps(log_msg), file=sys.stderr)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--scheme_name", "-s", dest="scheme_name", help="scheme name", required=True) parser.add_argument("--scheme_id", "-i", dest="scheme_id", default="1", help="scheme id") parser.add_argument("--outdir", "-o", dest="outdir", default='.', help="output directory") parser.add_argument("--base-url", "-b", dest="base_url", default='http://rest.pubmlst.org/db', help="Base URL for the API. Suggested values are: http://rest.pubmlst.org/db (default), https://bigsdb.pasteur.fr/api/db") parser.add_argument("--proxy", "-p", dest="proxy", default='', help="Proxy URL: http://yourProxy:8080") args = parser.parse_args() if not os.path.exists(args.outdir): os.mkdir(args.outdir) api_url_base = args.base_url scheme_url = '/'.join([ api_url_base, 'pubmlst_' + args.scheme_name + '_seqdef', 'schemes', args.scheme_id ]) scheme_response = json.loads(get(api_url=scheme_url, proxy=args.proxy)) if args.proxy == '': pd.read_csv(scheme_response['profiles_csv'], sep='\t', index_col=False).to_csv(os.path.join(args.outdir, os.path.basename(args.outdir) + ".txt"), sep='\t', index=None) else: proxy_dict = {"http": args.proxy, "https": args.proxy} s = requests.get(scheme_response['profiles_csv'], proxies=proxy_dict).text pd.read_csv(io.StringIO(s), sep='\t', index_col=False).to_csv(os.path.join(args.outdir, os.path.basename(args.outdir) + ".txt"), sep='\t', index=None) for locus_url in scheme_response['loci']: locus = json.loads(get(locus_url)) plaintext_header = {'Content-Type': 'text/plain'} alleles_fasta = get(locus['alleles_fasta'], headers=plaintext_header).decode('utf-8') output_filename = os.path.join(args.outdir, locus['id'] + '.tfa') with open(output_filename, 'w') as f: f.write(alleles_fasta) log_msg = { 'timestamp': str(datetime.datetime.now().isoformat()), 'event': 'file_downloaded', 'filename': output_filename, } print(json.dumps(log_msg), file=sys.stderr)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pattern', '-p', default="", help="regex pattern to filter scheme names") parser.add_argument('--exclude_pattern', '-e', default="", help="regex pattern to filter scheme names") parser.add_argument('--names_only', '-n', default="", action='store_true', help="Only show scheme names") args = parser.parse_args() details_fields = [ 'id', 'description', 'locus_count', 'records', 'last_added', 'last_updated', ] if args.names_only: print('name') else: print('\t'.join(['name'] + details_fields)) api_url_base = 'http://rest.pubmlst.org/db' url_base_response = json.loads(get(api_url_base)) for db in url_base_response: databases = db['databases'] for database in databases: if args.exclude_pattern != "": if re.search( 'pubmlst_(' + '.*' + args.exclude_pattern + '.*' + ')_seqdef$', database['name']): continue scheme_match = re.search( 'pubmlst_(' + '.*' + args.pattern + '.*' + ')_seqdef$', database['name']) if scheme_match: scheme_name = scheme_match.group(1) if args.names_only: print(scheme_name) break seqdef_response = get(database['href']) if seqdef_response: schemes_response = get( json.loads(seqdef_response)['schemes']) if schemes_response: for scheme in json.loads(schemes_response)['schemes']: scheme_details_response = get(scheme['scheme']) if scheme_details_response: details = {} for field in details_fields: try: details[field] = json.loads( scheme_details_response)[field] except KeyError: details[field] = None print('\t'.join( map(str, [scheme_name] + list(details.values()))), flush=True)