def download(dataset, path='./', quiet=False, subdir=False, debug=False): """Download scripts for retriever.""" args = { 'dataset': dataset, 'command': 'download', 'path': path, 'subdir': subdir, 'quiet': quiet } engine = choose_engine(args) script_list = SCRIPT_LIST() if not script_list or not os.listdir(SCRIPT_WRITE_PATH): check_for_updates() script_list = SCRIPT_LIST() scripts = name_matches(script_list, args['dataset']) if scripts: for script in scripts: print("=> Downloading", script.name) try: script.download(engine, debug=debug) script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = "Run retriever.datasets() to see a list of currently " \ "available datasets." raise ValueError(message)
def _install(args, use_cache, debug): """Install datasets for retriever.""" engine = choose_engine(args) engine.use_cache = use_cache script_list = SCRIPT_LIST() if not script_list or not os.listdir(SCRIPT_WRITE_PATH): check_for_updates() script_list = SCRIPT_LIST(force_compile=False) data_sets_scripts = name_matches(script_list, args['dataset']) if data_sets_scripts: for data_sets_script in data_sets_scripts: print("=> Installing", data_sets_script.name) try: data_sets_script.download(engine, debug=debug) data_sets_script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = "The dataset \"{}\" isn't available in the Retriever. " \ "Run retriever.datasets()to list the currently available " \ "datasets".format(args['dataset']) raise ValueError(message)
def _install(args, use_cache, debug): """Install datasets for retriever.""" engine = choose_engine(args) engine.use_cache = use_cache script_list = SCRIPT_LIST() if not (script_list or os.listdir(SCRIPT_WRITE_PATH)): check_for_updates() script_list = SCRIPT_LIST() data_sets_scripts = name_matches(script_list, args['dataset']) if data_sets_scripts: for data_sets_script in data_sets_scripts: try: engine.script_table_registry = OrderedDict() data_sets_script.download(engine, debug=debug) data_sets_script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = "Run retriever.datasets()to list the currently available " \ "datasets." raise ValueError(message) return engine
def download(dataset, path='./', quiet=False, sub_dir='', debug=False, use_cache=True): """Download scripts for retriever.""" args = { 'dataset': dataset, 'command': 'download', 'path': path, 'sub_dir': sub_dir, 'quiet': quiet } engine = choose_engine(args) engine.use_cache = use_cache script_list = SCRIPT_LIST() if not script_list or not os.listdir(SCRIPT_WRITE_PATH): check_for_updates() script_list = SCRIPT_LIST() scripts = name_matches(script_list, args['dataset']) if scripts: for script in scripts: print("=> Downloading", script.name) try: script.download(engine, debug=debug) script.engine.final_cleanup() except Exception as e: print(e) if debug: raise elif args['dataset'].startswith('socrata') and (scripts is None): socrata_id = args['dataset'].split('-', 1)[1] resource = find_socrata_dataset_by_id(socrata_id) if "error" in resource.keys(): if resource["datatype"][0] == "map": print("{} because map type datasets are not supported".format( resource["error"])) else: print("{} because it is of type {} and not tabular".format( resource["error"], resource["datatype"][1])) elif len(resource.keys()) == 0: return else: print("=> Downloading", args['dataset']) name = f"socrata-{socrata_id}" create_socrata_dataset(engine, name, resource) elif (scripts is None) and (args['dataset'].startswith('rdataset')): print("=> Downloading", args['dataset']) rdataset = args['dataset'].split('-') update_rdataset_catalog() package, dataset_name = rdataset[1], rdataset[2] create_rdataset(engine, package, dataset_name) else: message = "Run retriever.datasets() to see the list of currently " \ "available datasets." raise ValueError(message) return engine
def _install(args, use_cache, debug): """Install datasets for retriever.""" engine = choose_engine(args) engine.use_cache = use_cache if args['dataset'].endswith('.zip') or args['hash_value']: path_to_archive = args['dataset'] if args['hash_value']: path_to_archive = os.path.join(PROVENANCE_DIR, args['dataset'], '{}-{}.zip'.format(args['dataset'], args['hash_value'][0])) if not os.path.exists(path_to_archive): print('The committed file does not exist.') engine = install_committed(path_to_archive, engine, force=args.get('force', False)) return engine script_list = SCRIPT_LIST() if not (script_list or os.listdir(SCRIPT_WRITE_PATH)): check_for_updates() script_list = SCRIPT_LIST() data_sets_scripts = name_matches(script_list, args['dataset']) if data_sets_scripts: for data_sets_script in data_sets_scripts: try: engine.script_table_registry = OrderedDict() data_sets_script.download(engine, debug=debug) data_sets_script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = "Run retriever.datasets() to list the currently available " \ "datasets." raise ValueError(message) return engine
def download(dataset, path='./', quiet=False, sub_dir='', debug=False, use_cache=True): """Download scripts for retriever.""" args = { 'dataset': dataset, 'command': 'download', 'path': path, 'sub_dir': sub_dir, 'quiet': quiet } engine = choose_engine(args) engine.use_cache = use_cache script_list = SCRIPT_LIST() if not script_list or not os.listdir(SCRIPT_WRITE_PATH): check_for_updates() script_list = SCRIPT_LIST() scripts = name_matches(script_list, args['dataset']) if scripts: for script in scripts: print("=> Downloading", script.name) try: script.download(engine, debug=debug) script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = "Run retriever.datasets() to see the list of currently " \ "available datasets." raise ValueError(message) return engine
# if platform is OS X use "~/.bash_profile" if current_platform == "darwin": bash_file = "~/.bash_profile" # if platform is Linux use "~/.bashrc elif current_platform == "linux": bash_file = "~/.bashrc" # else write and discard else: bash_file = "/dev/null" argcomplete_command = 'eval "$(register-python-argcomplete retriever)"' with open(os.path.expanduser(bash_file), "a+") as bashrc: bashrc.seek(0) # register retriever for arg-completion if not already registered # whenever a new shell is spawned if argcomplete_command not in bashrc.read(): bashrc.write(argcomplete_command + "\n") bashrc.close() os.system("activate-global-python-argcomplete") # register for the current shell os.system(argcomplete_command) try: from retriever.compile import compile from retriever.lib.repository import check_for_updates check_for_updates(False) compile() except: pass
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them args = parser.parse_args() if args.command not in ['reset', 'update'] \ and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) \ and not [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]: check_for_updates() reload_scripts() script_list = SCRIPT_LIST() if args.command == "install" and not args.engine: parser.parse_args(['install', '-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = reload_scripts() if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates() reload_scripts() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'license': if args.dataset is None: print(LICENSE) else: dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format( args.dataset)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script json_file = get_script_filename(args.dataset.lower()) edit_json(json_file) return elif args.command == 'autocreate': if sum([args.f, args.d]) == 1: file_flag = True if args.f else False create_package(args.path, args.dt, file_flag, args.o, args.skip_lines) else: print('Please use one and only one of the flags -f -d') return elif args.command == 'delete_json': # delete existing JSON script from home directory and or script directory if exists in current dir confirm = input("Really remove " + args.dataset.lower() + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: json_file = get_script_filename(args.dataset.lower()) delete_json(json_file) return if args.command == 'ls': # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init if not (args.l or args.k or isinstance(args.v, list)): all_scripts = dataset_names() print("Available datasets : {}\n".format(len(all_scripts))) from retriever import lscolumns lscolumns.printls(all_scripts) elif isinstance(args.v, list): if args.v: try: all_scripts = [ get_script(dataset) for dataset in args.v ] except KeyError: all_scripts = [] print("Dataset(s) is not found.") else: all_scripts = datasets() count = 1 for script in all_scripts: print("{count}. {title}\n {name}\n" "{keywords}\n{description}\n" "{licenses}\n{citation}\n" "".format( count=count, title=script.title, name=script.name, keywords=script.keywords, description=script.description, licenses=str(script.licenses[0]['name']), citation=script.citation, )) count += 1 else: param_licenses = args.l if args.l else None keywords = args.k if args.k else None # search searched_scripts = datasets(keywords, param_licenses) if not searched_scripts: print("No available datasets found") else: print("Available datasets : {}\n".format( len(searched_scripts))) count = 1 for script in searched_scripts: print("{count}. {title}\n{name}\n" "{keywords}\n{licenses}\n".format( count=count, title=script.title, name=script.name, keywords=script.keywords, licenses=str(script.licenses[0]['name']), )) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: engine.use_cache = False else: engine.use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print( "Run 'retriever ls' to see a list of currently available datasets." )
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them args = parser.parse_args() reset_or_update = args.command in ["reset", "update"] if (not reset_or_update and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not [ f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1]) ]): check_for_updates() reload_scripts() script_list = SCRIPT_LIST() if args.command == "install" and not args.engine: parser.parse_args(["install", "-h"]) if args.quiet: sys.stdout = open(os.devnull, "w") if args.command == "help": parser.parse_args(["-h"]) if hasattr(args, "compile") and args.compile: script_list = reload_scripts() if args.command == "defaults": for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == "update": check_for_updates() reload_scripts() return if args.command == "citation": if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: citations = get_script_citation(args.dataset) for citation in citations: print("Citation: {}".format(citation)) return if args.command == 'license': if args.dataset is None: print(LICENSE) else: dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format( args.dataset)) return if args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return if args.command == 'reset': reset_retriever(args.scope) return if args.command == 'autocreate': if args.c: url = args.path script_list = SCRIPT_LIST() flag = 0 for script in script_list: for dataset in script.tables: if script.tables[dataset].url == url: flag = 1 break if flag == 1: print("File already exist in dataset " + str(script.name)) else: print("Dataset is not avaliable, Please download") return if sum([args.f, args.d]) == 1: file_flag = bool(args.f) create_package(args.path, args.dt, file_flag, args.o, args.skip_lines, args.e) else: print('Please use one and only one of the flags -f -d') return if args.command == 'ls': # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init if not any([args.l, args.k, args.v, args.s, args.rdataset]): all_scripts = dataset_names() from retriever import lscolumns all_scripts_combined = [] for dataset in all_scripts['offline']: all_scripts_combined.append((dataset, True)) for dataset in all_scripts['online']: if dataset in all_scripts['offline']: continue all_scripts_combined.append((dataset, False)) all_scripts_combined = sorted(all_scripts_combined, key=lambda x: x[0]) print("Available datasets : {}\n".format( len(all_scripts_combined))) lscolumns.printls(all_scripts_combined) print("\nThe symbol * denotes the online datasets.") print( "To see the full list of available online datasets, visit\n" "https://github.com/weecology/retriever-recipes.") elif isinstance(args.s, list): try: theme = INQUIRER_THEME except NameError: print("To use retriever ls -s, install inquirer") exit() name_list = socrata_autocomplete_search(args.s) print("Autocomplete suggestions : Total {} results\n".format( len(name_list))) if len(name_list): question = [ inquirer.List('dataset name', message='Select the dataset name', choices=name_list) ] answer = inquirer.prompt(question, theme=INQUIRER_THEME, raise_keyboard_interrupt=True) dataset_name = answer['dataset name'] metadata = socrata_dataset_info(dataset_name) print( "Dataset Information of {}: Total {} results\n".format( dataset_name, len(metadata))) for i in range(len(metadata)): print("{}. {}\n \tID : {}\n" "\tType : {}\n" "\tDescription : {}\n" "\tDomain : {}\n \tLink : {}\n".format( i + 1, metadata[i]["name"], metadata[i]["id"], metadata[i]["type"], metadata[i]["description"][:50] + "...", metadata[i]["domain"], metadata[i]["link"])) elif args.rdataset: if not isinstance(args.p, list) and not args.all: display_all_rdataset_names() elif not isinstance(args.p, list) and args.all: display_all_rdataset_names(package_name='all') else: display_all_rdataset_names(package_name=args.p) elif isinstance(args.v, list): dataset_verbose_list(args.v) else: param_licenses = args.l if args.l else None keywords = args.k if args.k else None # search searched_scripts = datasets(keywords, param_licenses) offline_mesg = "Available offline datasets : {}\n" online_mesg = "Available online datasets : {}\n" if not searched_scripts: print("No available datasets found") else: print(offline_mesg.format(len( searched_scripts['offline']))) count = 1 for script in searched_scripts['offline']: print("{count}. {title}\n{name}\n" "{keywords}\n{licenses}\n".format( count=count, title=script.title, name=script.name, keywords=script.keywords, licenses=str(script.licenses[0]['name']) if script.licenses and len(script.licenses) else str('N/A'), )) count += 1 count = 1 searched_scripts_offline = [ script.name for script in searched_scripts["offline"] ] searched_scripts_online = [] for script in searched_scripts['online']: if script in searched_scripts_offline: continue searched_scripts_online.append(script) print(online_mesg.format(len(searched_scripts_online))) for script in searched_scripts_online: print("{count}. {name}".format(count=count, name=script)) count += 1 return if args.command == 'commit': commit( dataset=args.dataset, path=os.path.normpath(args.path) if args.path else None, commit_message=args.message, ) return if args.command == 'log': commit_log(dataset=args.dataset) return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: use_cache = False else: use_cache = True engine.use_cache = use_cache if args.dataset is not None: if args.dataset.startswith(('socrata', 'rdataset')): scripts = True else: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: _install(vars(args), debug=debug, use_cache=use_cache) print("Done!") else: print( "Run 'retriever ls' to see a list of currently available datasets." )
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them args = parser.parse_args() reset_or_update = args.command in ["reset", "update"] if (not reset_or_update and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not [ f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1]) ]): check_for_updates() reload_scripts() script_list = SCRIPT_LIST() if args.command == "install" and not args.engine: parser.parse_args(["install", "-h"]) if args.quiet: sys.stdout = open(os.devnull, "w") if args.command == "help": parser.parse_args(["-h"]) if hasattr(args, "compile") and args.compile: script_list = reload_scripts() if args.command == "defaults": for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == "update": check_for_updates() reload_scripts() return if args.command == "citation": if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: citations = get_script_citation(args.dataset) for citation in citations: print("Citation: {}".format(citation)) return if args.command == 'license': if args.dataset is None: print(LICENSE) else: dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format( args.dataset)) return if args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return if args.command == 'reset': reset_retriever(args.scope) return if args.command == 'autocreate': if sum([args.f, args.d]) == 1: file_flag = bool(args.f) create_package(args.path, args.dt, file_flag, args.o, args.skip_lines, args.e) else: print('Please use one and only one of the flags -f -d') return if args.command == 'ls': # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init if not (args.l or args.k or isinstance(args.v, list)): all_scripts = dataset_names() from retriever import lscolumns all_scripts_combined = [] for dataset in all_scripts['offline']: all_scripts_combined.append((dataset, True)) for dataset in all_scripts['online']: if dataset in all_scripts['offline']: continue all_scripts_combined.append((dataset, False)) all_scripts_combined = sorted(all_scripts_combined, key=lambda x: x[0]) print("Available datasets : {}\n".format( len(all_scripts_combined))) lscolumns.printls(all_scripts_combined) print("\nThe symbol * denotes the online datasets.") print( "To see the full list of available online datasets, visit\n" "https://github.com/weecology/retriever-recipes.") elif isinstance(args.v, list): online_scripts = [] if args.v: try: all_scripts = [ get_script(dataset) for dataset in args.v ] except KeyError: all_scripts = [] print("Dataset(s) is not found.") else: scripts = datasets() all_scripts = scripts['offline'] online_scripts = scripts['online'] count = 1 if not args.v: print("Offline datasets : {}\n".format(len(all_scripts))) for script in all_scripts: print("{count}. {title}\n {name}\n" "{keywords}\n{description}\n" "{licenses}\n{citation}\n" "".format( count=count, title=script.title, name=script.name, keywords=script.keywords, description=script.description, licenses=str(script.licenses[0]['name']), citation=script.citation, )) count += 1 count = 1 offline_scripts = [script.name for script in all_scripts] set_online_scripts = [] for script in online_scripts: if script in offline_scripts: continue set_online_scripts.append(script) if not args.v: print("Online datasets : {}\n".format( len(set_online_scripts))) for script in set_online_scripts: print("{count}. {name}".format(count=count, name=script)) count += 1 else: param_licenses = args.l if args.l else None keywords = args.k if args.k else None # search searched_scripts = datasets(keywords, param_licenses) offline_mesg = "Available offline datasets : {}\n" online_mesg = "Available online datasets : {}\n" if not searched_scripts: print("No available datasets found") else: print(offline_mesg.format(len( searched_scripts['offline']))) count = 1 for script in searched_scripts['offline']: print("{count}. {title}\n{name}\n" "{keywords}\n{licenses}\n".format( count=count, title=script.title, name=script.name, keywords=script.keywords, licenses=str(script.licenses[0]['name']), )) count += 1 count = 1 searched_scripts_offline = [ script.name for script in searched_scripts["offline"] ] searched_scripts_online = [] for script in searched_scripts['online']: if script in searched_scripts_offline: continue searched_scripts_online.append(script) print(online_mesg.format(len(searched_scripts_online))) for script in searched_scripts_online: print("{count}. {name}".format(count=count, name=script)) count += 1 return if args.command == 'commit': commit( dataset=args.dataset, path=os.path.normpath(args.path) if args.path else None, commit_message=args.message, ) return if args.command == 'log': commit_log(dataset=args.dataset) return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: use_cache = False else: use_cache = True engine.use_cache = use_cache if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: if args.dataset.endswith('.zip') or hasattr(args, 'hash_value'): _install(vars(args), debug=debug, use_cache=use_cache) return for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print( "Run 'retriever ls' to see a list of currently available datasets." )
from retriever import VERSION,COPYRIGHT from retriever.lib.repository import check_for_updates from retriever import SCRIPT_LIST # Create the .rst file for the available datasets datasetfile = open("datasets.rst", "w") datasetfile_title = """ ================== Datasets Available ================== """ check_for_updates(graphical=False) script_list = SCRIPT_LIST() # write the title of dataset rst file datasetfile.write(datasetfile_title) # get info from the scripts for script_num, script in enumerate(script_list, start=1): if script.ref.strip(): reference_link = script.ref elif bool(script.urls.values()): reference_link = script.urls.values()[0].rpartition('/')[0] else: reference_link = "" datasetfile.write("| " + str(script_num) + ". **{}** \n| shortname: {}\n| reference: {}\n\n".format(script.name, script.shortname, reference_link)) datasetfile.close() needs_sphinx = '1.3'
from retriever.lib.repository import check_for_updates def to_str(object, object_encoding=encoding): return str(object).encode('UTF-8').decode(encoding) # Create the .rst file for the available datasets datasetfile = open_fw("datasets_list.rst") datasetfile_title = """================== Datasets Available ================== """ check_for_updates() reload_scripts() script_list = SCRIPT_LIST() # write the title of dataset rst file # ref:http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html datasetfile.write(datasetfile_title) # get info from the scripts using specified encoding for script_num, script in enumerate(script_list, start=1): reference_link = '' if script.ref.strip(): reference_link = script.ref elif hasattr(script, 'homepage'): reference_link = script.homepage elif not reference_link.strip():
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.command == "install" and not args.engine: parser.parse_args(['install','-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates() script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.filename.lower()) != -1: edit_json(json_file) return raise Exception("File not found") elif args.command == 'delete_json': # delete existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.dataset.lower()) != -1: confirm = input("Really remove " + json_file + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: # raise Exception(json_file) os.remove(os.path.join(HOME_DIR, 'scripts', json_file)) try: os.remove(os.path.join( HOME_DIR, 'scripts', json_file[:-4] + 'py')) except: # Not compiled yet pass return raise Exception("File not found") if args.command == 'ls': # If scripts have never been downloaded there is nothing to list if not script_list: print("No scripts are currently available. Updating scripts now...") check_for_updates() print("\n\nScripts downloaded.\n") script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.shortname: if args.l is not None: script_name = script.name + "\nShortname: " + script.shortname + "\n" if script.tags: script_name += "Tags: " + \ str([tag for tag in script.tags]) + "\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print("Available datasets : {}\n".format(len(all_scripts))) if args.l is None: from retriever import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print("%d. %s" % (count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: use_cache = False else: use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug, use_cache=use_cache) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("The dataset {} isn't currently available in the Retriever".format( args.dataset)) print("Run 'retriever ls to see a list of currently available datasets")
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates() script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print("\nCitation for retriever:\n") with open(citation_path) as citation_file: print(citation_file.read()) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.filename.lower()) != -1: edit_json(json_file) return raise Exception("File not found") elif args.command == 'delete_json': # delete existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.dataset.lower()) != -1: confirm = input("Really remove " + json_file + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: # raise Exception(json_file) os.remove(os.path.join(HOME_DIR, 'scripts', json_file)) try: os.remove(os.path.join( HOME_DIR, 'scripts', json_file[:-4] + 'py')) except: # Not compiled yet pass return raise Exception("File not found") if args.command == 'ls': # If scripts have never been downloaded there is nothing to list if not script_list: print("No scripts are currently available. Updating scripts now...") check_for_updates() print("\n\nScripts downloaded.\n") script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.shortname: if args.l is not None: script_name = script.name + "\nShortname: " + script.shortname + "\n" if script.tags: script_name += "Tags: " + \ str([tag for tag in script.tags]) + "\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print("Available datasets : {}\n".format(len(all_scripts))) if args.l is None: from retriever import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print("%d. %s" % (count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("The dataset {} isn't currently available in the Retriever".format( args.dataset)) print("Run 'retriever ls to see a list of currently available datasets")
def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if current_platform == 'darwin' else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print "\nCitation for retriever:\n" with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print ("\nCitation: {}".format(dataset.citation)) print ("Description: {}\n".format(dataset.description)) return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return if args.command == 'ls' or args.dataset is None: # If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.name: if args.l!=None: script_name = script.name + "\nShortname: " + script.shortname+"\n" if script.tags: script_name += "Tags: "+str([tag for tag in script.tags])+"\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print "Available datasets : {}\n".format(len(all_scripts)) if args.l==None: import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print ("%d. %s"%(count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset {} isn't currently available in the Retriever".format(args.dataset) print "Run 'retriever ls to see a list of currently available datasets"
from retriever import VERSION, COPYRIGHT from retriever.lib.repository import check_for_updates from retriever import SCRIPT_LIST # Create the .rst file for the available datasets datasetfile = open("datasets.rst", "w") datasetfile_title = """ ================== Datasets Available ================== """ check_for_updates(graphical=False) script_list = SCRIPT_LIST() # write the title of dataset rst file datasetfile.write(datasetfile_title) # get info from the scripts for script_num, script in enumerate(script_list, start=1): if script.ref.strip(): reference_link = script.ref elif bool(script.urls.values()): reference_link = script.urls.values()[0].rpartition('/')[0] else: reference_link = "" datasetfile.write("| " + str(script_num) + ". **{}** \n| shortname: {}\n| reference: {}\n\n".format( script.name, script.shortname, reference_link)) datasetfile.close()
def main(): """This function launches the Data Retriever.""" sys.argv[1:] = [arg.lower() for arg in sys.argv[1:]] if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them if not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not \ [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]: check_for_updates() script_list = SCRIPT_LIST() args = parser.parse_args() if args.command == "install" and not args.engine: parser.parse_args(['install', '-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates(False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'license': dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format(args.dataset)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script json_file = get_script_filename(args.dataset.lower()) edit_json(json_file) return elif args.command == 'delete_json': # delete existing JSON script from home directory and or script directory if exists in current dir confirm = input("Really remove " + args.dataset.lower() + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: json_file = get_script_filename(args.dataset.lower()) delete_json(json_file) return if args.command == 'ls': # If scripts have never been downloaded there is nothing to list if not script_list: print("No scripts are currently available. Updating scripts now...") check_for_updates(False) print("\n\nScripts downloaded.\n") if args.l is None: all_scripts = datasets() print("Available datasets : {}\n".format(len(all_scripts))) from retriever import lscolumns lscolumns.printls(dataset_names()) else: all_scripts = datasets(args.l[0]) print("Available datasets : {}\n".format(len(all_scripts))) count = 1 for script in all_scripts: print("{}. {}".format(count, script.title)) print(script.name) print(script.keywords) print() count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: engine.use_cache = False else: engine.use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("The dataset {} isn't currently available in the Retriever".format( args.dataset)) print("Run 'retriever ls to see a list of currently available datasets")
def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print citation_path with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print dataset.description return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return if args.command == 'ls' or args.dataset is None: import lscolumns #If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = set([script.shortname for script in script_list]) all_tags = set(["ALL"] + [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')]) print "Available datasets (%s):" % len(all_scripts) lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower())) print "Groups:" lscolumns.printls(sorted(list(all_tags))) return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset %s isn't currently available in the Retriever" % (args.dataset) print "Run 'retriever -ls to see a list of currently available datasets"
def to_str(object, object_encoding=encoding): if sys.version_info >= (3, 0, 0): return str(object).encode('UTF-8').decode(encoding) return object # Create the .rst file for the available datasets datasetfile = open_fw("datasets_list.rst") datasetfile_title = """================== Datasets Available ================== """ check_for_updates() reload_scripts() script_list = SCRIPT_LIST() # write the title of dataset rst file # ref:http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html datasetfile.write(datasetfile_title) # get info from the scripts using specified encoding for script_num, script in enumerate(script_list, start=1): reference_link = '' if script.ref.strip(): reference_link = script.ref elif hasattr(script, 'homepage'): reference_link = script.homepage elif not reference_link.strip():
def main(): """This function launches the Data Retriever.""" sys.argv[1:] = [arg.lower() for arg in sys.argv[1:]] if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.command == "install" and not args.engine: parser.parse_args(['install', '-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates(False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'license': dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format(args.dataset)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script json_file = get_script_filename(args.dataset.lower()) edit_json(json_file) return elif args.command == 'delete_json': # delete existing JSON script from home directory and or script directory if exists in current dir confirm = input("Really remove " + args.dataset.lower() + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: json_file = get_script_filename(args.dataset.lower()) delete_json(json_file) return if args.command == 'ls': # If scripts have never been downloaded there is nothing to list if not script_list: print("No scripts are currently available. Updating scripts now...") check_for_updates(False) print("\n\nScripts downloaded.\n") if args.l is None: all_scripts = datasets() print("Available datasets : {}\n".format(len(all_scripts))) from retriever import lscolumns lscolumns.printls(dataset_names()) else: all_scripts = datasets(args.l[0]) print("Available datasets : {}\n".format(len(all_scripts))) count = 1 for script in all_scripts: print("{}. {}".format(count, script.title)) print(script.name) print(script.keywords) print() count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: engine.use_cache = False else: engine.use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("The dataset {} isn't currently available in the Retriever".format( args.dataset)) print("Run 'retriever ls to see a list of currently available datasets")
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them args = parser.parse_args() if args.command not in ['reset', 'update'] \ and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) \ and not [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]: check_for_updates() reload_scripts() script_list = SCRIPT_LIST() if args.command == "install" and not args.engine: parser.parse_args(['install', '-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = reload_scripts() if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates() reload_scripts() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'license': if args.dataset is None: print(LICENSE) else: dataset_license = license(args.dataset) if dataset_license: print(dataset_license) else: print("There is no license information for {}".format(args.dataset)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script json_file = get_script_filename(args.dataset.lower()) edit_json(json_file) return elif args.command == 'autocreate': if sum([args.f, args.d]) == 1: file_flag = True if args.f else False create_package(args.path, args.dt, file_flag, args.o, args.skip_lines) else: print('Please use one and only one of the flags -f -d') return elif args.command == 'delete_json': # delete existing JSON script from home directory and or script directory if exists in current dir confirm = input("Really remove " + args.dataset.lower() + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: json_file = get_script_filename(args.dataset.lower()) delete_json(json_file) return if args.command == 'ls': # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init if not (args.l or args.k or isinstance(args.v, list)): all_scripts = dataset_names() print("Available datasets : {}\n".format(len(all_scripts))) from retriever import lscolumns lscolumns.printls(all_scripts) elif isinstance(args.v, list): if args.v: try: all_scripts = [get_script(dataset) for dataset in args.v] except KeyError: all_scripts = [] print("Dataset(s) is not found.") else: all_scripts = datasets() count = 1 for script in all_scripts: print( "{count}. {title}\n {name}\n" "{keywords}\n{description}\n" "{licenses}\n{citation}\n" "".format( count=count, title=script.title, name=script.name, keywords=script.keywords, description=script.description, licenses=str(script.licenses[0]['name']), citation=script.citation, ) ) count += 1 else: param_licenses = args.l if args.l else None keywords = args.k if args.k else None # search searched_scripts = datasets(keywords, param_licenses) if not searched_scripts: print("No available datasets found") else: print("Available datasets : {}\n".format(len(searched_scripts))) count = 1 for script in searched_scripts: print( "{count}. {title}\n{name}\n" "{keywords}\n{licenses}\n".format( count=count, title=script.title, name=script.name, keywords=script.keywords, licenses=str(script.licenses[0]['name']), ) ) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: engine.use_cache = False else: engine.use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("Run 'retriever ls' to see a list of currently available datasets.")
def _install(args, use_cache, debug): """Install datasets for retriever.""" engine = choose_engine(args) engine.use_cache = use_cache if args['dataset'].endswith('.zip') or args.get('hash_value'): path_to_archive = args['dataset'] if args.get('hash_value'): path_to_archive = os.path.join( PROVENANCE_DIR, args['dataset'], '{}-{}.zip'.format(args['dataset'], args['hash_value'])) if not os.path.exists(path_to_archive): print('The committed file does not exist.') engine = install_committed(path_to_archive, engine, force=args.get('force', False)) return engine script_list = SCRIPT_LIST() if not (script_list or os.listdir(SCRIPT_WRITE_PATH)): check_for_updates() script_list = SCRIPT_LIST() data_sets_scripts = name_matches(script_list, args['dataset']) if data_sets_scripts: for data_sets_script in data_sets_scripts: print("=> Installing", data_sets_script.name) try: if engine.name == "HDF5": sqlite_opts = { 'command': 'install', 'dataset': data_sets_script, 'engine': 'sqlite', 'file': (args["file"].split("."))[0] + ".db", 'table_name': args["table_name"], 'data_dir': args["data_dir"] } sqlite_engine = choose_engine(sqlite_opts) data_sets_script.download(sqlite_engine, debug=debug) data_sets_script.engine.final_cleanup() engine.script_table_registry = OrderedDict() data_sets_script.download(engine, debug=debug) data_sets_script.engine.final_cleanup() except Exception as e: print(e) if debug: raise elif args['dataset'].startswith('socrata') and not data_sets_scripts: socrata_id = args['dataset'].split('-', 1)[1] resource = find_socrata_dataset_by_id(socrata_id) if "error" in resource.keys(): if resource["datatype"][0] == "map": print("{} because map type datasets are not supported".format( resource["error"])) else: print("{} because it is of type {} and not tabular".format( resource["error"], resource["datatype"][1])) elif len(resource.keys()) == 0: return else: print("=> Installing", args['dataset']) name = f"socrata-{socrata_id}" create_socrata_dataset(engine, name, resource) if args['command'] == 'download': return engine else: script_list = SCRIPT_LIST() script = get_script(args['dataset']) script.download(engine, debug=debug) script.engine.final_cleanup() elif args['dataset'].startswith('rdataset') and not data_sets_scripts: print("=> Installing", args['dataset']) rdataset = args['dataset'].split('-') update_rdataset_catalog() package, dataset_name = rdataset[1], rdataset[2] create_rdataset(engine, package, dataset_name) if args['command'] == 'download': return engine else: script_list = SCRIPT_LIST() script = get_script(args['dataset']) script.download(engine, debug=debug) script.engine.final_cleanup() else: message = "Run retriever.datasets() to list the currently available " \ "datasets." raise ValueError(message) return engine