def export(ctx, filepath): """Export a dataset to an input set json.""" try: ds = get_dataset(ctx) ds.export_inputset(filepath) except Exception as e: print_error(e, DEBUG)
def backup(ctx, filepath): """Pickles the complete dataset.""" try: ds = get_dataset(ctx) ds.backup(filepath) except Exception as e: print_error(e, DEBUG)
def sample(ctx, n, on_versions, seed): """Samples projects or versions from a dataset.""" backup_ds = None try: ds = get_dataset(ctx) backup_ds = deepcopy(ds) ds.sample(n, on_versions, seed) except Exception as e: print_error(e, DEBUG) # roll back the db ctx.obj['dataset'] = backup_ds print(' The dataset was not modified.')
def sort(ctx, keywords_string): """Sorts a dataset.""" backup_ds = None try: ds = get_dataset(ctx) backup_ds = deepcopy(ds) ds.sort(keywords_string.split()) except Exception as e: print_error(e, DEBUG) # roll back the db ctx.obj['dataset'] = backup_ds print(' The dataset was not modified.')
def show(ctx): """Opens the complete dataset as a json for easier review.""" try: ds = get_dataset(ctx) data_dict = ds.to_json() # save temp file filepath = TEMP_DIR + 'jsonify.json' with open(filepath, 'w') as file: json.dump(data_dict, file, indent=4) fullpath = os.path.realpath(filepath) # open in system's default json viewer webbrowser.open_new('file://' + fullpath) except Exception as e: print_error(e, DEBUG)
def restore(ctx, filepath): """Restores a pickled dataset file.""" backup_ds = None try: backup_ds = deepcopy(ctx.obj.get('dataset', None)) ds = Dataset.restore(filepath) ctx.obj['dataset'] = ds # reset the temporary api/metadata dict global TEMP_SETTINGS TEMP_SETTINGS = dict() except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds
def set_meta(ctx, name, version, description, readme, author, email): """Sets dataset metadata.""" backup_ds = None try: ds = ctx.obj.get('dataset', None) backup_ds = deepcopy(ds) if ds: # update dataset's metadata ds.update(name=name, version=version, description=description, readme=readme, author=author, email=email) else: global TEMP_SETTINGS if name: TEMP_SETTINGS['name'] = name if version: TEMP_SETTINGS['version'] = version if description: TEMP_SETTINGS['description'] = description if readme: TEMP_SETTINGS['readme'] = readme if author: TEMP_SETTINGS['author'] = author if email: TEMP_SETTINGS['email'] = email # print the outcome settings = [] if name: settings.append('name') if version: settings.append('version') if description: settings.append('description') if readme: settings.append('readme') if author: settings.append('author') if email: settings.append('email') set_str = ', '.join([s for s in settings if s]) print(" Set the dataset's %s." % set_str) except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds
def set_api(ctx, cache_dir, cache_timeout, nocache, github_pat): """Sets API settings.""" backup_ds = None try: ds = ctx.obj.get('dataset', None) backup_ds = deepcopy(ds) # convert cache timeout string to timedelta if cache_timeout: cache_timeout = timedelta(days=cache_timeout) if ds and ds.api: # configure the api ds.api.configure(cache_dir=cache_dir, cache_timeout=cache_timeout, nocache=nocache, github_pat=github_pat) else: # no ds/api; save the settings for when there is one global TEMP_SETTINGS if cache_dir: TEMP_SETTINGS['cache_dir'] = cache_dir if cache_timeout: TEMP_SETTINGS['cache_timeout'] = cache_timeout if nocache: TEMP_SETTINGS['nocache'] = nocache if github_pat: TEMP_SETTINGS['github_pat'] = github_pat # print the outcome settings = [] if cache_dir: settings.append('cache_dir') if cache_timeout: settings.append('cache_timeout') if nocache: settings.append('nocache') if github_pat: settings.append('github_pat') set_str = ', '.join([s for s in settings if s]) print(" Set the api's %s." % set_str) except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds
def import_(ctx, registry, filepath): """Imports an input set json file.""" backup_ds = None try: backup_ds = deepcopy(ctx.obj.get('dataset', None)) if registry == 'noreg': registry = None global TEMP_SETTINGS ds = Dataset.import_inputset(filepath, registry, **TEMP_SETTINGS) ctx.obj['dataset'] = ds # reset the temporary api/metadata dict TEMP_SETTINGS = dict() except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds
def get(ctx, metadata, versions): """Downloads project and version information.""" backup_ds = None rolled_back = False # load project metadata if metadata: try: ds = get_dataset(ctx) backup_ds = deepcopy(ds) ds.get_projects_meta() except Exception as e: print_error(e, DEBUG) # roll back the db ctx.obj['dataset'] = backup_ds rolled_back = True # load project versions if versions: try: ds = get_dataset(ctx) backup_ds = deepcopy(ds) ds.get_project_versions(historical=versions) except Exception as e: print_error(e, DEBUG) # roll back the db ctx.obj['dataset'] = backup_ds rolled_back = True if rolled_back: print(' The dataset was not modified.')
def load(ctx, registry, from_type, name_or_path, fileargs): """Generates a dataset from a weblist name or file path.""" backup_ds = None try: backup_ds = deepcopy(ctx.obj.get('dataset', None)) if registry == 'noreg': registry = None global TEMP_SETTINGS if from_type == 'file': # read in a file (fileargs is either a header string for csv # or a parser handle for json) ds = Dataset.load_file(name_or_path, registry, fileargs=fileargs, **TEMP_SETTINGS) else: # download a weblist or organization repo list ds = Dataset.load_web(name_or_path, registry, from_type=from_type, **TEMP_SETTINGS) ctx.obj['dataset'] = ds # reset the temporary api/metadata dict TEMP_SETTINGS = dict() except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds