Esempio n. 1
0
def export(ctx, filepath):
    """Export a dataset to an input set json."""
    try:
        ds = get_dataset(ctx)
        ds.export_inputset(filepath)

    except Exception as e:
        print_error(e, DEBUG)
Esempio n. 2
0
def backup(ctx, filepath):
    """Pickles the complete dataset."""
    try:
        ds = get_dataset(ctx)
        ds.backup(filepath)

    except Exception as e:
        print_error(e, DEBUG)
Esempio n. 3
0
def sample(ctx, n, on_versions, seed):
    """Samples projects or versions from a dataset."""
    backup_ds = None

    try:
        ds = get_dataset(ctx)
        backup_ds = deepcopy(ds)

        ds.sample(n, on_versions, seed)

    except Exception as e:
        print_error(e, DEBUG)

        # roll back the db
        ctx.obj['dataset'] = backup_ds
        print('         The dataset was not modified.')
Esempio n. 4
0
def sort(ctx, keywords_string):
    """Sorts a dataset."""
    backup_ds = None

    try:
        ds = get_dataset(ctx)
        backup_ds = deepcopy(ds)

        ds.sort(keywords_string.split())

    except Exception as e:
        print_error(e, DEBUG)

        # roll back the db
        ctx.obj['dataset'] = backup_ds
        print('         The dataset was not modified.')
Esempio n. 5
0
def show(ctx):
    """Opens the complete dataset as a json for easier review."""
    try:
        ds = get_dataset(ctx)
        data_dict = ds.to_json()

        # save temp file
        filepath = TEMP_DIR + 'jsonify.json'
        with open(filepath, 'w') as file:
            json.dump(data_dict, file, indent=4)
            fullpath = os.path.realpath(filepath)

        # open in system's default json viewer
        webbrowser.open_new('file://' + fullpath)

    except Exception as e:
        print_error(e, DEBUG)
Esempio n. 6
0
def restore(ctx, filepath):
    """Restores a pickled dataset file."""
    backup_ds = None

    try:
        backup_ds = deepcopy(ctx.obj.get('dataset', None))

        ds = Dataset.restore(filepath)
        ctx.obj['dataset'] = ds

        # reset the temporary api/metadata dict
        global TEMP_SETTINGS
        TEMP_SETTINGS = dict()

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds
Esempio n. 7
0
def set_meta(ctx, name, version, description, readme, author, email):
    """Sets dataset metadata."""
    backup_ds = None

    try:
        ds = ctx.obj.get('dataset', None)
        backup_ds = deepcopy(ds)

        if ds:
            # update dataset's metadata
            ds.update(name=name,
                      version=version,
                      description=description,
                      readme=readme,
                      author=author,
                      email=email)

        else:
            global TEMP_SETTINGS
            if name: TEMP_SETTINGS['name'] = name
            if version: TEMP_SETTINGS['version'] = version
            if description: TEMP_SETTINGS['description'] = description
            if readme: TEMP_SETTINGS['readme'] = readme
            if author: TEMP_SETTINGS['author'] = author
            if email: TEMP_SETTINGS['email'] = email

        # print the outcome
        settings = []
        if name: settings.append('name')
        if version: settings.append('version')
        if description: settings.append('description')
        if readme: settings.append('readme')
        if author: settings.append('author')
        if email: settings.append('email')
        set_str = ', '.join([s for s in settings if s])
        print("         Set the dataset's %s." % set_str)

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds
Esempio n. 8
0
def set_api(ctx, cache_dir, cache_timeout, nocache, github_pat):
    """Sets API settings."""
    backup_ds = None

    try:
        ds = ctx.obj.get('dataset', None)
        backup_ds = deepcopy(ds)

        # convert cache timeout string to timedelta
        if cache_timeout:
            cache_timeout = timedelta(days=cache_timeout)

        if ds and ds.api:
            # configure the api
            ds.api.configure(cache_dir=cache_dir,
                             cache_timeout=cache_timeout,
                             nocache=nocache,
                             github_pat=github_pat)

        else:
            # no ds/api; save the settings for when there is one
            global TEMP_SETTINGS
            if cache_dir: TEMP_SETTINGS['cache_dir'] = cache_dir
            if cache_timeout: TEMP_SETTINGS['cache_timeout'] = cache_timeout
            if nocache: TEMP_SETTINGS['nocache'] = nocache
            if github_pat: TEMP_SETTINGS['github_pat'] = github_pat

        # print the outcome
        settings = []
        if cache_dir: settings.append('cache_dir')
        if cache_timeout: settings.append('cache_timeout')
        if nocache: settings.append('nocache')
        if github_pat: settings.append('github_pat')
        set_str = ', '.join([s for s in settings if s])
        print("         Set the api's %s." % set_str)

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds
Esempio n. 9
0
def import_(ctx, registry, filepath):
    """Imports an input set json file."""
    backup_ds = None

    try:
        backup_ds = deepcopy(ctx.obj.get('dataset', None))

        if registry == 'noreg':
            registry = None

        global TEMP_SETTINGS

        ds = Dataset.import_inputset(filepath, registry, **TEMP_SETTINGS)
        ctx.obj['dataset'] = ds

        # reset the temporary api/metadata dict
        TEMP_SETTINGS = dict()

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds
Esempio n. 10
0
def get(ctx, metadata, versions):
    """Downloads project and version information."""
    backup_ds = None
    rolled_back = False

    # load project metadata
    if metadata:
        try:
            ds = get_dataset(ctx)
            backup_ds = deepcopy(ds)

            ds.get_projects_meta()

        except Exception as e:
            print_error(e, DEBUG)

            # roll back the db
            ctx.obj['dataset'] = backup_ds
            rolled_back = True

    # load project versions
    if versions:
        try:
            ds = get_dataset(ctx)
            backup_ds = deepcopy(ds)

            ds.get_project_versions(historical=versions)

        except Exception as e:
            print_error(e, DEBUG)

            # roll back the db
            ctx.obj['dataset'] = backup_ds
            rolled_back = True

    if rolled_back:
        print('         The dataset was not modified.')
Esempio n. 11
0
def load(ctx, registry, from_type, name_or_path, fileargs):
    """Generates a dataset from a weblist name or file path."""
    backup_ds = None

    try:
        backup_ds = deepcopy(ctx.obj.get('dataset', None))

        if registry == 'noreg':
            registry = None

        global TEMP_SETTINGS

        if from_type == 'file':
            # read in a file (fileargs is either a header string for csv
            # or a parser handle for json)
            ds = Dataset.load_file(name_or_path,
                                   registry,
                                   fileargs=fileargs,
                                   **TEMP_SETTINGS)

        else:
            # download a weblist or organization repo list
            ds = Dataset.load_web(name_or_path,
                                  registry,
                                  from_type=from_type,
                                  **TEMP_SETTINGS)

        ctx.obj['dataset'] = ds

        # reset the temporary api/metadata dict
        TEMP_SETTINGS = dict()

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds