예제 #1
0
def test_load_file():
    # test github
    ds = Dataset.load_file('files/git_urls_commits.csv',
                           registry='github',
                           cache_dir=CACHE_DIR,
                           debug=True,
                           github_pat=os.getenv('GITHUB_PAT'))
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # test npm
    ds = Dataset.load_file('files/names_versions.csv',
                           registry='npm',
                           cache_dir=CACHE_DIR,
                           debug=True)
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # test pypi
    ds = Dataset.load_file('files/names_versions.csv',
                           registry='pypi',
                           cache_dir=CACHE_DIR,
                           debug=True)
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # test vanilla
    ds = Dataset.load_file('files/urls.csv', cache_dir=CACHE_DIR, debug=True)
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # cleanup files
    os.remove('../test.p')
    os.remove('../test.json')
예제 #2
0
def load(ctx, registry, from_type, name_or_path, fileargs):
    """Generates a dataset from a weblist name or file path."""
    backup_ds = None

    try:
        backup_ds = deepcopy(ctx.obj.get('dataset', None))

        if registry == 'noreg':
            registry = None

        global TEMP_SETTINGS

        if from_type == 'file':
            # read in a file (fileargs is either a header string for csv
            # or a parser handle for json)
            ds = Dataset.load_file(name_or_path,
                                   registry,
                                   fileargs=fileargs,
                                   **TEMP_SETTINGS)

        else:
            # download a weblist or organization repo list
            ds = Dataset.load_web(name_or_path,
                                  registry,
                                  from_type=from_type,
                                  **TEMP_SETTINGS)

        ctx.obj['dataset'] = ds

        # reset the temporary api/metadata dict
        TEMP_SETTINGS = dict()

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds