Beispiel #1
0
def list_repository(index):
    """List repository index content."""
    # Read the index from the optional file or Url. By default, the index that
    # is specified in the environment is loaded.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    datasets = RepositoryManager(doc=loader.load()).find()
    headers = ['Identifier', 'Name', 'Description']
    data = list()
    # Maintain the maximum with for each columns.
    widths = [len(h) + 1 for h in headers]
    # Sort datasets by name before output.
    for dataset in sorted(datasets, key=lambda d: d.name):
        desc = dataset.description if dataset.description is not None else ''
        row = [dataset.identifier, dataset.name, desc]
        for i in range(len(row)):
            w = len(row[i]) + 1
            if w > widths[i]:
                widths[i] = w
        data.append(row)
    tp.table(data,
             headers=headers,
             width=widths,
             style='grid',
             out=util.TPrinter())
Beispiel #2
0
def test_get_dataset(mock_response):
    """Test getting a dataset from the default repository."""
    # Will attempt to download the default repository. The mocked response will
    # return the content of the `index.json` file in the test files directory.
    repo = RepositoryManager(doc=UrlLoader().load())
    assert repo.get(key='DS1').identifier == 'DS1'
    assert repo.get(key='UNKNOWN') is None
Beispiel #3
0
def list_datasets(basedir, db, index):
    """List local store content."""
    # Read the index of given.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
    datasets = store.list()
    headers = ['Name', 'Size', 'Downloaded', 'Package']
    data = list()
    # Maintain the maximum with for each columns.
    widths = [len(h) + 1 for h in headers]
    # Sort datasets by name before output.
    for dataset in sorted(datasets, key=lambda d: d.name):
        row = [
            dataset.identifier, '{:.2a}'.format(DataSize(dataset.filesize)),
            ' '.join(dataset.created_at.isoformat()[:19].split('T')),
            '{} {}'.format(dataset.package_name, dataset.package_version)
        ]
        for i in range(len(row)):
            w = len(row[i]) + 1
            if w > widths[i]:
                widths[i] = w
        data.append(row)
    tp.table(data,
             headers=headers,
             width=widths,
             style='grid',
             out=util.TPrinter())
Beispiel #4
0
def download_dataset(basedir, db, index, key):
    """List local store content."""
    # Read the index of given.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
    store.download(key)
Beispiel #5
0
def show_dataset(basedir, db, index, raw, key):
    """Show descriptor for downloaded dataset."""
    # Read the index of given.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
    util.print_dataset(dataset=store.load(key), raw=raw)
Beispiel #6
0
def show_dataset(index, raw, key):
    """Show dataset descriptor from repository index."""
    # Read the index from the optional file or Url. By default, the index that
    # is specified in the environment is loaded.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    util.print_dataset(dataset=RepositoryManager(doc=loader.load()).get(key),
                       raw=raw)
Beispiel #7
0
def remove_dataset(basedir, db, index, force, key):
    """Remove dataset from local store."""
    # Confirm that the user wants to remove the dataset from the local store.
    if not force:  # pragma: no cover
        msg = "Do you really want to remove dataset '{}'".format(key)
        click.confirm(msg, default=True, abort=True)
    # Read the index of given.
    loader = DictLoader(
        util.read_index(index)) if index is not None else UrlLoader()
    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
    store.remove(key)
def read_index(filename: str) -> Dict:
    """Read a repository index file.

    The filename may either reference a file on the local file system or is
    expected to be an Url. Attempts to read a file first and then load the
    Url if an error occured while loading the file.

    Parameters
    ----------
    filename: string
        Path to file on the local file system or Url.

    Returns
    -------
    dict
    """
    try:
        return FileLoader(filename).load()
    except (IOError, OSError):
        pass
    return UrlLoader(url=filename).load()
def test_url_loader(url, ftype, mock_response):
    """Test loading the repository index from a Url in different formats."""
    doc = UrlLoader(url=url, ftype=ftype).load()
    assert len(doc['datasets']) == 3
Beispiel #10
0
def test_read_linked_index(mock_response):
    """Test reading a federated repository index."""
    repo = RepositoryManager(doc=UrlLoader(url='multi-index.json').load())
    assert len(repo.find()) == 4
    assert repo.get('us_cities') is not None
    assert repo.get('cities') is not None
def test_repository_query(query, result, mock_response):
    """Test querying the text repository."""
    repo = RepositoryManager(doc=UrlLoader().load())
    datasets = [ds.identifier for ds in repo.find(filter=query)]
    assert datasets == result
def store(mock_response, tmpdir):
    return LocalStore(package_name='refdata_test',
                      package_version=__version__,
                      basedir=tmpdir,
                      loader=UrlLoader(url='index.json'),
                      auto_download=False)
Beispiel #13
0
    def __init__(self,
                 package_name: str,
                 package_version: str,
                 basedir: Optional[str] = None,
                 loader: Optional[RepositoryIndexLoader] = None,
                 auto_download: Optional[bool] = None,
                 connect_url: Optional[str] = None):
        """Initialize the base directory on the file system where downloaded
        datasets are stored, the database for storing information about the
        downloaded datasets, the repository manager, and set the auto download
        option.

        Parameters
        ----------
        package_name: string
            Name of the package that created the instance of the local store.
            This name is used to associated downloaded datasets in the local
            database with the packages that downloaded them.
        package_version: string
            Version information for the package that created the local store
            instance.
        basedir: string, default=None
            Path to the directory for downloaded datasets. By default, the
            directory that is specified in the environment variable REFDATA_BASEDIR
            is used. If the environment variable is not set an directory under
            the OS-specific users cache data directory is used.
        loader: refdata.repo.loader.RepositoryIndexLoader, default=None
            Loader for a dataset repository index. the loaded index is used to
            create an instance of the repository manager that is associated with
            the local data store for downloading datasets.
        auto_download: bool, default=None
            If auto download is enabled (True) datasets are downloaded automatically
            when they are first accessed via `.open()`. If this option is not
            enabled and an attempt is made to open a datasets that has not yet
            been downloaded to the local file syste, an error is raised. If this
            argument is not given the value from the environment variable
            REFDATA_AUTODOWNLOAD is used or False if the variable is not set.
        connect_url: string, default=None
            SQLAlchemy database connect Url string. If a value is given it is
            assumed that the database exists and has been initialized. If no
            value is given the default SQLite database is used. If the respective
            database file does not exist a new database will be created.
        """
        self.package_name = package_name
        self.package_version = package_version
        # Create the base directory if it does not exist.
        self.basedir = basedir if basedir else config.BASEDIR()
        os.makedirs(self.basedir, exist_ok=True)
        # Set the repository loader. The repository manager will be instantiated
        # when it is first accessed. If no loader is given the default dataset
        # index will be loaded for the associated repository manager instance.
        self.loader = loader if loader is not None else UrlLoader()
        self.repo = None
        # Set the auto download option. Read REFDATA_AUTODOWNLOAD if not no
        # argument value was given. The default is False.
        self.auto_download = auto_download if auto_download is not None else config.AUTO_DOWNLOAD(
        )
        # Initialize the metadata database if it does not exist.
        if connect_url is None:
            dbfile = os.path.join(self.basedir, 'refdata.db')
            create_db = not os.path.isfile(dbfile)
            self.db = DB(connect_url='sqlite:///{}'.format(dbfile))
            # Create fresh database if the database file does not exist.
            if create_db:
                self.db.init()
        else:
            self.db = DB(connect_url=connect_url)