Exemple #1
0
def from_yaml(fh,
              ignore=(ValueError, NotImplementedError),
              followlinks=True,
              hidden=False,
              relative_to_yaml_dir=False):
    """Construct a dictionary of resources from a YAML specification.

    Parameters
    ----------
    fh : file
        File object referring to the YAML specification of resources to load.
    ignore : tuple of Exception, optional
        Ignore these exceptions when calling ``blaze.data``.
    followlinks : bool, optional
        Follow symbolic links.
    hidden : bool, optional
        Load hidden files.
    relative_to_yaml_dir: bool, optional, default False
        Load paths relative to yaml file's directory.  Default is to load
        relative to process' CWD.

    Returns
    -------
    dict
        A dictionary mapping top level keys in a YAML file to resources.

    See Also
    --------
    data_spider : Traverse a directory tree for resources
    """
    resources = {}
    yaml_dir = os.path.dirname(os.path.abspath(fh.name))
    for name, info in yaml.load(fh.read()).items():
        with pushd(yaml_dir if relative_to_yaml_dir else os.getcwd()):
            try:
                source = info.pop('source')
            except KeyError:
                raise ValueError(
                    'source key not found for data source named %r' % name)
            for mod in info.pop('imports', []):
                importlib.import_module(mod)
            if os.path.isdir(source):
                resources[name] = data_spider(os.path.expanduser(source),
                                              ignore=ignore,
                                              followlinks=followlinks,
                                              hidden=hidden,
                                              extra_kwargs=info)
            else:
                resources[name] = bz_data(source, **info)
    return resources
Exemple #2
0
def from_yaml(fh,
              ignore=(ValueError, NotImplementedError),
              followlinks=True,
              hidden=False,
              relative_to_yaml_dir=False):
    """Construct a dictionary of resources from a YAML specification.

    Parameters
    ----------
    fh : file
        File object referring to the YAML specification of resources to load.
    ignore : tuple of Exception, optional
        Ignore these exceptions when calling ``blaze.data``.
    followlinks : bool, optional
        Follow symbolic links.
    hidden : bool, optional
        Load hidden files.
    relative_to_yaml_dir: bool, optional, default False
        Load paths relative to yaml file's directory.  Default is to load
        relative to process' CWD.

    Returns
    -------
    dict
        A dictionary mapping top level keys in a YAML file to resources.

    See Also
    --------
    data_spider : Traverse a directory tree for resources
    """
    resources = {}
    yaml_dir = os.path.dirname(os.path.abspath(fh.name))
    for name, info in yaml.load(fh.read()).items():
        with pushd(yaml_dir if relative_to_yaml_dir else os.getcwd()):
            try:
                source = info.pop('source')
            except KeyError:
                raise ValueError('source key not found for data source named %r' %
                                 name)
            for mod in info.pop('imports', []):
                importlib.import_module(mod)
            if os.path.isdir(source):
                resources[name] = data_spider(os.path.expanduser(source),
                                              ignore=ignore,
                                              followlinks=followlinks,
                                              hidden=hidden,
                                              extra_kwargs=info)
            else:
                resources[name] = bz_data(source, **info)
    return resources
Exemple #3
0
def _spider(resource_path, ignore, followlinks, hidden, extra_kwargs):
    resources = {}
    for filename in (os.path.join(resource_path, x)
                     for x in os.listdir(resource_path)):
        basename = os.path.basename(filename)
        if (basename.startswith(os.curdir) and not hidden or
                os.path.islink(filename) and not followlinks):
            continue
        if os.path.isdir(filename):
            new_resources = _spider(filename, ignore=ignore,
                                    followlinks=followlinks,
                                    hidden=hidden,
                                    extra_kwargs=extra_kwargs)
            if new_resources:
                resources[basename] = new_resources
        else:
            with ignoring(*ignore):
                resources[basename] = bz_data(filename, **(extra_kwargs or {}))
    return resources
Exemple #4
0
def from_yaml(path, ignore=(ValueError, NotImplementedError), followlinks=True,
              hidden=False):
    """Construct a dictionary of resources from a YAML specification.

    Parameters
    ----------
    path : str
        Path to a YAML specification of resources to load
    ignore : tuple of Exception, optional
        Ignore these exceptions when calling ``blaze.data``
    followlinks : bool, optional
        Follow symbolic links
    hidden : bool, optional
        Load hidden files

    Returns
    -------
    dict
        A dictionary mapping top level keys in a YAML file to resources.

    See Also
    --------
    data_spider : Traverse a directory tree for resources
    """
    resources = {}
    for name, info in yaml.load(path.read()).items():
        try:
            source = info.pop('source')
        except KeyError:
            raise ValueError('source key not found for data source named %r' %
                             name)
        for mod in info.pop('imports', []):
            importlib.import_module(mod)
        if os.path.isdir(source):
            resources[name] = data_spider(os.path.expanduser(source),
                                          ignore=ignore,
                                          followlinks=followlinks,
                                          hidden=hidden,
                                          extra_kwargs=info)
        else:
            resources[name] = bz_data(source, **info)
    return resources
Exemple #5
0
def _spider(resource_path, ignore, followlinks, hidden, extra_kwargs):
    resources = {}
    for filename in (os.path.join(resource_path, x)
                     for x in os.listdir(resource_path)):
        basename = os.path.basename(filename)
        if (basename.startswith(os.curdir) and not hidden
                or os.path.islink(filename) and not followlinks):
            continue
        if os.path.isdir(filename):
            new_resources = _spider(filename,
                                    ignore=ignore,
                                    followlinks=followlinks,
                                    hidden=hidden,
                                    extra_kwargs=extra_kwargs)
            if new_resources:
                resources[basename] = new_resources
        else:
            with ignoring(*ignore):
                resources[basename] = bz_data(filename, **(extra_kwargs or {}))
    return resources
Exemple #6
0
def create_index(uri, column_name_or_names, name=None, **kwargs):
    dta = bz_data(uri, **kwargs)
    create_index(dta, column_name_or_names, name=name)
    return dta
Exemple #7
0
def create_index(uri, column_name_or_names, name=None, **kwargs):
    dta = bz_data(uri, **kwargs)
    create_index(dta, column_name_or_names, name=name)
    return dta