Exemple #1
0
def _get_denpendencies(dataset, all_datasets, include_indirect=False):
    try:
        etl_type, fn = all_datasets[dataset]
    except KeyError:  # not open_numbers datasets
        return list()

    if etl_type == 'recipe':
        dataset_path = osp.join(datasets_dir, dataset)
        etl_dir = osp.join(dataset_path, 'etl/scripts')
        recipe = osp.join(etl_dir, fn)
        logging.info("using recipe file: " + fn)
        chef = Chef.from_recipe(recipe, ddf_dir=datasets_dir)
        dependencies = list()
        for i in chef.ingredients:
            if i.dataset is not None:
                dependencies.append(i.dataset)
                if include_indirect:
                    for d in _get_denpendencies(i.dataset,
                                                all_datasets,
                                                include_indirect=True):
                        dependencies.append(d)
        dependencies = list(set(dependencies))
        logging.info("dependencies: {}".format(dependencies))
        return dependencies
    else:
        return list()
Exemple #2
0
def test_chef_load_recipe():
    recipe_file = os.path.join(wd, 'recipes/test_flatten.yml')
    chef = Chef.from_recipe(recipe_file)
    try:
        chef.validate()
    except ChefRuntimeError:
        pass
    assert 1
Exemple #3
0
def build_recipe(recipe, format):
    """create a complete recipe by expanding all includes in the input recipe."""
    from ddf_utils.chef.api import Chef
    chef = Chef.from_recipe(recipe)
    fp = click.open_file('-', 'w')
    if format == 'json':
        import json
        json.dump(recipe, fp, indent=4, ensure_ascii=False)
    elif format == 'yaml':
        import yaml
        yaml.dump(recipe, fp)
Exemple #4
0
def run_recipe(recipe, outdir, ddf_dir, update, dry_run, gen_dp, show_tree):
    """generate new ddf dataset with recipe"""
    from ddf_utils.chef.api import Chef
    from ddf_utils.package import create_datapackage
    from ddf_utils.io import dump_json
    import json

    coloredlogs.install(logger=logging.getLogger('Chef'),
                        fmt='%(asctime)s %(name)s %(levelname)s %(message)s',
                        level=LOG_LEVEL)

    click.echo('building recipe...')
    if ddf_dir:
        chef = Chef.from_recipe(recipe, ddf_dir=ddf_dir)
    else:
        chef = Chef.from_recipe(recipe)
    if show_tree:
        chef.dag.tree_view()
        return
    if update:
        pass
    serve = not dry_run
    chef.run(serve=serve, outpath=outdir)
    if serve and gen_dp:
        click.echo('creating datapackage file...')
        datapackage_path = os.path.join(outdir, 'datapackage.json')
        if os.path.exists(datapackage_path):
            click.echo('backup old datapackage.json to datapackage.json.bak')
            shutil.copyfile(datapackage_path, os.path.join(outdir, 'datapackage.json.bak'))
            dp_old = json.load(open(datapackage_path))
            # copy translations info. other info should be in the recipe.
            if 'translations' in dp_old.keys():
                chef = chef.add_metadata(translations=dp_old['translations'])
        dump_json(os.path.join(outdir, 'datapackage.json'),
                  create_datapackage(outdir, gen_schema=True, **chef.metadata))
    click.echo("Done.")
# -*- coding: utf-8 -*-

import os
from ddf_utils.chef.api import Chef

out_dir = '../../'
recipe_file = '../recipes/recipe_main.yaml'

try:
    datasets_dir = os.environ['DATASETS_DIR']
except KeyError:
    datasets_dir = '../../../'

if __name__ == '__main__':
    chef = Chef.from_recipe(recipe_file, ddf_dir=datasets_dir)
    chef.run(serve=True, outpath=out_dir)
Exemple #6
0
# -*- coding: utf-8 -*-

import os

from ddf_utils.chef.api import Chef

recipe_file = '../recipes/etl.yml'

if __name__ == '__main__':

    try:
        d = os.environ['DATASETS_DIR']
        chef = Chef.from_recipe(recipe_file, ddf_dir=d)
    except KeyError:
        chef = Chef.from_recipe(recipe_file)

    chef.run(serve=True, outpath='../../')
Exemple #7
0
# coding: utf8

import os
from ddf_utils.chef.api import Chef

recipe_file = ''
out_dir = '../../'

try:
    datasets_dir = os.environ['DATASETS_DIR']
except KeyError:
    datasets_dir = '../../../'

if __name__ == '__main__':
    chef = Chef.from_recipe(recipe_file)
    chef.add_config(ddf_dir=datasets_dir)
    chef.run(serve=True, outpath=out_dir)
Exemple #8
0
def chef_fn(fn):
    return Chef.from_recipe(os.path.join(wd, 'recipes', fn),
                            ddf_dir=os.path.join(wd, 'datasets'),
                            procedure_dir=os.path.join(wd, 'procedures'))