def _get_denpendencies(dataset, all_datasets, include_indirect=False): try: etl_type, fn = all_datasets[dataset] except KeyError: # not open_numbers datasets return list() if etl_type == 'recipe': dataset_path = osp.join(datasets_dir, dataset) etl_dir = osp.join(dataset_path, 'etl/scripts') recipe = osp.join(etl_dir, fn) logging.info("using recipe file: " + fn) chef = Chef.from_recipe(recipe, ddf_dir=datasets_dir) dependencies = list() for i in chef.ingredients: if i.dataset is not None: dependencies.append(i.dataset) if include_indirect: for d in _get_denpendencies(i.dataset, all_datasets, include_indirect=True): dependencies.append(d) dependencies = list(set(dependencies)) logging.info("dependencies: {}".format(dependencies)) return dependencies else: return list()
def test_chef_load_recipe(): recipe_file = os.path.join(wd, 'recipes/test_flatten.yml') chef = Chef.from_recipe(recipe_file) try: chef.validate() except ChefRuntimeError: pass assert 1
def build_recipe(recipe, format): """create a complete recipe by expanding all includes in the input recipe.""" from ddf_utils.chef.api import Chef chef = Chef.from_recipe(recipe) fp = click.open_file('-', 'w') if format == 'json': import json json.dump(recipe, fp, indent=4, ensure_ascii=False) elif format == 'yaml': import yaml yaml.dump(recipe, fp)
def run_recipe(recipe, outdir, ddf_dir, update, dry_run, gen_dp, show_tree): """generate new ddf dataset with recipe""" from ddf_utils.chef.api import Chef from ddf_utils.package import create_datapackage from ddf_utils.io import dump_json import json coloredlogs.install(logger=logging.getLogger('Chef'), fmt='%(asctime)s %(name)s %(levelname)s %(message)s', level=LOG_LEVEL) click.echo('building recipe...') if ddf_dir: chef = Chef.from_recipe(recipe, ddf_dir=ddf_dir) else: chef = Chef.from_recipe(recipe) if show_tree: chef.dag.tree_view() return if update: pass serve = not dry_run chef.run(serve=serve, outpath=outdir) if serve and gen_dp: click.echo('creating datapackage file...') datapackage_path = os.path.join(outdir, 'datapackage.json') if os.path.exists(datapackage_path): click.echo('backup old datapackage.json to datapackage.json.bak') shutil.copyfile(datapackage_path, os.path.join(outdir, 'datapackage.json.bak')) dp_old = json.load(open(datapackage_path)) # copy translations info. other info should be in the recipe. if 'translations' in dp_old.keys(): chef = chef.add_metadata(translations=dp_old['translations']) dump_json(os.path.join(outdir, 'datapackage.json'), create_datapackage(outdir, gen_schema=True, **chef.metadata)) click.echo("Done.")
# -*- coding: utf-8 -*- import os from ddf_utils.chef.api import Chef out_dir = '../../' recipe_file = '../recipes/recipe_main.yaml' try: datasets_dir = os.environ['DATASETS_DIR'] except KeyError: datasets_dir = '../../../' if __name__ == '__main__': chef = Chef.from_recipe(recipe_file, ddf_dir=datasets_dir) chef.run(serve=True, outpath=out_dir)
# -*- coding: utf-8 -*- import os from ddf_utils.chef.api import Chef recipe_file = '../recipes/etl.yml' if __name__ == '__main__': try: d = os.environ['DATASETS_DIR'] chef = Chef.from_recipe(recipe_file, ddf_dir=d) except KeyError: chef = Chef.from_recipe(recipe_file) chef.run(serve=True, outpath='../../')
# coding: utf8 import os from ddf_utils.chef.api import Chef recipe_file = '' out_dir = '../../' try: datasets_dir = os.environ['DATASETS_DIR'] except KeyError: datasets_dir = '../../../' if __name__ == '__main__': chef = Chef.from_recipe(recipe_file) chef.add_config(ddf_dir=datasets_dir) chef.run(serve=True, outpath=out_dir)
def chef_fn(fn): return Chef.from_recipe(os.path.join(wd, 'recipes', fn), ddf_dir=os.path.join(wd, 'datasets'), procedure_dir=os.path.join(wd, 'procedures'))