from dataflows import Flow, update_resource from datapackage_pipelines.wrapper import ingest from datapackage_pipelines.utilities.flow_utils import spew_flow def flow(parameters): resources = parameters.get('resources', None) metadata = parameters.pop('metadata', {}) return Flow( update_resource(resources, **metadata), ) if __name__ == '__main__': with ingest() as ctx: spew_flow(flow(ctx.parameters), ctx)
import sys from importlib import import_module from datapackage_pipelines.wrapper import ingest from datapackage_pipelines.utilities.flow_utils import spew_flow with ingest() as ctx: parameters, datapackage, resources = ctx stats = {} sys.path.append(parameters.pop('__path')) flow_module = import_module(parameters.pop('__flow')) flow = flow_module.flow(parameters, datapackage, resources, ctx.stats) spew_flow(flow, ctx)
self.stats = {} def process_resource(self, resource): resource_path = resource.res.infer().get('path', '.') out_file = os.path.join(self.out_path, resource_path) out_file, _ = os.path.splitext(out_file) if not os.path.exists(os.path.dirname(out_file)): try: os.makedirs(os.path.dirname(out_file)) except OSError: pass with RollingJSONFile(out_file + '.json', self.max_rows) as f: for row in resource: f.write(row) yield row def flow(parameters: dict, stats: dict): out_path = parameters.pop('out-path', '.') max_rows = parameters.get('max-rows', 0) stats.setdefault(STATS_DPP_KEY, {})[STATS_OUT_DP_URL_KEY] = os.path.join( out_path, 'datapackage.json') return Flow(DumpToJson(out_path, max_rows)) if __name__ == '__main__': with ingest() as ctx: spew_flow(flow(ctx.parameters, ctx.stats), ctx)
from dataflows import Flow from datapackage_pipelines.wrapper import ingest from datapackage_pipelines.utilities.flow_utils import spew_flow from datapackage_pipelines_budgetkey.processors.data_gov_il_resource import flow from datapackage_pipelines_budgetkey.common.google_chrome import google_chrome_driver def batch_flow(parameters): gcd = google_chrome_driver() return Flow(*[flow(dict(**p, gcd=gcd)) for p in parameters['batch']]) if __name__ == '__main__': with ingest() as ctx: spew_flow(batch_flow(ctx.parameters), ctx)