def test_nested_does_not_short_circuit_crawl(): """Bug history: - Detected around 9/20/2019 - Isolated minimum reproducible case on 9/24/2019 - Caused by commit e23b825 (8/27/2019) - Regression test based on minimum reproducible case """ spec: Dict = { "root": { "name": "return", "data_type": "Folder", "sort_order": 0 }, "application_submissions": { "name": "application_submissions", "data_type": "List", "parent": "root", "sort_order": 0 }, "award_restrict": { "name": "award_restrict", "data_type": "Text", "parent": "application_submissions", "sort_order": 0 }, "filer": { "name": "filer", "data_type": "Folder", "parent": "root", "sort_order": 1 }, "name_org": { "name": "name_org", "data_type": "Text", "parent": "filer", "sort_order": 0 } } temporal: Track = Track.build(spec, None, "temporal") immutable: Track = Track.build({}, None, "immutable") schema: Schema = Schema(temporal, immutable, name="semantic") basepath: str = os.path.dirname(os.path.abspath(__file__)) composite_path: str = os.path.join(basepath, "data") shutil.rmtree(output_path, ignore_errors=True) os.makedirs(output_path) with Context.build(conf_dir="dummy", data_dir="dummy") as context: coverage: CoverageFile = CoverageFile(context, schema, output_path + "/semantic", None, None) coverage(composite_path, "dummy") expected_path: str = os.path.join(basepath, "expected.csv") actual_path: str = os.path.join(output_path, "semantic_temporal.csv") with open(expected_path) as expected_fh, open(actual_path) as actual_fh: expected: csv.DictReader = csv.DictReader(expected_fh) actual: csv.DictReader = csv.DictReader(actual_fh) e_rows = [row for row in expected] a_rows = [row for row in actual] assert a_rows == e_rows
def _run_task(scenario, task_name, expected_location, output_dir: Optional[str] = None): polytropos.actions.register_all() conf = os.path.join(basepath, '../examples', scenario, 'conf') data = os.path.join(basepath, '../examples', scenario, 'data') with Context.build(conf, data, output_dir=output_dir) as context: task = Task.build(context, task_name) task.run() actual_path = os.path.join(task.context.entities_output_dir, task.target_data) expected_path = os.path.join(task.context.entities_input_dir, expected_location) composite_ids: List = list(find_all_composites(expected_path)) assert list(find_all_composites(expected_path)) == composite_ids for composite_id in composite_ids: relpath: str = relpath_for(composite_id) with open( os.path.join(actual_path, relpath, "%s.json" % composite_id)) as f: with open( os.path.join(expected_path, relpath, "%s.json" % composite_id)) as g: actual_data = json.load(f) expected_data = json.load(g) diff = Differ().compare( json.dumps(actual_data, indent=4).split('\n'), json.dumps(expected_data, indent=4).split('\n')) assert compare( actual_data, expected_data), ('Diff: ' + '\n'.join(line for line in diff))
def setup_teardown(schema): shutil.copytree(os.path.join(base_path, "input"), input_dir) with Context("", "", "", "", "", "", "", False, 1, False, True) as context: quantile: Quantile = Quantile(context, schema, source, target) quantile(input_dir, actual_dir) yield shutil.rmtree(working_path)
def translate(schemas_dir: str, source_schema: str, target_schema: str, input_dir: str, output_dir: str) -> None: with Context.build("", "", input_dir=input_dir, output_dir=output_dir, schemas_dir=schemas_dir) as context: Translate.standalone(context, source_schema, target_schema)
def _do_run(): translate_dir: str = os.path.join(module_basepath, "translate") trace_dir: str = os.path.join(module_basepath, "trace") with Context.build(conf_dir="dummy", data_dir="dummy", output_dir=output_basepath) as context: coverage: SourceCoverageFile = SourceCoverageFile( context, module_target_schema, translate_dir, trace_dir) coverage("dummy", "dummy")
def trace(schemas_dir: str, source_schema: str, target_schema: str, input_dir: str, output_dir: str, chunk_size: Optional[int]) -> None: with Context.build("", "", input_dir=input_dir, output_dir=output_dir, schemas_dir=schemas_dir, process_pool_chunk_size=chunk_size) as context: Trace.standalone(context, source_schema, target_schema)
def translate(source_schema, target_schema) -> Translate: context: Context = Context("", "", "", "", "", "", "", False, 1, False, True) translate_immutable: Translator = Translator( target_schema.immutable, Translate.create_document_value_provider) translate_temporal: Translator = Translator( target_schema.temporal, Translate.create_document_value_provider) return Translate(context, target_schema, translate_immutable, translate_temporal)
def _do_run(data_type: str): translate_dir: str = os.path.join(module_basepath, data_type.lower(), "translate") trace_dir: str = os.path.join(module_basepath, data_type.lower(), "trace") output_dir: str = os.path.join(output_basepath, data_type.lower()) with Context.build(conf_dir="dummy", data_dir="dummy", output_dir=output_dir) as context: coverage: SourceCoverageFile = SourceCoverageFile( context, target_schema(data_type), translate_dir, trace_dir) coverage("dummy", "dummy")
def source_coverage(schemas_dir: str, source_schema_name: str, target_schema_name: str, translate_dir: str, trace_dir: str, output_path: str) -> None: """Produce a source coverage report.""" output_dir, output_filename = os.path.split(output_path) with Context.build("", "", output_dir=output_dir, schemas_dir=schemas_dir, clean_output_directory=False) as context: SourceCoverageFile.standalone(context, translate_dir, trace_dir, source_schema_name, target_schema_name, output_filename)
def coverage(schema_basepath: str, schema_name: str, data_path: str, output_prefix: str, t_group: Optional[str], i_group: Optional[str], exclude_trivial: bool) -> None: """Produce a coverage report consisting of four files: coverage and groups for each of immutable and temporal tracks.""" with Context.build("", "", input_dir=data_path, schemas_dir=schema_basepath) as context: CoverageFile.standalone(context, schema_name, output_prefix, cast(Optional[VariableId], t_group), cast(Optional[VariableId], i_group), exclude_trivial)
def setup_and_teardown(): shutil.rmtree(WORKING_PATH, ignore_errors=True) polytropos.actions.register_all() data_dir: str = os.path.join(FIXTURE_PATH, "data") conf_dir: str = os.path.join(FIXTURE_PATH, "conf") task_dir: str = os.path.join(conf_dir, "tasks") with Context.build(conf_dir, data_dir, output_dir=WORKING_PATH) as context: for file in os.scandir(task_dir): task_name: str = file.name[:-5] task = Task.build(context, task_name) task.run() yield shutil.rmtree(WORKING_PATH, ignore_errors=True)
def _do_run(output_dir: str) -> None: source: Schema = source_schema("Text") target: Schema = target_schema(source, "Text") translate_dir: str = os.path.join(MODULE_BASEPATH, "fixtures", "translate") trace_dir: str = os.path.join(MODULE_BASEPATH, "fixtures", "trace") with Context.build(conf_dir="/tmp/dummy", data_dir="/tmp/dummy", output_dir=output_dir) as context: coverage: SourceCoverage = SourceCoverage(context, target, translate_dir, trace_dir) coverage("/tmp/dummy", "/tmp/dummy")
def task(data_path: str, config_path: str, task_name: str, input_path: Optional[str], output_path: Optional[str], temp_path: Optional[str], no_cleanup: bool, chunk_size: Optional[int]) -> None: """Perform a Polytropos task.""" with Context.build(config_path, data_path, input_dir=input_path, output_dir=output_path, temp_dir=temp_path, no_cleanup=no_cleanup, process_pool_chunk_size=chunk_size) as context: task = Task.build(context, task_name) task.run()
def _do_export(task_name: str) -> Tuple[str, str]: polytropos.actions.register_all() conf = os.path.join(BASEPATH, '../../examples', "s_5_tr_export", 'conf') data = os.path.join(BASEPATH, '../../examples', "s_5_tr_export", 'data') with Context.build(conf, data) as context: task = Task.build(context, task_name) task.run() actual_path: str = os.path.join(task.context.conf_dir, '../') expected_path: str = os.path.join(task.context.conf_dir, '../', "expected") filename: str = task.steps[-1].filename actual_fn: str = os.path.join(actual_path, filename) expected_fn: str = os.path.join(expected_path, filename) return actual_fn, expected_fn
def context() -> Context: return Context.build(conf_dir="dummy", data_dir="dummy")