def test_datamapline_repository_single_file_repo(datamap, datamapline_list_objects): repo = InMemorySingleDatamapRepository(datamap) assert repo.list_as_objs()[0].key == datamapline_list_objects[0].key assert repo.list_as_objs()[0].sheet == datamapline_list_objects[0].sheet assert json.loads( repo.list_as_json())[0]["key"] == "Project/Programme Name"
def test_incorrect_validation_type_is_na( mock_config, datamap_match_test_template_incorrect_type_descriptor, template): """ We want to show incorrect wanted types in the validation report so the user can fix them. """ mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository( datamap_match_test_template_incorrect_type_descriptor) output_repo = MasterOutputRepository uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") pth = mock_config.FULL_PATH_OUTPUT f = list(pth.glob( "*.csv")) # we have to do this because filename includes timestamp with open(f[0]) as csvfile: reader = csv.DictReader(csvfile) next(reader) next(reader) row = next(reader) # we need the third row assert row["Expected Type"] == "BUTTER"
def test_validation_csv_report_with_mixture_of_included_types( mock_config, datamap_missing_one_type, template): mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository(datamap_missing_one_type) output_repo = MasterOutputRepository uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") pth = mock_config.FULL_PATH_OUTPUT f = list(pth.glob( "*.csv")) # we have to do this because filename includes timestamp with open(f[0]) as csvfile: reader = csv.DictReader(csvfile) row = next(reader) row = next(reader) # we need the second row assert ( row["Filename"] == f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template.xlsx" ) assert row["Pass Status"] == "UNTYPED" assert row["Key"] == "String Key" assert row["Sheet Name"] == "Summary" assert row["Expected Type"] == "NA"
def test_zip_with_directory_raises_exception(mock_config, datamap, templates_zipped_containing_dir): mock_config.initialise() shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) zip_repo = InMemoryPopulatedTemplatesZip(templates_zipped_containing_dir) dm_repo = InMemorySingleDatamapRepository( Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv") uc = ApplyDatamapToExtractionUseCase(dm_repo, zip_repo) with pytest.raises(NestedZipError): uc.execute()
def test_in_extract_files_from_zipfile_with_deep_structure_raises_exception( mock_config, datamap, templates_zipped_deep_structure): mock_config.initialise() shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesZip(templates_zipped_deep_structure) dm_repo = InMemorySingleDatamapRepository( Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv") uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo) with pytest.raises(NestedZipError): uc.execute()
def execute(self) -> None: """ Writes a master file to multiple templates using blank_template, based on the blank_template and the datamap. """ master_data: MASTER_DATA_FOR_FILE = [] self.parse_dm_repo = InMemorySingleDatamapRepository(str(self._datamap)) self._parse_dm_uc = ParseDatamapUseCase(self.parse_dm_repo) if not self._check_datamap_matches_cola(): _missing_keys = self._get_keys_in_datamap_not_in_master() # You shall not pass if this is a problem if _missing_keys: for m in _missing_keys: logger.critical( f"Key {m} in the datamap but not in the master. Not continuing." ) raise RuntimeError( "Not continuing. Ensure all keys from datamap are in the master." ) cola = [x.value for x in list(self._master_sheet.columns)[0]][1:] for col in list(self._master_sheet.columns)[1:]: tups = [] try: file_name = col[0].value.split(".")[0] except AttributeError: logger.warning(f"Found values in cells beyond end of expected end column. " "For most reliable results, use a clean master file.") break logger.info(f"Extracting data for {file_name} from {self._master_path}") for i, key in enumerate(cola, start=1): if key is not None: key = key.strip() else: # TODO - create a log register so this does not have to be repeated for every # column of data in the master (). logger.warning(f"Found values in cells beyond end of expected end row. " "For most reliable results, use a clean master file.") break try: sheet = [dml[1] for dml in self._dml_line_tup if dml[0] == key][0] except IndexError: continue cd = ColData( key=key, sheet=sheet, cellref=[dml[2] for dml in self._dml_line_tup if dml[0] == key][0], value=col[i].value, file_name=file_name, ) tups.append(cd) master_data.append(tups) self.output_repo.write(master_data, from_json=False)
def test_create_master_spreadsheet_with_validation(mock_config, datamap_match_test_template, template): mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template) output_repo = MasterOutputRepository uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") # FIXME - this is not a good test; no assurance about ordering in a list assert uc.final_validation_checks[0].passes == "PASS"
def test_in_memory_datamap_application_to_extracted_data_raises_exception( mock_config, datamap, template): "Raise exception when the key provided is not in the datamap" mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository(datamap) uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo) with pytest.raises(KeyError): # note the extra space in the key name uc.query_key("test_template.xlsx", "Funny Date ", "Another Sheet") with pytest.raises(KeyError): # note the extra space in the sheet name uc.query_key("test_template.xlsx", "Funny Date", "Another Sheet ")
def test_in_extract_files_from_zipfile(mock_config, datamap, templates_zipped): mock_config.initialise() shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesZip(templates_zipped) dm_repo = InMemorySingleDatamapRepository( Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv") uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo) uc.execute() assert (uc.query_key("test_template_with_introduction_sheet.xlsm", "String Key", "Summary") == "This is a string") assert (uc.query_key("test_template_with_introduction_sheet.xlsm", "Big Float", "Another Sheet") == 7.2) assert (uc.query_key("test_template_with_introduction_sheet.XLSM", "Big Float", "Another Sheet") == 7.2) assert (uc.query_key("test_template_with_introduction_sheet.XLSX", "Big Float", "Another Sheet") == 7.2)
def test_skips_type_validation_report_if_no_type_col_in_dm( mock_config, datamap_no_type_col_matches_test_template, template): mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository( datamap_no_type_col_matches_test_template) output_repo = MasterOutputRepository uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") pth = mock_config.FULL_PATH_OUTPUT f = list(pth.glob( "*.csv")) # we have to do this because filename includes timestamp assert len(f) == 0
def test_in_memory_datamap_application_to_extracted_data( mock_config, datamap, template_with_introduction_sheet): mock_config.initialise() shutil.copy2(template_with_introduction_sheet, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository( Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv") uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo) uc.execute() assert (uc.query_key("test_template_with_introduction_sheet.xlsm", "String Key", "Summary") == "This is a string") assert uc.query_key("test_template_with_introduction_sheet.xlsm", "Big Float", "Another Sheet") == 7.2
def test_create_master_spreadsheet(mock_config, datamap_match_test_template, template): mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template) output_repo = MasterOutputRepository uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") wb = load_workbook( Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx") ws = wb.active assert ws["A1"].value == "file name" assert ws["B1"].value == "test_template" assert ws["B2"].value == "2019-10-20T00:00:00" assert ws["B3"].value == "This is a string"
def test_create_master_spreadsheet(mock_config, org_test_files_dir): mock_config.initialise() for fl in os.listdir(org_test_files_dir): shutil.copy( Path.cwd() / "tests" / "resources" / "org_templates" / fl, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"), ) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_file = mock_config.PLATFORM_DOCS_DIR / "input" / "dft_datamap.csv" dm_repo = InMemorySingleDatamapRepository(str(dm_file)) output_repo = MasterOutputRepository uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") wb = load_workbook( Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx") ws = wb.active assert ws["A1"].value == "file name" assert "dft1_tmp" in ws["B1"].value
def test_empty_cells_in_template_expected_by_dm_go_into_val_report( mock_config, datamap_match_test_template_with_missing_val_match_template_equiv, template_with_empty_cells_expected_by_datamap, ): mock_config.initialise() shutil.copy2( template_with_empty_cells_expected_by_datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"), ) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository( datamap_match_test_template_with_missing_val_match_template_equiv) output_repo = MasterOutputRepository uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo) uc.execute("master.xlsx") pth = mock_config.FULL_PATH_OUTPUT f = list(pth.glob( "*.csv")) # we have to do this because filename includes timestamp with open(f[0]) as csvfile: reader = csv.DictReader(csvfile) row = next(reader) row = next(reader) row = next(reader) row = next(reader) row = next(reader) # we want the fifth row assert row["Key"] == "Missing Value" assert row["Value"] == "NO VALUE RETURNED" assert ( row["Filename"] == f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template_with_empty_cells_expected_by_datamap.xlsm" ) assert row["Pass Status"] == "FAIL" assert row["Sheet Name"] == "Summary" assert row["Expected Type"] == "TEXT" assert row["Got Type"] == "EMPTY" row = next(reader) row = next(reader) # now we want Missing Value 3 assert row["Key"] == "Missing Value 3" assert row["Expected Type"] == "NA"
def test_in_memory_datamap_generator(mock_config, datamap_match_test_template, template): "Doesn't really need a generator because its already in memory, but whatever..." mock_config.initialise() shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input")) tmpl_repo = InMemoryPopulatedTemplatesRepository( mock_config.PLATFORM_DOCS_DIR / "input") dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template) uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo) uc.execute() data = uc.get_values() # assert next(uc.get_values(as_obj=True)) == {("test_template.xlsx", "Summary", "B2"): datetime.date(2019, 10, 19)} assert next(data) == { ("test_template.xlsx", "Date Key", "Summary", "B2"): "2019-10-20T00:00:00" } assert next(data) == { ("test_template.xlsx", "String Key", "Summary", "B3"): "This is a string" } assert next(data) == { ("test_template.xlsx", "Big Float", "Another Sheet", "F17"): 7.2 }
def import_and_create_master(echo_funcs, datamap=None): """Import all spreadsheet files from input directory and process with datamap. echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc) echo_func_params - parameters to be used with echo_func Create master spreadsheet immediately. """ # patch ECHO_FUNC for datamap creation - hack! setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN", echo_funcs["click_echo_green"]) setattr(engine.use_cases.parsing, "ECHO_FUNC_RED", echo_funcs["click_echo_red"]) setattr(engine.use_cases.parsing, "ECHO_FUNC_YELLOW", echo_funcs["click_echo_yellow"]) setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE", echo_funcs["click_echo_white"]) tmpl_repo = InMemoryPopulatedTemplatesRepository(Config.PLATFORM_DOCS_DIR / "input") master_fn = Config.config_parser["DEFAULT"]["master file name"] if datamap: dm_fn = datamap else: dm_fn = Config.config_parser["DEFAULT"]["datamap file name"] dm = Path(tmpl_repo.directory_path) / dm_fn dm_repo = InMemorySingleDatamapRepository(dm) output_repo = MasterOutputRepository uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo) try: uc.execute(master_fn) except FileNotFoundError as e: raise FileNotFoundError(e) except DatamapNotCSVException: raise logger.info("{} successfully created in {}\n".format( master_fn, Path(Config.PLATFORM_DOCS_DIR / "output")))
def test_parse_datamap_to_in_memory_use_case(datamap, datamapline_list_objects, mock_config): repo = InMemorySingleDatamapRepository(datamap) parse_datamap_use_case = ParseDatamapUseCase(repo) result = parse_datamap_use_case.execute() assert json.loads(result)[0]["key"] == datamapline_list_objects[0].key
def import_and_create_master(echo_funcs, datamap=None, **kwargs): """Import all spreadsheet files from input directory and process with datamap. echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc) echo_func_params - parameters to be used with echo_func Create master spreadsheet immediately. """ # patch ECHO_FUNC for datamap creation - hack! setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN", echo_funcs["click_echo_green"]) setattr(engine.use_cases.parsing, "ECHO_FUNC_RED", echo_funcs["click_echo_red"]) setattr( engine.use_cases.parsing, "ECHO_FUNC_YELLOW", echo_funcs["click_echo_yellow"] ) setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE", echo_funcs["click_echo_white"]) master_fn = Config.config_parser["DEFAULT"]["master file name"] if kwargs.get("rowlimit"): Config.TEMPLATE_ROW_LIMIT = kwargs.get("rowlimit") if kwargs.get("inputdir"): inputdir = kwargs.get("inputdir") else: inputdir = Config.PLATFORM_DOCS_DIR / "input" if kwargs.get("validationonly"): output_repo = ValidationOnlyRepository master_fn = "" else: output_repo = MasterOutputRepository if kwargs.get("zipinput"): tmpl_repo = InMemoryPopulatedTemplatesZip(kwargs.get("zipinput")) else: tmpl_repo = InMemoryPopulatedTemplatesRepository(inputdir) if Config.TEMPLATE_ROW_LIMIT < 50: logger.warning( f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT} (default is 500). This may be unintentionally low. Check datamaps import templates --help" ) else: logger.info(f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT}.") if datamap: dm_fn = datamap else: dm_fn = Config.config_parser["DEFAULT"]["datamap file name"] dm = Path(tmpl_repo.directory_path) / dm_fn dm_repo = InMemorySingleDatamapRepository(dm) if dm_repo.is_typed: uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo) else: if output_repo == ValidationOnlyRepository: logger.critical( "Cannot validate data. The datamap needs to have a 'type' column." ) sys.exit(1) uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo) try: uc.execute(master_fn) except FileNotFoundError as e: raise FileNotFoundError(e) except DatamapNotCSVException: raise
def test_datamapline_repository_non_existant_file(datamapline_list_objects): with pytest.raises(DatamapNotCSVException): repo = InMemorySingleDatamapRepository("non-file.txt") # noqua repo.list_as_objs()[0].key == datamapline_list_objects[0].key