def test_datamapline_repository_single_file_repo(datamap,
                                                 datamapline_list_objects):
    repo = InMemorySingleDatamapRepository(datamap)
    assert repo.list_as_objs()[0].key == datamapline_list_objects[0].key
    assert repo.list_as_objs()[0].sheet == datamapline_list_objects[0].sheet
    assert json.loads(
        repo.list_as_json())[0]["key"] == "Project/Programme Name"
def test_incorrect_validation_type_is_na(
        mock_config, datamap_match_test_template_incorrect_type_descriptor,
        template):
    """
    We want to show incorrect wanted types in the validation report so the
    user can fix them.
    """
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_match_test_template_incorrect_type_descriptor)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        next(reader)
        next(reader)
        row = next(reader)  # we need the third row
        assert row["Expected Type"] == "BUTTER"
def test_validation_csv_report_with_mixture_of_included_types(
        mock_config, datamap_missing_one_type, template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_missing_one_type)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        row = next(reader)
        row = next(reader)  # we need the second row
        assert (
            row["Filename"] ==
            f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template.xlsx"
        )
        assert row["Pass Status"] == "UNTYPED"
        assert row["Key"] == "String Key"
        assert row["Sheet Name"] == "Summary"
        assert row["Expected Type"] == "NA"
Ejemplo n.º 4
0
def test_zip_with_directory_raises_exception(mock_config, datamap,
                                             templates_zipped_containing_dir):
    mock_config.initialise()
    shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    zip_repo = InMemoryPopulatedTemplatesZip(templates_zipped_containing_dir)
    dm_repo = InMemorySingleDatamapRepository(
        Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv")
    uc = ApplyDatamapToExtractionUseCase(dm_repo, zip_repo)
    with pytest.raises(NestedZipError):
        uc.execute()
Ejemplo n.º 5
0
def test_in_extract_files_from_zipfile_with_deep_structure_raises_exception(
        mock_config, datamap, templates_zipped_deep_structure):
    mock_config.initialise()
    shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesZip(templates_zipped_deep_structure)
    dm_repo = InMemorySingleDatamapRepository(
        Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv")
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    with pytest.raises(NestedZipError):
        uc.execute()
Ejemplo n.º 6
0
    def execute(self) -> None:
        """
        Writes a master file to multiple templates using blank_template,
        based on the blank_template and the datamap.
        """

        master_data: MASTER_DATA_FOR_FILE = []

        self.parse_dm_repo = InMemorySingleDatamapRepository(str(self._datamap))
        self._parse_dm_uc = ParseDatamapUseCase(self.parse_dm_repo)
        if not self._check_datamap_matches_cola():
            _missing_keys = self._get_keys_in_datamap_not_in_master()
            # You shall not pass if this is a problem
            if _missing_keys:
                for m in _missing_keys:
                    logger.critical(
                        f"Key {m} in the datamap but not in the master. Not continuing."
                    )
                raise RuntimeError(
                    "Not continuing. Ensure all keys from datamap are in the master."
                )
        cola = [x.value for x in list(self._master_sheet.columns)[0]][1:]
        for col in list(self._master_sheet.columns)[1:]:
            tups = []
            try:
                file_name = col[0].value.split(".")[0]
            except AttributeError:
                logger.warning(f"Found values in cells beyond end of expected end column. "
                               "For most reliable results, use a clean master file.")
                break
            logger.info(f"Extracting data for {file_name} from {self._master_path}")
            for i, key in enumerate(cola, start=1):
                if key is not None:
                    key = key.strip()
                else:
                    # TODO - create a log register so this does not have to be repeated for every
                    #   column of data in the master ().
                    logger.warning(f"Found values in cells beyond end of expected end row. "
                                   "For most reliable results, use a clean master file.")
                    break
                try:
                    sheet = [dml[1] for dml in self._dml_line_tup if dml[0] == key][0]
                except IndexError:
                    continue
                cd = ColData(
                    key=key,
                    sheet=sheet,
                    cellref=[dml[2] for dml in self._dml_line_tup if dml[0] == key][0],
                    value=col[i].value,
                    file_name=file_name,
                )
                tups.append(cd)
            master_data.append(tups)

        self.output_repo.write(master_data, from_json=False)
def test_create_master_spreadsheet_with_validation(mock_config,
                                                   datamap_match_test_template,
                                                   template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    # FIXME - this is not a good test; no assurance about ordering in a list
    assert uc.final_validation_checks[0].passes == "PASS"
Ejemplo n.º 8
0
def test_in_memory_datamap_application_to_extracted_data_raises_exception(
        mock_config, datamap, template):
    "Raise exception when the key provided is not in the datamap"
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap)
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    with pytest.raises(KeyError):
        # note the extra space in the key name
        uc.query_key("test_template.xlsx", "Funny Date ", "Another Sheet")
    with pytest.raises(KeyError):
        # note the extra space in the sheet name
        uc.query_key("test_template.xlsx", "Funny Date", "Another Sheet ")
Ejemplo n.º 9
0
def test_in_extract_files_from_zipfile(mock_config, datamap, templates_zipped):
    mock_config.initialise()
    shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesZip(templates_zipped)
    dm_repo = InMemorySingleDatamapRepository(
        Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv")
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    uc.execute()
    assert (uc.query_key("test_template_with_introduction_sheet.xlsm",
                         "String Key", "Summary") == "This is a string")
    assert (uc.query_key("test_template_with_introduction_sheet.xlsm",
                         "Big Float", "Another Sheet") == 7.2)
    assert (uc.query_key("test_template_with_introduction_sheet.XLSM",
                         "Big Float", "Another Sheet") == 7.2)
    assert (uc.query_key("test_template_with_introduction_sheet.XLSX",
                         "Big Float", "Another Sheet") == 7.2)
def test_skips_type_validation_report_if_no_type_col_in_dm(
        mock_config, datamap_no_type_col_matches_test_template, template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_no_type_col_matches_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp
    assert len(f) == 0
Ejemplo n.º 11
0
def test_in_memory_datamap_application_to_extracted_data(
        mock_config, datamap, template_with_introduction_sheet):
    mock_config.initialise()
    shutil.copy2(template_with_introduction_sheet,
                 (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv")
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    uc.execute()
    assert (uc.query_key("test_template_with_introduction_sheet.xlsm",
                         "String Key", "Summary") == "This is a string")
    assert uc.query_key("test_template_with_introduction_sheet.xlsm",
                        "Big Float", "Another Sheet") == 7.2
Ejemplo n.º 12
0
def test_create_master_spreadsheet(mock_config, datamap_match_test_template,
                                   template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    wb = load_workbook(
        Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx")
    ws = wb.active
    assert ws["A1"].value == "file name"
    assert ws["B1"].value == "test_template"
    assert ws["B2"].value == "2019-10-20T00:00:00"
    assert ws["B3"].value == "This is a string"
Ejemplo n.º 13
0
def test_create_master_spreadsheet(mock_config, org_test_files_dir):
    mock_config.initialise()
    for fl in os.listdir(org_test_files_dir):
        shutil.copy(
            Path.cwd() / "tests" / "resources" / "org_templates" / fl,
            (Path(mock_config.PLATFORM_DOCS_DIR) / "input"),
        )
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_file = mock_config.PLATFORM_DOCS_DIR / "input" / "dft_datamap.csv"
    dm_repo = InMemorySingleDatamapRepository(str(dm_file))
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    wb = load_workbook(
        Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx")
    ws = wb.active
    assert ws["A1"].value == "file name"
    assert "dft1_tmp" in ws["B1"].value
def test_empty_cells_in_template_expected_by_dm_go_into_val_report(
    mock_config,
    datamap_match_test_template_with_missing_val_match_template_equiv,
    template_with_empty_cells_expected_by_datamap,
):
    mock_config.initialise()
    shutil.copy2(
        template_with_empty_cells_expected_by_datamap,
        (Path(mock_config.PLATFORM_DOCS_DIR) / "input"),
    )
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_match_test_template_with_missing_val_match_template_equiv)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        row = next(reader)
        row = next(reader)
        row = next(reader)
        row = next(reader)
        row = next(reader)  # we want the fifth row
        assert row["Key"] == "Missing Value"
        assert row["Value"] == "NO VALUE RETURNED"
        assert (
            row["Filename"] ==
            f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template_with_empty_cells_expected_by_datamap.xlsm"
        )
        assert row["Pass Status"] == "FAIL"
        assert row["Sheet Name"] == "Summary"
        assert row["Expected Type"] == "TEXT"
        assert row["Got Type"] == "EMPTY"
        row = next(reader)
        row = next(reader)  # now we want Missing Value 3
        assert row["Key"] == "Missing Value 3"
        assert row["Expected Type"] == "NA"
Ejemplo n.º 15
0
def test_in_memory_datamap_generator(mock_config, datamap_match_test_template,
                                     template):
    "Doesn't really need a generator because its already in memory, but whatever..."
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    uc.execute()
    data = uc.get_values()
    #   assert next(uc.get_values(as_obj=True)) == {("test_template.xlsx", "Summary", "B2"): datetime.date(2019, 10, 19)}
    assert next(data) == {
        ("test_template.xlsx", "Date Key", "Summary", "B2"):
        "2019-10-20T00:00:00"
    }
    assert next(data) == {
        ("test_template.xlsx", "String Key", "Summary", "B3"):
        "This is a string"
    }
    assert next(data) == {
        ("test_template.xlsx", "Big Float", "Another Sheet", "F17"): 7.2
    }
Ejemplo n.º 16
0
def import_and_create_master(echo_funcs, datamap=None):
    """Import all spreadsheet files from input directory and process with datamap.

    echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc)
    echo_func_params - parameters to be used with echo_func

    Create master spreadsheet immediately.
    """
    # patch ECHO_FUNC for datamap creation - hack!
    setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN",
            echo_funcs["click_echo_green"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_RED",
            echo_funcs["click_echo_red"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_YELLOW",
            echo_funcs["click_echo_yellow"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE",
            echo_funcs["click_echo_white"])

    tmpl_repo = InMemoryPopulatedTemplatesRepository(Config.PLATFORM_DOCS_DIR /
                                                     "input")
    master_fn = Config.config_parser["DEFAULT"]["master file name"]
    if datamap:
        dm_fn = datamap
    else:
        dm_fn = Config.config_parser["DEFAULT"]["datamap file name"]
    dm = Path(tmpl_repo.directory_path) / dm_fn
    dm_repo = InMemorySingleDatamapRepository(dm)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    try:
        uc.execute(master_fn)
    except FileNotFoundError as e:
        raise FileNotFoundError(e)
    except DatamapNotCSVException:
        raise
    logger.info("{} successfully created in {}\n".format(
        master_fn, Path(Config.PLATFORM_DOCS_DIR / "output")))
Ejemplo n.º 17
0
def test_parse_datamap_to_in_memory_use_case(datamap, datamapline_list_objects,
                                             mock_config):
    repo = InMemorySingleDatamapRepository(datamap)
    parse_datamap_use_case = ParseDatamapUseCase(repo)
    result = parse_datamap_use_case.execute()
    assert json.loads(result)[0]["key"] == datamapline_list_objects[0].key
Ejemplo n.º 18
0
def import_and_create_master(echo_funcs, datamap=None, **kwargs):
    """Import all spreadsheet files from input directory and process with datamap.

    echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc)
    echo_func_params - parameters to be used with echo_func

    Create master spreadsheet immediately.
    """
    # patch ECHO_FUNC for datamap creation - hack!
    setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN", echo_funcs["click_echo_green"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_RED", echo_funcs["click_echo_red"])
    setattr(
        engine.use_cases.parsing, "ECHO_FUNC_YELLOW", echo_funcs["click_echo_yellow"]
    )
    setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE", echo_funcs["click_echo_white"])

    master_fn = Config.config_parser["DEFAULT"]["master file name"]
    if kwargs.get("rowlimit"):
        Config.TEMPLATE_ROW_LIMIT = kwargs.get("rowlimit")

    if kwargs.get("inputdir"):
        inputdir = kwargs.get("inputdir")
    else:
        inputdir = Config.PLATFORM_DOCS_DIR / "input"
    if kwargs.get("validationonly"):
        output_repo = ValidationOnlyRepository
        master_fn = ""
    else:
        output_repo = MasterOutputRepository

    if kwargs.get("zipinput"):
        tmpl_repo = InMemoryPopulatedTemplatesZip(kwargs.get("zipinput"))
    else:
        tmpl_repo = InMemoryPopulatedTemplatesRepository(inputdir)

    if Config.TEMPLATE_ROW_LIMIT < 50:
        logger.warning(
            f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT} (default is 500). This may be unintentionally low. Check datamaps import templates --help"
        )
    else:
        logger.info(f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT}.")

    if datamap:
        dm_fn = datamap
    else:
        dm_fn = Config.config_parser["DEFAULT"]["datamap file name"]
    dm = Path(tmpl_repo.directory_path) / dm_fn
    dm_repo = InMemorySingleDatamapRepository(dm)
    if dm_repo.is_typed:
        uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    else:
        if output_repo == ValidationOnlyRepository:
            logger.critical(
                "Cannot validate data. The datamap needs to have a 'type' column."
            )
            sys.exit(1)
        uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    try:
        uc.execute(master_fn)
    except FileNotFoundError as e:
        raise FileNotFoundError(e)
    except DatamapNotCSVException:
        raise
Ejemplo n.º 19
0
def test_datamapline_repository_non_existant_file(datamapline_list_objects):
    with pytest.raises(DatamapNotCSVException):
        repo = InMemorySingleDatamapRepository("non-file.txt")  # noqua
        repo.list_as_objs()[0].key == datamapline_list_objects[0].key