def test_incorrect_validation_type_is_na(
        mock_config, datamap_match_test_template_incorrect_type_descriptor,
        template):
    """
    We want to show incorrect wanted types in the validation report so the
    user can fix them.
    """
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_match_test_template_incorrect_type_descriptor)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        next(reader)
        next(reader)
        row = next(reader)  # we need the third row
        assert row["Expected Type"] == "BUTTER"
def test_validation_csv_report_with_mixture_of_included_types(
        mock_config, datamap_missing_one_type, template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_missing_one_type)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        row = next(reader)
        row = next(reader)  # we need the second row
        assert (
            row["Filename"] ==
            f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template.xlsx"
        )
        assert row["Pass Status"] == "UNTYPED"
        assert row["Key"] == "String Key"
        assert row["Sheet Name"] == "Summary"
        assert row["Expected Type"] == "NA"
def test_create_master_spreadsheet_with_validation(mock_config,
                                                   datamap_match_test_template,
                                                   template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    # FIXME - this is not a good test; no assurance about ordering in a list
    assert uc.final_validation_checks[0].passes == "PASS"
Exemple #4
0
def test_in_memory_datamap_application_to_extracted_data_raises_exception(
        mock_config, datamap, template):
    "Raise exception when the key provided is not in the datamap"
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap)
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    with pytest.raises(KeyError):
        # note the extra space in the key name
        uc.query_key("test_template.xlsx", "Funny Date ", "Another Sheet")
    with pytest.raises(KeyError):
        # note the extra space in the sheet name
        uc.query_key("test_template.xlsx", "Funny Date", "Another Sheet ")
def test_skips_type_validation_report_if_no_type_col_in_dm(
        mock_config, datamap_no_type_col_matches_test_template, template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_no_type_col_matches_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp
    assert len(f) == 0
Exemple #6
0
def test_in_memory_datamap_application_to_extracted_data(
        mock_config, datamap, template_with_introduction_sheet):
    mock_config.initialise()
    shutil.copy2(template_with_introduction_sheet,
                 (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    shutil.copy2(datamap, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        Path(mock_config.PLATFORM_DOCS_DIR) / "input" / "datamap.csv")
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    uc.execute()
    assert (uc.query_key("test_template_with_introduction_sheet.xlsm",
                         "String Key", "Summary") == "This is a string")
    assert uc.query_key("test_template_with_introduction_sheet.xlsm",
                        "Big Float", "Another Sheet") == 7.2
Exemple #7
0
def test_create_master_spreadsheet(mock_config, datamap_match_test_template,
                                   template):
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    wb = load_workbook(
        Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx")
    ws = wb.active
    assert ws["A1"].value == "file name"
    assert ws["B1"].value == "test_template"
    assert ws["B2"].value == "2019-10-20T00:00:00"
    assert ws["B3"].value == "This is a string"
Exemple #8
0
def test_create_master_spreadsheet(mock_config, org_test_files_dir):
    mock_config.initialise()
    for fl in os.listdir(org_test_files_dir):
        shutil.copy(
            Path.cwd() / "tests" / "resources" / "org_templates" / fl,
            (Path(mock_config.PLATFORM_DOCS_DIR) / "input"),
        )
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_file = mock_config.PLATFORM_DOCS_DIR / "input" / "dft_datamap.csv"
    dm_repo = InMemorySingleDatamapRepository(str(dm_file))
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")
    wb = load_workbook(
        Path(mock_config.PLATFORM_DOCS_DIR) / "output" / "master.xlsx")
    ws = wb.active
    assert ws["A1"].value == "file name"
    assert "dft1_tmp" in ws["B1"].value
def test_empty_cells_in_template_expected_by_dm_go_into_val_report(
    mock_config,
    datamap_match_test_template_with_missing_val_match_template_equiv,
    template_with_empty_cells_expected_by_datamap,
):
    mock_config.initialise()
    shutil.copy2(
        template_with_empty_cells_expected_by_datamap,
        (Path(mock_config.PLATFORM_DOCS_DIR) / "input"),
    )
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(
        datamap_match_test_template_with_missing_val_match_template_equiv)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    uc.execute("master.xlsx")

    pth = mock_config.FULL_PATH_OUTPUT
    f = list(pth.glob(
        "*.csv"))  # we have to do this because filename includes timestamp

    with open(f[0]) as csvfile:
        reader = csv.DictReader(csvfile)
        row = next(reader)
        row = next(reader)
        row = next(reader)
        row = next(reader)
        row = next(reader)  # we want the fifth row
        assert row["Key"] == "Missing Value"
        assert row["Value"] == "NO VALUE RETURNED"
        assert (
            row["Filename"] ==
            f"{str(mock_config.PLATFORM_DOCS_DIR / 'input')}/test_template_with_empty_cells_expected_by_datamap.xlsm"
        )
        assert row["Pass Status"] == "FAIL"
        assert row["Sheet Name"] == "Summary"
        assert row["Expected Type"] == "TEXT"
        assert row["Got Type"] == "EMPTY"
        row = next(reader)
        row = next(reader)  # now we want Missing Value 3
        assert row["Key"] == "Missing Value 3"
        assert row["Expected Type"] == "NA"
Exemple #10
0
def test_in_memory_datamap_generator(mock_config, datamap_match_test_template,
                                     template):
    "Doesn't really need a generator because its already in memory, but whatever..."
    mock_config.initialise()
    shutil.copy2(template, (Path(mock_config.PLATFORM_DOCS_DIR) / "input"))
    tmpl_repo = InMemoryPopulatedTemplatesRepository(
        mock_config.PLATFORM_DOCS_DIR / "input")
    dm_repo = InMemorySingleDatamapRepository(datamap_match_test_template)
    uc = ApplyDatamapToExtractionUseCase(dm_repo, tmpl_repo)
    uc.execute()
    data = uc.get_values()
    #   assert next(uc.get_values(as_obj=True)) == {("test_template.xlsx", "Summary", "B2"): datetime.date(2019, 10, 19)}
    assert next(data) == {
        ("test_template.xlsx", "Date Key", "Summary", "B2"):
        "2019-10-20T00:00:00"
    }
    assert next(data) == {
        ("test_template.xlsx", "String Key", "Summary", "B3"):
        "This is a string"
    }
    assert next(data) == {
        ("test_template.xlsx", "Big Float", "Another Sheet", "F17"): 7.2
    }
Exemple #11
0
def import_and_create_master(echo_funcs, datamap=None):
    """Import all spreadsheet files from input directory and process with datamap.

    echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc)
    echo_func_params - parameters to be used with echo_func

    Create master spreadsheet immediately.
    """
    # patch ECHO_FUNC for datamap creation - hack!
    setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN",
            echo_funcs["click_echo_green"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_RED",
            echo_funcs["click_echo_red"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_YELLOW",
            echo_funcs["click_echo_yellow"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE",
            echo_funcs["click_echo_white"])

    tmpl_repo = InMemoryPopulatedTemplatesRepository(Config.PLATFORM_DOCS_DIR /
                                                     "input")
    master_fn = Config.config_parser["DEFAULT"]["master file name"]
    if datamap:
        dm_fn = datamap
    else:
        dm_fn = Config.config_parser["DEFAULT"]["datamap file name"]
    dm = Path(tmpl_repo.directory_path) / dm_fn
    dm_repo = InMemorySingleDatamapRepository(dm)
    output_repo = MasterOutputRepository
    uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    try:
        uc.execute(master_fn)
    except FileNotFoundError as e:
        raise FileNotFoundError(e)
    except DatamapNotCSVException:
        raise
    logger.info("{} successfully created in {}\n".format(
        master_fn, Path(Config.PLATFORM_DOCS_DIR / "output")))
Exemple #12
0
def test_template_parser_use_case(resources):
    repo = InMemoryPopulatedTemplatesRepository(resources)
    parse_populated_templates_use_case = ParsePopulatedTemplatesUseCase(repo)
    result = parse_populated_templates_use_case.execute()
    assert (json.loads(result)["test_template.xlsx"]["data"]["Summary"]["B3"]
            ["value"] == "This is a string")
Exemple #13
0
def import_and_create_master(echo_funcs, datamap=None, **kwargs):
    """Import all spreadsheet files from input directory and process with datamap.

    echo_func - a function sent from the front-end interface allowing for suitable output (stdout, etc)
    echo_func_params - parameters to be used with echo_func

    Create master spreadsheet immediately.
    """
    # patch ECHO_FUNC for datamap creation - hack!
    setattr(engine.use_cases.parsing, "ECHO_FUNC_GREEN", echo_funcs["click_echo_green"])
    setattr(engine.use_cases.parsing, "ECHO_FUNC_RED", echo_funcs["click_echo_red"])
    setattr(
        engine.use_cases.parsing, "ECHO_FUNC_YELLOW", echo_funcs["click_echo_yellow"]
    )
    setattr(engine.use_cases.parsing, "ECHO_FUNC_WHITE", echo_funcs["click_echo_white"])

    master_fn = Config.config_parser["DEFAULT"]["master file name"]
    if kwargs.get("rowlimit"):
        Config.TEMPLATE_ROW_LIMIT = kwargs.get("rowlimit")

    if kwargs.get("inputdir"):
        inputdir = kwargs.get("inputdir")
    else:
        inputdir = Config.PLATFORM_DOCS_DIR / "input"
    if kwargs.get("validationonly"):
        output_repo = ValidationOnlyRepository
        master_fn = ""
    else:
        output_repo = MasterOutputRepository

    if kwargs.get("zipinput"):
        tmpl_repo = InMemoryPopulatedTemplatesZip(kwargs.get("zipinput"))
    else:
        tmpl_repo = InMemoryPopulatedTemplatesRepository(inputdir)

    if Config.TEMPLATE_ROW_LIMIT < 50:
        logger.warning(
            f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT} (default is 500). This may be unintentionally low. Check datamaps import templates --help"
        )
    else:
        logger.info(f"Row limit is set to {Config.TEMPLATE_ROW_LIMIT}.")

    if datamap:
        dm_fn = datamap
    else:
        dm_fn = Config.config_parser["DEFAULT"]["datamap file name"]
    dm = Path(tmpl_repo.directory_path) / dm_fn
    dm_repo = InMemorySingleDatamapRepository(dm)
    if dm_repo.is_typed:
        uc = CreateMasterUseCaseWithValidation(dm_repo, tmpl_repo, output_repo)
    else:
        if output_repo == ValidationOnlyRepository:
            logger.critical(
                "Cannot validate data. The datamap needs to have a 'type' column."
            )
            sys.exit(1)
        uc = CreateMasterUseCase(dm_repo, tmpl_repo, output_repo)
    try:
        uc.execute(master_fn)
    except FileNotFoundError as e:
        raise FileNotFoundError(e)
    except DatamapNotCSVException:
        raise