Ejemplo n.º 1
0
    def config_loader(self):
        """
        This will load, check and parse the config file for the driver.

        :return: Nothing. Updates the internal config object of the driver.
        """
        self.printer("Loading config file...")
        self.config = config_utils.config_open(self.config_path)
        if self.config[0]:
            self.printer("Checking and parsing config file...")
            self.config = self.config[1]
            if config_utils.config_checker(self.config):
                self.config = config_utils.config_parser(self.config)
                self.general = self.config['general']
                # init the report object
                self.report = report.Report(self.config)
                self.printer("Config file checked and parsed. "
                             "Starting QC pipeline...")
            else:
                self.printer("Check the config file, some mandatory fields "
                             "are either missing or misformatted.")
                return
        else:
            self.printer("Couldn't load config file. It's badly formatted.")
            return
Ejemplo n.º 2
0
import pytest

from paqc.connectors import csv
from paqc.utils.config_utils import config_open

DICT_CONFIG = config_open("paqc/tests/data/driver_dict_output.yml")[1]
DICT_CONFIG_16 = config_open("paqc/tests/data/qc16_driver_dict_output.yml")[1]
DICT_CONFIG_17TO19 = config_open(
    "paqc/tests/data/qc17to19_driver_dict_output.yml")[1]
DICT_CONFIG_20TO21 = config_open(
    "paqc/tests/data/qc20to21_driver_dict_output.yml")[1]


# 14
@pytest.mark.parametrize("dict_config", [DICT_CONFIG])
@pytest.mark.parametrize(
    "df, expected, ls_faults",
    [
        # Subset from data/qc_data.csv
        (csv.read_csv(DICT_CONFIG,
                      "paqc/tests/data/qc14_check1.csv"), True, None),
        # row 3 is missing
        (csv.read_csv(DICT_CONFIG,
                      "paqc/tests/data/qc14_check2.csv"), False, [2])
    ])
def test_qc14(df, expected, ls_faults, dict_config):
    rpi = qc14(df, dict_config)
    assert (rpi.passed == expected) & (rpi.extra == ls_faults)


# 15
Ejemplo n.º 3
0
import pytest

from paqc.connectors import csv
from paqc.utils.config_utils import config_open

DICT_CONFIG_FLAGPROP = config_open(
    "paqc/tests/data/driver_dict_output_flagprop.yml")[1]


# 40
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_FLAGPROP])
@pytest.mark.parametrize(
    "df, expected, ls_faults",
    [
        # Altered subset from v02.csv
        (csv.read_csv(DICT_CONFIG_FLAGPROP,
                      "paqc/tests/data/qc40_check1.csv"), True, None),
        #
        (csv.read_csv(DICT_CONFIG_FLAGPROP, "paqc/tests/data/qc40_check2.csv"),
         False, ['C_11402COUNT', 'J_27370_DATE_FIRST_INDEX']),
    ])
def test_qc40(df, expected, ls_faults, dict_config):
    qc_params = dict_config['qc']['qc_params']
    rpi = qc40(df, dict_config, qc_params['ls_metrictypes'])
    assert (rpi.passed == expected) & (rpi.extra == ls_faults)


# 41
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_FLAGPROP])
@pytest.mark.parametrize(
    "df, expected, ls_faults",
Ejemplo n.º 4
0
import pytest

from paqc.connectors import csv
from paqc.utils.config_utils import config_open

DICT_CONFIG_CN01 = config_open(
    "paqc/tests/data/driver_dict_output_CN01.yml")[1]


# 27
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_CN01])
@pytest.mark.parametrize(
    "df, expected",
    [
        # Original subset from data/initial_negative.csv
        (csv.read_csv(DICT_CONFIG_CN01,
                      "paqc/tests/data/qc27_check1.csv"), True),
        # Deleted one row
        (csv.read_csv(DICT_CONFIG_CN01,
                      "paqc/tests/data/qc27_check2.csv"), False),
    ])
def test_qc27(df, expected, dict_config):
    qc_params = dict_config['qc']['qc_params']
    rpi = qc27(df, dict_config, qc_params['path_file_cp02'],
               qc_params['pat_id_col_cp02'], qc_params['n01_match'])
    assert (rpi.passed == expected)


# 28
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_CN01])
@pytest.mark.parametrize(
Ejemplo n.º 5
0
import pytest

from paqc.connectors import csv
from paqc.utils.config_utils import config_open

DICT_CONFIG_1TO8 = config_open("paqc/tests/data/driver_dict_output.yml")[1]
DICT_CONFIG_9TO13 = config_open(
    "paqc/tests/data/qc9to13_driver_dict_output.yml")[1]


# 1
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_1TO8])
@pytest.mark.parametrize(
    "df, expected, ls_faults",
    [
        # Original column names from data/qc_data.csv
        (csv.read_csv(DICT_CONFIG_1TO8,
                      "paqc/tests/data/qc1_check1.csv"), True, None),
        # GENDER has trailing space, D_7931_AVG_CLAIM_CNT leading space
        (csv.read_csv(DICT_CONFIG_1TO8, "paqc/tests/data/qc1_check2.csv"),
         False, ['GENDER ', ' D_7931_AVG_CLAIM_CNT']),
        # Second column name is empty
        (csv.read_csv(DICT_CONFIG_1TO8, "paqc/tests/data/qc1_check3.csv"),
         False, ['Unnamed: 1']),
        # Created column name with single $
        (csv.read_csv(DICT_CONFIG_1TO8,
                      "paqc/tests/data/qc1_check4.csv"), False, ['$']),
        # First column name is lab*el
        (csv.read_csv(DICT_CONFIG_1TO8,
                      "paqc/tests/data/qc1_check5.csv"), False, ['lab*el'])
    ])
Ejemplo n.º 6
0
import pytest

from paqc.connectors import csv
from paqc.utils.config_utils import config_open

DICT_CONFIG_9TO13 = config_open(
    "paqc/tests/data/qc9to13_driver_dict_output.yml")[1]
DICT_CONFIG_48 = config_open("paqc/tests/data/qc48_driver_dict_output.yml")[1]
DICT_CONFIG_50 = config_open("paqc/tests/data/qc50_driver_dict_output.yml")[1]


# 46
@pytest.mark.parametrize("dict_config", [DICT_CONFIG_9TO13])
@pytest.mark.parametrize(
    "df_old",
    [csv.read_csv(DICT_CONFIG_9TO13, "paqc/tests/data/suite2_df_old.csv")])
@pytest.mark.parametrize(
    "df_new, expected, ls_faults",
    [
        # identical to suite2_df_old.csv
        (csv.read_csv(DICT_CONFIG_9TO13,
                      "paqc/tests/data/qc46_check1.csv"), True, None),
        # A_last_exp_dt and A_first_exp_dt are changed in position
        (csv.read_csv(DICT_CONFIG_9TO13,
                      "paqc/tests/data/qc46_check2.csv"), False, None),
        # column C_count is gone in the new dataframe, column D_count is new
        (csv.read_csv(DICT_CONFIG_9TO13,
                      "paqc/tests/data/qc46_check3.csv"), False, {
                          'missing columns': ['C_count'],
                          'new columns': ['D_count']
                      })