Exemple #1
0
def test_flatten_with_exclude(spec, releases):
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    for _ in spec.process_items(releases):
        pass
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True
            }
        },
        "exclude": ["tenders_items"]
    })
    flattener = Flattener(options, spec.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)
    assert "tenders" in all_rows
    assert "tenders_items" not in all_rows

    options = FlattenOptions(**{"selection": {"tenders": {"split": True}}})
    flattener = Flattener(options, spec.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)
    assert "tenders" in all_rows
    assert "tenders_items" in all_rows
Exemple #2
0
def test_flatten_should_split_with_child(spec, releases, options):
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    for _ in spec.process_items(releases):
        pass
    flattener = Flattener(options, spec.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)

    assert "tenders_items" in all_rows
    assert "tenders_items_addit" not in all_rows
    tenders = all_rows["tenders"]

    for tender, release in zip(tenders, releases):
        if release.get("tender", {}).get("items"):
            assert "/tender/items/0/id" not in tender
            assert "/tender/items/0/description" not in tender
            assert "/tender/items/1/id" not in tender
            assert "/tender/items/1/description" not in tender

    items = all_rows["tenders_items"]
    for item in items:
        assert "/tender/items/id" in item
        assert "/tender/items/description" in item
def test_csv_writer(spec_analyzed, releases, flatten_options, tmpdir, schema):
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    flatten_options.selection["parties"].split = True
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with CSVWriter(workdir, tables, flatten_options, schema) as writer:
        # Writing CSV files
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    # Reading CSV files
    counter = {}
    for _count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            if name not in counter:
                counter[name] = 0
            for row in rows:
                str_row = {k: str(v) for (k, v) in row.items()}
                file = name + ".csv"
                path = workdir / file
                with open(path, newline="", encoding="utf-8") as csv_file:
                    csv_reader = csv.DictReader(csv_file)
                    for num, line in enumerate(csv_reader):
                        if num == counter[name]:
                            clean_line = {
                                k: v
                                for (k, v) in line.items() if v != ""
                            }
                            assert dict(clean_line) == str_row
                counter[name] += 1
def test_xlsx_writer(spec_analyzed, releases, flatten_options, tmpdir, schema):
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with XlsxWriter(workdir, tables, flatten_options, schema) as writer:
        # Writing XLSX file
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    # Reading XLSX files
    counter = {}
    path = workdir / "result.xlsx"
    xlsx_reader = openpyxl.load_workbook(path)

    for _count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            if name not in counter:
                counter[name] = 2
            sheet = xlsx_reader[name]
            headers = {cell.column_letter: cell.value for cell in sheet[1]}
            for row in rows:
                line = {
                    headers[cell.column_letter]: cell.value
                    for cell in sheet[counter[name]]
                }
                row = row.as_dict()
                assert not set(row.keys()).difference(set(line.keys()))
                for k, v in row.items():
                    assert str(v) == str(line[k])
                counter[name] += 1
Exemple #5
0
def test_flatten_with_counters(spec, releases):
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    releases[0]["tender"]["items"][0]["additionalClassifications"] = (
        releases[0]["tender"]["items"][0]["additionalClassifications"] * 6)
    for _ in spec.process_items(releases):
        pass
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True
            }
        },
        "count": True
    })
    flattener = Flattener(options, spec.tables)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            if name == "tenders":
                for row in rows:
                    items = search(f"[{count}].tender.items", releases)
                    if items:
                        assert "/tender/itemsCount" in row
                        assert len(items) == row["/tender/itemsCount"]
            elif name == "tenders_items":
                for index, row in enumerate(rows):
                    additional = search(
                        f"[{count}].tender.items[{index}].additionalClassifications",
                        releases,
                    )
                    if additional:
                        assert "/tender/items/additionalClassificationsCount" in row
                        assert len(additional) == row[
                            "/tender/items/additionalClassificationsCount"]
def test_flatten_string_arrays(spec_analyzed, releases):
    options = FlattenOptions(
        **{
            "selection": {
                "tenders": {
                    "split": True
                },
                "parties": {
                    "split": True
                }
            },
            "exclude": ["tenders_items", "parties_ids", "tenders_tenderers"],
        })
    flattener = Flattener(options, spec_analyzed.tables)
    fields = ["submissionMethod", "roles"]
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            counters = defaultdict(int)
            for row in reversed(rows):
                for key, value in row.items():
                    if "/" in key:
                        key = key.replace("parties",
                                          f"parties/{counters['parties']}")
                        actual = resolve_pointer(releases[count], key)
                        if any(key.endswith(field) for field in fields):
                            actual = JOINABLE_SEPARATOR.join(actual)
                        assert actual == value
                counters[name] += 1
Exemple #7
0
def test_flatten_fields_compare(spec_analyzed, releases):
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True
            },
            "parties": {
                "split": False
            }
        },
    })
    flattener = Flattener(options, spec_analyzed.tables)
    fields = ["submissionMethod", "roles"]
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            counters = defaultdict(int)
            for row in reversed(rows):
                for key, value in row.items():
                    if "/" in key:
                        if "parties" in key:
                            key = key.replace(
                                "parties", f"parties/{counters['parties']}")
                        expected = resolve_pointer(releases[count], key)
                        if any(key.endswith(field) for field in fields):
                            expected = JOINABLE_SEPARATOR.join(expected)
                        assert expected == value
                counters[name] += 1
Exemple #8
0
 def __init__(self, workdir, options, tables, root_key="releases", csv=None, xlsx="result.xlsx", language=LOCALE):
     self.flattener = Flattener(options, tables, language=language)
     self.workdir = Path(workdir)
     # TODO: detect package, where?
     self.root_key = root_key
     self.writers = []
     self.csv = csv
     self.xlsx = xlsx
def test_flatten_with_only(spec_analyzed, releases):
    options = FlattenOptions(
        **{
            "selection": {
                "tenders": {
                    "split": True,
                    "only": ["/tender/id"]
                },
                "parties": {
                    "split": False
                }
            }
        })
    flattener = Flattener(options, spec_analyzed.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)
    assert all_rows["tenders"]

    for row in all_rows["tenders"]:
        assert not set(row).difference(
            ["/tender/id", "rowID", "ocid", "parentID", "id"])

    options = FlattenOptions(
        **{"selection": {
            "tenders": {
                "split": False,
                "only": ["/tender/id"]
            }
        }})
    flattener = Flattener(options, spec_analyzed.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)

    assert all_rows["tenders"]
    for row in all_rows["tenders"]:
        assert not set(row).difference(
            ["/tender/id", "rowID", "ocid", "parentID", "id"])
Exemple #10
0
def test_flattener_generate_count_columns(spec, releases):
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    for _ in spec.process_items(releases):
        pass
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": False
            }
        },
        "count": True
    })
    flattener = Flattener(options, spec.tables)
    tenders = flattener.tables["tenders"]
    assert "/tender/itemsCount" not in tenders
    for index in range(
            tenders.arrays["/tender/items/additionalClassifications"]):
        assert f"/tender/items/{index}/additionalClassificationsCount" not in tenders

    options = FlattenOptions(
        **{
            "selection": {
                "tenders": {
                    "split": True
                },
                "tenders_items": {
                    "split": False
                }
            },
            "count": True
        })
    flattener = Flattener(options, spec.tables)
    tenders = flattener.tables["tenders"]
    tenders_items = flattener.tables["tenders_items"]
    assert "/tender/itemsCount" in tenders
    for index in range(
            tenders.arrays["/tender/items/additionalClassifications"]):
        assert f"/tender/items/{index}/additionalClassificationsCount" not in tenders
    assert "/tender/items/additionalClassificationsCount" in tenders_items
def test_flatten_only_no_default_columns(spec_analyzed, releases):
    options = FlattenOptions(
        **{"selection": {
            "tenders": {
                "split": False,
                "only": ["/tender/id"]
            }
        }})
    flattener = Flattener(options, spec_analyzed.tables)
    for _count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            for row in rows:
                assert not set(row.keys()).difference(set(["/tender/id"]))
Exemple #12
0
def test_less_five_arrays_csv(spec_analyzed, releases, flatten_options, tmpdir):
    test_arrays = ["tenders_items", "tenders_items_addit", "tenders_tende"]
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with CSVWriter(workdir, tables, flatten_options) as writer:
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    for name in test_arrays:
        path = workdir / f"{name}.csv"
        assert not path.is_file()
Exemple #13
0
def test_less_five_arrays_xlsx(spec_analyzed, releases, flatten_options, tmpdir):
    test_arrays = ["tenders_items", "tenders_items_addit", "tenders_tende"]
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with XlsxWriter(workdir, tables, flatten_options) as writer:
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    path = workdir / "result.xlsx"
    xlsx_reader = openpyxl.load_workbook(path)
    for name in test_arrays:
        assert name not in xlsx_reader
def test_flatten_buyer(spec_analyzed, releases):
    options = FlattenOptions(**{
        "selection": {
            "parties": {
                "split": True
            }
        },
        "exclude": ["parties_ids"]
    })
    flattener = Flattener(options, spec_analyzed.tables)
    for count, flat in flattener.flatten(releases):
        buyer = search(f"[{count}].buyer", releases)
        for name, rows in flat.items():
            for row in rows:
                if buyer:
                    assert "/buyer/id" in row
                    assert "/buyer/name" in row
Exemple #15
0
def test_flatten_should_not_split(spec_analyzed, releases):
    options = FlattenOptions(**{"selection": {"tenders": {"split": False}}})
    flattener = Flattener(options, spec_analyzed.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)
    assert "tender_items" not in all_rows
    assert "tenders_items_addit" not in all_rows
    tenders = all_rows["tenders"]

    for tender, release in zip(tenders, releases):
        items = release.get("tender", {}).get("items")
        if release.get("tender", {}).get("items"):
            assert "/tender/items/0/id" in tender
            assert "/tender/items/0/description" in tender
            if len(items) > 1:
                assert "/tender/items/1/id" in tender
                assert "/tender/items/1/description" in tender
def test_flatten_row_id_parent_id_relation(spec, releases):
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    releases[0]["tender"]["items"] = releases[0]["tender"]["items"] * 6
    releases[0]["tender"]["items"][0]["additionalClassifications"] = (
        releases[0]["tender"]["items"][0]["additionalClassifications"] * 6)
    for _ in spec.process_items(releases):
        pass
    options = FlattenOptions(**{"selection": {"tenders": {"split": True}}})
    flattener = Flattener(options, spec.tables)
    all_rows = defaultdict(list)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            all_rows[name].extend(rows)

    for row in all_rows["tenders_items_class"]:
        parent_id = row["parentID"]
        items = [
            i for i in all_rows["tenders_items"] if i["rowID"] == parent_id
        ]
        assert items
Exemple #17
0
def test_flatten_with_repeat(spec_analyzed, releases):
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True,
                "repeat": ["/tender/id"]
            }
        },
    })
    flattener = Flattener(options, spec_analyzed.tables)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            if name == "tenders":
                continue
            for row in rows:
                assert "id" in row
                assert "ocid" in row
                assert "rowID" in row
                assert "/tender/id" in row
                assert row["/tender/id"] == search(f"[{count}].tender.id",
                                                   releases)
Exemple #18
0
def test_flatten_with_unnest(spec_analyzed, releases):
    field = "/tender/items/0/id"
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True,
                "unnest": [field]
            }
        },
    })
    flattener = Flattener(options, spec_analyzed.tables)
    for count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            for row in rows:
                if name != "tenders":
                    assert field not in row
                    continue
                item_id = search(f"[{count}].tender.items[0].id", releases)
                if item_id:
                    assert field in row
                    assert search(f"[{count}].tender.items[0].id",
                                  releases) == row[field]
 def __init__(
     self,
     workdir,
     options,
     analyzer=None,
     tables=None,
     pkg_type="releases",
     csv=None,
     xlsx="result.xlsx",
     language=LOCALE,
     multiple_values=False,
     schema=None,
 ):
     self.tables = tables if tables else analyzer.spec.tables
     self.flattener = Flattener(options, self.tables, language=language)
     self.workdir = Path(workdir)
     # TODO: detect package, where?
     self.writers = []
     self.csv = csv
     self.xlsx = xlsx
     self.multiple_values = multiple_values if multiple_values else analyzer.multiple_values if analyzer else False
     self.pkg_type = pkg_type if pkg_type else analyzer.pkg_type if analyzer else "releases"
     self.schema = schema or analyzer.spec.schema
Exemple #20
0
def test_xlsx_writer(spec_analyzed, releases, flatten_options, tmpdir):
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with XlsxWriter(workdir, tables, flatten_options) as writer:
        # Writing XLSX file
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    # Reading XLSX files
    counter = {}
    path = workdir / "result.xlsx"
    for _count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            if name not in counter:
                counter[name] = 2
            xlsx_reader = openpyxl.load_workbook(path)
            sheet = xlsx_reader[name]
            header_values = [cell.value for cell in sheet[1]]
            header_columns = [cell.column_letter for cell in sheet[1]]
            headers = dict(zip(header_columns, header_values))
            for row in rows:
                line_values = [cell.value for cell in sheet[counter[name]]]
                line_columns = [headers[cell.column_letter] for cell in sheet[counter[name]]]
                line = dict(zip(line_columns, line_values))
                # Cleaning empty cells
                line = {k: v for (k, v) in line.items() if v}

                if "/tender/hasEnquiries" in row:
                    str_row = {k: v for (k, v) in row.items()}
                    str_row["/tender/hasEnquiries"] = str(row["/tender/hasEnquiries"])
                    assert line == str_row
                else:
                    assert line == row
                counter[name] += 1
def test_xlsx_only_no_default_columns(spec_analyzed, releases, tmpdir, schema):
    flatten_options = FlattenOptions(
        **{"selection": {
            "tenders": {
                "split": True,
                "only": ["/tender/id"]
            }
        }})
    flattener = Flattener(flatten_options, spec_analyzed.tables)
    tables = prepare_tables(spec_analyzed, flatten_options)
    workdir = Path(tmpdir)
    with XlsxWriter(workdir, tables, flatten_options, schema) as writer:
        for _count, flat in flattener.flatten(releases):
            for name, rows in flat.items():
                for row in rows:
                    writer.writerow(name, row)

    path = workdir / "result.xlsx"
    xlsx_reader = openpyxl.load_workbook(path)
    column = []
    for row in xlsx_reader["tenders"].rows:
        column.append(row[0].value)
    assert column[0] == "/tender/id"
    assert xlsx_reader["tenders"].max_column == 1
Exemple #22
0
def test_flatten(spec_analyzed, releases):
    options = FlattenOptions(**{
        "selection": {
            "tenders": {
                "split": True
            },
            "parties": {
                "split": False
            }
        },
    })
    flattener = Flattener(options, spec_analyzed.tables)
    count = {"tenders": 0, "parties": 0}
    for _count, flat in flattener.flatten(releases):
        for name, rows in flat.items():
            for row in rows:
                assert "id" in row
                assert "ocid" in row
                assert "rowID" in row
                if name in ID_ITEMS:
                    key = "tender" if name == "tenders" else "parties"
                    path = f"/{key}/id"
                    assert ID_ITEMS[name][count[name]][path] == row.get(path)
                    count[name] += 1