Exemple #1
0
    def test_enrich_previously_assigned(self):
        """
        It can happen that in an import batch for some records an autoid had previously
        been assigned. That's why we always store the highest value in the database
        when the enricher has been initialized.
        """
        msg = self.mock_msg
        msg["header"]["enrich"]["id"]["template"] = "0123X"
        msg["contents"] = [{
            "id": None,
            "code": "0"
        }, {
            "id": None,
            "code": "1"
        }, {
            "id": None,
            "code": "2"
        }]
        # Create a mock_record for the first entity in the message
        Record = namedtuple('Record', ['id', 'code'])
        mock_record = Record(id="01232", code="0")
        # The first record had been assigned an autoid in a previous run
        self.mock_storage.get_column_values_for_key_value.side_effect = [[
            mock_record
        ], None, None]
        # The database contains records with higher values
        self.mock_storage.get_last_column_value.return_value = "1234"
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.assertEqual(msg["contents"][0]["id"], "01232")
        self.assertEqual(msg["contents"][1]["id"], "01235")
        self.assertEqual(msg["contents"][2]["id"], "01236")
Exemple #2
0
    def test_enrich_reuse_value(self):
        msg = self.mock_msg
        msg["contents"] = [
            {
                "id": None,
                "code": "A"
            },
            {
                "id": None,
                "code": "B"
            },
            {
                "id": None,
                "code": "A"
            },
            {
                "id": None,
                "code": "B"
            },
        ]
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.assertEqual(msg["contents"][0]["id"], "01230")
        self.assertEqual(msg["contents"][1]["id"], "01231")
        self.assertEqual(msg["contents"][2]["id"], "01230")
        self.assertEqual(msg["contents"][3]["id"], "01231")
Exemple #3
0
    def test_enrich_existing_contents(self):
        msg = self.mock_msg
        msg["contents"] = [{"geo": "aap"}]
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.mock_storage.get_query_value.assert_not_called()
        self.assertEqual(msg["contents"][0]["geo"], "aap")
Exemple #4
0
    def test_enrich_empty_contents(self):
        msg = self.mock_msg
        msg["contents"] = []
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.mock_storage.get_query_value.assert_not_called()
        self.assertEqual(msg["contents"], [])
Exemple #5
0
    def test_enrich_id_already_filled(self):
        msg = self.mock_msg
        msg["contents"] = [{"id": "123", "code": "A"}]
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.assertEqual(msg["contents"][0]["id"], "123")
Exemple #6
0
    def test_enrich_empty_contents(self):
        msg = self.mock_msg
        msg["contents"] = []
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.assertEqual(msg["contents"], [])
Exemple #7
0
    def test_enrich_with_last_value(self):
        msg = self.mock_msg
        msg["contents"] = [{"id": None, "code": "A"}]
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = "123"
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        # Check that the length is OK (padded with zeroes) and that 1 is added (123 => 124)
        self.assertEqual(msg["contents"][0]["id"], "00124")
Exemple #8
0
    def test_enrich_max_contents(self):
        msg = self.mock_msg
        msg["contents"] = [
            {
                "id": None,
                "code": "0"
            },
            {
                "id": None,
                "code": "1"
            },
            {
                "id": None,
                "code": "2"
            },
            {
                "id": None,
                "code": "3"
            },
            {
                "id": None,
                "code": "4"
            },
            {
                "id": None,
                "code": "5"
            },
            {
                "id": None,
                "code": "6"
            },
            {
                "id": None,
                "code": "7"
            },
            {
                "id": None,
                "code": "8"
            },
            {
                "id": None,
                "code": "9"
            },
        ]
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.assertEqual(msg["contents"][0]["id"], "01230")
        self.assertEqual(msg["contents"][9]["id"], "01239")
Exemple #9
0
 def test_enrich_with_mulitple_current_values(self):
     msg = self.mock_msg
     msg["contents"] = [{"id": None, "code": "A"}]
     Record = namedtuple('Record', ['id', 'code'])
     self.mock_storage.get_column_values_for_key_value.return_value = [
         Record(id="123", code="A"),
         Record(id="456", code="A"),
     ]
     self.mock_storage.get_last_column_value.return_value = None
     with self.assertRaises(AssertionError):
         enricher = Enricher(self.mock_storage, msg)
         for content in msg["contents"]:
             enricher.enrich(content)
Exemple #10
0
    def test_enrich_with_current_value(self):
        msg = self.mock_msg
        msg["contents"] = [{"id": None, "code": "A"}]
        Record = namedtuple('Record', ['id', 'code'])
        self.mock_storage.get_column_values_for_key_value.return_value = [
            Record(id="123", code="A")
        ]
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        # Check that the length is OK (padded with zeroes) and that 1 is added (123 => 124)
        self.assertEqual(msg["contents"][0]["id"], "123")
Exemple #11
0
    def test_enrich_dry_run(self):
        msg = self.mock_msg
        msg["contents"] = [
            {
                "id": None,
                "code": "0"
            },
        ]
        msg["header"]["enrich"]["id"]["dry_run"] = True

        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None
        enricher = Enricher(self.mock_storage, msg)

        for content in msg["contents"]:
            enricher.enrich(content)
            self.assertIsNone(content["id"])
Exemple #12
0
    def test_enrich_simple_contents(self):
        self.mock_storage.get_query_value.return_value = "POINT (1 2)"
        msg = self.mock_msg
        msg["contents"] = [{"x": [1, 2]}]
        enricher = Enricher(self.mock_storage, msg)
        for content in msg["contents"]:
            enricher.enrich(content)

        self.mock_storage.get_query_value.assert_called_with("""
SELECT
      ST_AsText(
          ST_Union(geometrie)
      )
FROM  cat_col
WHERE fld in ('1', '2')
AND   eind_geldigheid IS NULL
""")
        self.assertEqual(msg["contents"][0]["geo"], "POINT (1.000 2.000)")
Exemple #13
0
    def test_enrich_overflow_contents(self):
        msg = self.mock_msg
        msg["contents"] = [{
            "id": None,
            "code": "0"
        }, {
            "id": None,
            "code": "1"
        }, {
            "id": None,
            "code": "2"
        }, {
            "id": None,
            "code": "3"
        }, {
            "id": None,
            "code": "4"
        }, {
            "id": None,
            "code": "5"
        }, {
            "id": None,
            "code": "6"
        }, {
            "id": None,
            "code": "7"
        }, {
            "id": None,
            "code": "8"
        }, {
            "id": None,
            "code": "9"
        }, {
            "id": None,
            "code": "A"
        }]
        self.mock_storage.get_column_values_for_key_value.return_value = None
        self.mock_storage.get_last_column_value.return_value = None

        with self.assertRaises(AssertionError):
            enricher = Enricher(self.mock_storage, msg)
            for content in msg["contents"]:
                enricher.enrich(content)
Exemple #14
0
def compare(msg):
    """Compare new data in msg (contents) with the current data

    :param msg: The new data, including header and summary
    :return: result message
    """
    logger.configure(msg, "COMPARE")
    header = msg.get('header', {})
    mode = header.get('mode', FULL_UPLOAD)
    logger.info(
        f"Compare (mode = {mode}) to GOB Database {GOBStorageHandler.user_name} started"
    )

    # Parse the message header
    message = ImportMessage(msg)
    metadata = message.metadata

    # Get the model for the collection to be compared
    gob_model = GOBModel()
    entity_model = gob_model.get_collection(metadata.catalogue,
                                            metadata.entity)

    # Initialize a storage handler for the collection
    storage = GOBStorageHandler(metadata)
    model = f"{metadata.source} {metadata.catalogue} {metadata.entity}"
    logger.info(f"Compare {model}")

    stats = CompareStatistics()

    tmp_table_name = None
    with storage.get_session():
        with ProgressTicker("Collect compare events", 10000) as progress:
            # Check any dependencies
            if not meets_dependencies(storage, msg):
                return {
                    "header": msg["header"],
                    "summary": logger.get_summary(),
                    "contents": None
                }

            enricher = Enricher(storage, msg)
            populator = Populator(entity_model, msg)

            # If there are no records in the database all data are ADD events
            initial_add = not storage.has_any_entity()
            if initial_add:
                logger.info("Initial load of new collection detected")
                # Write ADD events directly, without using a temporary table
                contents_writer = ContentsWriter()
                contents_writer.open()
                # Pass a None confirms_writer because only ADD events are written
                collector = EventCollector(contents_writer,
                                           confirms_writer=None,
                                           version=entity_model['version'])
                collect = collector.collect_initial_add
            else:
                # Collect entities in a temporary table
                collector = EntityCollector(storage)
                collect = collector.collect
                tmp_table_name = collector.tmp_table_name

            for entity in msg["contents"]:
                progress.tick()
                stats.collect(entity)
                enricher.enrich(entity)
                populator.populate(entity)
                collect(entity)

            collector.close()

    if initial_add:
        filename = contents_writer.filename
        confirms = None
        contents_writer.close()
    else:
        # Compare entities from temporary table
        with storage.get_session():
            diff = storage.compare_temporary_data(tmp_table_name, mode)
            filename, confirms = _process_compare_results(
                storage, entity_model, diff, stats)

    # Build result message
    results = stats.results()

    logger.info(f"Compare {model} completed", {'data': results})

    results.update(logger.get_summary())

    message = {
        "header": msg["header"],
        "summary": results,
        "contents_ref": filename,
        "confirms": confirms
    }

    return message