Esempio n. 1
0
 def test_returns_default_on_dict_list(self):
     extras = Extras([
         {
             'key': 'foo',
             'value': 'foo-value'
         },
         {
             'key': 'bar',
             'value': 'baz'
         },
     ])
     self.assertEquals('OhNo', extras.value('baz', 'OhNo'))
Esempio n. 2
0
 def test_returns_value_on_dict_list(self):
     extras = Extras([
         {
             'key': 'foo',
             'value': 'foo-value'
         },
         {
             'key': 'baz',
             'value': 'baz-value'
         },
     ])
     self.assertEquals('foo-value', extras.value('foo'))
Esempio n. 3
0
    def upsert_on_empty_dict_list_works_as_expected(self):
        extras = Extras()

        expected_extras = [{
            'key': 'three',
            'value': 3,
        }]

        self.assertTrue(extras.update('three', 3, True))
        self.assertEquals(3, extras.value('three'))
        self.assertEquals(1, extras.len())

        self.assertEquals(expected_extras, extras.get())
Esempio n. 4
0
 def test_update_on_dict_list_works_as_expected(self):
     extras = Extras([
         {
             'key': 'hash',
             'value': 'tag'
         },
         {
             'key': 'label',
             'value': 'dot'
         },
     ])
     self.assertTrue(extras.update('label', 'doubledot'))
     self.assertEquals('doubledot', extras.value('label'))
Esempio n. 5
0
    def test_alternates_structure_as_expected(self):
        extras = Extras([
            {
                'key': 'terms_of_use',
                'value': [{
                    'licence_id': 'some-id',
                    'licence_url': 'some-url',
                }]
            },
        ])

        expected_value = [{
            'license_id': 'some-id',
            'license_url': 'some-url',
        }]

        extras.update(
            'terms_of_use',
            expected_value,
        )

        self.assertEquals(expected_value, extras.value('terms_of_use'))
        self.assertEquals(1, len(extras.value('terms_of_use')))
Esempio n. 6
0
 def upsert_on_dict_list_works_as_expected(self):
     extras = Extras([
         {
             'key': 'one',
             'value': 1
         },
         {
             'key': 'two',
             'value': 2
         },
     ])
     self.assertTrue(extras.update('three', 3, True))
     self.assertEquals(3, extras.value('three'))
     self.assertEquals(3, extras.len())
Esempio n. 7
0
    def returns_modified_sector(self):
        extras = Extras([
            {
                'key': 'metadata_original_portal',
                'value': None
            },
            {
                'key': 'sector',
                'value': None
            },
        ])

        self.assertTrue(extras.update('sector', 'privat'))
        self.assertEquals('privat', extras.value('sector'))
Esempio n. 8
0
    def test_original_groups_are_updated_as_expected(self):
        extras_in = [{
            "key":
            "contacts",
            "value":
            "[{'url': 'www.open.nrw.de', 'role': 'vertrieb', 'name': 'Gesch\\u00e4ftsstelle Open.NRW', 'email': '*****@*****.**'}]"
        }, {
            "key":
            "dates",
            "value":
            "[{'date': '2016-06-08T12:31:11+02:00', 'role': 'erstellt'}, {'date': '2014-05-26T12:39:03+02:00', 'role': 'veroeffentlicht'}, {'date': '2016-06-08T12:31:11+02:00', 'role': 'aktualisiert'}]"
        }, {
            "key":
            "images",
            "value":
            "['https://open.nrw/profiles/nrw_ressort/themes/custom/nrw_base/images/grayish-blue/files/koeln_klein.png']"
        }, {
            "key": "metadata_original_portal",
            "value": "http://open.nrw/"
        }, {
            "key": "metadata_transformer",
            "value": "boo"
        }, {
            "key": "non_open",
            "value": "false"
        }, {
            "key": "opennrw_spatial",
            "value": "Stadt Köln"
        }, {
            "key": "original_groups",
            "value": "['Politik und Wahlen']"
        }, {
            "key":
            "spatial",
            "value":
            "{'type': 'polygon', 'coordinates': [[[6.7838099999999999, 50.825465999999999], [7.1533170000000004, 50.825465999999999], [7.1533170000000004, 51.090167999999998], [6.7838099999999999, 51.090167999999998], [6.7838099999999999, 50.825465999999999]]]}"
        }]

        extras = Extras(extras_in)

        self.assertTrue(
            extras.update('original_groups', ['group one', 'group two']))
        self.assertEquals(2, len(extras.value('original_groups')))
Esempio n. 9
0
    def test_returns_value_on_dict_list_nested(self):
        extras = Extras([
            {
                'key': 'foo',
                'value': {
                    'nested': 'nested-value',
                    'zoo': 'zoo-value',
                }
            },
            {
                'key': 'baz',
                'value': 'baz-value'
            },
        ])

        expected_value = {
            'nested': 'nested-value',
            'zoo': 'zoo-value',
        }

        self.assertEquals(expected_value, extras.value('foo'))
Esempio n. 10
0
 def upsert_on_flat_list_works_as_expected(self):
     extras = Extras({'dates': 'foo', 'bar': 'baz', 'some': 'thing'})
     self.assertTrue(extras.update('new', 'kid', True))
     self.assertEquals('kid', extras.value('new'))
     self.assertEquals(4, extras.len())
Esempio n. 11
0
 def test_raises_error_when_list_empty(self):
     extras = Extras([])
     extras.value('raiser')
Esempio n. 12
0
 def test_raises_error_when_key_not_found_on_dict_list(self):
     extras = Extras([{'dates': 'foo'}, {'bar': 'baz'}])
     extras.value('raiser')
Esempio n. 13
0
 def test_raises_error_when_key_not_found_on_flat_list(self):
     extras = Extras({'dates': 'foo', 'bar': 'baz'})
     extras.value('raiser')
Esempio n. 14
0
 def test_returns_default_on_flat_list(self):
     extras = Extras({'dates': 'foo', 'bar': 'baz'})
     self.assertEquals('Default', extras.value('foo', 'Default'))
Esempio n. 15
0
 def test_returns_value_on_flat_list_with_dict(self):
     extras = Extras({'terms_of_use': {'license_id': 'some-license'}})
     self.assertEquals({'license_id': 'some-license'},
                       extras.value('terms_of_use'))
Esempio n. 16
0
 def test_update_on_flat_list_works_as_expected(self):
     extras = Extras({'dates': 'foo', 'bar': 'baz', 'some': 'thing'})
     self.assertTrue(extras.update('some', 'one'))
     self.assertEquals('one', extras.value('some'))
Esempio n. 17
0
    def handle_duplicates(cls, harvest_object_content):
        '''Compares new dataset with existing and checks, if a dataset should be imported.'''

        method_prefix = 'handle_duplicates: '
        context = cls.build_context()

        remote_dataset = json.loads(harvest_object_content)
        remote_dataset_extras = Extras(remote_dataset['extras'])
        remote_dataset_name = remote_dataset.get('name', '')

        has_orig_id = remote_dataset_extras.key(EXTRAS_KEY_DCT_IDENTIFIER)
        if has_orig_id:
            orig_id = remote_dataset_extras.value(EXTRAS_KEY_DCT_IDENTIFIER)
            if orig_id:
                try:
                    data_dict = {
                        "q": EXTRAS_KEY_DCT_IDENTIFIER + ':"' + orig_id + '"'
                    }
                    # Add filter that local dataset guid is not equal to guid of the remote dataset
                    if (remote_dataset_extras.key('guid')):
                        data_dict[
                            'fq'] = '-guid:"' + remote_dataset_extras.value(
                                'guid') + '"'
                    local_search_result = p.toolkit.get_action(
                        "package_search")(context, data_dict)
                    if local_search_result['count'] == 0:
                        LOGGER.debug('%sDid not find any existing dataset in the database. ' \
                            'Import accepted for %s.', method_prefix, remote_dataset_name)
                        return True
                    elif local_search_result['count'] == 1:
                        LOGGER.debug('%sFound duplicate entry for dataset %s.',
                                     method_prefix, remote_dataset_name)
                        local_dataset = local_search_result['results'][0]
                        local_dataset_extras = Extras(local_dataset['extras'])

                        # TODO : Im Zweifel das CKAN-Feld "metadata_modified" des lokalen Datensatzes nutzen,
                        # falls modified nicht enthalten ist?
                        if remote_dataset_extras.key(EXTRAS_KEY_DCT_MODIFIED) and \
                                local_dataset_extras.key(EXTRAS_KEY_DCT_MODIFIED):
                            return cls.compare_metadata_modified(
                                remote_dataset_extras.value(
                                    EXTRAS_KEY_DCT_MODIFIED),
                                local_dataset_extras.value(
                                    EXTRAS_KEY_DCT_MODIFIED))
                        else:
                            LOGGER.info(
                                '%sFound duplicate entry with the value "%s" in field "identifier", but ' \
                                'remote and/or local dataset does not contain a modified date. ' \
                                '-> Skipping import for %s!',
                                method_prefix, orig_id, remote_dataset_name)
                    else:
                        LOGGER.info('%sFound multiple duplicates with the value "%s" in field ' \
                            '"identifier". -> Skipping import for %s!', method_prefix, orig_id,
                            remote_dataset_name)
                except Exception as exception:
                    LOGGER.error(exception)
            else:
                LOGGER.debug(
                    '%sNo original id in field identifier found. Import accepted for %s.',
                    method_prefix, remote_dataset_name)
                return True
        else:
            LOGGER.debug(
                '%sNo field identifier found. Import accepted for %s.',
                method_prefix, remote_dataset_name)
            return True

        return False
    def handle_duplicates(harvest_object_content):
        '''Compares new dataset with existing and checks, if a dataset should be imported.'''

        method_prefix = 'handle_duplicates: '
        context = HarvestUtils.build_context()

        remote_dataset = json.loads(harvest_object_content)
        remote_dataset_extras = Extras(remote_dataset['extras'])
        remote_dataset_name = remote_dataset.get('name', '')

        has_orig_id = remote_dataset_extras.key(EXTRAS_KEY_DCT_IDENTIFIER)
        if has_orig_id:
            orig_id = remote_dataset_extras.value(EXTRAS_KEY_DCT_IDENTIFIER)
            # remote dataset contains identifier
            if orig_id:
                try:
                    data_dict = {
                        "q": EXTRAS_KEY_DCT_IDENTIFIER + ':"' + orig_id + '"'
                    }
                    # Add filter that local dataset guid is not equal to guid of the remote dataset
                    if remote_dataset_extras.key('guid'):
                        data_dict[
                            'fq'] = '-guid:"' + remote_dataset_extras.value(
                                'guid') + '"'
                    # search for other datasets with the same identifier
                    local_search_result = p.toolkit.get_action(
                        "package_search")(context, data_dict)
                    if local_search_result['count'] == 0:
                        # no other dataset with the same identifier was found, import accepted
                        LOGGER.debug(u'%sDid not find any existing dataset in the database with ' \
                                     u'Identifier %s. Import accepted for dataset %s.', method_prefix,
                                     orig_id, remote_dataset_name)
                        return True
                    else:
                        # other dataset with the same identifier was found
                        LOGGER.debug(
                            u'%sFound duplicate entries with Identifier %s for dataset %s.',
                            method_prefix, orig_id, remote_dataset_name)
                        remote_is_latest = True
                        local_dataset_has_modified = False
                        latest_local_dataset = {}
                        if not remote_dataset_extras.key(
                                EXTRAS_KEY_DCT_MODIFIED):
                            remote_is_latest = False

                        # compare modified date with all local datasets
                        for local_dataset in local_search_result['results']:
                            local_dataset_extras = Extras(
                                local_dataset['extras'])

                            if local_dataset_extras.key(
                                    EXTRAS_KEY_DCT_MODIFIED):
                                local_dataset_has_modified = True
                                # notice the local dataset with the latest date
                                _set_or_update_latest_dataset(
                                    latest_local_dataset,
                                    local_dataset_extras.value(
                                        EXTRAS_KEY_DCT_MODIFIED),
                                    local_dataset['id'])
                                # compare dct:modified if remote and local dataset contain the field
                                # "modified" and remote dataset is still not detected as older
                                if remote_is_latest and remote_dataset_extras.key(
                                        EXTRAS_KEY_DCT_MODIFIED):
                                    remote_is_latest = HarvestUtils.compare_metadata_modified(
                                        remote_dataset_extras.value(
                                            EXTRAS_KEY_DCT_MODIFIED),
                                        local_dataset_extras.value(
                                            EXTRAS_KEY_DCT_MODIFIED))

                        if remote_is_latest:
                            # Import accepted. Delete all local datasets with the same identifier.
                            LOGGER.debug(u'%sRemote dataset with Identifier %s is the latest. '\
                                         u'Modified date: %s. Import accepted for dataset %s.',
                                         method_prefix, orig_id,
                                         remote_dataset_extras.value(EXTRAS_KEY_DCT_MODIFIED),
                                         remote_dataset_name)
                            packages_deleted = _delete_packages_keep(
                                local_search_result['results'])
                            LOGGER.debug(u'%sDeleted packages: %s',
                                         method_prefix,
                                         ','.join(packages_deleted))
                            return True
                        elif local_dataset_has_modified:
                            # Skip import. Delete local datasets, but keep the dataset with latest date in
                            # the field "modified".
                            LOGGER.info(u'%sRemote dataset with Identifier %s is NOT the latest. '\
                                        u'Modified date: %s. Keep local dataset with ' \
                                        u'latest date in field "modified". Skipping import for dataset %s!',
                                        method_prefix, orig_id,
                                        remote_dataset_extras.value(EXTRAS_KEY_DCT_MODIFIED, 'n/a'),
                                        remote_dataset_name)
                            packages_deleted = _delete_packages_keep(
                                local_search_result['results'],
                                latest_local_dataset)
                            LOGGER.debug(u'%sDeleted packages: %s',
                                         method_prefix,
                                         ','.join(packages_deleted))
                        else:
                            # Skip import, because remote dataset and no other local dataset contains the
                            # field "modified". Delete local datasets, but keep the dataset last modified in
                            # database.
                            LOGGER.info(
                                u'%sFound duplicate entries with the value "%s" in field "identifier", but ' \
                                u'remote and local datasets does not contain a modified date. ' \
                                u'Keep local dataset last modified in database. Skipping import for %s!',
                                method_prefix, orig_id, remote_dataset_name)
                            last_modified_local_dataset = {}
                            for local_dataset in local_search_result[
                                    'results']:
                                # notice the local dataset with the latest date
                                _set_or_update_latest_dataset(
                                    last_modified_local_dataset,
                                    local_dataset.get('metadata_modified',
                                                      None),
                                    local_dataset['id'])
                            packages_deleted = _delete_packages_keep(
                                local_search_result['results'],
                                last_modified_local_dataset)
                            LOGGER.debug(u'%sDeleted packages: %s',
                                         method_prefix,
                                         ','.join(packages_deleted))
                except Exception as exception:
                    LOGGER.error(exception)
            else:
                LOGGER.debug(
                    u'%sNo original id in field identifier found. Import accepted for dataset %s.',
                    method_prefix, remote_dataset_name)
                return True
        else:
            LOGGER.debug(
                u'%sNo field identifier found. Import accepted for dataset %s.',
                method_prefix, remote_dataset_name)
            return True

        return False