Exemple #1
0
def test_ckan_organization_creation():
    organization = CkanOrganization(dummy_org)
    assert organization.name == 'my-organization'
    assert organization.title == 'My Organization'
    assert organization.description == 'My org description'

    # Make sure it can be json-serialized
    serialized = organization.serialize()
    json.dumps(serialized)
Exemple #2
0
def test_organization_read(ckan_client_hl):
    client = ckan_client_hl

    obj_dict = generate_organization()
    obj = CkanOrganization(obj_dict)

    created = client.create_organization(obj)
    assert obj.is_equivalent(created)
    assert created.is_equivalent(obj)
Exemple #3
0
def test_organization_get_by_name(ckan_client_hl):
    client = ckan_client_hl
    organization_dict = generate_organization()
    organization_dict['name'] = 'example-organization-name'
    organization = CkanOrganization(organization_dict)
    created = client.create_organization(organization)
    assert created.is_equivalent(organization)
    organization_id = created.id

    # Try getting by id
    organization_1 = client.get_organization(organization_id)
    assert created == organization_1

    # Try getting by name
    organization_2 = client.get_organization_by_name(
        'example-organization-name')
    assert created == organization_2

    # Try getting by id, but passing name instead
    with pytest.raises(HTTPError) as excinfo:
        client.get_organization('example-organization-name')
    assert excinfo.value.status_code == 404

    # Try getting by name, but passing id instead
    with pytest.raises(HTTPError) as excinfo:
        client.get_organization_by_name(organization_id)
    assert excinfo.value.status_code == 404
Exemple #4
0
def test_organization_list(ckan_client_hl):
    client = ckan_client_hl

    # Create a bunch of organizations
    obj_dicts = [generate_organization() for _ in xrange(10)]
    objs = [CkanOrganization.from_dict(d) for d in obj_dicts]
    created_objs = [client.create_organization(o) for o in objs]

    # Make sure all the orgnaizations are in the list
    obj_ids = client.list_organizations()
    for obj in created_objs:
        assert obj.id is not None
        assert obj.id in obj_ids
Exemple #5
0
def test_merge_organizations(ckan_client_arguments):
    args = ckan_client_arguments
    client = CkanHighlevelClient(*args[0], **args[1])
    sync_client = SynchronizationClient(*args[0], **args[1])

    # Create a couple initial organizations
    # ------------------------------------------------------------

    client.create_organization(
        CkanOrganization({
            'name': 'tmo-1',
            'title': 'TMO 1'
        }))
    client.create_organization(
        CkanOrganization({
            'name': 'tmo-2',
            'title': 'TMO 2'
        }))

    # Test merging with "create" strategy
    # ------------------------------------------------------------

    data = {
        'organization': {
            'tmo-2': {
                'name': 'tmo-2',
                'title': 'TMO 2.1'
            },
            'tmo-3': {
                'name': 'tmo-3',
                'title': 'TMO 3.1'
            },
        },
        'group': {},
        'dataset': {}
    }

    sync_client._conf['organization_merge_strategy'] = 'create'
    sync_client.sync('test_merge_organizations', data)

    assert client.get_organization_by_name('tmo-1').title == 'TMO 1'
    assert client.get_organization_by_name('tmo-2').title == 'TMO 2'
    assert client.get_organization_by_name('tmo-3').title == 'TMO 3.1'

    # Test merging with "update" strategy
    # ------------------------------------------------------------

    data = {
        'organization': {
            'tmo-2': {
                'name': 'tmo-2',
                'title': 'TMO 2.2'
            },
            'tmo-4': {
                'name': 'tmo-4',
                'title': 'TMO 4.2'
            },
        },
        'group': {},
        'dataset': {}
    }

    sync_client._conf['organization_merge_strategy'] = 'update'
    sync_client.sync('test_merge_organizations', data)

    assert client.get_organization_by_name('tmo-1').title == 'TMO 1'
    assert client.get_organization_by_name('tmo-2').title == 'TMO 2.2'
    assert client.get_organization_by_name('tmo-3').title == 'TMO 3.1'
    assert client.get_organization_by_name('tmo-4').title == 'TMO 4.2'
Exemple #6
0
    def sync(self, source_name, data):
        """
        Synchronize data from a source into Ckan.

        - datasets are matched by _harvest_source
        - groups and organizations are matched by name

        :param source_name:
            String identifying the source of the data. Used to build
            ids that will be used in further synchronizations.
        :param data:
            Data to be synchronized. Should be a dict (or dict-like)
            with top level keys coresponding to the object type,
            mapping to dictionaries of ``{'id': <object>}``.
        """

        groups = dict(
            (key, CkanGroup(val))
            for key, val in data['group'].iteritems())

        organizations = dict(
            (key, CkanOrganization(val))
            for key, val in data['organization'].iteritems())

        # Upsert groups and organizations
        groups_map = self._upsert_groups(groups)
        orgs_map = self._upsert_organizations(organizations)

        # Create list of datasets to be synced
        source_datasets = {}
        for source_id, dataset_dict in data['dataset'].iteritems():
            _dataset_dict = copy.deepcopy(dataset_dict)

            # We need to make sure "source" datasets
            # don't have (otherwise misleading) ids
            _dataset_dict.pop('id', None)

            # We need to update groups and organizations,
            # to map their name from the source into a
            # ckan id
            _dataset_dict['groups'] = [
                groups_map.to_ckan(grp_id)
                for grp_id in _dataset_dict['groups']
            ]
            _dataset_dict['owner_org'] = \
                orgs_map.to_ckan(_dataset_dict['owner_org'])

            dataset = CkanDataset(_dataset_dict)

            # We also want to add the "source id", used for further
            # synchronizations to find stuff
            dataset.extras[HARVEST_SOURCE_ID_FIELD] = \
                self._join_source_id(source_name, source_id)

            source_datasets[source_id] = dataset

        # Retrieve list of datasets from Ckan
        ckan_datasets = self._find_datasets_by_source(source_name)

        # Compare collections to find differences
        differences = self._compare_collections(
            ckan_datasets, source_datasets)

        # ------------------------------------------------------------
        # We now need to create/update/delete datasets.

        # todo: we need to make sure dataset names are not
        # already used by another dataset. The only
        # way is to randomize resource names and hope
        # a 409 response indicates duplicate name..

        # We delete first, in order to (possibly) deallocate
        # some already-used names..
        for source_id in differences['left']:
            ckan_id = ckan_datasets[source_id].id
            logger.info('Deleting dataset {0}'.format(ckan_id))
            self._client.delete_dataset(ckan_id)

        def force_dataset_operation(operation, dataset, retry=5):
            # Maximum dataset name length is 100 characters
            # We trim it down to 80 just to be safe.

            # Note: we generally want to preserve the original name
            #       and there should *never* be problems with that
            #       when updating..

            _orig_name = dataset.name[:80]
            dataset.name = _orig_name

            while True:
                try:
                    result = operation(dataset)
                except HTTPError, e:
                    if e.status_code != 409:
                        raise
                    retry -= 1
                    if retry < 0:
                        raise
                    dataset.name = '{0}-{1:06d}'.format(
                        _orig_name,
                        random.randint(0, 999999))
                    logger.debug('Got 409: trying to rename dataset to {0}'
                                 .format(dataset.name))
                else:
                    return result