def test_ckan_organization_creation(): organization = CkanOrganization(dummy_org) assert organization.name == 'my-organization' assert organization.title == 'My Organization' assert organization.description == 'My org description' # Make sure it can be json-serialized serialized = organization.serialize() json.dumps(serialized)
def test_organization_read(ckan_client_hl): client = ckan_client_hl obj_dict = generate_organization() obj = CkanOrganization(obj_dict) created = client.create_organization(obj) assert obj.is_equivalent(created) assert created.is_equivalent(obj)
def test_organization_get_by_name(ckan_client_hl): client = ckan_client_hl organization_dict = generate_organization() organization_dict['name'] = 'example-organization-name' organization = CkanOrganization(organization_dict) created = client.create_organization(organization) assert created.is_equivalent(organization) organization_id = created.id # Try getting by id organization_1 = client.get_organization(organization_id) assert created == organization_1 # Try getting by name organization_2 = client.get_organization_by_name( 'example-organization-name') assert created == organization_2 # Try getting by id, but passing name instead with pytest.raises(HTTPError) as excinfo: client.get_organization('example-organization-name') assert excinfo.value.status_code == 404 # Try getting by name, but passing id instead with pytest.raises(HTTPError) as excinfo: client.get_organization_by_name(organization_id) assert excinfo.value.status_code == 404
def test_organization_list(ckan_client_hl): client = ckan_client_hl # Create a bunch of organizations obj_dicts = [generate_organization() for _ in xrange(10)] objs = [CkanOrganization.from_dict(d) for d in obj_dicts] created_objs = [client.create_organization(o) for o in objs] # Make sure all the orgnaizations are in the list obj_ids = client.list_organizations() for obj in created_objs: assert obj.id is not None assert obj.id in obj_ids
def test_merge_organizations(ckan_client_arguments): args = ckan_client_arguments client = CkanHighlevelClient(*args[0], **args[1]) sync_client = SynchronizationClient(*args[0], **args[1]) # Create a couple initial organizations # ------------------------------------------------------------ client.create_organization( CkanOrganization({ 'name': 'tmo-1', 'title': 'TMO 1' })) client.create_organization( CkanOrganization({ 'name': 'tmo-2', 'title': 'TMO 2' })) # Test merging with "create" strategy # ------------------------------------------------------------ data = { 'organization': { 'tmo-2': { 'name': 'tmo-2', 'title': 'TMO 2.1' }, 'tmo-3': { 'name': 'tmo-3', 'title': 'TMO 3.1' }, }, 'group': {}, 'dataset': {} } sync_client._conf['organization_merge_strategy'] = 'create' sync_client.sync('test_merge_organizations', data) assert client.get_organization_by_name('tmo-1').title == 'TMO 1' assert client.get_organization_by_name('tmo-2').title == 'TMO 2' assert client.get_organization_by_name('tmo-3').title == 'TMO 3.1' # Test merging with "update" strategy # ------------------------------------------------------------ data = { 'organization': { 'tmo-2': { 'name': 'tmo-2', 'title': 'TMO 2.2' }, 'tmo-4': { 'name': 'tmo-4', 'title': 'TMO 4.2' }, }, 'group': {}, 'dataset': {} } sync_client._conf['organization_merge_strategy'] = 'update' sync_client.sync('test_merge_organizations', data) assert client.get_organization_by_name('tmo-1').title == 'TMO 1' assert client.get_organization_by_name('tmo-2').title == 'TMO 2.2' assert client.get_organization_by_name('tmo-3').title == 'TMO 3.1' assert client.get_organization_by_name('tmo-4').title == 'TMO 4.2'
def sync(self, source_name, data): """ Synchronize data from a source into Ckan. - datasets are matched by _harvest_source - groups and organizations are matched by name :param source_name: String identifying the source of the data. Used to build ids that will be used in further synchronizations. :param data: Data to be synchronized. Should be a dict (or dict-like) with top level keys coresponding to the object type, mapping to dictionaries of ``{'id': <object>}``. """ groups = dict( (key, CkanGroup(val)) for key, val in data['group'].iteritems()) organizations = dict( (key, CkanOrganization(val)) for key, val in data['organization'].iteritems()) # Upsert groups and organizations groups_map = self._upsert_groups(groups) orgs_map = self._upsert_organizations(organizations) # Create list of datasets to be synced source_datasets = {} for source_id, dataset_dict in data['dataset'].iteritems(): _dataset_dict = copy.deepcopy(dataset_dict) # We need to make sure "source" datasets # don't have (otherwise misleading) ids _dataset_dict.pop('id', None) # We need to update groups and organizations, # to map their name from the source into a # ckan id _dataset_dict['groups'] = [ groups_map.to_ckan(grp_id) for grp_id in _dataset_dict['groups'] ] _dataset_dict['owner_org'] = \ orgs_map.to_ckan(_dataset_dict['owner_org']) dataset = CkanDataset(_dataset_dict) # We also want to add the "source id", used for further # synchronizations to find stuff dataset.extras[HARVEST_SOURCE_ID_FIELD] = \ self._join_source_id(source_name, source_id) source_datasets[source_id] = dataset # Retrieve list of datasets from Ckan ckan_datasets = self._find_datasets_by_source(source_name) # Compare collections to find differences differences = self._compare_collections( ckan_datasets, source_datasets) # ------------------------------------------------------------ # We now need to create/update/delete datasets. # todo: we need to make sure dataset names are not # already used by another dataset. The only # way is to randomize resource names and hope # a 409 response indicates duplicate name.. # We delete first, in order to (possibly) deallocate # some already-used names.. for source_id in differences['left']: ckan_id = ckan_datasets[source_id].id logger.info('Deleting dataset {0}'.format(ckan_id)) self._client.delete_dataset(ckan_id) def force_dataset_operation(operation, dataset, retry=5): # Maximum dataset name length is 100 characters # We trim it down to 80 just to be safe. # Note: we generally want to preserve the original name # and there should *never* be problems with that # when updating.. _orig_name = dataset.name[:80] dataset.name = _orig_name while True: try: result = operation(dataset) except HTTPError, e: if e.status_code != 409: raise retry -= 1 if retry < 0: raise dataset.name = '{0}-{1:06d}'.format( _orig_name, random.randint(0, 999999)) logger.debug('Got 409: trying to rename dataset to {0}' .format(dataset.name)) else: return result