def __call__(self): endpoint = getattr(self.api.action, self.action) metadata = {} for item_id in set(self.entity_ids): try: metadata[item_id] = endpoint(id=item_id) except errors.NotFound: url = f"action={self.action} id={item_id}" logging.error(f"CKAN API NotFound error: {url}") RemoteCKAN.close(self.api) return metadata
def push_dataset_to_ckan(catalog, owner_org, dataset_origin_identifier, portal_url, apikey, catalog_id=None, demote_superThemes=True, demote_themes=True): """Escribe la metadata de un dataset en el portal pasado por parámetro. Args: catalog (DataJson): El catálogo de origen que contiene el dataset. owner_org (str): La organización a la cual pertence el dataset. dataset_origin_identifier (str): El id del dataset que se va a federar. portal_url (str): La URL del portal CKAN de destino. apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset. catalog_id (str): El prefijo con el que va a preceder el id y name del dataset en catálogo destino. demote_superThemes(bool): Si está en true, los ids de los super themes del dataset, se propagan como grupo. demote_themes(bool): Si está en true, los labels de los themes del dataset, pasan a ser tags. Sino, se pasan como grupo. Returns: str: El id del dataset en el catálogo de destino. """ dataset = catalog.get_dataset(dataset_origin_identifier) ckan_portal = RemoteCKAN(portal_url, apikey=apikey) package = map_dataset_to_package(catalog, dataset, owner_org, catalog_id, demote_superThemes, demote_themes) # Get license id if dataset.get('license'): license_list = ckan_portal.call_action('license_list') try: ckan_license = next(license_item for license_item in license_list if license_item['title'] == dataset['license'] or license_item['url'] == dataset['license']) package['license_id'] = ckan_license['id'] except StopIteration: package['license_id'] = 'notspecified' else: package['license_id'] = 'notspecified' try: pushed_package = ckan_portal.call_action('package_update', data_dict=package) except NotFound: pushed_package = ckan_portal.call_action('package_create', data_dict=package) ckan_portal.close() return pushed_package['id']
def get_activity_detail(self, activity_id): details = self.ckan_api.action.activity_detail_list(id=activity_id) RemoteCKAN.close(self.ckan_api) if details: try: # Filter recent activity sorted by 'last_modified' detail = sorted( details, key=lambda i: i["data"].get("resource", i["data"].get( "package"))["last_modified"] or "", reverse=True, )[0] except: detail = details[0] object_type = detail["object_type"] activity_type = "%s %s" % (detail["activity_type"], object_type.lower()) return activity_type else: return False
def handler(event, context): config = KMS() db = Dynamo(config) foursquare = Fs(config) twitter = Twitter(config) client = RemoteCKAN('https://data.boston.gov') viols = get_viols(client) client.close() for viol in viols: if not db.query(viol['_id']): count = db.count(viol['LICENSENO']) url = format_url(viol) text = format_msg(viol, count, url) (lat, lon) = extract_geo(viol) place = foursquare.place_search(name=viol['businessName'], lat=lat, lon=lon) photo_url = None if place: photo_url = foursquare.random_photo_url(place) twitter.tweet(text, photo_url, lat, lon) db.save(viol['_id'], viol['LICENSENO']) break else: print('Violation already known to Dynamo')
# ----------------------------------------------------------------------- # MAIN # CSV filename is first parameter if len(sys.argv) > 1: csv_filename = sys.argv[1] # Read the configuration ckan_ip = open("ckan_ip.txt").read().replace('\n','') api_key = open("ckan_api_key.txt").read().replace('\n','') # Read in the CSV file fp = open(csv_filename) reader = csv.DictReader(fp) # Open the connection to the CKAN server ckan = RemoteCKAN('http://%s' % ckan_ip, apikey=api_key, user_agent=user_agent) organisations_data = get_existing_organisations_dict() organisations_names = get_existing_organisations_names(organisations_data) # Process each row for row in reader: add_organisation(row) # Close RemoteCKAN.close(ckan) fp.close() exit(0)
def test_good_oldstyle(self): ckan = RemoteCKAN(TEST_CKAN) self.assertEqual( ckan.action.organization_list(), ['aa', 'bb', 'cc']) ckan.close()
# Create new Dataset using Ckan API # Project: Protect SLR # JB Barré - 9/12/2020 from ckanapi import RemoteCKAN import requests session = requests.Session() session.verify = False protect = RemoteCKAN('https://data-protect-slr.univ-grenoble-alpes.fr/', apikey='xxxxxxxx', user_agent='xxxxxxxxxxxxx', session=session) # Parameters: Id (name or Id) id = "dataset_8" # Purging a dataset cannot be undone! protect.action.dataset_purge(id=id) # or use the function package_delete to put the package in the trash # protect.action.package_delete(id = id) RemoteCKAN.close(protect)
class PushTestCase(unittest.TestCase): CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'), cassette_library_dir=os.path.join( "tests", "cassetes", "ckan_integration", "push_dataset"), filter_headers=['Authorization', 'X-CKAN-API-Key'], record_mode='once') portal_url = 'http://localhost:8080' apikey = "<apikey>" @classmethod def get_sample(cls, sample_filename): return os.path.join(SAMPLES_DIR, sample_filename) @CKAN_VCR.use_cassette() def setUp(self): self.portal = RemoteCKAN(self.portal_url, apikey=self.apikey) self.full_catalog = pydatajson.DataJson( self.get_sample('full_data.json')) self.justice_catalog = pydatajson.DataJson( self.get_sample('catalogo_justicia.json')) @CKAN_VCR.use_cassette() def tearDown(self): full_dataset = self.full_catalog.datasets[0] full_name = title_to_name(full_dataset['title']) justice_dataset = self.justice_catalog.datasets[0] justice_name = title_to_name(justice_dataset['title']) try: self.portal.call_action('dataset_purge', data_dict={'id': full_name}) except NotFound: pass try: self.portal.call_action('dataset_purge', data_dict={'id': justice_name}) except NotFound: pass self.portal.close() @CKAN_VCR.use_cassette() def test_dataset_is_created_correctly(self): catalog = self.full_catalog catalog_id = title_to_name(catalog['title']) dataset = catalog.datasets[0] dataset_id = dataset['identifier'] return_id = push_dataset_to_ckan( catalog, "oficina-de-muestra", dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) self.assertEqual(return_id, catalog_id + '_' + dataset_id) @CKAN_VCR.use_cassette() def test_dataset_is_updated_correctly(self): catalog = self.full_catalog catalog_id = title_to_name(catalog['title']) dataset_id = catalog.datasets[0]['identifier'] push_dataset_to_ckan( catalog, "oficina-de-muestra", dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) catalog.datasets[0]['description'] = 'updated description' return_id = push_dataset_to_ckan( catalog, "oficina-de-muestra", dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) data_dict = {'id': catalog_id + '_' + dataset_id} package = self.portal.call_action('package_show', data_dict=data_dict) self.assertEqual(return_id, catalog_id + '_' + dataset_id) self.assertEqual('updated description', package['notes']) @CKAN_VCR.use_cassette() def test_resources_swapped_correctly(self): catalog_id = 'same-catalog-id' full_dataset = self.full_catalog.datasets[0] full_dataset_id = full_dataset['identifier'] push_dataset_to_ckan( self.full_catalog, 'oficina-de-muestra', full_dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) justice_dataset = self.justice_catalog.datasets[0] justice_dataset_id = justice_dataset['identifier'] push_dataset_to_ckan( self.justice_catalog, 'oficina-de-muestra', justice_dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) # Switch them and update full_dataset['distribution'], justice_dataset['distribution'] = \ justice_dataset['distribution'], full_dataset['distribution'] full_package_id = push_dataset_to_ckan( self.full_catalog, 'oficina-de-muestra', full_dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) justice_package_id = push_dataset_to_ckan( self.justice_catalog, 'oficina-de-muestra', justice_dataset_id, self.portal_url, self.apikey, catalog_id=catalog_id, ) # Switch them back full_dataset['distribution'], justice_dataset['distribution'] = \ justice_dataset['distribution'], full_dataset['distribution'] data_dict = {'id': full_package_id} full_package = self.portal.call_action('package_show', data_dict=data_dict) data_dict = {'id': justice_package_id} justice_package = self.portal.call_action('package_show', data_dict=data_dict) self.assertEqual(len(full_package['resources']), len(justice_dataset['distribution'])) self.assertEqual(len(justice_package['resources']), len(full_dataset['distribution'])) for resource, justice_distribution in zip( full_package['resources'], justice_dataset['distribution']): self.assertEqual( 'same-catalog-id_' + justice_distribution['identifier'], resource['id']) for resource, full_distribution in zip(justice_package['resources'], full_dataset['distribution']): self.assertEqual( 'same-catalog-id_' + full_distribution['identifier'], resource['id'])
class CkanBaseHandler(object): def __init__(self, url, apikey=None): self.apikey = apikey self.remote = RemoteCKAN(url, apikey=self.apikey) try: res = self.call_action('site_read') except Exception: raise CkanReadError() # else: logger.info('Open CKAN connection with api key: {}'.format(apikey)) if not res: self.close() raise CkanApiError() def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() def close(self): self.remote.close() logger.info('Close CKAN connection') # @timeout def call_action(self, action, **kwargs): return self.remote.call_action(action, kwargs) @CkanExceptionsHandler() def get_all_categories(self, *args, **kwargs): kwargs.setdefault('order_by', 'name') return [ category for category in self.call_action('group_list', **kwargs) ] @CkanExceptionsHandler() def get_all_licenses(self, *args, **kwargs): try: action_result = self.call_action('license_list', **kwargs) except CkanError.CKANAPIError: action_result = self.call_action('licence_list', **kwargs) return [license for license in action_result] @CkanExceptionsHandler() def get_all_organisations(self, *args, **kwargs): return [ organisation for organisation in self.call_action('organization_list', **kwargs) ] @CkanExceptionsHandler(ignore=[CkanError.NotFound]) def get_organisation(self, id, **kwargs): try: return self.call_action('organization_show', id=id, **kwargs) except CkanError.NotFound: return None @CkanExceptionsHandler(ignore=[CkanError.NotFound]) def get_package(self, id, **kwargs): kwargs.setdefault('include_tracking', True) try: return self.call_action('package_show', id=id, **kwargs) except CkanError.NotFound: return False def is_package_exists(self, id): return self.get_package(id) and True or False def is_package_name_already_used(self, name): return self.get_package(name) and True or False @CkanExceptionsHandler() # @timeout def push_resource(self, package, **kwargs): kwargs['package_id'] = package['id'] kwargs['created'] = datetime.now().isoformat() for resource in package['resources']: if resource['id'] == kwargs['id']: kwargs['last_modified'] = kwargs['created'] del kwargs['created'] if 'url' in kwargs and not kwargs['url']: del kwargs['url'] resource.update(kwargs) del resource['tracking_summary'] # Moche pour tester # if resource['datastore_active']: # self.remote.action.resource_update(**resource) # if 'upload' in resource: # del resource['upload'] # Fin de 'Moche pour tester' return self.remote.action.resource_update(**resource) return self.remote.action.resource_create(**kwargs) @CkanExceptionsHandler() def push_resource_view(self, **kwargs): kwargs['title'] = kwargs['title'] if 'title' in kwargs else 'Aperçu' kwargs['description'] = kwargs['description'] \ if 'description' in kwargs else 'Aperçu du jeu de données' views = self.call_action('resource_view_list', id=kwargs['resource_id']) for view in views: if view['view_type'] == kwargs['view_type']: return self.call_action('resource_view_update', id=view['id'], **kwargs) return self.call_action('resource_view_create', **kwargs) @CkanExceptionsHandler() def update_resource(self, id, **kwargs): resource = self.call_action('resource_show', id=id) resource.update(kwargs) return self.call_action('resource_update', **resource) def check_dataset_integrity(self, name): if self.is_package_name_already_used(name): raise CkanConflictError('Dataset already exists') @CkanExceptionsHandler() def publish_dataset(self, id=None, resources=None, **kwargs): if id and self.is_package_exists(id): package = self.call_action('package_update', **{ **self.get_package(id), **kwargs }) else: package = self.call_action('package_create', **kwargs) return package @CkanExceptionsHandler() def publish_resource(self, package, **kwargs): resource_view_type = kwargs.pop('view_type') resource = self.push_resource(package, **kwargs) view = None if resource_view_type: view = self.push_resource_view(resource_id=resource['id'], view_type=resource_view_type) return resource, view @CkanExceptionsHandler(ignore=[CkanError.NotFound]) def delete_resource(self, id): try: return self.call_action('resource_delete', id=id, force=True) except CkanError.NotFound: return None @CkanExceptionsHandler(ignore=[CkanError.NotFound]) def delete_dataset(self, id): try: return self.call_action('package_delete', id=id) except CkanError.NotFound: return None
def test_good_oldstyle(self): ckan = RemoteCKAN(TEST_CKAN) self.assertEqual(ckan.action.organization_list(), ['aa', 'bb', 'cc']) ckan.close()