def push_theme_to_ckan(catalog, portal_url, apikey, identifier=None, label=None): """Escribe la metadata de un theme en el portal pasado por parámetro. Args: catalog (DataJson): El catálogo de origen que contiene el theme. portal_url (str): La URL del portal CKAN de destino. apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset. identifier (str): El identificador para buscar el theme en la taxonomia. label (str): El label para buscar el theme en la taxonomia. Returns: str: El name del theme en el catálogo de destino. """ ckan_portal = RemoteCKAN(portal_url, apikey=apikey) theme = catalog.get_theme(identifier=identifier, label=label) group = map_theme_to_group(theme) pushed_group = ckan_portal.call_action('group_create', data_dict=group) return pushed_group['name']
def create(self, apikey=apikey): if not hasattr(self, 'url'): setattr(self, 'url', 'dummy-url') if hasattr(self, 'upload'): setattr(self, 'upload', open(self.upload, 'rb')) d = self.check() try: new_resource = RemoteCKAN( url, apikey).action.resource_create(**d) # make CKAN API call setattr(self, 'id', new_resource['id']) return (show(self.id, 'resource')) except NotAuthorized: #return('Denied. Check your apikey.') print('Denied. Check your apikey.' ) # print 'denied' if call not authorised except: print('Failed to create %s' % self.name)
def push_dataset_to_ckan(catalog, owner_org, dataset_origin_identifier, portal_url, apikey, catalog_id=None, demote_superThemes=True, demote_themes=True): """Escribe la metadata de un dataset en el portal pasado por parámetro. Args: catalog (DataJson): El catálogo de origen que contiene el dataset. owner_org (str): La organización a la cual pertence el dataset. dataset_origin_identifier (str): El id del dataset que se va a federar. portal_url (str): La URL del portal CKAN de destino. apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset. catalog_id (str): El prefijo con el que va a preceder el id del dataset en catálogo destino. demote_superThemes(bool): Si está en true, los ids de los super themes del dataset, se propagan como grupo. demote_themes(bool): Si está en true, los labels de los themes del dataset, pasan a ser tags. Sino, se pasan como grupo. Returns: str: El id del dataset en el catálogo de destino. """ dataset = catalog.get_dataset(dataset_origin_identifier) ckan_portal = RemoteCKAN(portal_url, apikey=apikey) package = map_dataset_to_package(catalog, dataset, owner_org, catalog_id, demote_superThemes, demote_themes) # Get license id if dataset.get('license'): license_list = ckan_portal.call_action('license_list') try: ckan_license = next(license_item for license_item in license_list if license_item['title'] == dataset['license'] or license_item['url'] == dataset['license']) package['license_id'] = ckan_license['id'] except StopIteration: package['license_id'] = 'notspecified' else: package['license_id'] = 'notspecified' try: pushed_package = ckan_portal.call_action( 'package_update', data_dict=package) except NotFound: pushed_package = ckan_portal.call_action( 'package_create', data_dict=package) ckan_portal.close() return pushed_package['id']
def new_user(username, email=None, fullname=None, apikey=apikey): s = search(username, 'user') if s is None: while email is None: email = input('Enter new user\'s email address:\n') or None while fullname is None: fullname = input('Enter new user\'s full name:\n') or None d = dict(name=username.lower().strip(), email=email, password='******', fullname=fullname) try: RemoteCKAN(url, apikey).action.user_create(**d) return (s) except NotAuthorized: print('\nDenied. Check your apikey.') else: print('This user already exists\n', s)
def update_ckan_database_resource(domain, entity_id): ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' client = RemoteCKAN('http://104.236.54.23', apikey='eb5346ab-0c53-4b1f-a0a4-f4be797db23b', user_agent=ua) # file_name = save_gas_stations_as_csv(get_orion_client()) file_name = update_json_and_save_csv(get_orion_client(), entity_id) file_path_csv = 'friendlygas_project/core/media/' + file_name file_path_json = 'friendlygas_project/core/media/gas_stations_data.json' date = str(datetime.datetime.now(pytz.timezone('America/Recife'))) client.action.resource_create( package_id='ac335d1a-098c-4f18-882c-03950ddc5d7c', name='Postos de Combustíveis - Natal - RN - ' + date, description= 'Este arquivo contém as informações dos Postos de Combustíveis de Natal - RN.', format='csv', upload=open(file_path_csv, 'rb'), url='http://' + domain + '/' + 'media' + '/' + file_name)
def run(): print("OpenSTL-DataExchange") usr_in = input( "Do you wish to create 'datasets' or 'resources'? Leave blank to exit\n" ) #Decide if user wants to make new Datasets or Resources if usr_in == "datasets": update_datasets() elif usr_in == "resources": update_resources() update_resources() ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' demo = RemoteCKAN('http://beta.stlouisdata.org', apikey=secret) groups = demo.action.package_list(id='test_data') print(groups) # try: # pkg = demo.action.package_create(name='ya_set', title='not going to work') # except: # print("create_error") pkg = demo.action.package_show(id='ya_set') groups = demo.action.package_list(id='test_data') print(groups) print("\nPackage_Create {}".format(pkg)) pkg['title'] = "WORKING!!" #pkg['tags'] = ['a_tag', 'b_tag'] pkg['notes'] = 'Just playing around really...' # this is the description field #print("\nPackage_Create {}".format(pkg)) #pkg = demo.action.package_update(**pkg) #print("\nPackage_Create {}".format(pkg)) npkg = demo.action.package_show(id='test_data') print("\nPackage_Create {}".format(npkg))
def crawl(ctx, workflow, url, engine): printer = ctx.obj['printer'] click.echo(_("Engine: %s" % engine)) engine = engines[engine]() from ckanapi import RemoteCKAN client = RemoteCKAN(url, user_agent='lintol-doorstep-crawl/1.0 (+http://lintol.io)') resources = client.action.resource_search(query='format:csv') if 'results' in resources: for resource in resources['results']: r = requests.get(resource['url']) with make_file_manager(content={'data.csv': r.text}) as file_manager: filename = file_manager.get('data.csv') loop = asyncio.get_event_loop() result = loop.run_until_complete(engine.run(filename, workflow)) printer.print_report(result) print(printer.get_output())
def get_ids(): ''' called by main() :return: ''' site = os.getenv("registry_url") rckan = RemoteCKAN(site) # query for last 48 hours apicall = "api/3/action/package_search" # q_param = "?q=metadata_modified:[2019-10-10T21:15:00Z TO *]&fq=publication:open_government" hours_ago = 48 two_days_ago = datetime.now() - timedelta(hours=hours_ago) str_2days_ago = two_days_ago.strftime('%Y-%m-%dT%H:%M:%SZ') q_param1 = "?q=metadata_modified:[%s%sTO%s*]" % (str_2days_ago, '%20', '%20') res = query_with_get(site, apicall, q_param1) dict = json.loads(res)['result']['results'] # additionally filter only records where open checklist criteria passes filtered_dict = [ x for x in dict if (x['ready_to_publish'] == 'true' and x['elegible_for_release'] == 'true' and x['access_to_information'] == 'true' and x['authority_to_release'] == 'true' and x['formats'] == 'true' and x['privacy'] == 'true' and x['official_language'] == 'true' and x['security'] == 'true' and x['other'] == 'true' and x['imso_approval'] == 'true' and x['license_id'] == 'ca-ogl-lgo' and x['restrictions'] == 'unrestricted') ] print(filtered_dict) # process the result to get filtered ids try: ids = [] for index in range(len(filtered_dict)): ids.append(filtered_dict[index]['name']) except Exception as e: return [] return ids
def delete_ckan_record(package_id): """ Remove a dataset and its associated resource from CKAN :param package_id: :return: Nothing """ # First, verify and get the resource ID package_record = get_ckan_record(package_id) if len(package_record) == 0: logger.warn("Cannot find record {0} to delete".format(package_id)) return # Get rid of the resource remote_ckan_url = Config.get('ckan', 'remote_url') remote_ckan_api = Config.get('ckan', 'remote_api_key') user_agent = Config.get('web', 'user_agent') # Delete the local file if it exists gcdocs_file = os.path.join( doc_intake_dir, munge_filename(os.path.basename( package_record['resources'][0]['name']))) if os.path.exists(gcdocs_file): os.remove(gcdocs_file) with RemoteCKAN(remote_ckan_url, user_agent=user_agent, apikey=remote_ckan_api) as ckan_instance: try: delete_blob( ckan_container, 'resources/{0}/{1}'.format( package_record['resources'][0]['id'], package_record['resources'][0]['name'].lower())) ckan_instance.action.package_delete(id=package_record['id']) ckan_instance.action.dataset_purge(id=package_record['id']) logger.info("Deleted expired CKAN record {0}".format( package_record['id'])) except Exception as ex: logger.error("Unexpected error when deleting record {0}".format( ex.message))
def main(args): def capac(capacity): return (capacity[0] in args['-s']) if args['-s'] else True host = args['HOST'] apikey = args['-a'] or os.environ.get('CKAN_APIKEY') ckan = RemoteCKAN(host, apikey=apikey) orgas = ckan.call_action('organization_list', data_dict={ 'all_fields': True, 'include_users': True }) userids = [ u['id'] for o in orgas for u in o['users'] if capac(u['capacity']) ] users = [ ckan.call_action('user_show', data_dict={'id': uid}) for uid in userids ] users = [encode(u) for u in users] output(users, args['-f'])
def update_resource(package_id, resource_file): """ Add or update the resource file for the dataset :param package_id: OBD dataset ID :param resource_file: path to the resource file :return: Nothing """ remote_ckan_url = Config.get('ckan', 'remote_url') remote_ckan_api = Config.get('ckan', 'remote_api_key') user_agent = Config.get('web', 'user_agent') with RemoteCKAN(remote_ckan_url, user_agent=user_agent, apikey=remote_ckan_api) as ckan_instance: try: package_record = ckan_instance.action.package_show(id=package_id) except NotFound as nf: logger.error("Unable to find record {0} to update".format( nf.message)) return try: if len(package_record['resources']) == 0: ckan_instance.action.resource_create(package_id=package_id, url='', upload=open( resource_file, 'rb')) logger.info("Added new resource to {0}".format(package_id)) else: ckan_instance.action.resource_patch( id=package_record['resources'][0]['id'], url='', upload=open(resource_file, 'rb')) except CKANAPIError as ce: logger.error( "Unexpected error when updating a record {0}: ".format( ce.message)) logger.error(traceback.format_exc()) logger.info("Updated resource {0}".format( package_record['resources'][0]['id']))
def remove_datasets_from_ckan(portal_url, apikey, filter_in=None, filter_out=None, only_time_series=False, organization=None): """Borra un dataset en el portal pasado por parámetro. Args: portal_url (str): La URL del portal CKAN de destino. apikey (str): La apikey de un usuario con los permisos que le permitan borrar el dataset. filter_in(dict): Diccionario de filtrado positivo, similar al de search.get_datasets. filter_out(dict): Diccionario de filtrado negativo, similar al de search.get_datasets. only_time_series(bool): Filtrar solo los datasets que tengan recursos con series de tiempo. organization(str): Filtrar solo los datasets que pertenezcan a cierta organizacion. Returns: None """ ckan_portal = RemoteCKAN(portal_url, apikey=apikey) identifiers = [] datajson_filters = filter_in or filter_out or only_time_series if datajson_filters: identifiers += get_datasets(portal_url + '/data.json', filter_in=filter_in, filter_out=filter_out, only_time_series=only_time_series, meta_field='identifier') if organization: query = 'organization:"' + organization + '"' search_result = ckan_portal.call_action('package_search', data_dict={ 'q': query, 'rows': 500, 'start': 0}) org_identifiers = [dataset['id'] for dataset in search_result['results']] start = 500 while search_result['count'] > start: search_result = ckan_portal.call_action('package_search', data_dict={'q': query, 'rows': 500, 'start': start}) org_identifiers += [dataset['id'] for dataset in search_result['results']] start += 500 if datajson_filters: identifiers = set(identifiers).intersection(set(org_identifiers)) else: identifiers = org_identifiers for identifier in identifiers: ckan_portal.call_action('dataset_purge', data_dict={'id': identifier})
def update_ckan_record(package_dict): """ Add a new dataset to the Open by Default Portal :param package_dict: JSON dict of the new package :return: The created package """ remote_ckan_url = Config.get('ckan', 'remote_url') remote_ckan_api = Config.get('ckan', 'remote_api_key') user_agent = Config.get('web', 'user_agent') new_package = None with RemoteCKAN(remote_ckan_url, user_agent=user_agent, apikey=remote_ckan_api) as ckan_instance: try: new_package = ckan_instance.action.package_patch(**package_dict) except Exception as ex: logger.error("Unable to update existing portal record: {0}".format( ex.message)) return new_package
def put(self, request, publisher_id, dataset_id): user = request.user.organisationuser publisher = Publisher.objects.get(pk=publisher_id) admin_group = OrganisationAdminGroup.objects.get( publisher_id=publisher_id) source_url = request.data.get('source_url', None) # TODO: call package_update to update source_url for registry as well - 2017-02-20 if not source_url: raise exceptions.APIException(detail="no source_url provided") user = request.user organisationuser = user.organisationuser api_key = organisationuser.iati_api_key client = RemoteCKAN(settings.CKAN_URL, apikey=api_key) dataset = Dataset.objects.get(id=dataset_id) dataset.date_updated = datetime.now() dataset.source_url = source_url dataset.save() # get all ready to publish organisations organisations = Organisation.objects.filter(ready_to_publish=True, publisher=publisher) non_r2p_organisations = Organisation.objects.filter( ready_to_publish=False, publisher=publisher) # update the affected organisations flags organisations.update( published=True, modified=False, ready_to_publish=True, last_updated_datetime=datetime.now().isoformat(' ')) non_r2p_organisations.update(published=False) # return Dataset object serializer = DatasetSerializer(dataset, context={'request': request}) return Response(serializer.data)
async def do_crawl(component, url, workflow, printer, publish, update=False): """ gets all the datasets on the ckan instance """ # is it worth using datastore to create te client here? from ckanapi import RemoteCKAN client = RemoteCKAN(url, user_agent='lintol-doorstep-crawl/1.0 (+http://lintol.io)') # gets the packages to iterate through using the retry method packages = ckan_retry(client.action.package_list) for package in packages: # creates package metadata package_metadata = ckan_retry(client.action.package_show, id=package) ini = DoorstepIni(context_package=package_metadata) # classes = studley case for resource in ini.package['resources']: # checks if the resource is either CSV or geoJson (why geojson but not json?? is it more standarised re: columns) if resource['format'] in ALLOWED_FORMATS: if workflow: # if workflow is initisialised # creates response oject from the url column r = requests.get(resource['url']) with make_file_manager(content={'data.csv': r.text}) as file_manager: # makes file etc filename = file_manager.get('data.csv') # calls async function to exec the workflow? result = await execute_workflow(component, filename, workflow, ini) print(result) if result: printer.build_report(result) if publish: # what is publish in this context? # probably, if there is something to publish that is returned from the component, then do whatevs result = await announce_resource(component, resource, ini, url, update) else: if not resource['format']: print(resource) logging.warn("Not allowed format: {}".format(resource['format'])) printer.print_output()