def __call__(self):
        endpoint = getattr(self.api.action, self.action)
        metadata = {}
        for item_id in set(self.entity_ids):
            try:
                metadata[item_id] = endpoint(id=item_id)
            except errors.NotFound:
                url = f"action={self.action} id={item_id}"
                logging.error(f"CKAN API NotFound error: {url}")
            RemoteCKAN.close(self.api)

        return metadata
Ejemplo n.º 2
0
def push_dataset_to_ckan(catalog,
                         owner_org,
                         dataset_origin_identifier,
                         portal_url,
                         apikey,
                         catalog_id=None,
                         demote_superThemes=True,
                         demote_themes=True):
    """Escribe la metadata de un dataset en el portal pasado por parámetro.

        Args:
            catalog (DataJson): El catálogo de origen que contiene el dataset.
            owner_org (str): La organización a la cual pertence el dataset.
            dataset_origin_identifier (str): El id del dataset que se va a federar.
            portal_url (str): La URL del portal CKAN de destino.
            apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
            catalog_id (str): El prefijo con el que va a preceder el id y name del dataset en catálogo destino.
            demote_superThemes(bool): Si está en true, los ids de los super themes del dataset, se propagan como grupo.
            demote_themes(bool): Si está en true, los labels de los themes del dataset, pasan a ser tags. Sino,
            se pasan como grupo.
        Returns:
            str: El id del dataset en el catálogo de destino.
    """
    dataset = catalog.get_dataset(dataset_origin_identifier)
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)

    package = map_dataset_to_package(catalog, dataset, owner_org, catalog_id,
                                     demote_superThemes, demote_themes)

    # Get license id
    if dataset.get('license'):
        license_list = ckan_portal.call_action('license_list')
        try:
            ckan_license = next(license_item for license_item in license_list
                                if license_item['title'] == dataset['license']
                                or license_item['url'] == dataset['license'])
            package['license_id'] = ckan_license['id']
        except StopIteration:
            package['license_id'] = 'notspecified'
    else:
        package['license_id'] = 'notspecified'

    try:
        pushed_package = ckan_portal.call_action('package_update',
                                                 data_dict=package)
    except NotFound:
        pushed_package = ckan_portal.call_action('package_create',
                                                 data_dict=package)

    ckan_portal.close()
    return pushed_package['id']
    def get_activity_detail(self, activity_id):
        details = self.ckan_api.action.activity_detail_list(id=activity_id)
        RemoteCKAN.close(self.ckan_api)

        if details:
            try:
                # Filter recent activity sorted by 'last_modified'
                detail = sorted(
                    details,
                    key=lambda i: i["data"].get("resource", i["data"].get(
                        "package"))["last_modified"] or "",
                    reverse=True,
                )[0]
            except:
                detail = details[0]

            object_type = detail["object_type"]
            activity_type = "%s %s" % (detail["activity_type"],
                                       object_type.lower())
            return activity_type
        else:
            return False
Ejemplo n.º 4
0
def handler(event, context):
    config = KMS()
    db = Dynamo(config)
    foursquare = Fs(config)
    twitter = Twitter(config)

    client = RemoteCKAN('https://data.boston.gov')

    viols = get_viols(client)

    client.close()

    for viol in viols:

        if not db.query(viol['_id']):

            count = db.count(viol['LICENSENO'])
            url = format_url(viol)
            text = format_msg(viol, count, url)
            (lat, lon) = extract_geo(viol)

            place = foursquare.place_search(name=viol['businessName'],
                                            lat=lat,
                                            lon=lon)

            photo_url = None

            if place:
                photo_url = foursquare.random_photo_url(place)

            twitter.tweet(text, photo_url, lat, lon)

            db.save(viol['_id'], viol['LICENSENO'])

            break
        else:
            print('Violation already known to Dynamo')
# -----------------------------------------------------------------------
# MAIN

# CSV filename is first parameter
if len(sys.argv) > 1:
	csv_filename = sys.argv[1]

# Read the configuration
ckan_ip = open("ckan_ip.txt").read().replace('\n','')
api_key = open("ckan_api_key.txt").read().replace('\n','')

# Read in the CSV file
fp = open(csv_filename)
reader = csv.DictReader(fp)

# Open the connection to the CKAN server
ckan = RemoteCKAN('http://%s' % ckan_ip, apikey=api_key, user_agent=user_agent)

organisations_data = get_existing_organisations_dict()
organisations_names = get_existing_organisations_names(organisations_data)

# Process each row
for row in reader:
	add_organisation(row)

# Close
RemoteCKAN.close(ckan)
fp.close()

exit(0)
Ejemplo n.º 6
0
 def test_good_oldstyle(self):
     ckan = RemoteCKAN(TEST_CKAN)
     self.assertEqual(
         ckan.action.organization_list(),
         ['aa', 'bb', 'cc'])
     ckan.close()
Ejemplo n.º 7
0
# Create new Dataset using Ckan API
# Project: Protect SLR
# JB Barré - 9/12/2020

from ckanapi import RemoteCKAN
import requests

session = requests.Session()
session.verify = False

protect = RemoteCKAN('https://data-protect-slr.univ-grenoble-alpes.fr/',
                     apikey='xxxxxxxx',
                     user_agent='xxxxxxxxxxxxx',
                     session=session)

# Parameters: Id (name or Id)
id = "dataset_8"

# Purging a dataset cannot be undone!
protect.action.dataset_purge(id=id)

# or use the function package_delete to put the package in the trash
# protect.action.package_delete(id = id)

RemoteCKAN.close(protect)
Ejemplo n.º 8
0
class PushTestCase(unittest.TestCase):
    CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
                       cassette_library_dir=os.path.join(
                           "tests", "cassetes", "ckan_integration",
                           "push_dataset"),
                       filter_headers=['Authorization', 'X-CKAN-API-Key'],
                       record_mode='once')

    portal_url = 'http://localhost:8080'
    apikey = "<apikey>"

    @classmethod
    def get_sample(cls, sample_filename):
        return os.path.join(SAMPLES_DIR, sample_filename)

    @CKAN_VCR.use_cassette()
    def setUp(self):
        self.portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
        self.full_catalog = pydatajson.DataJson(
            self.get_sample('full_data.json'))
        self.justice_catalog = pydatajson.DataJson(
            self.get_sample('catalogo_justicia.json'))

    @CKAN_VCR.use_cassette()
    def tearDown(self):
        full_dataset = self.full_catalog.datasets[0]
        full_name = title_to_name(full_dataset['title'])
        justice_dataset = self.justice_catalog.datasets[0]
        justice_name = title_to_name(justice_dataset['title'])
        try:
            self.portal.call_action('dataset_purge',
                                    data_dict={'id': full_name})
        except NotFound:
            pass
        try:
            self.portal.call_action('dataset_purge',
                                    data_dict={'id': justice_name})
        except NotFound:
            pass

        self.portal.close()

    @CKAN_VCR.use_cassette()
    def test_dataset_is_created_correctly(self):
        catalog = self.full_catalog
        catalog_id = title_to_name(catalog['title'])
        dataset = catalog.datasets[0]
        dataset_id = dataset['identifier']
        return_id = push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        self.assertEqual(return_id, catalog_id + '_' + dataset_id)

    @CKAN_VCR.use_cassette()
    def test_dataset_is_updated_correctly(self):
        catalog = self.full_catalog
        catalog_id = title_to_name(catalog['title'])
        dataset_id = catalog.datasets[0]['identifier']
        push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        catalog.datasets[0]['description'] = 'updated description'
        return_id = push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        data_dict = {'id': catalog_id + '_' + dataset_id}
        package = self.portal.call_action('package_show', data_dict=data_dict)
        self.assertEqual(return_id, catalog_id + '_' + dataset_id)
        self.assertEqual('updated description', package['notes'])

    @CKAN_VCR.use_cassette()
    def test_resources_swapped_correctly(self):
        catalog_id = 'same-catalog-id'
        full_dataset = self.full_catalog.datasets[0]
        full_dataset_id = full_dataset['identifier']
        push_dataset_to_ckan(
            self.full_catalog,
            'oficina-de-muestra',
            full_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        justice_dataset = self.justice_catalog.datasets[0]
        justice_dataset_id = justice_dataset['identifier']
        push_dataset_to_ckan(
            self.justice_catalog,
            'oficina-de-muestra',
            justice_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        # Switch them and update
        full_dataset['distribution'], justice_dataset['distribution'] = \
            justice_dataset['distribution'], full_dataset['distribution']

        full_package_id = push_dataset_to_ckan(
            self.full_catalog,
            'oficina-de-muestra',
            full_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        justice_package_id = push_dataset_to_ckan(
            self.justice_catalog,
            'oficina-de-muestra',
            justice_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        # Switch them back
        full_dataset['distribution'], justice_dataset['distribution'] = \
            justice_dataset['distribution'], full_dataset['distribution']

        data_dict = {'id': full_package_id}
        full_package = self.portal.call_action('package_show',
                                               data_dict=data_dict)
        data_dict = {'id': justice_package_id}
        justice_package = self.portal.call_action('package_show',
                                                  data_dict=data_dict)

        self.assertEqual(len(full_package['resources']),
                         len(justice_dataset['distribution']))
        self.assertEqual(len(justice_package['resources']),
                         len(full_dataset['distribution']))

        for resource, justice_distribution in zip(
                full_package['resources'], justice_dataset['distribution']):
            self.assertEqual(
                'same-catalog-id_' + justice_distribution['identifier'],
                resource['id'])

        for resource, full_distribution in zip(justice_package['resources'],
                                               full_dataset['distribution']):
            self.assertEqual(
                'same-catalog-id_' + full_distribution['identifier'],
                resource['id'])
Ejemplo n.º 9
0
class CkanBaseHandler(object):
    def __init__(self, url, apikey=None):

        self.apikey = apikey
        self.remote = RemoteCKAN(url, apikey=self.apikey)
        try:
            res = self.call_action('site_read')
        except Exception:
            raise CkanReadError()
        # else:
        logger.info('Open CKAN connection with api key: {}'.format(apikey))
        if not res:
            self.close()
            raise CkanApiError()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()

    def close(self):
        self.remote.close()
        logger.info('Close CKAN connection')

    # @timeout
    def call_action(self, action, **kwargs):
        return self.remote.call_action(action, kwargs)

    @CkanExceptionsHandler()
    def get_all_categories(self, *args, **kwargs):
        kwargs.setdefault('order_by', 'name')
        return [
            category for category in self.call_action('group_list', **kwargs)
        ]

    @CkanExceptionsHandler()
    def get_all_licenses(self, *args, **kwargs):
        try:
            action_result = self.call_action('license_list', **kwargs)
        except CkanError.CKANAPIError:
            action_result = self.call_action('licence_list', **kwargs)
        return [license for license in action_result]

    @CkanExceptionsHandler()
    def get_all_organisations(self, *args, **kwargs):
        return [
            organisation
            for organisation in self.call_action('organization_list', **kwargs)
        ]

    @CkanExceptionsHandler(ignore=[CkanError.NotFound])
    def get_organisation(self, id, **kwargs):
        try:
            return self.call_action('organization_show', id=id, **kwargs)
        except CkanError.NotFound:
            return None

    @CkanExceptionsHandler(ignore=[CkanError.NotFound])
    def get_package(self, id, **kwargs):
        kwargs.setdefault('include_tracking', True)
        try:
            return self.call_action('package_show', id=id, **kwargs)
        except CkanError.NotFound:
            return False

    def is_package_exists(self, id):
        return self.get_package(id) and True or False

    def is_package_name_already_used(self, name):
        return self.get_package(name) and True or False

    @CkanExceptionsHandler()
    # @timeout
    def push_resource(self, package, **kwargs):
        kwargs['package_id'] = package['id']
        kwargs['created'] = datetime.now().isoformat()
        for resource in package['resources']:
            if resource['id'] == kwargs['id']:
                kwargs['last_modified'] = kwargs['created']
                del kwargs['created']
                if 'url' in kwargs and not kwargs['url']:
                    del kwargs['url']
                resource.update(kwargs)
                del resource['tracking_summary']
                # Moche pour tester
                # if resource['datastore_active']:
                #     self.remote.action.resource_update(**resource)
                #     if 'upload' in resource:
                #         del resource['upload']
                # Fin de 'Moche pour tester'
                return self.remote.action.resource_update(**resource)
        return self.remote.action.resource_create(**kwargs)

    @CkanExceptionsHandler()
    def push_resource_view(self, **kwargs):
        kwargs['title'] = kwargs['title'] if 'title' in kwargs else 'Aperçu'
        kwargs['description'] = kwargs['description'] \
            if 'description' in kwargs else 'Aperçu du jeu de données'

        views = self.call_action('resource_view_list',
                                 id=kwargs['resource_id'])
        for view in views:
            if view['view_type'] == kwargs['view_type']:
                return self.call_action('resource_view_update',
                                        id=view['id'],
                                        **kwargs)
        return self.call_action('resource_view_create', **kwargs)

    @CkanExceptionsHandler()
    def update_resource(self, id, **kwargs):
        resource = self.call_action('resource_show', id=id)
        resource.update(kwargs)
        return self.call_action('resource_update', **resource)

    def check_dataset_integrity(self, name):
        if self.is_package_name_already_used(name):
            raise CkanConflictError('Dataset already exists')

    @CkanExceptionsHandler()
    def publish_dataset(self, id=None, resources=None, **kwargs):
        if id and self.is_package_exists(id):
            package = self.call_action('package_update', **{
                **self.get_package(id),
                **kwargs
            })
        else:
            package = self.call_action('package_create', **kwargs)
        return package

    @CkanExceptionsHandler()
    def publish_resource(self, package, **kwargs):
        resource_view_type = kwargs.pop('view_type')
        resource = self.push_resource(package, **kwargs)

        view = None
        if resource_view_type:
            view = self.push_resource_view(resource_id=resource['id'],
                                           view_type=resource_view_type)

        return resource, view

    @CkanExceptionsHandler(ignore=[CkanError.NotFound])
    def delete_resource(self, id):
        try:
            return self.call_action('resource_delete', id=id, force=True)
        except CkanError.NotFound:
            return None

    @CkanExceptionsHandler(ignore=[CkanError.NotFound])
    def delete_dataset(self, id):
        try:
            return self.call_action('package_delete', id=id)
        except CkanError.NotFound:
            return None
Ejemplo n.º 10
0
 def test_good_oldstyle(self):
     ckan = RemoteCKAN(TEST_CKAN)
     self.assertEqual(ckan.action.organization_list(), ['aa', 'bb', 'cc'])
     ckan.close()