Ejemplo n.º 1
0
def remove_datasets_from_ckan(portal_url,
                              apikey,
                              filter_in=None,
                              filter_out=None,
                              only_time_series=False,
                              organization=None):
    """Borra un dataset en el portal pasado por parámetro.

            Args:
                portal_url (str): La URL del portal CKAN de destino.
                apikey (str): La apikey de un usuario con los permisos que le permitan borrar el dataset.
                filter_in(dict): Diccionario de filtrado positivo, similar al de search.get_datasets.
                filter_out(dict): Diccionario de filtrado negativo, similar al de search.get_datasets.
                only_time_series(bool): Filtrar solo los datasets que tengan recursos con series de tiempo.
                organization(str): Filtrar solo los datasets que pertenezcan a cierta organizacion.
            Returns:
                None
    """
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)
    identifiers = []
    datajson_filters = filter_in or filter_out or only_time_series
    if datajson_filters:
        identifiers += get_datasets(portal_url + '/data.json',
                                    filter_in=filter_in,
                                    filter_out=filter_out,
                                    only_time_series=only_time_series,
                                    meta_field='identifier')
    if organization:
        query = 'organization:"' + organization + '"'
        search_result = ckan_portal.call_action('package_search',
                                                data_dict={
                                                    'q': query,
                                                    'rows': 500,
                                                    'start': 0
                                                })
        org_identifiers = [
            dataset['id'] for dataset in search_result['results']
        ]
        start = 500
        while search_result['count'] > start:
            search_result = ckan_portal.call_action('package_search',
                                                    data_dict={
                                                        'q': query,
                                                        'rows': 500,
                                                        'start': start
                                                    })
            org_identifiers += [
                dataset['id'] for dataset in search_result['results']
            ]
            start += 500

        if datajson_filters:
            identifiers = set(identifiers).intersection(set(org_identifiers))
        else:
            identifiers = org_identifiers

    for identifier in identifiers:
        ckan_portal.call_action('dataset_purge', data_dict={'id': identifier})
Ejemplo n.º 2
0
def get_n_post(package_id):
    """
    Called by main()
    Call get_data_from_reg()
    Get an data from registry , modify and post it to OG
    :param package_id:
    :return:
    """
    og_data = get_data_from_reg(package_id)

    # replace
    for k, v in to_replace.items():
        og_data[k] = to_replace[k]

    # Use branch and data steward values to populate metadata_contact field (necessary in sync_with_og script).
    data_steward = og_data["data_steward_email"].strip()
    branch = og_data["organization"]["description"].split('|')
    branch_en = branch[0].strip()
    branch_fr = branch[1].strip()
    to_add = {
        "metadata_contact": {
            "fr":
            "Gouvernement du Canada; Agriculture et Agroalimentaire Canada, " +
            branch_fr + ", " + data_steward,
            "en":
            "Government of Canada; Agriculture and Agri-Food Canada, " +
            branch_en + ", " + data_steward
        }
    }

    for k, v in to_add.items():
        og_data[k] = to_add[k]

    # remove
    for k in to_remove:
        del og_data[k]

    og_site = os.getenv("open_gov_registry_url")
    og_key = os.getenv("open_gov_registry_api_key")
    rckan = RemoteCKAN(og_site, apikey=og_key)

    # First try to create new package.
    # If package create fails, it's possible that the package already exists
    # Try to update the package. If both of these actions fail, return false.
    try:
        ret = rckan.call_action("package_create", data_dict=og_data)
    except Exception as e1:
        print(e1)
        try:
            ret = rckan.call_action("package_update", data_dict=og_data)
        except Exception as e2:
            print(e2)
            return False
        return True

    return True
Ejemplo n.º 3
0
def read_ckan_catalog(portal_url):
    """Convierte los metadatos de un portal disponibilizados por la Action API
    v3 de CKAN al estándar data.json.

    Args:
        portal_url (str): URL de un portal de datos CKAN que soporte la API v3.

    Returns:
        dict: Representación interna de un catálogo para uso en las funciones
            de esta librería.
    """
    portal = RemoteCKAN(portal_url)
    try:
        status = portal.call_action(
            'status_show', requests_kwargs={"verify": False})
        packages_list = portal.call_action(
            'package_list', requests_kwargs={"verify": False})
        groups_list = portal.call_action(
            'group_list', requests_kwargs={"verify": False})

        # itera leyendo todos los datasets del portal
        packages = []
        num_packages = len(packages_list)
        for index, pkg in enumerate(packages_list):
            # progreso (necesario cuando son muchos)
            msg = "Leyendo dataset {} de {}".format(index + 1, num_packages)
            logger.info(msg)

            # agrega un nuevo dataset a la lista
            packages.append(portal.call_action(
                'package_show', {'id': pkg},
                requests_kwargs={"verify": False}
            ))

            # tiempo de espera padra evitar baneos
            time.sleep(0.2)

        # itera leyendo todos los temas del portal
        groups = [portal.call_action(
            'group_show', {'id': grp},
            requests_kwargs={"verify": False})
            for grp in groups_list]

        catalog = map_status_to_catalog(status)
        catalog["dataset"] = map_packages_to_datasets(
            packages, portal_url)
        catalog["themeTaxonomy"] = map_groups_to_themes(groups)

    except (CKANAPIError, RequestException) as e:
        logger.exception(
            'Error al procesar el portal %s', portal_url, exc_info=True)
        raise NonParseableCatalog(portal_url, e)

    return catalog
Ejemplo n.º 4
0
def push_dataset_to_ckan(catalog,
                         owner_org,
                         dataset_origin_identifier,
                         portal_url,
                         apikey,
                         catalog_id=None,
                         demote_superThemes=True,
                         demote_themes=True):
    """Escribe la metadata de un dataset en el portal pasado por parámetro.

        Args:
            catalog (DataJson): El catálogo de origen que contiene el dataset.
            owner_org (str): La organización a la cual pertence el dataset.
            dataset_origin_identifier (str): El id del dataset que se va a federar.
            portal_url (str): La URL del portal CKAN de destino.
            apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
            catalog_id (str): El prefijo con el que va a preceder el id y name del dataset en catálogo destino.
            demote_superThemes(bool): Si está en true, los ids de los super themes del dataset, se propagan como grupo.
            demote_themes(bool): Si está en true, los labels de los themes del dataset, pasan a ser tags. Sino,
            se pasan como grupo.
        Returns:
            str: El id del dataset en el catálogo de destino.
    """
    dataset = catalog.get_dataset(dataset_origin_identifier)
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)

    package = map_dataset_to_package(catalog, dataset, owner_org, catalog_id,
                                     demote_superThemes, demote_themes)

    # Get license id
    if dataset.get('license'):
        license_list = ckan_portal.call_action('license_list')
        try:
            ckan_license = next(license_item for license_item in license_list
                                if license_item['title'] == dataset['license']
                                or license_item['url'] == dataset['license'])
            package['license_id'] = ckan_license['id']
        except StopIteration:
            package['license_id'] = 'notspecified'
    else:
        package['license_id'] = 'notspecified'

    try:
        pushed_package = ckan_portal.call_action('package_update',
                                                 data_dict=package)
    except NotFound:
        pushed_package = ckan_portal.call_action('package_create',
                                                 data_dict=package)

    ckan_portal.close()
    return pushed_package['id']
Ejemplo n.º 5
0
def test_post():
    package_id = "e328838f-3bfc-4d86-9cc5-23de0b549c91"
    apicall = "api/3/action/package_show"
    q_param="?id=" +package_id
    site = os.getenv("registry_url")
    registry_key = os.getenv("registry_api_key")
    ret_as_string = query_with_get(site, apicall, q_param, apikey=registry_key)
    res_as_dict = json.loads(ret_as_string)['result']
    data_as_dict = res_as_dict
    data_as_dict['title'] = res_as_dict['title']  + str(randint(0,99))
    del data_as_dict['id']
    del data_as_dict['revision_id']
    if "aafc_subject" in data_as_dict:
        del data_as_dict['aafc_subject']

    data_as_d = {}
    with open("Data//fromAlexis.json") as json_fp:
        data_as_d = json.load(json_fp)


    # do a post
    registry_key = os.getenv("registry_api_key")
    rckan = RemoteCKAN(site, apikey=registry_key)

    try:
        ret = rckan.call_action("package_create", data_dict=data_as_d)#data_as_dict )

    except Exception as e:
        pass
    #post_to_site(site, "api/3/action/package_create",data_as_dict,registry_key)

    pass
Ejemplo n.º 6
0
class Action(ActionRuntime):

    category = "Data Sourcing"

    required_config = """
        url:     # URL for the CKAN endpoint
        action:  # CKAN action to perform
    """

    optional_config = """
        ignore_not_found: False     # Ignore collection not found errors
    """

    def on_start(self, config):
        self.ckan = RemoteCKAN(config["url"])

    def on_input(self, item):
        action = self.config["action"]
        args = self.config.get("arguments", {})
        try:
            data = self.ckan.call_action(action, args)
        except errors.NotFound:
            if self.config["ignore_not_found"]:
                return
            else:
                raise
        if isinstance(data, list):
            for new_item in data:
                self.put(new_item)
        else:
            self.put(data)
Ejemplo n.º 7
0
def create_to_registry(package_id):
        '''
        Create new dataset in registry with the values from OG

        Called by:
        * main()
        Call:
        * get_data_from_url()
        * replace_branch_and_data_steward()
        * replace_regions()
        *
        '''
        with open("Data//fieldsAdded.json") as json_fp:
            add_fields = json.load(json_fp)
        og_data = get_data_from_url(package_id, "open_gov_url")
        for k,v in add_fields.items():
            og_data[k] = add_fields[k]
        #Post to registry
        replace_branch_and_data_steward(og_data)
        replace_regions(og_data)
        default_resource_date_published(og_data)
        reg_site = os.getenv("registry_url")
        registry_key = os.getenv("registry_api_key")
        rckan = RemoteCKAN(reg_site, apikey=registry_key)
        try:
            ret = rckan.call_action("package_create", data_dict=og_data)
        except Exception as e:
            print(e)
            return False
        return True
class CkanClient:
    def __init__(self, url: str, apikey: str):
        self.ckan = RemoteCKAN(url, apikey)

    def get_dataset(self, name):
        return self.ckan.call_action('package_show', {'id': name})

    def create_dataset(self, dataset_params, fields_definition, primary_key):
        pkg = self.ckan.call_action('package_create', dataset_params)
        newdataset = self.ckan.call_action('datastore_create', 
        {
            'resource': { 'package_id': pkg['id'], 'resource_type': 'datastore', 'name': dataset_params['name'] },
            'fields': fields_definition,
            'primary_key': primary_key
        } )
        return newdataset

    def upsert_dataset(self, resource_id, records):
        self.ckan.call_action('datastore_upsert', 
        {
            'resource_id':resource_id,
            'records': records
        })
    
    def delete_dataset(self, name):
        self.ckan.call_action('dataset_purge', 
        {
            'id': name
        })
Ejemplo n.º 9
0
def export_to_ckan(arguments):
    input_dir = arguments['--input']
    input_dir = input_dir if path.isabs(input_dir) else path.abspath(input_dir)
    ckan_host = arguments['--site']
    organization = arguments['--organization']
    api_key = arguments['--api-key']
    public = arguments['--public']

    if not ckan_host:
        ckan_host = prompt({
            'type': 'input',
            'name': 'ckan_host',
            'message': 'Inform your CKAN API Site: '
        })['ckan_host']

    if not api_key:
        api_key = prompt({
            'type': 'password',
            'name': 'api_key',
            'message': 'Inform your CKAN API Key:'
        })['api_key']

    ckan = RemoteCKAN(ckan_host, apikey=api_key)
    organizations = ckan.call_action("organization_list", {"all_fields": True})

    if not organization:
        organization = prompt({
            'type': 'list',
            'name': 'organization',
            'message': 'Select the organization:',
            'choices': [o['name'] for o in organizations]
        })['organization']

    packages_cache = {}
    selected_org = next(
        filter(lambda o: o['name'] == organization, organizations))
    for meta_file in filter(lambda f: f.endswith("meta.json"),
                            tqdm(os.listdir(input_dir))):

        original_file = meta_file.replace('.meta.json', '')
        original_file = path.join(input_dir, original_file)
        meta_file = path.join(input_dir, meta_file)

        if path.exists(original_file):

            with open(meta_file, 'r', encoding='latin-1') as mfp:
                meta_data = json.load(fp=mfp)

            export_file(ckan, original_file, meta_data, selected_org, public,
                        packages_cache)

        else:
            print(f"{original_file} not found. Deleting meta the data file.")
            os.remove(meta_file)
Ejemplo n.º 10
0
def main(args):
    def capac(capacity):
        return (capacity[0] in args['-s']) if args['-s'] else True

    host = args['HOST']
    apikey = args['-a'] or os.environ.get('CKAN_APIKEY')
    ckan = RemoteCKAN(host, apikey=apikey)
    orgas = ckan.call_action('organization_list',
                             data_dict={
                                 'all_fields': True,
                                 'include_users': True
                             })
    userids = [
        u['id'] for o in orgas for u in o['users'] if capac(u['capacity'])
    ]
    users = [
        ckan.call_action('user_show', data_dict={'id': uid}) for uid in userids
    ]
    users = [encode(u) for u in users]
    output(users, args['-f'])
Ejemplo n.º 11
0
def get_data_from_url(package_id, url):
    #site = os.getenv("registry_url")
    site = os.getenv(url)
    rckan = RemoteCKAN(site)

    data_as_d = {"id":package_id}
    try:
        ret = rckan.call_action("package_show", data_dict=data_as_d)#data_as_dict )
    except Exception as e:
    # if no data exists yet, return empty
        ret = []

    return ret
Ejemplo n.º 12
0
def read_ckan_catalog(portal_url):
    """Convierte los metadatos de un portal disponibilizados por la Action API
    v3 de CKAN al estándar data.json.

    Args:
        portal_url (str): URL de un portal de datos CKAN que soporte la API v3.

    Returns:
        dict: Representación interna de un catálogo para uso en las funciones
        de esta librería.
    """
    portal = RemoteCKAN(portal_url)
    catalog = {}

    try:
        status = portal.call_action('status_show')
        packages_list = portal.call_action('package_list')
        groups_list = portal.call_action('group_list')

        packages = [
            portal.call_action('package_show', {'name_or_id': pkg})
            for pkg in packages_list
        ]

        groups = [
            portal.call_action('group_show', {'id': grp})
            for grp in groups_list
        ]

        catalog = map_status_to_catalog(status)
        catalog["dataset"] = map_packages_to_datasets(packages, portal_url)
        catalog["themeTaxonomy"] = map_groups_to_themes(groups)
    except:
        logging.error('Error al procesar el portal %s',
                      portal_url,
                      exc_info=True)

    return catalog
Ejemplo n.º 13
0
def scidm_resources_download(token, package_id):
    scidm = RemoteCKAN('https://scidm.nchc.org.tw', apikey=token)

    # 遍尋包含的資料
    name_list = []
    url_list = []
    for data in scidm.call_action('package_show',
                                  {'id': package_id})['resources']:
        name_list.append(data['name'])
        url_list.append(data['url'])

    download_data = pd.DataFrame({'name': name_list, 'url': url_list})

    return download_data
Ejemplo n.º 14
0
def push_theme_to_ckan(catalog, portal_url, apikey, identifier=None, label=None):
    """Escribe la metadata de un theme en el portal pasado por parámetro.

            Args:
                catalog (DataJson): El catálogo de origen que contiene el theme.
                portal_url (str): La URL del portal CKAN de destino.
                apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
                identifier (str): El identificador para buscar el theme en la taxonomia.
                label (str): El label para buscar el theme en la taxonomia.
            Returns:
                str: El name del theme en el catálogo de destino.
        """
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)
    theme = catalog.get_theme(identifier=identifier, label=label)
    group = map_theme_to_group(theme)
    pushed_group = ckan_portal.call_action('group_create', data_dict=group)
    return pushed_group['name']
Ejemplo n.º 15
0
def get_data_from_reg(package_id):
    """
    Called by get_n_post
    Get data from registry
    :param package_id:
    :return:
    """
    site = os.getenv("registry_url")
    rckan = RemoteCKAN(site)

    data_as_d = {"id": package_id}
    try:
        ret = rckan.call_action("package_show",
                                data_dict=data_as_d)  # data_as_dict )
    except Exception as e:
        print("failed")

    return ret
Ejemplo n.º 16
0
def push_new_themes(catalog, portal_url, apikey):
    """Toma un catálogo y escribe los temas de la taxonomía que no están presentes.

        Args:
            catalog (DataJson): El catálogo de origen que contiene la taxonomía.
            portal_url (str): La URL del portal CKAN de destino.
            apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
        Returns:
            str: Los ids de los temas creados.
    """
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)
    existing_themes = ckan_portal.call_action('group_list')
    new_themes = [
        theme['id'] for theme in catalog['themeTaxonomy']
        if theme['id'] not in existing_themes
    ]
    pushed_names = []
    for new_theme in new_themes:
        name = push_theme_to_ckan(catalog,
                                  portal_url,
                                  apikey,
                                  identifier=new_theme)
        pushed_names.append(name)
    return pushed_names
Ejemplo n.º 17
0
    def post(self, request):

        # TODO: If verifying for the first time, OrganisationGroup and
        # OrganisationAdminGroup don't exist yet. - 2016-10-25

        # publisher = Publisher.objects.get(pk=publisher_id)
        # group = OrganisationAdminGroup.objects.get(publisher_id=publisher_id)

        user = request.user.organisationuser
        api_key = request.data.get('apiKey')
        user_id = request.data.get('userId')

        if not api_key or not user_id:
            raise exceptions.ParseError(
                detail="apiKey or userId not specified")

        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        try:
            client.call_action('user_show', {
                "id": user_id,
                "include_datasets": True,
            })
        except BaseException:
            raise exceptions.APIException(
                detail="user with id {} not found".format(user_id))

        # print('got user')
        # print(result)

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(user_id))

        if not len(orgList):
            raise exceptions.APIException(
                detail="This user has no organisations yet".format(user_id))

        primary_org_id = orgList[0]['id']

        try:
            primary_org = client.call_action(
                'organization_show', {"id": primary_org_id})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id\
                        {}".format(primary_org_id))
            return Response(status=401)

        # print('got primary_org')
        # print(primary_org)

        if not primary_org:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id\
                        {}".format(primary_org_id))

        primary_org_iati_id = primary_org.get('publisher_iati_id')
        publisher_org = get_or_none(Organisation,
                                    organisation_identifier=primary_org_iati_id
                                    )

        if not publisher_org:
            raise exceptions.APIException(
                detail="publisher_iati_id of {} not found in Organisation\
                        standard, correct this in the IATI registry".format(
                            primary_org_iati_id
                        )
                    )

        # TODO: add organisation foreign key - 2016-10-25
        publisher = Publisher.objects.update_or_create(
            iati_id=primary_org_id,
            publisher_iati_id=primary_org_iati_id,
            defaults={
                "name": primary_org.get('name'),
                "display_name": primary_org.get('display_name'),
                "organisation": publisher_org,
            }
        )

        organisation_group = OrganisationGroup.objects.get_or_create(
            publisher=publisher[0],
            defaults={
                "name": "{} Organisation Group".format(primary_org.get('name'))
            }
        )
        organisation_group[0].organisationuser_set.add(user)

        if publisher[1]:  # has been created
            organisation_admin_group = OrganisationAdminGroup.objects.\
                    get_or_create(
                        publisher=publisher[0],
                        defaults={
                            "owner": user,
                            "name": "{} Organisation Admin Group".format(
                                primary_org.get('name')
                            ),
                        }
                    )
        else:  # already exists
            organisation_admin_group = OrganisationAdminGroup.objects.\
                    get_or_create(
                        publisher=publisher[0],
                        defaults={
                            "name": "{} Organisation Admin Group".format(
                                primary_org.get('name')
                            ),
                        }
                    )
        organisation_admin_group[0].organisationuser_set.add(user)

        user.iati_api_key = api_key
        user.iati_user_id = user_id
        user.save()

        serializer = OrganisationUserSerializer(
            user,
            context={
                'request': request,
            }
        )

        return Response(serializer.data)
Ejemplo n.º 18
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        if self.options.push_apikey and not self.options.fetch:
            registry = LocalCKAN()
            portal = RemoteCKAN(remote, apikey=self.options.push_apikey)
        elif self.options.fetch:
            registry = RemoteCKAN(remote)
            portal = LocalCKAN()
        else:
            print "exactly one of -f or -a options must be specified"
            return

        now = datetime.now()

        if not package_ids:
            package_ids = iter(sys.stdin.readline, '')

        for package_id in package_ids:
            package_id = package_id.strip()
            reason = None
            target_deleted = False
            try:
                source_pkg = registry.action.package_show(id=package_id)
            except NotAuthorized:
                source_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'source error',
                    unicode(e.args)]) + '\n')
                raise
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] != 'dataset':
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg and not self.options.mirror:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date']:
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'

            try:
                target_pkg = portal.call_action('package_show',
                    {'id':package_id})
            except (NotFound, NotAuthorized):
                target_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'target error',
                    unicode(e.args)]) + '\n')
                raise
Ejemplo n.º 19
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        if self.options.push_apikey and not self.options.fetch:
            registry = LocalCKAN()
            portal = RemoteCKAN(remote, apikey=self.options.push_apikey)
        elif self.options.fetch:
            registry = RemoteCKAN(remote)
            portal = LocalCKAN()
        else:
            print "exactly one of -f or -a options must be specified"
            return

        now = datetime.now()

        if not package_ids:
            package_ids = iter(sys.stdin.readline, '')

        for package_id in package_ids:
            package_id = package_id.strip()
            reason = None
            target_deleted = False
            try:
                source_pkg = registry.action.package_show(id=package_id)
            except NotAuthorized:
                source_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'source error',
                    unicode(e.args)]) + '\n')
                raise
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] != 'dataset':
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg and not self.options.mirror:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date']:
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'

            try:
                target_pkg = portal.call_action('package_show',
                    {'id':package_id})
            except (NotFound, NotAuthorized):
                target_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'target error',
                    unicode(e.args)]) + '\n')
                raise
        session.verify = True
    elif SSL_VERIFY == "false":
        session.verify = False
    else:
        session.verify = not arguments['--no-verify']
    ckan = RemoteCKAN(BASE_URL, session=session, apikey=API_KEY)

    # meta.xml
    meta_xml_path = arguments['--file']
    meta = read_meta_xml.read_meta_xml(meta_xml_path)
    ckan_metadata = map_metadata_to_ckan(meta)

    dataset = arguments['--dataset']
    now_utc = pytz.utc.localize(datetime.utcnow())
    now_cet = now_utc.astimezone(pytz.timezone("Europe/Berlin"))
    data = {
        'id': dataset,
        'dateLastUpdated': now_cet.date().strftime('%d.%m.%Y')
    }
    data.update(ckan_metadata)
    print(f"Updating metadata on dataset {dataset} to {pprint(data)}")
    try:
        ckan.call_action('package_patch', data)
    except NotFound:
        print('Dataset %s not found!' % dataset, file=sys.stderr)
        raise
except Exception as e:
    print("Error: %s" % e, file=sys.stderr)
    print(traceback.format_exc(), file=sys.stderr)
    sys.exit(1)
Ejemplo n.º 21
0
    def post(self, request, publisher_id):
        user = request.user.organisationuser
        iati_user_id = user.iati_user_id
        publisher = Publisher.objects.get(pk=publisher_id)
        admin_group = OrganisationAdminGroup.objects.get(publisher_id=publisher_id)

        source_url = request.data.get('source_url', None)

        if not source_url:
            raise exceptions.APIException(detail="no source_url provided")

        user = request.user
        organisationuser = user.organisationuser
        api_key = organisationuser.iati_api_key
        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        # TODO: should this be the name? - 2017-02-20
        source_name = '{}-activities'.format(publisher.name)

        # get all published activities, except for the ones that are just modified
        activities = Activity.objects.filter(ready_to_publish=True, publisher=publisher)

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except:
            raise exceptions.APIException(detail="Can't get organisation list for user".format(user_id))

        primary_org_id = orgList[0]['id']

        try:
            # sync main datasets to IATI registry
            registry_dataset = client.call_action('package_create', { 
                "resources": [
                    { "url": source_url }
                ],
                "name": source_name,
                "filetype": "activity",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "activity_count": activities.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })

        except Exception as e:
            # try to recover from case when the dataset already exists (just update it instead)

            old_package = client.call_action('package_show', { 
                "name_or_id": source_name,
            })

            if not old_package:
                print('exception raised in client_call_action', e, e.error_dict)
                raise exceptions.APIException(detail="Failed publishing dataset")

            registry_dataset = client.call_action('package_update', { 
                "id": old_package.get('id'),
                "resources": [
                    { "url": source_url }
                ],
                "name": source_name,
                "filetype": "activity",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "activity_count": activities.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })


        # 0. create_or_update Dataset object
        dataset = Dataset.objects.create(
            id=registry_dataset['id'],
            name=source_name,
            title=source_name,
            filetype=1,
            publisher=publisher,
            source_url=source_url, # TODO: store in OIPA somewhere, or let user define this? - 2017-01-13
            is_parsed=False,
            iati_version="2.02",
                )

        #  update the affected activities flags
        activities.update(published=True, modified=False, ready_to_publish=True)

        # remove the old datasets from the registry
        # TODO: query the registry to remove a dataset - 2017-01-16
        # TODO: remove old datasets locally as well - 2017-01-16
        # TODO: Or just ask the user to remove the old datasets by hand? - 2017-02-20

        # return Dataset object
        serializer = DatasetSerializer(dataset, context={'request': request})
        return Response(serializer.data)
Ejemplo n.º 22
0
# ==== Data import  ==== #

create.action.organization_create(name='statens-vegvesen', title='Statens vegvesen')
print 'Organisasjonen Statens vegvesen opprettet'

file_name = 'package_create.csv'

csv_file = open(file_name, 'rb')
raw_data = csv.DictReader(csv_file, delimiter=';', skipinitialspace=True)

for row in raw_data:
    dataset_dict=row
    print dataset_dict
    # Remember: In create, package name = name, in patch it's id
    create.call_action('package_create',dataset_dict) 
    #create.call_action('package_patch',dataset_dict)

print '------------ Datasett opprettet --------------------'

file_name = 'resource_create.csv'

csv_file = open(file_name, 'rb')
raw_data = csv.DictReader(csv_file, delimiter=';', skipinitialspace=True)

for row in raw_data:
    dataset_dict=row
    print dataset_dict
    create.call_action('resource_create',dataset_dict) 
    
print '------------------ Ressurser opprettet --------------------'
try:
    BASE_URL = os.getenv('CKAN_BASE_URL')
    API_KEY = os.getenv('CKAN_API_KEY')
    ckan = RemoteCKAN(BASE_URL, apikey=API_KEY)


    # CKAN metadata
    dataset = arguments['--dataset']
    data = {
        'id': dataset,
    }
    print(f"Extracting metadata from dataset {dataset}.")
    try:
        if arguments['--no-verify']:
            ckan_metadata = ckan.call_action('package_show', data, requests_kwargs={'verify': False})
        else:
            ckan_metadata = ckan.call_action('package_show', data)
    except NotFound:
         print('Dataset %s not found!' % dataset, file=sys.stderr)
         raise
    
    # XLS file
    template_path = os.path.join(__location__, 'OGD-Metadaten_Template.xlsx')
    wb = openpyxl.load_workbook(template_path)
    ws = wb['metadata']
    # set all values
    xls_metadata = map_metadata_to_xls(ckan_metadata)
    for cell, value in xls_metadata.items():
        ws[cell] = value
    
Ejemplo n.º 24
0
def update_to_registry(package_id):
        '''
        Update registry dataset with the values from OG
        Called by:
        * main()
        Call:
        * get_data_from_url()
        * replace_branch_and_data_steward()
        * replace_regions()
        '''
        og_data = get_data_from_url(package_id, "open_gov_url")
        # query for registry data and remove shared fields (aafc registry exclusive fields will be kept i.e. ODI reference number, DRF core responsibilties)
        reg_data = get_data_from_url(package_id, "registry_url")
        if reg_data != []:
        # strip all fields except we will keep the values AAFC Registry specific fields
            reg_keep = {
                'ready_to_publish': reg_data['ready_to_publish'],
                'drf_program_inventory': reg_data['drf_program_inventory'],
                'official_language': reg_data['official_language'],
                'aafc_owner_org': reg_data['aafc_owner_org'],
                'owner_org': reg_data['owner_org'],
                'data_steward_email': reg_data['data_steward_email'],
                'procured_data': reg_data['procured_data'],
                'elegible_for_release': reg_data['elegible_for_release'],
                'authority_to_release': reg_data['authority_to_release'],
                'mint_a_doi': reg_data['mint_a_doi'],
                'procured_data_organization_name': reg_data['procured_data_organization_name'],
                'privacy': reg_data['privacy'],
                'data_source_repository': reg_data['data_source_repository'],
                'drf_core_responsibilities': reg_data['drf_core_responsibilities'],
                'authoritative_source': reg_data['authoritative_source'],
                'formats': reg_data['formats'],
                'aafc_sector': reg_data['aafc_sector'],
                'security': reg_data['security'],
                'ineligibility_reason': reg_data['ineligibility_reason'],
                'access_to_information': reg_data['access_to_information'],
                'other': reg_data['other'],
                'publication': reg_data['publication'],
                'data_released': reg_data['data_released'],
                'open_government_portal_record_e': reg_data['open_government_portal_record_e'],
                'open_government_portal_record_f': reg_data['open_government_portal_record_f'],
                'groups': reg_data['groups']
            }
            # Keep ODI number if it exists, some datasets may not have an ODI.
            if 'odi_reference_number' in reg_data:
                reg_odi = {
                    'odi_reference_number': reg_data['odi_reference_number']
                }
            else:
                reg_odi = None
                
            # Update the Open Gov dataset
            if reg_odi != None:
                if isinstance(reg_odi,list):
                    return False
                og_data.update(reg_odi)
            og_data.update(reg_keep)
            
        # Only map branch and data steward for external (FGP) datasets, for AAFC Registry datasets, this is not required.
        if 'ready_to_publish' in reg_data and reg_data['ready_to_publish'] == "false":
            replace_branch_and_data_steward(og_data)

        # Ensure that we reset ready to publish back to false after AAFC Registry dataset is posted to OG
        if 'ready_to_publish' in reg_data and reg_data['ready_to_publish'] == "true":
            og_data['ready_to_publish'] == "false"

        replace_regions(og_data)
        default_resource_date_published(og_data)
        reg_site = os.getenv("registry_url")
        registry_key = os.getenv("registry_api_key")
        rckan = RemoteCKAN(reg_site, apikey=registry_key)
        try:
            ret = rckan.call_action("package_update", data_dict=og_data)
        except Exception as e:
            print(e)
            return False
        return True
Ejemplo n.º 25
0
    def post(self, request, publisher_id):
        user = request.user.organisationuser
        iati_user_id = user.iati_user_id
        publisher = Publisher.objects.get(pk=publisher_id)

        source_url = request.data.get('source_url', None)

        if not source_url:
            raise exceptions.APIException(detail="no source_url provided")

        user = request.user
        organisationuser = user.organisationuser
        api_key = organisationuser.iati_api_key
        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        # TODO: should this be the name? - 2017-02-20
        source_name = '{}-organisations'.format(publisher.name)

        # get all published organisations, except for the ones that are just
        # modified
        organisations = Organisation.objects.filter(ready_to_publish=True,
                                                    publisher=publisher)

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(
                    iati_user_id))

        primary_org_id = orgList[0]['id']

        try:
            # sync main datasets to IATI registry
            registry_dataset = client.call_action(
                'package_create', {
                    "resources": [{
                        "url": source_url
                    }],
                    "name": source_name,
                    "filetype": "organisation",
                    "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                    "organisation_count": organisations.count(),
                    "title": source_name,
                    "owner_org": primary_org_id,
                    "url": source_url,
                })

        except Exception as e:
            # try to recover from case when the dataset already exists (just
            # update it instead)

            old_package = client.call_action('package_show', {
                "name_or_id": source_name,
            })

            if not old_package:
                raise exceptions.APIException(
                    detail="Failed publishing dataset")

            registry_dataset = client.call_action(
                'package_update', {
                    "id": old_package.get('id'),
                    "resources": [{
                        "url": source_url
                    }],
                    "name": source_name,
                    "filetype": "organisation",
                    "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                    "organisation_count": organisations.count(),
                    "title": source_name,
                    "owner_org": primary_org_id,
                    "url": source_url,
                })

        # 0. create_or_update Dataset object
        dataset = Dataset.objects.get(
            filetype=2,
            publisher=publisher,
            added_manually=True,
        )

        dataset.iati_id = registry_dataset['id']
        dataset.name = source_name
        dataset.title = source_name
        dataset.source_url = source_url
        dataset.is_parsed = False
        dataset.save()

        #  update the affected organisations flags
        organisations.update(published=True,
                             modified=False,
                             ready_to_publish=True)

        # remove the old datasets from the registry
        # TODO: query the registry to remove a dataset - 2017-01-16
        # TODO: remove old datasets locally as well - 2017-01-16

        # TODO: Or just ask the user to remove the old datasets by hand?
        # - 2017-02-20

        # return Dataset object
        serializer = DatasetSerializer(dataset, context={'request': request})
        return Response(serializer.data)
Ejemplo n.º 26
0
BASE_URL = os.getenv('CKAN_BASE_URL')
API_KEY = os.getenv('CKAN_API_KEY')

import requests
import urllib3
urllib3.disable_warnings()

site = RemoteCKAN(BASE_URL, apikey=API_KEY)

for dataset in sys.stdin:
    data = {"id": dataset.strip()}
    log.info('Reordering %s...' % data['id'])
    try:
        verify = not arguments['--no-verify']
        ckan_dataset = site.call_action('package_show',
                                        data,
                                        requests_kwargs={'verify': verify})

        resources = [{
            key: value
            for (key, value) in r.items() if key in ['name', 'id']
        } for r in ckan_dataset['resources']]
        sorted_resources = sorted(resources,
                                  key=lambda r: r['name'],
                                  reverse=True)
        sorted_ids = [r['id'] for r in sorted_resources]
        reorder = {'id': ckan_dataset['id'], 'order': sorted_ids}
        site.call_action('package_resource_reorder',
                         reorder,
                         requests_kwargs={'verify': verify})
    except NotFound:
Ejemplo n.º 27
0
class PushTestCase(unittest.TestCase):
    CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
                       cassette_library_dir=os.path.join(
                           "tests", "cassetes", "ckan_integration",
                           "push_dataset"),
                       filter_headers=['Authorization', 'X-CKAN-API-Key'],
                       record_mode='once')

    portal_url = 'http://localhost:8080'
    apikey = "<apikey>"

    @classmethod
    def get_sample(cls, sample_filename):
        return os.path.join(SAMPLES_DIR, sample_filename)

    @CKAN_VCR.use_cassette()
    def setUp(self):
        self.portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
        self.full_catalog = pydatajson.DataJson(
            self.get_sample('full_data.json'))
        self.justice_catalog = pydatajson.DataJson(
            self.get_sample('catalogo_justicia.json'))

    @CKAN_VCR.use_cassette()
    def tearDown(self):
        full_dataset = self.full_catalog.datasets[0]
        full_name = title_to_name(full_dataset['title'])
        justice_dataset = self.justice_catalog.datasets[0]
        justice_name = title_to_name(justice_dataset['title'])
        try:
            self.portal.call_action('dataset_purge',
                                    data_dict={'id': full_name})
        except NotFound:
            pass
        try:
            self.portal.call_action('dataset_purge',
                                    data_dict={'id': justice_name})
        except NotFound:
            pass

        self.portal.close()

    @CKAN_VCR.use_cassette()
    def test_dataset_is_created_correctly(self):
        catalog = self.full_catalog
        catalog_id = title_to_name(catalog['title'])
        dataset = catalog.datasets[0]
        dataset_id = dataset['identifier']
        return_id = push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        self.assertEqual(return_id, catalog_id + '_' + dataset_id)

    @CKAN_VCR.use_cassette()
    def test_dataset_is_updated_correctly(self):
        catalog = self.full_catalog
        catalog_id = title_to_name(catalog['title'])
        dataset_id = catalog.datasets[0]['identifier']
        push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        catalog.datasets[0]['description'] = 'updated description'
        return_id = push_dataset_to_ckan(
            catalog,
            "oficina-de-muestra",
            dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        data_dict = {'id': catalog_id + '_' + dataset_id}
        package = self.portal.call_action('package_show', data_dict=data_dict)
        self.assertEqual(return_id, catalog_id + '_' + dataset_id)
        self.assertEqual('updated description', package['notes'])

    @CKAN_VCR.use_cassette()
    def test_resources_swapped_correctly(self):
        catalog_id = 'same-catalog-id'
        full_dataset = self.full_catalog.datasets[0]
        full_dataset_id = full_dataset['identifier']
        push_dataset_to_ckan(
            self.full_catalog,
            'oficina-de-muestra',
            full_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )

        justice_dataset = self.justice_catalog.datasets[0]
        justice_dataset_id = justice_dataset['identifier']
        push_dataset_to_ckan(
            self.justice_catalog,
            'oficina-de-muestra',
            justice_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        # Switch them and update
        full_dataset['distribution'], justice_dataset['distribution'] = \
            justice_dataset['distribution'], full_dataset['distribution']

        full_package_id = push_dataset_to_ckan(
            self.full_catalog,
            'oficina-de-muestra',
            full_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        justice_package_id = push_dataset_to_ckan(
            self.justice_catalog,
            'oficina-de-muestra',
            justice_dataset_id,
            self.portal_url,
            self.apikey,
            catalog_id=catalog_id,
        )
        # Switch them back
        full_dataset['distribution'], justice_dataset['distribution'] = \
            justice_dataset['distribution'], full_dataset['distribution']

        data_dict = {'id': full_package_id}
        full_package = self.portal.call_action('package_show',
                                               data_dict=data_dict)
        data_dict = {'id': justice_package_id}
        justice_package = self.portal.call_action('package_show',
                                                  data_dict=data_dict)

        self.assertEqual(len(full_package['resources']),
                         len(justice_dataset['distribution']))
        self.assertEqual(len(justice_package['resources']),
                         len(full_dataset['distribution']))

        for resource, justice_distribution in zip(
                full_package['resources'], justice_dataset['distribution']):
            self.assertEqual(
                'same-catalog-id_' + justice_distribution['identifier'],
                resource['id'])

        for resource, full_distribution in zip(justice_package['resources'],
                                               full_dataset['distribution']):
            self.assertEqual(
                'same-catalog-id_' + full_distribution['identifier'],
                resource['id'])
Ejemplo n.º 28
0
class RemoveTestCase(unittest.TestCase):
    CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
                       cassette_library_dir=os.path.join(
                           "tests", "cassetes", "ckan_integration",
                           "remove_dataset"),
                       filter_headers=['Authorization', 'X-CKAN-API-Key'],
                       record_mode='once')

    test_datasets = [{
        'id': '1.1',
        'owner_org': 'org-1',
        'author': 'author_a',
        'name': 'data1_1'
    }, {
        'id': '2.1',
        'owner_org': 'org-2',
        'author': 'author_a',
        'name': 'data2_1'
    }, {
        'id': '2.2',
        'owner_org': 'org-2',
        'author': 'author_b',
        'name': 'data2_2'
    }, {
        'id': '3.1',
        'owner_org': 'org-3',
        'author': 'author_a',
        'name': 'data3_1'
    }, {
        'id': '3.2',
        'owner_org': 'org-3',
        'author': 'author_b',
        'name': 'data3_2'
    }, {
        'id': '3.3',
        'owner_org': 'org-3',
        'author': 'author_c',
        'name': 'data3_3'
    }]

    portal_url = 'http://localhost:8080'
    apikey = "<apikey>"

    @CKAN_VCR.use_cassette()
    def setUp(self):
        self.ckan_portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
        for dataset in self.test_datasets:
            try:
                self.ckan_portal.call_action('dataset_purge',
                                             data_dict={'id': dataset['id']})
            except NotFound:
                continue
        for dataset in self.test_datasets:
            self.ckan_portal.call_action('package_create', data_dict=dataset)

    @CKAN_VCR.use_cassette()
    def tearDown(self):
        for dataset in self.test_datasets:
            try:
                self.ckan_portal.call_action('dataset_purge',
                                             data_dict={'id': dataset['id']})
            except NotFound:
                continue

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_id(self):
        filter_in = {'dataset': {'identifier': '1.1'}}
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_in=filter_in)
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data1_1' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_title(self):
        filter_in = {'dataset': {'title': 'data3_3'}}
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_in=filter_in)
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data3_3' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_organization(self):
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  organization='org-2')
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data2_1' not in package_list)
        self.assertTrue('data2_2' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_publisher_and_organization(self):
        filter_in = {
            'dataset': {
                'publisher': {
                    'name': 'author_b',
                    'mbox': None
                }
            }
        }
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_in=filter_in,
                                  organization='org-3')
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data3_2' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_filter_out(self):
        filter_out = {
            'dataset': {
                'publisher': {
                    'name': 'author_b',
                    'mbox': None
                }
            }
        }
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_out=filter_out)
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data2_2' in package_list)
        self.assertTrue('data3_2' in package_list)
        self.assertTrue('data2_1' not in package_list)
        self.assertTrue('data3_3' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_remove_dataset_by_filter_out_and_organization(self):
        filter_out = {
            'dataset': {
                'publisher': {
                    'name': 'author_b',
                    'mbox': None
                }
            }
        }
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_out=filter_out,
                                  organization='org-3')
        package_list = self.ckan_portal.call_action('package_list')
        self.assertTrue('data3_1' not in package_list)
        self.assertTrue('data3_3' not in package_list)

    @CKAN_VCR.use_cassette()
    def test_empty_query_result(self):
        filter_in = {'dataset': {'identifier': '4.4'}}
        package_list_pre = self.ckan_portal.call_action('package_list')
        remove_datasets_from_ckan(self.portal_url,
                                  self.apikey,
                                  filter_in=filter_in,
                                  organization='org-4')
        package_list_post = self.ckan_portal.call_action('package_list')
        self.assertEqual(len(package_list_pre), len(package_list_post))

    @CKAN_VCR.use_cassette()
    def test_with_no_parametres(self):
        package_list_pre = self.ckan_portal.call_action('package_list')

        remove_datasets_from_ckan(self.portal_url, self.apikey)

        package_list_post = self.ckan_portal.call_action('package_list')
        self.assertEqual(len(package_list_pre), len(package_list_post))
Ejemplo n.º 29
0
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
arguments = docopt(__doc__, version='Upload resource to CKAN 1.0')

try:
    BASE_URL = os.getenv('CKAN_BASE_URL')
    API_KEY = os.getenv('CKAN_API_KEY')
    ckan = RemoteCKAN(BASE_URL, apikey=API_KEY)

    path = arguments['--file']
    dataset = arguments['--dataset']

    filename = os.path.basename(path).lower()
    try:
        print("Getting dataset %s..." % dataset)
        ckan_dataset = ckan.call_action('package_show', {'id': dataset})
    except NotFound:
        print('Dataset %s not found!' % dataset, file=sys.stderr)
        sys.exit(1)

    resources = ckan_dataset['resources']
    existing = list(filter(lambda r: r['name'].lower() == filename, resources))
    if existing:
        res = existing[0]
        print("Updating existing resource %s" % res['name'])
        ckan.action.resource_update(id=res['id'], upload=open(path, "rb"))
    else:
        print("Create new resource %s" % filename)
        ckan.action.resource_create(package_id=ckan_dataset['id'],
                                    upload=open(path, "rb"),
                                    url='upload',
import pprint
import sys
from ckanapi import RemoteCKAN

user_agent = 'ckanapiexample/1.0 (+http://example.com/my/website)'
dataset = 'dietary-dna-metabarcoding-in-black-browed-albatrosses-derived-from-scats-collected-at-new-island-ste'
dataset = ''

# Read the configuration
ckan_ip = open("/usr/lib/ckan/default/src/ckanext-saerischema/ckanext/saerischema/tools/ckan_ip.txt").read().replace('\n','')
api_key = open("/usr/lib/ckan/default/src/ckanext-saerischema/ckanext/saerischema/tools/ckan_api_key.txt").read().replace('\n','')

if len(sys.argv) > 1:
    dataset = sys.argv[1]

# Open the connection to the CKAN server
ckan = RemoteCKAN('http://%s' % ckan_ip, apikey=api_key, user_agent=user_agent)

# Show the dataset,
# can pass either name or id as the id parameter
if dataset:
	result = ckan.call_action('package_show', {'id': dataset} )
else:
	result = ckan.call_action('package_search', {'q': ''} )

RemoteCKAN.close(ckan)

print(json.dumps(result))

exit(0)
Ejemplo n.º 31
0
    def post(self, request):

        # TODO: If verifying for the first time, OrganisationGroup and
        # OrganisationAdminGroup don't exist yet. - 2016-10-25

        # publisher = Publisher.objects.get(pk=publisher_id)
        # group = OrganisationAdminGroup.objects.get(publisher_id=publisher_id)

        user = request.user.organisationuser
        api_key = request.data.get('apiKey')
        user_id = request.data.get('userId')

        if not api_key or not user_id:
            raise exceptions.ParseError(
                detail="apiKey or userId not specified")

        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        try:
            result = client.call_action('user_show', {
                "id": user_id,
                "include_datasets": True,
            })
        except BaseException:
            raise exceptions.APIException(
                detail="user with id {} not found".format(user_id))

        # print('got user')
        # print(result)

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(user_id))


#         print('got orgList')
#         print(orgList)

        if not len(orgList):
            raise exceptions.APIException(
                detail="This user has no organisations yet".format(user_id))

        primary_org_id = orgList[0]['id']

        try:
            primary_org = client.call_action('organization_show',
                                             {"id": primary_org_id})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id {}"
                .format(primary_org_id))
            return Response(status=401)

        # print('got primary_org')
        # print(primary_org)

        if not primary_org:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id {}"
                .format(primary_org_id))

        primary_org_iati_id = primary_org.get('publisher_iati_id')
        publisher_org = get_or_none(
            Organisation, organisation_identifier=primary_org_iati_id)

        if not publisher_org:
            raise exceptions.APIException(
                detail=
                "publisher_iati_id of {} not found in Organisation standard, correct this in the IATI registry"
                .format(primary_org_iati_id))

        # TODO: add organisation foreign key - 2016-10-25
        publisher = Publisher.objects.update_or_create(
            iati_id=primary_org_id,
            publisher_iati_id=primary_org_iati_id,
            defaults={
                "name": primary_org.get('name'),
                "display_name": primary_org.get('display_name'),
                "organisation": publisher_org,
            })

        organisation_group = OrganisationGroup.objects.get_or_create(
            publisher=publisher[0],
            defaults={
                "name": "{} Organisation Group".format(primary_org.get('name'))
            })
        organisation_group[0].organisationuser_set.add(user)

        if publisher[1]:  # has been created
            organisation_admin_group = OrganisationAdminGroup.objects.get_or_create(
                publisher=publisher[0],
                defaults={
                    "owner":
                    user,
                    "name":
                    "{} Organisation Admin Group".format(
                        primary_org.get('name')),
                })
        else:  # already exists
            organisation_admin_group = OrganisationAdminGroup.objects.get_or_create(
                publisher=publisher[0],
                defaults={
                    "name":
                    "{} Organisation Admin Group".format(
                        primary_org.get('name')),
                })
        organisation_admin_group[0].organisationuser_set.add(user)

        user.iati_api_key = api_key
        user.iati_user_id = user_id
        user.save()

        serializer = OrganisationUserSerializer(user,
                                                context={
                                                    'request': request,
                                                })

        return Response(serializer.data)
def checkFGP(uuid):

    r = requests.get('https://csw.open.canada.ca/geonetwork/srv/csw?service=CSW&version=2.0.2&request=GetRecordById&id=%s&elementsetname=brief' % uuid)

    root = ET.fromstring(r.text.encode('utf-8'))

    namespaces = {'csw': 'http://www.opengis.net/cat/csw/2.0.2'}

    if not root.findall('csw:BriefRecord', namespaces):
        print uuid + ' [Orphaned]'

i = 0
ogp_ids = []
ogp = RemoteCKAN('https://open.canada.ca/data')
search = ogp.call_action('package_search', {'fq':'collection:fgp', 'start': i})

print '\nSearching for UUId\'s\n'
while(True):
    
    if search['results']:
        sys.stdout.write('.')
        sys.stdout.flush()
        for record in search['results']:
            ogp_ids.append(record['id'])
            i +=1
            
    else:
        break

    search = ogp.call_action('package_search', {'fq':'collection:fgp', 'start': i})
Ejemplo n.º 33
0
    def post(self, request, publisher_id):
        user = request.user.organisationuser
        iati_user_id = user.iati_user_id
        publisher = Publisher.objects.get(pk=publisher_id)

        source_url = request.data.get('source_url', None)

        if not source_url:
            raise exceptions.APIException(detail="no source_url provided")

        user = request.user
        organisationuser = user.organisationuser
        api_key = organisationuser.iati_api_key
        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        # TODO: should this be the name? - 2017-02-20
        source_name = '{}-organisations'.format(publisher.name)

        # get all published organisations, except for the ones that are just
        # modified
        organisations = Organisation.objects.filter(
            ready_to_publish=True,
            publisher=publisher
        )

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(
                    iati_user_id
                ))

        primary_org_id = orgList[0]['id']

        try:
            # sync main datasets to IATI registry
            registry_dataset = client.call_action('package_create', {
                "resources": [
                    {"url": source_url}
                ],
                "name": source_name,
                "filetype": "organisation",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "organisation_count": organisations.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })

        except Exception as e:
            # try to recover from case when the dataset already exists (just
            # update it instead)

            old_package = client.call_action('package_show', {
                "name_or_id": source_name,
            })

            if not old_package:
                raise exceptions.APIException(
                    detail="Failed publishing dataset"
                )

            registry_dataset = client.call_action('package_update', {
                "id": old_package.get('id'),
                "resources": [
                    {"url": source_url}
                ],
                "name": source_name,
                "filetype": "organisation",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "organisation_count": organisations.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })

        # 0. create_or_update Dataset object
        dataset = Dataset.objects.get(
            filetype=2,
            publisher=publisher,
            added_manually=True,
        )

        dataset.iati_id = registry_dataset['id']
        dataset.name = source_name
        dataset.title = source_name
        dataset.source_url = source_url
        dataset.is_parsed = False
        dataset.save()

        #  update the affected organisations flags
        organisations.update(
            published=True, modified=False, ready_to_publish=True)

        # remove the old datasets from the registry
        # TODO: query the registry to remove a dataset - 2017-01-16
        # TODO: remove old datasets locally as well - 2017-01-16

        # TODO: Or just ask the user to remove the old datasets by hand?
        # - 2017-02-20

        # return Dataset object
        serializer = DatasetSerializer(dataset, context={'request': request})
        return Response(serializer.data)