예제 #1
0
def push_theme_to_ckan(catalog, portal_url, apikey, identifier=None, label=None):
    """Escribe la metadata de un theme en el portal pasado por parámetro.

            Args:
                catalog (DataJson): El catálogo de origen que contiene el theme.
                portal_url (str): La URL del portal CKAN de destino.
                apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
                identifier (str): El identificador para buscar el theme en la taxonomia.
                label (str): El label para buscar el theme en la taxonomia.
            Returns:
                str: El name del theme en el catálogo de destino.
        """
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)
    theme = catalog.get_theme(identifier=identifier, label=label)
    group = map_theme_to_group(theme)
    pushed_group = ckan_portal.call_action('group_create', data_dict=group)
    return pushed_group['name']
예제 #2
0
 def create(self, apikey=apikey):
     if not hasattr(self, 'url'):
         setattr(self, 'url', 'dummy-url')
     if hasattr(self, 'upload'):
         setattr(self, 'upload', open(self.upload, 'rb'))
     d = self.check()
     try:
         new_resource = RemoteCKAN(
             url, apikey).action.resource_create(**d)  # make CKAN API call
         setattr(self, 'id', new_resource['id'])
         return (show(self.id, 'resource'))
     except NotAuthorized:
         #return('Denied. Check your apikey.')
         print('Denied. Check your apikey.'
               )  # print 'denied' if call not authorised
     except:
         print('Failed to create %s' % self.name)
예제 #3
0
def push_dataset_to_ckan(catalog, owner_org, dataset_origin_identifier, portal_url, apikey,
                         catalog_id=None, demote_superThemes=True, demote_themes=True):
    """Escribe la metadata de un dataset en el portal pasado por parámetro.

        Args:
            catalog (DataJson): El catálogo de origen que contiene el dataset.
            owner_org (str): La organización a la cual pertence el dataset.
            dataset_origin_identifier (str): El id del dataset que se va a federar.
            portal_url (str): La URL del portal CKAN de destino.
            apikey (str): La apikey de un usuario con los permisos que le permitan crear o actualizar el dataset.
            catalog_id (str): El prefijo con el que va a preceder el id del dataset en catálogo destino.
            demote_superThemes(bool): Si está en true, los ids de los super themes del dataset, se propagan como grupo.
            demote_themes(bool): Si está en true, los labels de los themes del dataset, pasan a ser tags. Sino,
            se pasan como grupo.
        Returns:
            str: El id del dataset en el catálogo de destino.
    """
    dataset = catalog.get_dataset(dataset_origin_identifier)
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)

    package = map_dataset_to_package(catalog, dataset, owner_org, catalog_id,
                                     demote_superThemes, demote_themes)

    # Get license id
    if dataset.get('license'):
        license_list = ckan_portal.call_action('license_list')
        try:
            ckan_license = next(license_item for license_item in license_list if
                                license_item['title'] == dataset['license'] or
                                license_item['url'] == dataset['license'])
            package['license_id'] = ckan_license['id']
        except StopIteration:
            package['license_id'] = 'notspecified'
    else:
        package['license_id'] = 'notspecified'

    try:
        pushed_package = ckan_portal.call_action(
            'package_update', data_dict=package)
    except NotFound:
        pushed_package = ckan_portal.call_action(
            'package_create', data_dict=package)

    ckan_portal.close()
    return pushed_package['id']
예제 #4
0
def new_user(username, email=None, fullname=None, apikey=apikey):
    s = search(username, 'user')
    if s is None:
        while email is None:
            email = input('Enter new user\'s email address:\n') or None
        while fullname is None:
            fullname = input('Enter new user\'s full name:\n') or None
        d = dict(name=username.lower().strip(),
                 email=email,
                 password='******',
                 fullname=fullname)
        try:
            RemoteCKAN(url, apikey).action.user_create(**d)
            return (s)
        except NotAuthorized:
            print('\nDenied. Check your apikey.')
    else:
        print('This user already exists\n', s)
예제 #5
0
def update_ckan_database_resource(domain, entity_id):
    ua = 'ckanapiexample/1.0 (+http://example.com/my/website)'
    client = RemoteCKAN('http://104.236.54.23',
                        apikey='eb5346ab-0c53-4b1f-a0a4-f4be797db23b',
                        user_agent=ua)
    # file_name = save_gas_stations_as_csv(get_orion_client())
    file_name = update_json_and_save_csv(get_orion_client(), entity_id)
    file_path_csv = 'friendlygas_project/core/media/' + file_name
    file_path_json = 'friendlygas_project/core/media/gas_stations_data.json'
    date = str(datetime.datetime.now(pytz.timezone('America/Recife')))
    client.action.resource_create(
        package_id='ac335d1a-098c-4f18-882c-03950ddc5d7c',
        name='Postos de Combustíveis - Natal - RN - ' + date,
        description=
        'Este arquivo contém as informações dos Postos de Combustíveis de Natal - RN.',
        format='csv',
        upload=open(file_path_csv, 'rb'),
        url='http://' + domain + '/' + 'media' + '/' + file_name)
예제 #6
0
파일: pyckan.py 프로젝트: OpenSTL/pyckan
def run():
    print("OpenSTL-DataExchange")

    usr_in = input(
        "Do you wish to create 'datasets' or 'resources'?  Leave blank to exit\n"
    )

    #Decide if user wants to make new Datasets or Resources
    if usr_in == "datasets":
        update_datasets()
    elif usr_in == "resources":
        update_resources()
    update_resources()

    ua = 'ckanapiexample/1.0 (+http://example.com/my/website)'

    demo = RemoteCKAN('http://beta.stlouisdata.org', apikey=secret)

    groups = demo.action.package_list(id='test_data')
    print(groups)

    # try:
    #     pkg = demo.action.package_create(name='ya_set', title='not going to work')
    # except:
    #     print("create_error")

    pkg = demo.action.package_show(id='ya_set')

    groups = demo.action.package_list(id='test_data')
    print(groups)
    print("\nPackage_Create {}".format(pkg))

    pkg['title'] = "WORKING!!"
    #pkg['tags'] = ['a_tag', 'b_tag']
    pkg['notes'] = 'Just playing around really...'  # this is the description field

    #print("\nPackage_Create {}".format(pkg))

    #pkg = demo.action.package_update(**pkg)
    #print("\nPackage_Create {}".format(pkg))

    npkg = demo.action.package_show(id='test_data')
    print("\nPackage_Create {}".format(npkg))
예제 #7
0
def crawl(ctx, workflow, url, engine):
    printer = ctx.obj['printer']

    click.echo(_("Engine: %s" % engine))

    engine = engines[engine]()

    from ckanapi import RemoteCKAN
    client = RemoteCKAN(url, user_agent='lintol-doorstep-crawl/1.0 (+http://lintol.io)')
    resources = client.action.resource_search(query='format:csv')
    if 'results' in resources:
        for resource in resources['results']:
            r = requests.get(resource['url'])
            with make_file_manager(content={'data.csv': r.text}) as file_manager:
                filename = file_manager.get('data.csv')
                loop = asyncio.get_event_loop()
                result = loop.run_until_complete(engine.run(filename, workflow))
                printer.print_report(result)
    print(printer.get_output())
예제 #8
0
def get_ids():
    '''
    called by main()
    :return:
    '''
    site = os.getenv("registry_url")
    rckan = RemoteCKAN(site)

    # query for last 48 hours
    apicall = "api/3/action/package_search"
    # q_param = "?q=metadata_modified:[2019-10-10T21:15:00Z TO *]&fq=publication:open_government"

    hours_ago = 48
    two_days_ago = datetime.now() - timedelta(hours=hours_ago)
    str_2days_ago = two_days_ago.strftime('%Y-%m-%dT%H:%M:%SZ')
    q_param1 = "?q=metadata_modified:[%s%sTO%s*]" % (str_2days_ago, '%20',
                                                     '%20')
    res = query_with_get(site, apicall, q_param1)
    dict = json.loads(res)['result']['results']

    # additionally filter only records where open checklist criteria passes
    filtered_dict = [
        x for x in dict
        if (x['ready_to_publish'] == 'true' and x['elegible_for_release'] ==
            'true' and x['access_to_information'] == 'true'
            and x['authority_to_release'] == 'true' and x['formats'] == 'true'
            and x['privacy'] == 'true' and x['official_language'] == 'true'
            and x['security'] == 'true' and x['other'] == 'true' and
            x['imso_approval'] == 'true' and x['license_id'] == 'ca-ogl-lgo'
            and x['restrictions'] == 'unrestricted')
    ]

    print(filtered_dict)
    # process the result to get filtered ids

    try:
        ids = []
        for index in range(len(filtered_dict)):
            ids.append(filtered_dict[index]['name'])
    except Exception as e:
        return []
    return ids
def delete_ckan_record(package_id):
    """
    Remove a dataset and its associated resource from CKAN
    :param package_id:
    :return: Nothing
    """

    # First, verify and get the resource ID
    package_record = get_ckan_record(package_id)
    if len(package_record) == 0:
        logger.warn("Cannot find record {0} to delete".format(package_id))
        return

    # Get rid of the resource
    remote_ckan_url = Config.get('ckan', 'remote_url')
    remote_ckan_api = Config.get('ckan', 'remote_api_key')
    user_agent = Config.get('web', 'user_agent')

    # Delete the local file if it exists

    gcdocs_file = os.path.join(
        doc_intake_dir,
        munge_filename(os.path.basename(
            package_record['resources'][0]['name'])))
    if os.path.exists(gcdocs_file):
        os.remove(gcdocs_file)

    with RemoteCKAN(remote_ckan_url,
                    user_agent=user_agent,
                    apikey=remote_ckan_api) as ckan_instance:
        try:
            delete_blob(
                ckan_container, 'resources/{0}/{1}'.format(
                    package_record['resources'][0]['id'],
                    package_record['resources'][0]['name'].lower()))
            ckan_instance.action.package_delete(id=package_record['id'])
            ckan_instance.action.dataset_purge(id=package_record['id'])
            logger.info("Deleted expired CKAN record {0}".format(
                package_record['id']))
        except Exception as ex:
            logger.error("Unexpected error when deleting record {0}".format(
                ex.message))
예제 #10
0
def main(args):
    def capac(capacity):
        return (capacity[0] in args['-s']) if args['-s'] else True

    host = args['HOST']
    apikey = args['-a'] or os.environ.get('CKAN_APIKEY')
    ckan = RemoteCKAN(host, apikey=apikey)
    orgas = ckan.call_action('organization_list',
                             data_dict={
                                 'all_fields': True,
                                 'include_users': True
                             })
    userids = [
        u['id'] for o in orgas for u in o['users'] if capac(u['capacity'])
    ]
    users = [
        ckan.call_action('user_show', data_dict={'id': uid}) for uid in userids
    ]
    users = [encode(u) for u in users]
    output(users, args['-f'])
def update_resource(package_id, resource_file):
    """
    Add or update the resource file for the dataset
    :param package_id: OBD dataset ID
    :param resource_file: path to the resource file
    :return: Nothing
    """

    remote_ckan_url = Config.get('ckan', 'remote_url')
    remote_ckan_api = Config.get('ckan', 'remote_api_key')
    user_agent = Config.get('web', 'user_agent')
    with RemoteCKAN(remote_ckan_url,
                    user_agent=user_agent,
                    apikey=remote_ckan_api) as ckan_instance:
        try:
            package_record = ckan_instance.action.package_show(id=package_id)
        except NotFound as nf:
            logger.error("Unable to find record {0} to update".format(
                nf.message))
            return

        try:
            if len(package_record['resources']) == 0:
                ckan_instance.action.resource_create(package_id=package_id,
                                                     url='',
                                                     upload=open(
                                                         resource_file, 'rb'))
                logger.info("Added new resource to {0}".format(package_id))
            else:
                ckan_instance.action.resource_patch(
                    id=package_record['resources'][0]['id'],
                    url='',
                    upload=open(resource_file, 'rb'))
        except CKANAPIError as ce:
            logger.error(
                "Unexpected error when updating a record {0}: ".format(
                    ce.message))
            logger.error(traceback.format_exc())

        logger.info("Updated resource {0}".format(
            package_record['resources'][0]['id']))
예제 #12
0
def remove_datasets_from_ckan(portal_url, apikey, filter_in=None, filter_out=None,
                              only_time_series=False, organization=None):
    """Borra un dataset en el portal pasado por parámetro.

            Args:
                portal_url (str): La URL del portal CKAN de destino.
                apikey (str): La apikey de un usuario con los permisos que le permitan borrar el dataset.
                filter_in(dict): Diccionario de filtrado positivo, similar al de search.get_datasets.
                filter_out(dict): Diccionario de filtrado negativo, similar al de search.get_datasets.
                only_time_series(bool): Filtrar solo los datasets que tengan recursos con series de tiempo.
                organization(str): Filtrar solo los datasets que pertenezcan a cierta organizacion.
            Returns:
                None
    """
    ckan_portal = RemoteCKAN(portal_url, apikey=apikey)
    identifiers = []
    datajson_filters = filter_in or filter_out or only_time_series
    if datajson_filters:
        identifiers += get_datasets(portal_url + '/data.json', filter_in=filter_in, filter_out=filter_out,
                                    only_time_series=only_time_series, meta_field='identifier')
    if organization:
        query = 'organization:"' + organization + '"'
        search_result = ckan_portal.call_action('package_search', data_dict={
                                                'q': query, 'rows': 500, 'start': 0})
        org_identifiers = [dataset['id']
                           for dataset in search_result['results']]
        start = 500
        while search_result['count'] > start:
            search_result = ckan_portal.call_action('package_search',
                                                    data_dict={'q': query, 'rows': 500, 'start': start})
            org_identifiers += [dataset['id']
                                for dataset in search_result['results']]
            start += 500

        if datajson_filters:
            identifiers = set(identifiers).intersection(set(org_identifiers))
        else:
            identifiers = org_identifiers

    for identifier in identifiers:
        ckan_portal.call_action('dataset_purge', data_dict={'id': identifier})
def update_ckan_record(package_dict):
    """
    Add a new dataset to the Open by Default Portal
    :param package_dict: JSON dict of the new package
    :return: The created package
    """

    remote_ckan_url = Config.get('ckan', 'remote_url')
    remote_ckan_api = Config.get('ckan', 'remote_api_key')
    user_agent = Config.get('web', 'user_agent')
    new_package = None

    with RemoteCKAN(remote_ckan_url,
                    user_agent=user_agent,
                    apikey=remote_ckan_api) as ckan_instance:
        try:
            new_package = ckan_instance.action.package_patch(**package_dict)
        except Exception as ex:
            logger.error("Unable to update existing portal record: {0}".format(
                ex.message))
    return new_package
예제 #14
0
    def put(self, request, publisher_id, dataset_id):
        user = request.user.organisationuser
        publisher = Publisher.objects.get(pk=publisher_id)
        admin_group = OrganisationAdminGroup.objects.get(
            publisher_id=publisher_id)

        source_url = request.data.get('source_url', None)

        # TODO: call package_update to update source_url for registry as well - 2017-02-20

        if not source_url:
            raise exceptions.APIException(detail="no source_url provided")

        user = request.user
        organisationuser = user.organisationuser
        api_key = organisationuser.iati_api_key
        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        dataset = Dataset.objects.get(id=dataset_id)
        dataset.date_updated = datetime.now()
        dataset.source_url = source_url
        dataset.save()

        # get all ready to publish organisations
        organisations = Organisation.objects.filter(ready_to_publish=True,
                                                    publisher=publisher)
        non_r2p_organisations = Organisation.objects.filter(
            ready_to_publish=False, publisher=publisher)

        #  update the affected organisations flags
        organisations.update(
            published=True,
            modified=False,
            ready_to_publish=True,
            last_updated_datetime=datetime.now().isoformat(' '))
        non_r2p_organisations.update(published=False)

        #  return Dataset object
        serializer = DatasetSerializer(dataset, context={'request': request})
        return Response(serializer.data)
예제 #15
0
async def do_crawl(component, url, workflow, printer, publish, update=False):
    """
    gets all the datasets on the ckan instance
    """
    # is it worth using datastore to create te client here?
    from ckanapi import RemoteCKAN
    client = RemoteCKAN(url, user_agent='lintol-doorstep-crawl/1.0 (+http://lintol.io)')

    # gets the packages to iterate through using the retry method
    packages = ckan_retry(client.action.package_list)

    for package in packages:
        # creates package metadata
        package_metadata = ckan_retry(client.action.package_show, id=package)

        ini = DoorstepIni(context_package=package_metadata) # classes = studley case
        for resource in ini.package['resources']:
            # checks if the resource is either CSV or geoJson (why geojson but not json?? is it more standarised re: columns)
            if resource['format'] in ALLOWED_FORMATS:
                if workflow:
                    # if workflow is initisialised
                    # creates response oject from the url column
                    r = requests.get(resource['url'])
                    with make_file_manager(content={'data.csv': r.text}) as file_manager:
                        # makes file etc
                        filename = file_manager.get('data.csv')
                        # calls async function to exec the workflow?
                        result = await execute_workflow(component, filename, workflow, ini)
                        print(result)
                        if result:
                            printer.build_report(result)
                if publish:
                    # what is publish in this context?
                    # probably, if there is something to publish that is returned from the component, then do whatevs
                    result = await announce_resource(component, resource, ini, url, update)
            else:
                if not resource['format']:
                    print(resource)
                logging.warn("Not allowed format: {}".format(resource['format']))
    printer.print_output()