Python CKANPortalAPI.save_packages_list Examples

Programming Language: Python

Namespace/Package Name: harvester.data_gov_api

Class/Type: CKANPortalAPI

Method/Function: save_packages_list

Examples at hotexamples.com: 2

Python CKANPortalAPI.save_packages_list - 2 examples found. These are the top rated real world Python examples of harvester.data_gov_api.CKANPortalAPI.save_packages_list extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CKANPortalAPI(19)

search_harvest_packages(7)

create_package(5)

show_package(4)

delete_package(3)

get_user_info(2)

save_packages_list(2)

update_package(2)

get_all_packages(2)

get_admin_users(2)

count_resources(1)

read_local_packages(1)

remove_duplicated_identifiers(1)

save_datasets_as_data_packages(1)

delete_all_harvest_sources(1)

create_organization(1)

search_packages(1)

show_organization(1)

create_harvest_source(1)

import_harvest_sources(1)

Example #1

Show file

File: functions2.py Project: datopian/ckan-ng-harvest

def get_current_ckan_resources_from_api(harvest_source_id):
    results_json_path = config.get_ckan_results_cache_path()
    logger.info(f'Extracting from harvest source id: {harvest_source_id}')
    cpa = CKANPortalAPI(base_url=config.CKAN_CATALOG_URL)
    resources = 0

    page = 0
    for datasets in cpa.search_harvest_packages(
            harvest_source_id=harvest_source_id):
        # getting resources in pages of packages
        page += 1
        logger.info('PAGE {} from harvest source id: {}'.format(
            page, harvest_source_id))
        for dataset in datasets:
            pkg_resources = len(dataset['resources'])
            resources += pkg_resources
            yield (dataset)

            # we don't need to save this
            # save_dict_as_data_packages(data=package, path=config.get_data_packages_folder_path(),
            #                           prefix='ckan-result',
            #                           identifier_field='id')

    logger.info('{} total resources in harvest source id: {}'.format(
        resources, harvest_source_id))
    cpa.save_packages_list(path=results_json_path)

Example #2

Show file

File: process_ckan_api.py Project: datopian/ckan-ng-harvest

packages_folder_path = os.path.join(local_folder, 'datapackages')
if not os.path.isdir(packages_folder_path):
    os.makedirs(packages_folder_path)

api_results_path = os.path.join(local_folder, 'api_results.json')
# api_errors_path = os.path.join(local_folder, 'api_errors.json')
# duplicates_path = os.path.join(local_folder, 'api_duplicates.json')

# ----------------------------------------------------
# Get data.json if not here (or force)
# ----------------------------------------------------
if not os.path.isfile(api_results_path) or args.force_download:
    logger.info('Downloading')
    cpa = CKANPortalAPI(base_url=args.ckan_base_url)
    cpa.get_all_packages(harvest_source_id=args.harvest_source_id)
    cpa.save_packages_list(path=api_results_path)
else:
    logger.info(f'Using data.json prevously downloaded: {api_results_path}')
    cpa = CKANPortalAPI()
    cpa.read_local_packages(path=api_results_path)

packages = cpa.package_list
total_datasets = len(packages)
total_resources = cpa.count_resources()

logger.info('cleaning datasets')
duplicates = cpa.remove_duplicated_identifiers()
total_duplicates = len(duplicates)

logger.info(
    f'Readed {total_datasets} datasets including {total_resources} resources. {total_duplicates} duplicated identifiers removed'