def test_load_from_name(): """ Test source using force_all config. """ ckan = RemoteCKAN(url='https://catalog.data.gov') ckan.set_destination(ckan_url='http://ckan:5000', ckan_api_key='0602d7ed-1517-40a0-a92f-049d724962df') print('Getting harvest source ...') name = 'doi-open-data' full_hs = ckan.get_full_harvest_source(hs={'name': name}) ckan.create_harvest_source(data=full_hs) assert 'created' in ckan.harvest_sources[name].keys() assert ckan.harvest_sources[name]['created'] assert 'updated' in ckan.harvest_sources[name].keys() assert not ckan.harvest_sources[name]['updated'] assert 'error' in ckan.harvest_sources[name].keys() assert not ckan.harvest_sources[name]['error'] print(ckan.harvest_sources[name]) # check the force_all config cfg = ckan.harvest_sources[name]['ckan_package']['config'] cfg_data = json.loads(cfg) assert type(cfg_data['force_all']) == bool assert cfg_data['force_all']
def test_load_from_url(): """ Test with some previous harvester already saved Use a pytest cassette so real requests are not required. We import 3 harvest sources (so they already exists) and then run this test with 6 sources. """ ckan = RemoteCKAN(url='https://catalog.data.gov') ckan.set_destination(ckan_url='http://*****:*****@fdic.gov\r\[email protected]' assert expected_email_list in [ extra['value'] for extra in extras if extra['key'] == 'email_list' ] extras = ckan.organizations['fcc-gov'].get('extras', []) expected_email_list = '[email protected]\r\[email protected]' assert expected_email_list in [ extra['value'] for extra in extras if extra['key'] == 'email_list' ] assert len(ckan.groups), 1 assert 'local' in ckan.groups assert ckan.groups['local']['display_name'] == 'Local Government' print( 'Finished: {} harvest sources. {} Added, {} already exists, {} failed'. format(total, created, updated, errors)) assert total == len(ckan.harvest_sources) assert created == 4 assert updated == 3 assert errors == 0
parser.add_argument( "--wait_for_create", type=int, default=5, help="Wait this number of seconds between API calls to prevent timeout") args = parser.parse_args() if (args.destination_api_key is None): api_key_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../api.key') api_key = open(api_key_file).read().rstrip() args.destination_api_key = api_key ckan = RemoteCKAN(url=args.origin_url, user_agent=args.user_agent) ckan.set_destination(ckan_url=args.destination_url, ckan_api_key=args.destination_api_key) # define the final list of sources to import (from type o a list) sources_to_import = [] if args.names is not None: # we get a list of names from a file or list of source names if os.path.isfile(args.names): f = open(args.names) names = f.read().splitlines() f.close() else: names = args.names.split(',') if args.offset > 0: names = names[args.offset:]
def import_groups(origin_url, user_agent, destination_url, destination_api_key, groups='ALL', skip_groups=''): ckan = RemoteCKAN(url=origin_url, user_agent=user_agent) ckan.set_destination(ckan_url=destination_url, ckan_api_key=destination_api_key) groups_processed = [] groups_skipped = [] not_found = [] already_in_group = [] added_to_group = [] failed_to_add = [] if groups == 'ALL': groups = ckan.get_group_list() else: groups = groups.split(',') for group in groups: print('Group Found {}'.format(group)) if group in skip_groups.split(','): print('Skipping group') groups_skipped.append(group) continue groups_processed.append(group) # create this group at destination ckan.create_group(group) # get all datasets from this group and (if exist) add dataset to this group packages = ckan.get_datasets_in_group(group_name=group) for package in packages: name = package['name'] # if this dataset exists in the new CKAN instance we need to update to add this group package = ckan.get_full_package(name_or_id=name, url=destination_url) if package is None: print('Package not found {}'.format(name)) not_found.append({'group': group, 'dataset_name': name}) continue # check if the groups already exist at the destination package if group in [grp['name'] for grp in package.get('groups', [])]: print('Group {} already exists for {}'.format(group, name)) already_in_group.append(package['name']) continue # TODO update the dataset at the new environment to set the group package_update_url = f'{destination_url}/api/3/action/package_update' print(' ** Updating package {}'.format(name)) package["groups"].append({'name': group}) updated, status, error = ckan.request_ckan(url=package_update_url, method='POST', data=package) if updated: added_to_group.append(package['name']) else: failed_to_add.append(package['name']) print(' ** Updated ** Status {} ** Error {} **'.format(status, error)) if len(ckan.errors) > 0: print('*******\nWITH ERRORS\n*******') print('\n\t'.join(ckan.errors)) print('Datasets not found: {}'.format(len(not_found))) for nf in not_found: print('\tDataset {} at group {}'.format(nf['dataset_name'], nf['group'])) print('Final results:') ret = { "groups_processed": groups_processed, "groups_skipped": groups_skipped, "not_found": not_found, "already_in_group": already_in_group, "added_to_group": added_to_group, "failed_to_add":failed_to_add } print(ret) return ret