Ejemplo n.º 1
0
def test_load_from_name():
    """ Test source using force_all config. """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    ckan.set_destination(ckan_url='http://ckan:5000',
                         ckan_api_key='0602d7ed-1517-40a0-a92f-049d724962df')

    print('Getting harvest source ...')

    name = 'doi-open-data'
    full_hs = ckan.get_full_harvest_source(hs={'name': name})
    ckan.create_harvest_source(data=full_hs)
    assert 'created' in ckan.harvest_sources[name].keys()
    assert ckan.harvest_sources[name]['created']
    assert 'updated' in ckan.harvest_sources[name].keys()
    assert not ckan.harvest_sources[name]['updated']
    assert 'error' in ckan.harvest_sources[name].keys()
    assert not ckan.harvest_sources[name]['error']

    print(ckan.harvest_sources[name])

    # check the force_all config
    cfg = ckan.harvest_sources[name]['ckan_package']['config']
    cfg_data = json.loads(cfg)
    assert type(cfg_data['force_all']) == bool
    assert cfg_data['force_all']
Ejemplo n.º 2
0
def test_load_from_url():
    """ Test with some previous harvester already saved
        Use a pytest cassette so real requests are not required. 
        We import 3 harvest sources (so they already exists) 
        and then run this test with 6 sources. """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    ckan.set_destination(ckan_url='http://*****:*****@fdic.gov\r\[email protected]'
    assert expected_email_list in [
        extra['value'] for extra in extras if extra['key'] == 'email_list'
    ]

    extras = ckan.organizations['fcc-gov'].get('extras', [])
    expected_email_list = '[email protected]\r\[email protected]'
    assert expected_email_list in [
        extra['value'] for extra in extras if extra['key'] == 'email_list'
    ]

    assert len(ckan.groups), 1
    assert 'local' in ckan.groups
    assert ckan.groups['local']['display_name'] == 'Local Government'

    print(
        'Finished: {} harvest sources. {} Added, {} already exists, {} failed'.
        format(total, created, updated, errors))

    assert total == len(ckan.harvest_sources)
    assert created == 4
    assert updated == 3
    assert errors == 0
Ejemplo n.º 3
0
    for hs in ckan.list_harvest_sources(source_type=args.source_type,
                                        start=args.offset,
                                        limit=args.limit):
        sources_to_import.append(hs)

source_list_position = 0
for hs in sources_to_import:
    # save to destination CKAN
    source_list_position = source_list_position + 1
    print(' ****** creating {}: {} of {} sources'.format(
        hs['name'], source_list_position, len(sources_to_import)))
    if hs.get('error', False):
        print('Skipping failed source: {}'.format(hs['name']))
        continue
    time.sleep(args.wait_for_create)
    ckan.create_harvest_source(data=hs)
    assert 'created' in ckan.harvest_sources[hs['name']].keys()
    assert 'updated' in ckan.harvest_sources[hs['name']].keys()
    assert 'error' in ckan.harvest_sources[hs['name']].keys()

created = len([k for k, v in ckan.harvest_sources.items() if v['created']])
updated = len([k for k, v in ckan.harvest_sources.items() if v['updated']])
errors = len([k for k, v in ckan.harvest_sources.items() if v['error']])
total = created + updated + errors

assert total == len(ckan.harvest_sources)

if len(ckan.errors) > 0:
    print('*******\nWITH ERRORS\n*******')
    print('\n\t'.join(ckan.errors))
    row = {'name': name, 'time': time.time()}

    # check if already exists locally
    hs = local_ckan.get_full_harvest_source(hs={'name': name})
    if hs is None:  # some error
        # not exists locally, import
        rhs = remote_ckan.get_full_harvest_source(hs={'name': name})
        if rhs is None:
            print(f'ERROR GETTING EXTERNAL SOURCE: {name}')
            row['status'] = 'Failed to get external source'
            writer.writerow(row)
            continue

        # save it locally
        remote_ckan.create_harvest_source(data=rhs)
        # get this new source data
        hs = local_ckan.get_full_harvest_source(hs={'name': name})

    title = hs['title']
    url = hs['url']
    sid = hs['id']
    name = hs['name']
    config = hs.get('config', {})

    info = f'Running check for...\n\nTitle: {title}' \
           f'\n\tURL: {url}\n\tID: {sid}\n\t' \
           f'Config: {config}'
    print(info)

    command = ([