Python RemoteCKAN.list_harvest_sources Beispiele

Programmiersprache: Python

Namespace / Paketname: remote_ckan.lib

Klasse / Typ: RemoteCKAN

Methode / Funktion: list_harvest_sources

Beispiele auf hotexamples.com: 6

Python RemoteCKAN.list_harvest_sources - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die remote_ckan.lib.RemoteCKAN.list_harvest_sources, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

RemoteCKAN(9)

list_harvest_sources(6)

get_full_harvest_source(5)

create_harvest_source(4)

set_destination(4)

create_group(1)

get_datasets_in_group(1)

get_full_package(1)

get_group_list(1)

request_ckan(1)

Beispiel #1

Datei anzeigen

Datei: test_list_harvest_sources.py Projekt: doi-open-data/doi-ckan

def test_list_harvest_sources_with_pagination(mock_requests):
    """Test list_harvest_sources pagination with multiple source types."""
    ckan = RemoteCKAN(mock_url)
    expected_harvest_source_1 = mock.sentinel.harvest_source_1
    expected_harvest_source_2 = mock.sentinel.harvest_source_2

    # Grab the generator
    harvest_sources = ckan.list_harvest_sources(start=0, page_size=1)

    # First page
    ckan.get_full_harvest_source = mock.Mock(return_value=expected_harvest_source_1) # stub
    mock_requests.return_value = mock_response(data={
        'success': True,
        'result': {
            'count': 2,
            'results': [
                {
                    'title': 'dataset 1',
                    'name': 'dataset-1',
                    'state': 'active',
                    'type': 'harest',
                    'source_type': 'waf',
                },
            ],
        },
    })
    assert next(harvest_sources) == expected_harvest_source_1
    assert mock_requests.mock_calls == [
        api_call('/api/3/action/package_search', params=dict(start=0, rows=1, q='(type:harvest)', fq='+dataset_type:harvest', sort='metadata_created asc')),
        mock.call().json(),
    ]
    mock_requests.reset_mock()

    # Second page
    ckan.get_full_harvest_source = mock.Mock(return_value=expected_harvest_source_2)
    mock_requests.return_value = mock_response(data={
        'success': True,
        'result': {
            'count': 2,
            'results': [
                {
                    'title': 'dataset 2',
                    'name': 'dataset-2',
                    'state': 'active',
                    'source_type': 'ckan',
                },
            ],
        },
    })
    assert next(harvest_sources) == expected_harvest_source_2
    assert mock_requests.mock_calls == [
        api_call('/api/3/action/package_search', params=dict(start=1, rows=1, q='(type:harvest)', fq='+dataset_type:harvest', sort='metadata_created asc')),
        mock.call().json(),
    ]

Beispiel #2

Datei anzeigen

def test_list_all_sources():
    """ Test the list of sources """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    total = 0
    
    results = {}
    for hs in ckan.list_harvest_sources(skip_full_source_info=True):
        total += 1
        results[hs['name']] = hs
        
    assert 'doi-open-data' in results
    assert total == 1083

Beispiel #3

Datei anzeigen

def test_list_ckan_sources():
    """ Test the list of sources """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    total = 0
    expected_names = ['doi-open-data', 'test-2016']

    results = {}
    for hs in ckan.list_harvest_sources(source_type='ckan'):
        total += 1
        assert hs['source_type'] == 'ckan'
        assert hs['name'] in expected_names
        results[hs['name']] = hs

    assert total == 2
    assert results['doi-open-data']['url'] == 'https://data.doi.gov'
    assert results['doi-open-data']['status']['job_count'] == 1

Beispiel #4

Datei anzeigen

def test_list_datajson_sources():
    """ Test the list of sources """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    total = 0
    
    results = {}
    for hs in ckan.list_harvest_sources(source_type='datajson'):
        total += 1
        # some sources fails in production (didn't return the full source)
        assert hs.get('source_type', 'datajson') == 'datajson'
        results[hs['name']] = hs
        # just for the real requests
        # sleep(2)
        
    assert total == 152
    assert results['doj-json']['url'] == 'http://www.justice.gov/data.json'
    assert results['doj-json']['frequency'] == 'DAILY'
    assert results['doj-json']['status']['job_count'] == 235
    assert results['doj-json']['status']['total_datasets'] == 1236

Beispiel #5

Datei anzeigen

    source_list_position = 0
    for hs in [{'name': name} for name in names]:
        time.sleep(args.wait_for_show)
        source_list_position = source_list_position + 1
        print('****** collecting {}: {} of {} sources'.format(
            hs['name'], source_list_position, len(names)))
        rhs = ckan.get_full_harvest_source(hs)
        if rhs is None:
            print('ERROR GETTING EXTERNAL SOURCE: {}'.format(hs['name']))
            continue
        sources_to_import.append(rhs)

else:
    for hs in ckan.list_harvest_sources(source_type=args.source_type,
                                        start=args.offset,
                                        limit=args.limit):
        sources_to_import.append(hs)

source_list_position = 0
for hs in sources_to_import:
    # save to destination CKAN
    source_list_position = source_list_position + 1
    print(' ****** creating {}: {} of {} sources'.format(
        hs['name'], source_list_position, len(sources_to_import)))
    if hs.get('error', False):
        print('Skipping failed source: {}'.format(hs['name']))
        continue
    time.sleep(args.wait_for_create)
    ckan.create_harvest_source(data=hs)
    assert 'created' in ckan.harvest_sources[hs['name']].keys()

Beispiel #6

Datei anzeigen

args = parser.parse_args()

ckan = RemoteCKAN(url=args.origin_url, user_agent=args.user_agent)

csv_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), args.file_name + '.csv')
csvfile = open(csv_output, 'w')
fieldnames = ['title', 'name', 'type', 'url', 'frequency',
              'job_count', 'total_datasets', 'last_job_errored', 'last_job_created',
              'last_job_finished', 'last_job_status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

writer.writeheader()

harvest_sources = []
total = 0
for hs in ckan.list_harvest_sources(source_type=args.source_type):

    if args.limit > 0:
        if total >= args.limit:
            break

    harvest_sources.append(hs)
    status = hs.get('status', {})
    last_job = status.get('last_job', {})
    if last_job is None:
        last_job = {}
    stats = last_job.get('stats', {})

    row = {'title': hs.get('title', 'undefined'),
           'name': hs['name'],
           'type': hs.get('source_type', 'undefined'),