Ejemplo n.º 1
0
    def test_author_check(self):

        context = {'user': self.user['name']}
        dataset1 = helpers.call_action('package_create',
                                       context=context,
                                       name='syndicated_dataset1',
                                       extras=[{
                                           'key': 'syndicate',
                                           'value': 'true'
                                       }])

        dataset2 = helpers.call_action('package_create',
                                       context=context,
                                       name='syndicated_dataset2',
                                       extras=[{
                                           'key': 'syndicate',
                                           'value': 'true'
                                       }])

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            # Mock API

            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            # Syndicate to our Test CKAN instance
            ckan = mock_target()
            mock_user_show = mock.Mock()
            mock_user_show.return_value = self.user
            ckan.action.user_show = mock_user_show

            sync_package(dataset1['id'], 'dataset/create')
            helpers.call_action('package_patch',
                                id=dataset1['id'],
                                extras=[{
                                    'key': 'syndicate',
                                    'value': 'true'
                                }])

            sync_package(dataset1['id'], 'dataset/update')
            mock_user_show.assert_called_once_with(id='test_author')
            updated1 = helpers.call_action('package_show', id=dataset1['id'])
            assert_is_not_none(
                get_pkg_dict_extra(updated1, get_syndicated_id()))

            mock_user_show = mock.Mock()
            mock_user_show.return_value = {'name': 'random-name', 'id': ''}
            ckan.action.user_show = mock_user_show

            sync_package(dataset2['id'], 'dataset/create')
            helpers.call_action('package_patch',
                                id=dataset2['id'],
                                extras=[{
                                    'key': 'syndicate',
                                    'value': 'true'
                                }])
            sync_package(dataset2['id'], 'dataset/update')
            updated2 = helpers.call_action('package_show', id=dataset2['id'])
            assert_false(get_pkg_dict_extra(updated2, get_syndicated_id()))
            del Session.revision
Ejemplo n.º 2
0
def retrieve_ueb_run_output_packages():
    source = 'uebpackage.tasks.retrieve_ueb_run_output_packages():'
    global service_host_address
    #service_request_api_url = '/api/UEBModelRunOutput'
    service_request_api_url = uebhelper.StringSettings.app_server_api_get_ueb_run_output
    connection = httplib.HTTPConnection(service_host_address)

    # get all datasets of type model-package
    model_pkg_datasets = uebhelper.get_packages_by_dataset_type('model-package')
    for dataset in model_pkg_datasets:
        pkg_run_job_id = h.get_pkg_dict_extra(dataset, 'package_run_job_id')
        if pkg_run_job_id is None:
            continue

        # to get the package_type value which is a tag, use the get_package() of my the helper module
        pkg_dict = uebhelper.get_package(dataset['id'])
        # TODO: Before using pkg_dict check that it is not None
        pkg_type = pkg_dict['package_type'][0]
        if len(pkg_run_job_id) == 0:
            continue
        if pkg_type == u'Complete':
            continue

        pkg_run_status = h.get_pkg_dict_extra(dataset, 'package_run_status')
        if pkg_run_status != uebhelper.StringSettings.app_server_job_status_success:
            continue

        dataset_id = dataset['id']
        service_request_url = service_request_api_url + '?uebRunJobID=' + pkg_run_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            log.info(source + 'UEB model output package was received from App '
                              'server for model pkg dataset ID:%s and Run Job ID:%s' % (dataset_id, pkg_run_job_id))
            _merge_ueb_output_pkg_with_input_pkg(service_call_results, dataset_id)
        else:
            log.error(source + 'HTTP status %d returned from App server when '
                               'retrieving UEB model output package for '
                               'model pkg dataset ID:%s and Run Job ID:%s' %
                      (service_call_results.status, dataset_id, pkg_run_job_id))

            ueb_run_status = 'Failed to retrieve output package'

            # update the dataset
            data_dict = {'package_run_status': ueb_run_status}
            try:
                uebhelper.update_package(dataset_id, data_dict, backgroundTask=True)
                log.info(source + 'UEB model package dataset run status was updated to %s for '
                              'dataset ID:%s' % (dataset_id, ueb_run_status))
            except Exception as e:
                log.error(source + 'Failed to update run status for UEB model package dataset '
                                   'with dataset ID:%s\nException:%s' % (dataset_id, e))

        connection.close()

    return
Ejemplo n.º 3
0
def get_harvest_source_link(package_dict):
    harvest_source_id = h.get_pkg_dict_extra(package_dict, 'harvest_source_id', None)
    harvest_source_title = h.get_pkg_dict_extra(package_dict, 'harvest_source_title', None)

    if harvest_source_id and harvest_source_title:
       msg = p.toolkit._('Harvested from')
       url = h.url_for('harvest_read', id=harvest_source_id)
       link = '{msg} <a href="{url}">{title}</a>'.format(url=url, msg=msg, title=harvest_source_title)
       return p.toolkit.literal(link)

    return ''
Ejemplo n.º 4
0
def get_harvest_source_link(package_dict):
    harvest_source_id = h.get_pkg_dict_extra(package_dict, 'harvest_source_id', None)
    harvest_source_title = h.get_pkg_dict_extra(package_dict, 'harvest_source_title', None)

    if harvest_source_id and harvest_source_title:
       msg = p.toolkit._('Harvested from')
       url = h.url_for('harvest_read', id=harvest_source_id)
       link = '{msg} <a href="{url}">{title}</a>'.format(url=url, msg=msg, title=harvest_source_title)
       return p.toolkit.literal(link)

    return ''
Ejemplo n.º 5
0
    def _set_context_to_user_input_model_packages(self):
        # get all datasets of type model-package
        model_pkg_datasets = uebhelper.get_packages_by_dataset_type(
            'model-package')

        # for each resource we need only the id (id be used as the selection value) and the name for display
        file_resources = []

        for dataset in model_pkg_datasets:
            pkg_run_job_id = h.get_pkg_dict_extra(dataset,
                                                  'package_run_job_id')
            if pkg_run_job_id is None:
                continue

            # skip dataset if that does not have pkg_model-name = 'UEB'
            pkg_model_name = h.get_pkg_dict_extra(dataset, 'pkg_model_name')
            if pkg_model_name.upper() != 'UEB':
                continue

            # to get the package_type value which is a tag, use the get_package() of my the helper module
            pkg_dict = uebhelper.get_package(dataset['id'])
            pkg_type = pkg_dict['package_type'][0]
            if len(pkg_run_job_id.strip()) != 0:
                continue
            if pkg_type == u'Complete':
                continue

            # check if the dataset is owned by the current user
            dataset_id = dataset['id']
            if not uebhelper.is_user_owns_package(dataset_id, tk.c.user) and \
                    not uebhelper.is_user_owns_package(dataset_id, 'default'):
                continue

            # get model package zip file resource from the dataset and we assume the dataset has only one resource
            model_pkg_resource = dataset['resources'][0]
            dataset_title = dataset['title']
            max_len = 50
            if len(dataset_title) > max_len:
                dataset_title = dataset_title[:max_len] + '...'

            dataset_title = ' (' + dataset_title + ')'
            resource = {}
            resource['id'] = model_pkg_resource['id']
            resource['url'] = model_pkg_resource['url']
            resource['name'] = model_pkg_resource['name'] + dataset_title
            resource['description'] = model_pkg_resource['description']
            file_resources.append(resource)

        tk.c.ueb_input_model_packages = file_resources
Ejemplo n.º 6
0
    def test_get_pkg_dict_extra(self):

        from ckan.lib.create_test_data import CreateTestData
        from ckan import model
        from ckan.logic import get_action

        CreateTestData.create()

        pkg_dict = get_action("package_show")({"model": model, "user": u"tester"}, {"id": "annakarenina"})

        assert_equal(h.get_pkg_dict_extra(pkg_dict, "genre"), "romantic novel")

        assert_equal(h.get_pkg_dict_extra(pkg_dict, "extra_not_found"), None)

        assert_equal(h.get_pkg_dict_extra(pkg_dict, "extra_not_found", "default_value"), "default_value")
Ejemplo n.º 7
0
    def test_get_pkg_dict_extra(self):

        from ckan.lib.create_test_data import CreateTestData
        from ckan import model
        from ckan.logic import get_action

        CreateTestData.create()

        pkg_dict = get_action('package_show')({'model': model, 'user': u'tester'}, {'id': 'annakarenina'})

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'genre'), '"romantic novel"')

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'extra_not_found'), None)

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'extra_not_found', 'default_value'), 'default_value')
Ejemplo n.º 8
0
def _update_package(package):
    syndicated_id = get_pkg_dict_extra(package, get_syndicated_id())

    if syndicated_id is None:
        _create_package(package)
        return

    ckan = get_target()

    try:
        updated_package = dict(package)
        # Keep the existing remote ID and Name
        del updated_package['id']
        del updated_package['name']

        updated_package['extras'] = filter_extras(package['extras'])
        updated_package['resources'] = filter_resources(package['resources'])
        updated_package['owner_org'] = get_syndicated_organization()

        try:
            # TODO: No automated test
            updated_package = toolkit.get_action(
                'update_dataset_for_syndication')(
                {}, {'dataset_dict': updated_package})
        except KeyError:
            pass

        ckan.action.package_update(
            id=syndicated_id,
            **updated_package
        )
    except ckanapi.NotFound:
        _create_package(package)
Ejemplo n.º 9
0
def _update_package(package):
    syndicated_id = get_pkg_dict_extra(package, get_syndicated_id())

    if syndicated_id is None:
        _create_package(package)
        return

    ckan = get_target()

    try:
        updated_package = dict(package)
        # Keep the existing remote ID and Name
        del updated_package['id']
        del updated_package['name']

        updated_package['extras'] = filter_extras(package['extras'])
        updated_package['resources'] = filter_resources(package['resources'])
        updated_package['owner_org'] = get_syndicated_organization()

        try:
            # TODO: No automated test
            updated_package = toolkit.get_action(
                'update_dataset_for_syndication')(
                    {}, {
                        'dataset_dict': updated_package
                    })
        except KeyError:
            pass

        ckan.action.package_update(id=syndicated_id, **updated_package)
    except ckanapi.NotFound:
        _create_package(package)
Ejemplo n.º 10
0
def _get_group_ids(dataset_dict):
    group_ids = []

    countries = get_pkg_dict_extra(dataset_dict, 'countries')

    if countries is not None:
        for country_name in countries.split(','):
            cleaned_name = country_name.strip().title()
            country = None

            try:
                country = pycountry.countries.get(
                    name=cleaned_name)
            except KeyError:
                try:
                    country = pycountry.countries.get(
                        common_name=cleaned_name)
                except KeyError:
                    pass

            if country is not None:
                group_ids.append(
                    {'id': country.alpha3.lower()})

    if group_ids == []:
        group_ids.append({'id': 'world'})

    return group_ids
Ejemplo n.º 11
0
    def _set_context_to_user_input_model_packages(self):
        # get all datasets of type model-package
        model_pkg_datasets = uebhelper.get_packages_by_dataset_type('model-package')

        # for each resource we need only the id (id be used as the selection value) and the name for display
        file_resources = []

        for dataset in model_pkg_datasets:
            pkg_run_job_id = h.get_pkg_dict_extra(dataset, 'package_run_job_id')
            if pkg_run_job_id is None:
                continue

            # skip dataset if that does not have pkg_model-name = 'UEB'
            pkg_model_name = h.get_pkg_dict_extra(dataset, 'pkg_model_name')
            if pkg_model_name.upper() != 'UEB':
                continue

            # to get the package_type value which is a tag, use the get_package() of my the helper module
            pkg_dict = uebhelper.get_package(dataset['id'])
            pkg_type = pkg_dict['package_type'][0]
            if len(pkg_run_job_id.strip()) != 0:
                continue
            if pkg_type == u'Complete':
                continue

            # check if the dataset is owned by the current user
            dataset_id = dataset['id']
            if not uebhelper.is_user_owns_package(dataset_id, tk.c.user) and \
                    not uebhelper.is_user_owns_package(dataset_id, 'default'):
                continue

            # get model package zip file resource from the dataset and we assume the dataset has only one resource
            model_pkg_resource = dataset['resources'][0]
            dataset_title = dataset['title']
            max_len = 50
            if len(dataset_title) > max_len:
                dataset_title = dataset_title[:max_len] + '...'

            dataset_title = ' (' + dataset_title + ')'
            resource = {}
            resource['id'] = model_pkg_resource['id']
            resource['url'] = model_pkg_resource['url']
            resource['name'] = model_pkg_resource['name'] + dataset_title
            resource['description'] = model_pkg_resource['description']
            file_resources.append(resource)

        tk.c.ueb_input_model_packages = file_resources
Ejemplo n.º 12
0
def test_get_pkg_dict_extra():

    from ckan.lib.create_test_data import CreateTestData
    from ckan import model

    CreateTestData.create()

    pkg_dict = helpers.call_action("package_show", id="annakarenina")

    assert h.get_pkg_dict_extra(pkg_dict, "genre") == "romantic novel"

    assert h.get_pkg_dict_extra(pkg_dict, "extra_not_found") is None

    assert (h.get_pkg_dict_extra(pkg_dict, "extra_not_found",
                                 "default_value") == "default_value")

    model.repo.rebuild_db()
Ejemplo n.º 13
0
    def test_get_pkg_dict_extra(self):

        from ckan.lib.create_test_data import CreateTestData
        from ckan import model
        from ckan.logic import get_action

        CreateTestData.create()

        pkg_dict = get_action('package_show')({'model': model, 'user': u'tester'}, {'id': 'annakarenina'})

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'genre'), 'romantic novel')

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'extra_not_found'), None)

        assert_equal(h.get_pkg_dict_extra(pkg_dict, 'extra_not_found', 'default_value'), 'default_value')

        model.repo.rebuild_db()
Ejemplo n.º 14
0
def check_ueb_run_status():
    source = 'uebpackage.tasks.check_ueb_run_status():'
    global service_host_address

    service_request_api_url = uebhelper.StringSettings.app_server_api_check_ueb_run_status_url
    connection = httplib.HTTPConnection(service_host_address)
    job_status_processing = uebhelper.StringSettings.app_server_job_status_processing
    job_status_in_queue = uebhelper.StringSettings.app_server_job_status_in_queue

    model_pkg_datasets_with_run_status_processing = _get_model_pkg_datasets_by_run_status(job_status_processing)
    model_pkg_datasets_with_run_status_in_queue = _get_model_pkg_datasets_by_run_status(job_status_in_queue)

    model_pkg_datasets_need_run_status_update = model_pkg_datasets_with_run_status_processing + \
                                                model_pkg_datasets_with_run_status_in_queue

    if len(model_pkg_datasets_need_run_status_update) == 0:
        log.info(source + "No UEB model package dataset has a run status of %s at this time" % job_status_processing)
    else:
        log.info(source + "Number of UEB model package datatsets with run status of %s or %s at this time is:%s"
                 % (job_status_processing, job_status_in_queue, len(model_pkg_datasets_need_run_status_update)))

    for dataset in model_pkg_datasets_need_run_status_update:
        pkg_run_job_id = h.get_pkg_dict_extra(dataset, 'package_run_job_id')
        if pkg_run_job_id is None:
            continue

        dataset_id = dataset['id']
        service_request_url = service_request_api_url + '?uebRunJobID=' + pkg_run_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            request_processing_status = service_call_results.read()
            log.info(source + 'UEB model package run status as returned from App '
                              'server for dataset ID: %s and Run Job ID:%s is %s' %
                     (dataset_id, pkg_run_job_id, request_processing_status))
        else:
            request_processing_status = uebhelper.StringSettings.app_server_job_status_error
            log.error(source + 'HTTP status %d returned from App server when checking '
                               'run status for Run Job ID:%s and model pkg dataset ID:%s' %
                      (service_call_results.status, pkg_run_job_id, dataset_id))

        connection.close()
        # update the dataset
        data_dict = {'package_run_status': request_processing_status}
        try:
            uebhelper.update_package(dataset_id, data_dict, backgroundTask=True)
            log.info(source + 'UEB model package dataset run status was updated to %s for '
                          'dataset ID:%s' % (dataset_id, request_processing_status))
        except Exception as e:
            log.error(source + 'Failed to update run status for UEB model package dataset '
                               'with dataset ID:%s\nException:%s' % (dataset_id, e))
    def test_get_pkg_dict_extra(self):

        from ckan.lib.create_test_data import CreateTestData
        from ckan import model
        from ckan.logic import get_action

        CreateTestData.create()

        pkg_dict = get_action("package_show")({
            "model": model,
            "user": u"tester"
        }, {
            "id": "annakarenina"
        })

        assert h.get_pkg_dict_extra(pkg_dict, "genre") == "romantic novel"

        assert h.get_pkg_dict_extra(pkg_dict, "extra_not_found") is None

        assert (h.get_pkg_dict_extra(pkg_dict, "extra_not_found",
                                     "default_value") == "default_value")

        model.repo.rebuild_db()
Ejemplo n.º 16
0
def update_dataset_for_hdx_syndication(context, data_dict):
    dataset_dict = data_dict['dataset_dict']

    dataset_dict['dataset_date'] = _get_dataset_date(dataset_dict)

    dataset_dict['methodology'] = 'Other'
    methodology = get_pkg_dict_extra(dataset_dict, 'methodology')
    if methodology is None:
        dataset_dict['methodology_other'] = 'Not specified'
    else:
        dataset_dict['methodology_other'] = methodology

    dataset_dict['dataset_source'] = get_pkg_dict_extra(
        dataset_dict, 'datasource')

    dataset_dict['groups'] = _get_group_ids(dataset_dict)

    dataset_dict['data_update_frequency'] = '0'  # Never

    dataset_dict.pop('tags', None)
    dataset_dict.pop('extras', None)

    return dataset_dict
Ejemplo n.º 17
0
def format_frequency(package):
    freq = h.get_pkg_dict_extra(package, 'frequency-of-collection')
    unit = h.get_pkg_dict_extra(package, 'frequency-of-collection-units')
    # Remove the surrounding curly braces from both the strings
    freq_num = run_format_regex(freq)
    freq_float = None
    try:
        freq_int = int(freq_num)
    except ValueError:
        freq_float = float(freq_num)
    # Most values are ints, but some are floats and some of these floats are
    # just the same number as the int. This complicated and ugly logic makes
    # sure floats are used *only* when needed.
    if freq_float is not None:
        if freq_float == int(freq_float):
            freq_num = int(freq_float)
        else:
            freq_num = freq_float
    else:
        freq_num = freq_int
    unit_str = run_format_regex(unit)
    if freq_num > 0:
        unit_str = '{0}s'.format(unit_str)
    return '{0} {1}'.format(freq_num, unit_str)
Ejemplo n.º 18
0
def format_frequency(package):
    freq = h.get_pkg_dict_extra(package, 'frequency-of-collection')
    unit = h.get_pkg_dict_extra(package, 'frequency-of-collection-units')
    # Remove the surrounding curly braces from both the strings
    freq_num = run_format_regex(freq)
    freq_float = None
    try:
        freq_int = int(freq_num)
    except ValueError:
        freq_float = float(freq_num)
    # Most values are ints, but some are floats and some of these floats are
    # just the same number as the int. This complicated and ugly logic makes
    # sure floats are used *only* when needed.
    if freq_float is not None:
        if freq_float == int(freq_float):
            freq_num = int(freq_float)
        else:
            freq_num = freq_float
    else:
        freq_num = freq_int
    unit_str = run_format_regex(unit)
    if freq_num > 0:
        unit_str = '{0}s'.format(unit_str)
    return '{0} {1}'.format(freq_num, unit_str)
Ejemplo n.º 19
0
    def test_syndicate_existing_package(self):
        context = {
            'user': self.user['name'],
        }

        existing = helpers.call_action(
            'package_create',
            context=_get_context(context),
            name='existing-dataset',
            notes=
            'The MapAction PowerPoint Map Pack contains a set of country level reference maps'
        )

        existing['extras'] = [
            {
                'key': 'syndicate',
                'value': 'true'
            },
        ]

        helpers.call_action('package_update',
                            context=_get_context(context),
                            **existing)

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            sync_package(existing['id'], 'dataset/update')

        updated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=existing['id'],
        )

        syndicated_id = get_pkg_dict_extra(updated, 'syndicated_id')

        syndicated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=syndicated_id,
        )

        # Expect the id of the syndicated package to match the metadata
        # syndicated_id in the source package.
        assert_equal(syndicated['notes'], updated['notes'])
Ejemplo n.º 20
0
def _get_dataset_date(dataset_dict):
    created = get_pkg_dict_extra(dataset_dict, 'createdate')

    created_date = datetime(2003, 1, 1)

    if created is not None:
        try:
            created_date = datetime.strptime(created,
                                             '%Y-%m-%d %H:%M:%S')
        except ValueError:
            try:
                created_date = datetime.strptime(created,
                                                 '%d/%m/%Y %H:%M')
            except ValueError:
                pass

    return created_date.strftime('%m/%d/%Y')
Ejemplo n.º 21
0
    def test_syndicate_existing_package(self):
        context = {
            'user': self.user['name'],
        }

        existing = helpers.call_action(
            'package_create',
            context=_get_context(context),
            name='existing-dataset',
            notes='The MapAction PowerPoint Map Pack contains a set of country level reference maps'
        )

        existing['extras'] = [
            {'key': 'syndicate', 'value': 'true'},
        ]

        helpers.call_action(
            'package_update',
            context=_get_context(context),
            **existing)

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            sync_package(existing['id'], 'dataset/update')

        updated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=existing['id'],
        )

        syndicated_id = get_pkg_dict_extra(updated, 'syndicated_id')

        syndicated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=syndicated_id,
        )

        # Expect the id of the syndicated package to match the metadata
        # syndicated_id in the source package.
        assert_equal(syndicated['notes'], updated['notes'])
Ejemplo n.º 22
0
    def test_syndicate_existing_package_with_stale_syndicated_id(self):
        context = {
            'user': self.user['name'],
        }

        existing = helpers.call_action(
            'package_create',
            context=_get_context(context),
            name='existing-dataset',
            notes=
            'The MapAction PowerPoint Map Pack contains a set of country level reference maps',
            extras=[{
                'key': 'syndicate',
                'value': 'true'
            }, {
                'key': 'syndicated_id',
                'value': '87f7a229-46d0-4171-bfb6-048c622adcdc'
            }])

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            sync_package(existing['id'], 'dataset/update')

        updated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=existing['id'],
        )

        syndicated_id = get_pkg_dict_extra(updated, 'syndicated_id')

        syndicated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=syndicated_id,
        )

        assert_equal(syndicated['notes'], updated['notes'])
Ejemplo n.º 23
0
    def before_view(context, pkg_dict):
        '''
        Adds any additional data fields to the package dictionary for custom display
        '''

        # Attach URL "download bucket" endpoint to package
        pkg_dict['preview_url'] = helpers.get_pkg_dict_extra(
            pkg_dict, 'download-url', '')

        # Turn the stored string of credits back into a list
        credits = helpers.get_pkg_dict_extra(pkg_dict, 'credits', '')
        if credits:
            pkg_dict['credits'] = ast.literal_eval(credits)

        # If temporal extent, format the dates.
        temporal_start = helpers.get_pkg_dict_extra(pkg_dict,
                                                    'temporal-extent-begin')
        temporal_end = helpers.get_pkg_dict_extra(pkg_dict,
                                                  'temporal-extent-end')
        if temporal_start and temporal_end:
            # Unfortunately, the datetime library won't handle years before 1900, so a bit
            # of manual parsing is needed here.
            start_year = temporal_start[:4]
            start_month = temporal_start[5:7]
            end_year = temporal_end[:4]
            end_month = temporal_end[5:7]
            try:
                pkg_dict['temporal_start'] = "{0} {1}".format(
                    datetime.strptime(start_month, '%m').strftime('%B'),
                    start_year)
                pkg_dict['temporal_end'] = "{0} {1}".format(
                    datetime.strptime(end_month, '%m').strftime('%B'),
                    end_year)
            except ValueError:
                # Swallow and ignore if the date parsing failed.
                pass

        spatial_resolution = helpers.get_pkg_dict_extra(
            pkg_dict, 'spatial-resolution')
        spatial_resolution_units = helpers.get_pkg_dict_extra(
            pkg_dict, 'spatial-resolution-units')
        if spatial_resolution and spatial_resolution_units:
            pkg_dict['spatial_resolution'] = "{0} {1}".format(
                spatial_resolution, spatial_resolution_units)

        return pkg_dict
Ejemplo n.º 24
0
    def test_syndicate_existing_package_with_stale_syndicated_id(self):
        context = {
            'user': self.user['name'],
        }

        existing = helpers.call_action(
            'package_create',
            context=_get_context(context),
            name='existing-dataset',
            notes='The MapAction PowerPoint Map Pack contains a set of country level reference maps',
            extras=[
                {'key': 'syndicate', 'value': 'true'},
                {'key': 'syndicated_id',
                 'value': '87f7a229-46d0-4171-bfb6-048c622adcdc'}
            ]
        )

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            sync_package(existing['id'], 'dataset/update')

        updated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=existing['id'],
        )

        syndicated_id = get_pkg_dict_extra(updated, 'syndicated_id')

        syndicated = helpers.call_action(
            'package_show',
            context=_get_context(context),
            id=syndicated_id,
        )

        assert_equal(syndicated['notes'], updated['notes'])
Ejemplo n.º 25
0
class SearchfedPlugin(plugins.SingletonPlugin):
    plugins.implements(plugins.IConfigurer)
    plugins.implements(plugins.IPackageController, inherit=True)

    search_fed_dict = dict(zip(*[iter(toolkit.aslist(
        config.get('ckan.search_federation', [])))] * 2))
    search_fed_this_label = config.get('ckan.search_federation.label', '')
    search_fed_keys = toolkit.aslist(
        config.get('ckan.search_federation.extra_keys', 'harvest_portal'))
    search_fed_labels = search_fed_dict.keys() + [search_fed_this_label]
    use_remote_facets = toolkit.asbool(config.get(
        'ckan.search_federation.use_remote_facet_results', False))
    search_fed_label_blacklist = toolkit.aslist(config.get(
        'ckan.search_federation.label_blacklist',
        'owner_org harvest_source_id user_id'))
    search_fed_dataset_whitelist = toolkit.aslist(config.get(
        'ckan.search_federation.dataset_whitelist', 'dataset'))

    # IConfigurer

    def update_config(self, config_):
        toolkit.add_template_directory(config_, 'templates')
        toolkit.add_public_directory(config_, 'public')
        toolkit.add_resource('fanstatic', 'searchfed')

    # IPackageController

    def before_search(self, search_params):
        limit = int(config.get(
            'ckan.search_federation.min_search_results', 20))
        rows = search_params.get('rows', None)
        search_params['rows'] = rows if rows is not None else limit
        return search_params

    def after_search(self, search_results, search_params):
        limit = int(config.get(
            'ckan.search_federation.min_search_results', 20))

        def _append_remote_search(search_keys, remote_org_label,
                                  remote_org_url, fed_labels, type_whitelist):

            local_results_num = len(search_results['results'])
            # query.run increase by 1, so we need to reduce by 1
            limit = search_params.get('rows') - 1
            current_page = request.params.get('page', 1)
            try:
                current_page = int(current_page)
                if current_page < 1:
                    raise ValueError("Negative number not allowed")
            except ValueError, e:
                abort(400, ('"page" parameter must be a positive integer'))

            fq = " ".join(g for g in
                          map(lambda sk: " ".join(e for e in map(
                              lambda x: "-" + sk + ":" + str(x), fed_labels)),
                              search_keys))
            for fq_entry in toolkit.aslist(search_params['fq'][0]):
                fq_entry = fq_entry.replace('/"', '"').replace("//", "")
                fq_split = fq_entry.split(':', 1)
                if len(fq_split) == 2:
                    fq_key = fq_split[0]
                    fq_value = fq_split[1]
                    fq_monop = ""
                    if fq_key[0] in ['+', '-']:
                        fq_monop = fq_entry[:1]
                        fq_key = fq_key[1:]

                    # Dataset whitelist check
                    if (fq_key == 'dataset_type' and
                            fq_monop != "-" and
                            fq_value not in type_whitelist):
                        return
                    fq += " " + fq_monop + fq_key + ":" + fq_value
                else:
                    fq += fq_entry
            count_only = False
            start = search_params.get('start', 0)
            if local_results_num > start:
                remote_limit = limit - local_results_num
                if remote_limit <= 0:
                    count_only = True
                remote_start = 0
            else:
                remote_limit = limit
                if not used_controller:
                    remote_start = start - toolkit.c.local_item_count
                else:
                    if current_page == 1:
                        remote_start = 0
                    elif current_page == 2:
                        remote_start = limit - toolkit.c.local_item_count
                    else:
                        remote_start = limit - toolkit.c.local_item_count + limit * (current_page - 2)

            @beaker_cache(expire=3600, query_args=True)
            def _fetch_data(fetch_start, fetch_num):
                data = urllib.quote(json.dumps({
                    'q': search_params['q'],
                    'fq': fq,
                    'facet.field': search_params.get('facet.field', []),
                    'rows': fetch_num,
                    'start': fetch_start,
                    'sort': search_params['sort'],
                    'extras': search_params['extras']
                }))

                try:
                    req = urllib2.Request(
                        remote_org_url + '/api/3/action/package_search', data)
                    rsp = urllib2.urlopen(req)
                except urllib2.URLError, err:
                    log.warn('Unable to connect to %r: %r' % (
                        remote_org_url + '/api/3/action/package_search', err))
                    return None
                content = rsp.read()
                return json.loads(content)

            remote_results = _fetch_data(remote_start, remote_limit)

            # Only continue if the remote fetch was successful
            if remote_results is None:
                return search_results

            if count_only:
                remote_results['result']['results'] = []
            else:
                remote_results_num = len(remote_results['result']['results'])
                if remote_results_num <= remote_limit + remote_start:
                    if remote_results['result']['count'] > remote_results_num:
                        # While the result count reports all remote matches, the number of results may be limited
                        # by the CKAN install. Here our query has extended beyond the actual returned results, so
                        # we re-issue a more refined query starting and ending at precisely where we want (since
                        # we have already acquired the total count)
                        temp_results = _fetch_data(remote_start, min(
                            remote_results['result']['count'] - remote_start,
                            remote_limit))
                        if temp_results:
                            remote_results['result']['results'] = temp_results[
                                'result']['results']

            for dataset in remote_results['result']['results']:
                extras = dataset.get('extras', [])
                if not h.get_pkg_dict_extra(dataset, 'harvest_url'):
                    extras += [
                        {
                            'key': 'harvest_url',
                            'value': remote_org_url + '/dataset/' + dataset[
                                'id']
                        }
                    ]
                for k in search_keys:
                    if not h.get_pkg_dict_extra(dataset, k):
                        extras += [{'key': k, 'value': remote_org_label}]
                if not h.get_pkg_dict_extra(dataset, 'federation_source'):
                    extras += [{'key': 'federation_source',
                                'value': remote_org_url}]
                dataset.update(
                    extras=extras, harvest_source_title=remote_org_label)
            search_results['count'] += remote_results['result']['count']
            if not count_only:
                if not limit or start > search_results['count']:
                    search_results['results'] = []
                elif toolkit.c.local_item_count < limit + start:
                    search_results['results'] += remote_results['result'][
                                                                'results']
                if ('search_facets' in remote_results['result'] and
                        self.use_remote_facets):
                    search_results['search_facets'] = remote_results['result'][
                                                            'search_facets']
Ejemplo n.º 26
0
def retrieve_ueb_run_output_packages():
    source = 'uebpackage.tasks.retrieve_ueb_run_output_packages():'
    global service_host_address
    #service_request_api_url = '/api/UEBModelRunOutput'
    service_request_api_url = uebhelper.StringSettings.app_server_api_get_ueb_run_output
    connection = httplib.HTTPConnection(service_host_address)

    # get all datasets of type model-package
    model_pkg_datasets = uebhelper.get_packages_by_dataset_type(
        'model-package')
    for dataset in model_pkg_datasets:
        pkg_run_job_id = h.get_pkg_dict_extra(dataset, 'package_run_job_id')
        if pkg_run_job_id is None:
            continue

        # to get the package_type value which is a tag, use the get_package() of my the helper module
        pkg_dict = uebhelper.get_package(dataset['id'])
        # TODO: Before using pkg_dict check that it is not None
        pkg_type = pkg_dict['package_type'][0]
        if len(pkg_run_job_id) == 0:
            continue
        if pkg_type == u'Complete':
            continue

        pkg_run_status = h.get_pkg_dict_extra(dataset, 'package_run_status')
        if pkg_run_status != uebhelper.StringSettings.app_server_job_status_success:
            continue

        dataset_id = dataset['id']
        service_request_url = service_request_api_url + '?uebRunJobID=' + pkg_run_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            log.info(source + 'UEB model output package was received from App '
                     'server for model pkg dataset ID:%s and Run Job ID:%s' %
                     (dataset_id, pkg_run_job_id))
            _merge_ueb_output_pkg_with_input_pkg(service_call_results,
                                                 dataset_id)
        else:
            log.error(
                source + 'HTTP status %d returned from App server when '
                'retrieving UEB model output package for '
                'model pkg dataset ID:%s and Run Job ID:%s' %
                (service_call_results.status, dataset_id, pkg_run_job_id))

            ueb_run_status = 'Failed to retrieve output package'

            # update the dataset
            data_dict = {'package_run_status': ueb_run_status}
            try:
                uebhelper.update_package(dataset_id,
                                         data_dict,
                                         backgroundTask=True)
                log.info(
                    source +
                    'UEB model package dataset run status was updated to %s for '
                    'dataset ID:%s' % (dataset_id, ueb_run_status))
            except Exception as e:
                log.error(
                    source +
                    'Failed to update run status for UEB model package dataset '
                    'with dataset ID:%s\nException:%s' % (dataset_id, e))

        connection.close()

    return
Ejemplo n.º 27
0
def retrieve_ueb_packages():
    source = 'uebpackage.tasks.retrieve_ueb_packages():'
    global service_host_address
    service_request_api_url = uebhelper.StringSettings.app_server_api_get_ueb_package_url
    connection = httplib.HTTPConnection(service_host_address)
    job_status_complete = uebhelper.StringSettings.app_server_job_status_success
    model_config_datasets_with_status_complete = _get_model_configuration_datasets_by_processing_status(
        job_status_complete)

    if len(model_config_datasets_with_status_complete) == 0:
        log.info(
            source +
            "No UEB model configuration dataset has a status of %s at this time"
            % job_status_complete)
    else:
        log.info(
            source +
            "Number of UEB model configuration datasets with build status of %s at this time is:%s"
            % (job_status_complete,
               len(model_config_datasets_with_status_complete)))

    for dataset in model_config_datasets_with_status_complete:
        pkg_availability_status = h.get_pkg_dict_extra(dataset,
                                                       'package_availability')
        if pkg_availability_status == uebhelper.StringSettings.app_server_job_status_package_available:
            continue

        pkg_process_job_id = h.get_pkg_dict_extra(
            dataset, 'package_build_request_job_id')
        dataset_id = dataset['id']
        package_availability_status = h.get_pkg_dict_extra(
            dataset, 'package_availability')

        # if package is already available or error has been logged for package retrieval then skip this dataset
        if package_availability_status == uebhelper.StringSettings.app_server_job_status_package_available or \
                package_availability_status == uebhelper.StringSettings.app_server_job_status_error:
            continue

        service_request_url = service_request_api_url + '?packageID=' + pkg_process_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            log.info(
                source +
                'UEB model package was received from App server for PackageJobID:%s'
                % pkg_process_job_id)
            try:
                _save_ueb_package_as_dataset(service_call_results, dataset_id)
                pkg_availability_status = uebhelper.StringSettings.app_server_job_status_package_available
            except Exception as e:
                log.error(
                    source +
                    'Failed to save ueb model package as a new dataset '
                    'for model configuration dataset ID:%s\nException:%s' %
                    (dataset_id, e))
                pkg_availability_status = uebhelper.StringSettings.app_server_job_status_error
        else:
            log.error(
                source +
                'HTTP status %d returned from App server when retrieving '
                'UEB model package for PackageJobID:'
                '%s' % (service_call_results.status, pkg_process_job_id))
            pkg_availability_status = uebhelper.StringSettings.app_server_job_status_error

        connection.close()

        # update the resource processing status
        # update the related dataset
        data_dict = {'package_availability': pkg_availability_status}
        update_msg = 'system auto updated ueb package dataset'
        background_task = True
        try:
            updated_package = uebhelper.update_package(dataset_id, data_dict,
                                                       update_msg,
                                                       background_task)
            log.info(
                source +
                'UEB model configuration dataset was updated as a result of '
                'receiving model input package for dataset:%s' %
                updated_package['name'])
        except Exception as e:
            log.error(source +
                      'Failed to update UEB model configuration dataset after '
                      'receiving model input package for dataset ID:%s \n'
                      'Exception: %s' % (dataset_id, e))
            pass

    return
Ejemplo n.º 28
0
def check_ueb_run_status():
    source = 'uebpackage.tasks.check_ueb_run_status():'
    global service_host_address

    service_request_api_url = uebhelper.StringSettings.app_server_api_check_ueb_run_status_url
    connection = httplib.HTTPConnection(service_host_address)
    job_status_processing = uebhelper.StringSettings.app_server_job_status_processing
    job_status_in_queue = uebhelper.StringSettings.app_server_job_status_in_queue

    model_pkg_datasets_with_run_status_processing = _get_model_pkg_datasets_by_run_status(
        job_status_processing)
    model_pkg_datasets_with_run_status_in_queue = _get_model_pkg_datasets_by_run_status(
        job_status_in_queue)

    model_pkg_datasets_need_run_status_update = model_pkg_datasets_with_run_status_processing + \
                                                model_pkg_datasets_with_run_status_in_queue

    if len(model_pkg_datasets_need_run_status_update) == 0:
        log.info(
            source +
            "No UEB model package dataset has a run status of %s at this time"
            % job_status_processing)
    else:
        log.info(
            source +
            "Number of UEB model package datatsets with run status of %s or %s at this time is:%s"
            % (job_status_processing, job_status_in_queue,
               len(model_pkg_datasets_need_run_status_update)))

    for dataset in model_pkg_datasets_need_run_status_update:
        pkg_run_job_id = h.get_pkg_dict_extra(dataset, 'package_run_job_id')
        if pkg_run_job_id is None:
            continue

        dataset_id = dataset['id']
        service_request_url = service_request_api_url + '?uebRunJobID=' + pkg_run_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            request_processing_status = service_call_results.read()
            log.info(source +
                     'UEB model package run status as returned from App '
                     'server for dataset ID: %s and Run Job ID:%s is %s' %
                     (dataset_id, pkg_run_job_id, request_processing_status))
        else:
            request_processing_status = uebhelper.StringSettings.app_server_job_status_error
            log.error(
                source +
                'HTTP status %d returned from App server when checking '
                'run status for Run Job ID:%s and model pkg dataset ID:%s' %
                (service_call_results.status, pkg_run_job_id, dataset_id))

        connection.close()
        # update the dataset
        data_dict = {'package_run_status': request_processing_status}
        try:
            uebhelper.update_package(dataset_id,
                                     data_dict,
                                     backgroundTask=True)
            log.info(
                source +
                'UEB model package dataset run status was updated to %s for '
                'dataset ID:%s' % (dataset_id, request_processing_status))
        except Exception as e:
            log.error(
                source +
                'Failed to update run status for UEB model package dataset '
                'with dataset ID:%s\nException:%s' % (dataset_id, e))
Ejemplo n.º 29
0
    def _crawl_results(self, harvest_url, limit=100, timeout=5, username=None, password=None, provider=None):  # noqa: E501
        """
        Iterate through the results, create harvest objects,
        and return the ids.
        """
        ids = []
        new_counter = 0
        update_counter = 0
        while len(ids) < limit and harvest_url:
            # We'll limit ourselves to one request per second
            start_request = time.time()

            # Make a request to the website
            timestamp = str(datetime.utcnow())
            log_message = '{:<12} | {} | {} | {}s'
            try:
                r = requests.get(harvest_url,
                                 auth=HTTPBasicAuth(username, password),
                                 verify=False, timeout=timeout)
            except Timeout as e:
                self._save_gather_error('Request timed out: {}'.format(e), self.job)  # noqa: E501
                status_code = 408
                elapsed = 9999
                if hasattr(self, 'provider_logger'):
                    self.provider_logger.info(log_message.format(self.provider,
                        timestamp, status_code, timeout))  # noqa: E128
                return ids
            if r.status_code != 200:
                self._save_gather_error('{} error: {}'.format(r.status_code, r.text), self.job)  # noqa: E501
                elapsed = 9999
                if hasattr(self, 'provider_logger'):
                    self.provider_logger.info(log_message.format(self.provider,
                        timestamp, r.status_code, elapsed))  # noqa: E128
                return ids

            if hasattr(self, 'provider_logger'):
                self.provider_logger.info(log_message.format(self.provider,
                    timestamp, r.status_code, r.elapsed.total_seconds()))  # noqa: E128, E501

            soup = Soup(r.content, 'lxml')

            # Get the URL for the next loop, or None to break the loop
            harvest_url = self._get_next_url(soup)

            # Get the entries from the results
            entries = self._get_entries_from_results(soup)

            # Create a harvest object for each entry
            for entry in entries:
                entry_guid = entry['guid']
                entry_name = entry['identifier']
                entry_restart_date = entry['restart_date']

                package = Session.query(Package) \
                    .filter(Package.name == entry_name).first()

                if package:
                    # Meaning we've previously harvested this,
                    # but we may want to reharvest it now.
                    # We need package_show to ensure that all the conversions
                    # are carried out.
                    context = {"user": "******", "ignore_auth": True,
                               "model": model, "session": Session}
                    pkg_dict = logic.get_action('package_show')(context, {"id": package.name})  # noqa: E501
                    previous_obj = model.Session.query(HarvestObject) \
                        .filter(HarvestObject.guid == entry_guid) \
                        .filter(HarvestObject.current == True) \
                        .first()  # noqa: E712
                    if previous_obj:
                        previous_obj.current = False
                        previous_obj.save()

                    if self.update_all:
                        log.debug('{} already exists and will be updated.'.format(entry_name))  # noqa: E501
                        status = 'change'
                        update_counter += 1
                    # E.g., a Sentinel dataset exists,
                    # but doesn't have a NOA resource yet.
                    elif self.flagged_extra and not get_pkg_dict_extra(pkg_dict, self.flagged_extra):  # noqa: E501
                        log.debug('{} already exists and will be extended.'.format(entry_name))  # noqa: E501
                        status = 'change'
                        update_counter += 1
                    else:
                        log.debug('{} will not be updated.'.format(entry_name))  # noqa: E501
                        status = 'unchanged'

                    obj = HarvestObject(guid=entry_guid, job=self.job,
                                        extras=[HOExtra(key='status',
                                                value=status),
                                                HOExtra(key='restart_date',
                                                value=entry_restart_date)])
                    obj.content = entry['content']
                    obj.package = package
                    obj.save()
                    ids.append(obj.id)
                elif not package:
                    # It's a product we haven't harvested before.
                    log.debug('{} has not been harvested before. Creating a new harvest object.'.format(entry_name))  # noqa: E501
                    obj = HarvestObject(guid=entry_guid, job=self.job,
                                        extras=[HOExtra(key='status',
                                                value='new'),
                                                HOExtra(key='restart_date',
                                                value=entry_restart_date)])
                    new_counter += 1
                    obj.content = entry['content']
                    obj.package = None
                    obj.save()
                    ids.append(obj.id)

            end_request = time.time()
            request_time = end_request - start_request
            if request_time < 1.0:
                time.sleep(1 - request_time)

        harvester_msg = '{:<12} | {} | jobID:{} | {} | {}'
        if hasattr(self, 'harvester_logger'):
            timestamp = str(datetime.utcnow())
            self.harvester_logger.info(harvester_msg.format(self.provider,
                                       timestamp, self.job.id, new_counter, update_counter))  # noqa: E128, E501
        return ids
Ejemplo n.º 30
0
def retrieve_ueb_packages():
    source = 'uebpackage.tasks.retrieve_ueb_packages():'
    global service_host_address
    service_request_api_url = uebhelper.StringSettings.app_server_api_get_ueb_package_url
    connection = httplib.HTTPConnection(service_host_address)
    job_status_complete = uebhelper.StringSettings.app_server_job_status_success
    model_config_datasets_with_status_complete = _get_model_configuration_datasets_by_processing_status(
        job_status_complete)

    if len(model_config_datasets_with_status_complete) == 0:
        log.info(source + "No UEB model configuration dataset has a status of %s at this time" % job_status_complete)
    else:
        log.info(source + "Number of UEB model configuration datasets with build status of %s at this time is:%s"
                 % (job_status_complete, len(model_config_datasets_with_status_complete)))

    for dataset in model_config_datasets_with_status_complete:
        pkg_availability_status = h.get_pkg_dict_extra(dataset, 'package_availability')
        if pkg_availability_status == uebhelper.StringSettings.app_server_job_status_package_available:
            continue

        pkg_process_job_id = h.get_pkg_dict_extra(dataset, 'package_build_request_job_id')
        dataset_id = dataset['id']
        package_availability_status = h.get_pkg_dict_extra(dataset, 'package_availability')

        # if package is already available or error has been logged for package retrieval then skip this dataset
        if package_availability_status == uebhelper.StringSettings.app_server_job_status_package_available or \
                package_availability_status == uebhelper.StringSettings.app_server_job_status_error:
            continue

        service_request_url = service_request_api_url + '?packageID=' + pkg_process_job_id
        connection.request('GET', service_request_url)
        service_call_results = connection.getresponse()

        if service_call_results.status == httplib.OK:
            log.info(source + 'UEB model package was received from App server for PackageJobID:%s' % pkg_process_job_id)
            try:
                _save_ueb_package_as_dataset(service_call_results, dataset_id)
                pkg_availability_status = uebhelper.StringSettings.app_server_job_status_package_available
            except Exception as e:
                log.error(source + 'Failed to save ueb model package as a new dataset '
                                   'for model configuration dataset ID:%s\nException:%s' % (dataset_id, e))
                pkg_availability_status = uebhelper.StringSettings.app_server_job_status_error
        else:
            log.error(source + 'HTTP status %d returned from App server when retrieving '
                               'UEB model package for PackageJobID:'
                               '%s' % (service_call_results.status, pkg_process_job_id))
            pkg_availability_status = uebhelper.StringSettings.app_server_job_status_error

        connection.close()

        # update the resource processing status
        # update the related dataset
        data_dict = {'package_availability': pkg_availability_status}
        update_msg = 'system auto updated ueb package dataset'
        background_task = True
        try:
            updated_package = uebhelper.update_package(dataset_id, data_dict, update_msg, background_task)
            log.info(source + 'UEB model configuration dataset was updated as a result of '
                              'receiving model input package for dataset:%s' % updated_package['name'])
        except Exception as e:
            log.error(source + 'Failed to update UEB model configuration dataset after '
                               'receiving model input package for dataset ID:%s \n'
                               'Exception: %s' % (dataset_id, e))
            pass

    return
Ejemplo n.º 31
0
def format_data_costs(package):
    data = h.get_pkg_dict_extra(package, 'access_constraints')
    data_list = json.loads(data)
    return ', '.join(data_list)
Ejemplo n.º 32
0
    def test_create_package(self):
        local_org = factories.Organization(user=self.user,
                                           name='local-org')
        remote_org = factories.Organization(user=self.user,
                                            name='remote-org')

        helpers.call_action(
            'member_create',
            id=local_org['id'],
            object=self.user['id'],
            object_type='user',
            capacity='editor')

        helpers.call_action(
            'member_create',
            id=remote_org['id'],
            object=self.user['id'],
            object_type='user',
            capacity='editor')

        context = {
            'user': self.user['name'],
        }

        dataset = helpers.call_action(
            'package_create',
            context=context,
            name='syndicated_dataset',
            owner_org=local_org['id'],
            extras=[
                {'key': 'syndicate', 'value': 'true'},
            ],
            resources=[{
                'upload': test_upload_file,
                'url': 'test_file.txt',
                'url_type': 'upload',
                'format': 'txt',
                'name': 'test_file.txt',
            }, {
                'upload': test_upload_file,
                'url': 'test_file1.txt',
                'url_type': 'upload',
                'format': 'txt',
                'name': 'test_file1.txt',
            }],
        )
        assert_equal(dataset['name'], 'syndicated_dataset')

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            # Mock API
            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            # Syndicate to our Test CKAN instance
            sync_package(dataset['id'], 'dataset/create')

        # Reload our local package, to read the syndicated ID
        source = helpers.call_action(
            'package_show',
            context=context,
            id=dataset['id'],
        )

        # The source package should have a syndicated_id set pointing to the
        # new syndicated package.
        syndicated_id = get_pkg_dict_extra(source, 'syndicated_id')
        assert_is_not_none(syndicated_id)

        # Expect a new package to be created
        syndicated = helpers.call_action(
            'package_show',
            context=context,
            id=syndicated_id,
        )

        # Expect the id of the syndicated package to match the metadata
        # syndicated_id in the source package.
        assert_equal(syndicated['id'], syndicated_id)
        assert_equal(syndicated['name'], 'test-syndicated_dataset')
        assert_equal(syndicated['owner_org'], remote_org['id'])

        # Test links to resources on the source CKAN instace have been added
        resources = syndicated['resources']
        assert_equal(len(resources), 2)
        remote_resource_url = resources[0]['url']
        local_resource_url = source['resources'][0]['url']
        assert_equal(local_resource_url, remote_resource_url)

        remote_resource_url = resources[1]['url']
        local_resource_url = source['resources'][1]['url']
        assert_equal(local_resource_url, remote_resource_url)
Ejemplo n.º 33
0
    def after_search(self, search_results, search_params):
        limit = int(config.get('ckan.search_federation.min_search_results',
                               20))

        def _append_remote_search(search_keys, remote_org_label,
                                  remote_org_url, fed_labels, type_whitelist):

            local_results_num = len(search_results['results'])
            facet_fields = search_params.get('facet.field', [])

            fq = " ".join(g for g in map(
                lambda sk: " ".join(e for e in map(
                    lambda x: "-" + sk + ":" + str(x), fed_labels)),
                search_keys))
            for fq_entry in toolkit.aslist(search_params['fq'][0]):
                fq_entry = fq_entry.replace('/"', '"').replace("//", "")
                fq_split = fq_entry.split(':', 1)
                if len(fq_split) == 2:
                    fq_key = fq_split[0]
                    fq_value = fq_split[1]
                    fq_monop = ""
                    if fq_key[0] in ['+', '-']:
                        fq_monop = fq_entry[:1]
                        fq_key = fq_key[1:]

                    # Dataset whitelist check
                    if (fq_key == 'dataset_type' and fq_monop != "-"
                            and fq_value not in type_whitelist):
                        return

                    if fq_key.lower() in facet_fields:
                        fq += " " + fq_monop + fq_key + ":" + fq_value
                else:
                    fq += fq_entry

            count_only = False
            start = search_params.get('start', 0)

            if local_results_num >= start:
                remote_limit = limit - local_results_num + start
                if remote_limit <= 0:
                    count_only = True
                remote_start = 0
            else:
                remote_limit = limit
                remote_start = start - local_results_num

            @beaker_cache(expire=3600)
            def _fetch_data(fetch_start, fetch_num):
                data = urllib.quote(
                    json.dumps({
                        'q':
                        search_params['q'],
                        'fq':
                        fq,
                        'facet.field':
                        search_params.get('facet.field', []),
                        'rows':
                        fetch_num,
                        'start':
                        fetch_start,
                        'sort':
                        search_params['sort'],
                        'extras':
                        search_params['extras']
                    }))

                try:
                    req = urllib2.Request(
                        remote_org_url + '/api/3/action/package_search', data)
                    rsp = urllib2.urlopen(req)
                except urllib2.URLError, err:
                    log.warn(
                        'Unable to connect to %r: %r' %
                        (remote_org_url + '/api/3/action/package_search', err))
                    return None
                content = rsp.read()
                return json.loads(content)

            remote_results = _fetch_data(0, 99999)

            # Only continue if the remote fetch was successful
            if remote_results is None:
                return search_results

            if count_only:
                remote_results['result']['results'] = []
            else:
                use_temp = False
                remote_results_num = len(remote_results['result']['results'])
                if remote_results_num <= remote_limit + remote_start:
                    if remote_results['result']['count'] > remote_results_num:
                        # While the result count reports all remote matches, the number of results may be limited
                        # by the CKAN install. Here our query has extended beyond the actual returned results, so
                        # we re-issue a more refined query starting and ending at precisely where we want (since
                        # we have already acquired the total count)
                        temp_results = _fetch_data(
                            remote_start,
                            min(
                                remote_results['result']['count'] -
                                remote_start, remote_limit))
                        if temp_results:
                            use_temp = True
                            remote_results['result']['results'] = temp_results[
                                'result']['results']

                if not use_temp:
                    remote_results['result']['results'] = remote_results[
                        'result']['results'][remote_start:remote_limit +
                                             remote_start - 1]

            for dataset in remote_results['result']['results']:
                extras = dataset.get('extras', [])
                if not h.get_pkg_dict_extra(dataset, 'harvest_url'):
                    extras += [{
                        'key':
                        'harvest_url',
                        'value':
                        remote_org_url + '/dataset/' + dataset['id']
                    }]
                for k in search_keys:
                    if not h.get_pkg_dict_extra(dataset, k):
                        extras += [{'key': k, 'value': remote_org_label}]
                dataset.update(extras=extras,
                               harvest_source_title=remote_org_label)
            search_results['count'] += remote_results['result']['count']
            if not count_only:
                search_results['results'] += remote_results['result'][
                    'results']
                if ('search_facets' in remote_results['result']
                        and self.use_remote_facets):
                    search_results['search_facets'] = remote_results['result'][
                        'search_facets']
Ejemplo n.º 34
0
    def test_author_check(self):

        context = {
            'user': self.user['name']
        }
        dataset1 = helpers.call_action(
            'package_create',
            context=context,
            name='syndicated_dataset1',
            extras=[{'key': 'syndicate', 'value': 'true'}]
        )

        dataset2 = helpers.call_action(
            'package_create',
            context=context,
            name='syndicated_dataset2',
            extras=[{'key': 'syndicate', 'value': 'true'}]
        )

        with patch('ckanext.syndicate.tasks.get_target') as mock_target:
            # Mock API

            mock_target.return_value = ckanapi.TestAppCKAN(
                self._get_test_app(), apikey=self.user['apikey'])

            # Syndicate to our Test CKAN instance
            ckan = mock_target()
            mock_user_show = mock.Mock()
            mock_user_show.return_value = self.user
            ckan.action.user_show = mock_user_show

            sync_package(dataset1['id'], 'dataset/create')
            helpers.call_action(
                'package_patch',
                id=dataset1['id'],
                extras=[{'key': 'syndicate', 'value': 'true'}]
            )

            sync_package(dataset1['id'], 'dataset/update')
            mock_user_show.assert_called_once_with(id='test_author')
            updated1 = helpers.call_action('package_show', id=dataset1['id'])
            assert_is_not_none(
                get_pkg_dict_extra(updated1, get_syndicated_id())
            )

            mock_user_show = mock.Mock()
            mock_user_show.return_value = {'name': 'random-name', 'id': ''}
            ckan.action.user_show = mock_user_show

            sync_package(dataset2['id'], 'dataset/create')
            helpers.call_action(
                'package_patch',
                id=dataset2['id'],
                extras=[{'key': 'syndicate', 'value': 'true'}]
            )
            sync_package(dataset2['id'], 'dataset/update')
            updated2 = helpers.call_action('package_show', id=dataset2['id'])
            assert_false(
                get_pkg_dict_extra(updated2, get_syndicated_id())
            )
            del Session.revision
Ejemplo n.º 35
0
def format_data_costs(package):
    data = h.get_pkg_dict_extra(package, 'access_constraints')
    data_list = json.loads(data)
    return ', '.join(data_list)