Example #1
0
    def command(self):
        self._load_config()
        from ckan.lib.search import rebuild, check, show, clear

        if not self.args:
            # default to printing help
            print self.usage
            return

        cmd = self.args[0]        
        if cmd == 'rebuild':
            if len(self.args) > 1:
                rebuild(self.args[1])
            else:
                rebuild()
        elif cmd == 'check':
            check()
        elif cmd == 'show':
            if not len(self.args) == 2:
                import pdb; pdb.set_trace()
                self.args
            show(self.args[1])
        elif cmd == 'clear':
            clear()
        else:
            print 'Command %s not recognized' % cmd
Example #2
0
    def command(self):
        self._load_config()
        from ckan.lib.search import rebuild, check, show, clear

        if not self.args:
            # default to printing help
            print self.usage
            return

        cmd = self.args[0]
        if cmd == 'rebuild':
            if len(self.args) > 1:
                rebuild(self.args[1])
            else:
                rebuild()
        elif cmd == 'check':
            check()
        elif cmd == 'show':
            if not len(self.args) == 2:
                import pdb
                pdb.set_trace()
                self.args
            show(self.args[1])
        elif cmd == 'clear':
            clear()
        else:
            print 'Command %s not recognized' % cmd
Example #3
0
    def test_package_update_race_condition(self, lc_mock, dae_mock):
        """
        Handling of package updates during extraction.
        """
        res_dict = factories.Resource(**RES_DICT)
        sysadmin = factories.Sysadmin()

        def download_and_extract(*args, **kwargs):
            # Simulate a change to the package by another party during
            # the download and extraction process.
            toolkit.get_action('package_patch')({
                'user': sysadmin['name']
            }, {
                'id': res_dict['package_id'],
                'title': 'A changed title'
            })
            return {'fulltext': 'foobar'}

        dae_mock.side_effect = download_and_extract
        extract(config['__file__'], res_dict)

        # Make sure that the changed package metadata is kept and indexed
        pkg_dict = toolkit.get_action('package_show')(
            {}, {
                'id': res_dict['package_id']
            })
        assert_equal(pkg_dict['title'], 'A changed title')
        indexed_pkg_dict = search.show(res_dict['package_id'])
        assert_equal(indexed_pkg_dict['title'], 'A changed title')
Example #4
0
File: cli.py Project: arkka/ckan
    def show(self):
        from ckan.lib.search import show

        if not len(self.args) == 2:
            print 'Missing parameter: dataset-name'
            return
        index = show(self.args[1])
        pprint(index)
Example #5
0
    def test_indexed_package_stores_resource_type(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # Resource types are indexed
        assert_equal(indexed_pkg['res_type'], ['doc', 'file'])
Example #6
0
    def test_index_package_stores_unvalidated_data_dict_without_validated_data_dict(self):
        # This is a regression test for #2208
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        data_dict = json.loads(search.show(pkg_dict['name'])['data_dict'])

        assert_not_in('validated_data_dict', data_dict)
Example #7
0
    def test_indexed_package_stores_resource_type(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # Resource types are indexed
        assert_equal(indexed_pkg['res_type'], ['doc', 'file'])
Example #8
0
    def test_index_package_stores_unvalidated_data_dict_without_validated_data_dict(self):
        # This is a regression test for #2208
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        data_dict = json.loads(search.show(pkg_dict['name'])['data_dict'])

        assert_not_in('validated_data_dict', data_dict)
Example #9
0
    def test_indexed_package_stores_resource_type(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict["name"])

        # Resource types are indexed
        assert indexed_pkg["res_type"] == ["doc", "file"]
Example #10
0
    def test_index_package_stores_unvalidated_data_dict_without_validated_data_dict(
        self, ):
        # This is a regression test for #2208
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        data_dict = json.loads(search.show(pkg_dict["name"])["data_dict"])

        assert "validated_data_dict" not in data_dict
Example #11
0
    def command(self):
        self._load_config()
        from ckan.lib.search import rebuild, check, show

        if not self.args:
            # default to run
            cmd = 'rebuild'
        else:
            cmd = self.args[0]
        
        if cmd == 'rebuild':
            rebuild()
        elif cmd == 'check':
            check()
        elif cmd == 'show':
            if not len(self.args) == 2:
                import pdb; pdb.set_trace()
                self.args
            show(self.args[1])
        else:
            print 'Command %s not recognized' % cmd
Example #12
0
    def test_index_package_stores_basic_solr_fields(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict["name"])

        # At root level are the fields that SOLR uses
        assert indexed_pkg["name"] == "river-quality"
        assert indexed_pkg["metadata_modified"] == "2014-06-10T08:24:12.782Z"
        assert indexed_pkg["entity_type"] == "package"
        assert indexed_pkg["dataset_type"] == "dataset"
Example #13
0
    def test_index_package_stores_validated_data_dict(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict["name"])

        # validated_data_dict is the result of package_show, validated
        validated_data_dict = json.loads(indexed_pkg["validated_data_dict"])
        assert validated_data_dict["name"] == "river-quality"
        # title is inserted (copied from the name) during validation
        # so its presence shows it is validated
        assert "title" in validated_data_dict
Example #14
0
    def test_index_package_stores_validated_data_dict(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # validated_data_dict is the result of package_show, validated
        validated_data_dict = json.loads(indexed_pkg['validated_data_dict'])
        assert_equal(validated_data_dict['name'], 'river-quality')
        # title is inserted (copied from the name) during validation
        # so its presence shows it is validated
        assert_in('title', validated_data_dict)
Example #15
0
    def test_index_package_stores_basic_solr_fields(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # At root level are the fields that SOLR uses
        assert_equal(indexed_pkg['name'], 'river-quality')
        assert_equal(indexed_pkg['metadata_modified'],
                     '2014-06-10T08:24:12.782Z')
        assert_equal(indexed_pkg['entity_type'], 'package')
        assert_equal(indexed_pkg['dataset_type'], 'dataset')
Example #16
0
    def test_index_package_stores_basic_solr_fields(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # At root level are the fields that SOLR uses
        assert_equal(indexed_pkg['name'], 'river-quality')
        assert_equal(indexed_pkg['metadata_modified'],
                     '2014-06-10T08:24:12.782Z')
        assert_equal(indexed_pkg['entity_type'], 'package')
        assert_equal(indexed_pkg['dataset_type'], 'dataset')
Example #17
0
    def test_index_package_stores_validated_data_dict(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # validated_data_dict is the result of package_show, validated
        validated_data_dict = json.loads(indexed_pkg['validated_data_dict'])
        assert_equal(validated_data_dict['name'], 'river-quality')
        # title is inserted (copied from the name) during validation
        # so its presence shows it is validated
        assert_in('title', validated_data_dict)
Example #18
0
    def test_index_package_stores_resource_extras_in_config_file(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # Resource fields given by ckan.extra_resource_fields are indexed
        assert_equal(indexed_pkg['res_extras_alt_url'],
                     ['http://www.bar.com/riverquality.pdf',
                      'http://www.bar.com/riverquality.csv'])

        # Other resource fields are ignored
        assert_equal(indexed_pkg.get('res_extras_institution', None), None)
        assert_equal(indexed_pkg.get('res_extras_city', None), None)
Example #19
0
    def test_index_package_stores_resource_extras_in_config_file(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict['name'])

        # Resource fields given by ckan.extra_resource_fields are indexed
        assert_equal(indexed_pkg['res_extras_alt_url'],
                     ['http://www.bar.com/riverquality.pdf',
                      'http://www.bar.com/riverquality.csv'])

        # Other resource fields are ignored
        assert_equal(indexed_pkg.get('res_extras_institution', None), None)
        assert_equal(indexed_pkg.get('res_extras_city', None), None)
Example #20
0
    def test_index_package_stores_resource_extras_in_config_file(self):
        index = search.index.PackageSearchIndex()
        pkg_dict = self._get_pkg_dict_with_resources()

        index.index_package(pkg_dict)
        indexed_pkg = search.show(pkg_dict["name"])

        # Resource fields given by ckan.extra_resource_fields are indexed
        assert indexed_pkg["res_extras_alt_url"] == [
            "http://www.bar.com/riverquality.pdf",
            "http://www.bar.com/riverquality.csv",
        ]

        # Other resource fields are ignored
        assert indexed_pkg.get("res_extras_institution", None) is None
        assert indexed_pkg.get("res_extras_city", None) is None
    def test_package_update_race_condition(self, lc_mock, dae_mock):
        """
        Handling of package updates during extraction.
        """
        res_dict = factories.Resource(**RES_DICT)
        sysadmin = factories.Sysadmin()

        def download_and_extract(*args, **kwargs):
            # Simulate a change to the package by another party during
            # the download and extraction process.
            toolkit.get_action('package_patch')({'user': sysadmin['name']},
                                                {'id': res_dict['package_id'],
                                                 'title': 'A changed title'})
            return {'fulltext': 'foobar'}

        dae_mock.side_effect = download_and_extract
        extract(config['__file__'], res_dict)

        # Make sure that the changed package metadata is kept and indexed
        pkg_dict = toolkit.get_action('package_show')(
                {}, {'id': res_dict['package_id']})
        assert_equal(pkg_dict['title'], 'A changed title')
        indexed_pkg_dict = search.show(res_dict['package_id'])
        assert_equal(indexed_pkg_dict['title'], 'A changed title')
def _modification_time_in_the_search_index(pkg):
    pkg_dict = show(pkg.name)
    mod_time_str = pkg_dict['last_major_modification']
    return dateutil.parser.parse(mod_time_str).replace(tzinfo=None)
 def test_dataset_properties_in_solr(self):
     try:
         search_result = search.show('test_private_dataset_1')
     except Exception, e:
         assert False, 'search query needs to succeed'
Example #24
0
def show(dataset_name: str):
    from ckan.lib.search import show

    index = show(dataset_name)
    click.echo(index)
Example #25
0
def _modification_time_in_the_search_index(pkg):
    pkg_dict = show(pkg.name)
    mod_time_str = pkg_dict['last_major_modification']
    return dateutil.parser.parse(mod_time_str).replace(tzinfo=None)
Example #26
0
def show(dataset_name):
    from ckan.lib.search import show

    index = show(dataset_name)
    click.echo(index)
Example #27
0
def package_show(context, data_dict):
    '''Return the metadata of a dataset (package) and its resources.

    :param id: the id or name of the dataset
    :type id: string
    :param use_default_schema: use default package schema instead of
        a custom schema defined with an IDatasetForm plugin (default: False)
    :type use_default_schema: bool
    :param include_tracking: add tracking information to dataset and
        resources (default: False)
    :type include_tracking: bool
    :rtype: dictionary

    '''
    model = context['model']
    context['session'] = model.Session
    name_or_id = data_dict.get("id") or _get_or_bust(data_dict, 'name_or_id')

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound

    context['package'] = pkg

    _check_access('package_show', context, data_dict)

    if data_dict.get('use_default_schema', False):
        context['schema'] = ckan.logic.schema.default_show_package_schema()
    include_tracking = asbool(data_dict.get('include_tracking', False))

    package_dict = None
    use_cache = (context.get('use_cache', True)
                 and not 'revision_id' in context
                 and not 'revision_date' in context)
    if use_cache:
        try:
            search_result = search.show(name_or_id)
        except (search.SearchError, socket.error):
            pass
        else:
            use_validated_cache = 'schema' not in context
            if use_validated_cache and 'validated_data_dict' in search_result:
                package_json = search_result['validated_data_dict']
                package_dict = json.loads(package_json)
                package_dict_validated = True
            else:
                package_dict = json.loads(search_result['data_dict'])
                package_dict_validated = False
            metadata_modified = pkg.metadata_modified.isoformat()
            search_metadata_modified = search_result['metadata_modified']
            # solr stores less precice datetime,
            # truncate to 22 charactors to get good enough match
            if metadata_modified[:22] != search_metadata_modified[:22]:
                package_dict = None

    if not package_dict:
        package_dict = model_dictize.package_dictize(pkg, context)
        package_dict_validated = False

    if include_tracking:
        # page-view tracking summary data
        package_dict['tracking_summary'] = (
            model.TrackingSummary.get_for_package(package_dict['id']))

        for resource_dict in package_dict['resources']:
            _add_tracking_summary_to_resource_dict(resource_dict, model)

    if context.get('for_view'):
        for item in plugins.PluginImplementations(plugins.IPackageController):
            package_dict = item.before_view(package_dict)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.read(pkg)

    for item in plugins.PluginImplementations(plugins.IResourceController):
        for resource_dict in package_dict['resources']:
            item.before_show(resource_dict)

    if not package_dict_validated:
        package_plugin = lib_plugins.lookup_package_plugin(
            package_dict['type'])
        if 'schema' in context:
            schema = context['schema']
        else:
            schema = package_plugin.show_package_schema()
        if schema and context.get('validate', True):
            package_dict, errors = lib_plugins.plugin_validate(
                package_plugin, context, package_dict, schema, 'package_show')

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.after_show(context, package_dict)

    return package_dict