def command(self): self._load_config() from ckan.lib.search import rebuild, check, show, clear if not self.args: # default to printing help print self.usage return cmd = self.args[0] if cmd == 'rebuild': if len(self.args) > 1: rebuild(self.args[1]) else: rebuild() elif cmd == 'check': check() elif cmd == 'show': if not len(self.args) == 2: import pdb; pdb.set_trace() self.args show(self.args[1]) elif cmd == 'clear': clear() else: print 'Command %s not recognized' % cmd
def command(self): self._load_config() from ckan.lib.search import rebuild, check, show, clear if not self.args: # default to printing help print self.usage return cmd = self.args[0] if cmd == 'rebuild': if len(self.args) > 1: rebuild(self.args[1]) else: rebuild() elif cmd == 'check': check() elif cmd == 'show': if not len(self.args) == 2: import pdb pdb.set_trace() self.args show(self.args[1]) elif cmd == 'clear': clear() else: print 'Command %s not recognized' % cmd
def test_package_update_race_condition(self, lc_mock, dae_mock): """ Handling of package updates during extraction. """ res_dict = factories.Resource(**RES_DICT) sysadmin = factories.Sysadmin() def download_and_extract(*args, **kwargs): # Simulate a change to the package by another party during # the download and extraction process. toolkit.get_action('package_patch')({ 'user': sysadmin['name'] }, { 'id': res_dict['package_id'], 'title': 'A changed title' }) return {'fulltext': 'foobar'} dae_mock.side_effect = download_and_extract extract(config['__file__'], res_dict) # Make sure that the changed package metadata is kept and indexed pkg_dict = toolkit.get_action('package_show')( {}, { 'id': res_dict['package_id'] }) assert_equal(pkg_dict['title'], 'A changed title') indexed_pkg_dict = search.show(res_dict['package_id']) assert_equal(indexed_pkg_dict['title'], 'A changed title')
def show(self): from ckan.lib.search import show if not len(self.args) == 2: print 'Missing parameter: dataset-name' return index = show(self.args[1]) pprint(index)
def test_indexed_package_stores_resource_type(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict_with_resources() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict['name']) # Resource types are indexed assert_equal(indexed_pkg['res_type'], ['doc', 'file'])
def test_index_package_stores_unvalidated_data_dict_without_validated_data_dict(self): # This is a regression test for #2208 index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) data_dict = json.loads(search.show(pkg_dict['name'])['data_dict']) assert_not_in('validated_data_dict', data_dict)
def test_indexed_package_stores_resource_type(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict_with_resources() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict["name"]) # Resource types are indexed assert indexed_pkg["res_type"] == ["doc", "file"]
def test_index_package_stores_unvalidated_data_dict_without_validated_data_dict( self, ): # This is a regression test for #2208 index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) data_dict = json.loads(search.show(pkg_dict["name"])["data_dict"]) assert "validated_data_dict" not in data_dict
def command(self): self._load_config() from ckan.lib.search import rebuild, check, show if not self.args: # default to run cmd = 'rebuild' else: cmd = self.args[0] if cmd == 'rebuild': rebuild() elif cmd == 'check': check() elif cmd == 'show': if not len(self.args) == 2: import pdb; pdb.set_trace() self.args show(self.args[1]) else: print 'Command %s not recognized' % cmd
def test_index_package_stores_basic_solr_fields(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict["name"]) # At root level are the fields that SOLR uses assert indexed_pkg["name"] == "river-quality" assert indexed_pkg["metadata_modified"] == "2014-06-10T08:24:12.782Z" assert indexed_pkg["entity_type"] == "package" assert indexed_pkg["dataset_type"] == "dataset"
def test_index_package_stores_validated_data_dict(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict["name"]) # validated_data_dict is the result of package_show, validated validated_data_dict = json.loads(indexed_pkg["validated_data_dict"]) assert validated_data_dict["name"] == "river-quality" # title is inserted (copied from the name) during validation # so its presence shows it is validated assert "title" in validated_data_dict
def test_index_package_stores_validated_data_dict(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict['name']) # validated_data_dict is the result of package_show, validated validated_data_dict = json.loads(indexed_pkg['validated_data_dict']) assert_equal(validated_data_dict['name'], 'river-quality') # title is inserted (copied from the name) during validation # so its presence shows it is validated assert_in('title', validated_data_dict)
def test_index_package_stores_basic_solr_fields(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict['name']) # At root level are the fields that SOLR uses assert_equal(indexed_pkg['name'], 'river-quality') assert_equal(indexed_pkg['metadata_modified'], '2014-06-10T08:24:12.782Z') assert_equal(indexed_pkg['entity_type'], 'package') assert_equal(indexed_pkg['dataset_type'], 'dataset')
def test_index_package_stores_resource_extras_in_config_file(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict_with_resources() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict['name']) # Resource fields given by ckan.extra_resource_fields are indexed assert_equal(indexed_pkg['res_extras_alt_url'], ['http://www.bar.com/riverquality.pdf', 'http://www.bar.com/riverquality.csv']) # Other resource fields are ignored assert_equal(indexed_pkg.get('res_extras_institution', None), None) assert_equal(indexed_pkg.get('res_extras_city', None), None)
def test_index_package_stores_resource_extras_in_config_file(self): index = search.index.PackageSearchIndex() pkg_dict = self._get_pkg_dict_with_resources() index.index_package(pkg_dict) indexed_pkg = search.show(pkg_dict["name"]) # Resource fields given by ckan.extra_resource_fields are indexed assert indexed_pkg["res_extras_alt_url"] == [ "http://www.bar.com/riverquality.pdf", "http://www.bar.com/riverquality.csv", ] # Other resource fields are ignored assert indexed_pkg.get("res_extras_institution", None) is None assert indexed_pkg.get("res_extras_city", None) is None
def test_package_update_race_condition(self, lc_mock, dae_mock): """ Handling of package updates during extraction. """ res_dict = factories.Resource(**RES_DICT) sysadmin = factories.Sysadmin() def download_and_extract(*args, **kwargs): # Simulate a change to the package by another party during # the download and extraction process. toolkit.get_action('package_patch')({'user': sysadmin['name']}, {'id': res_dict['package_id'], 'title': 'A changed title'}) return {'fulltext': 'foobar'} dae_mock.side_effect = download_and_extract extract(config['__file__'], res_dict) # Make sure that the changed package metadata is kept and indexed pkg_dict = toolkit.get_action('package_show')( {}, {'id': res_dict['package_id']}) assert_equal(pkg_dict['title'], 'A changed title') indexed_pkg_dict = search.show(res_dict['package_id']) assert_equal(indexed_pkg_dict['title'], 'A changed title')
def _modification_time_in_the_search_index(pkg): pkg_dict = show(pkg.name) mod_time_str = pkg_dict['last_major_modification'] return dateutil.parser.parse(mod_time_str).replace(tzinfo=None)
def test_dataset_properties_in_solr(self): try: search_result = search.show('test_private_dataset_1') except Exception, e: assert False, 'search query needs to succeed'
def show(dataset_name: str): from ckan.lib.search import show index = show(dataset_name) click.echo(index)
def show(dataset_name): from ckan.lib.search import show index = show(dataset_name) click.echo(index)
def package_show(context, data_dict): '''Return the metadata of a dataset (package) and its resources. :param id: the id or name of the dataset :type id: string :param use_default_schema: use default package schema instead of a custom schema defined with an IDatasetForm plugin (default: False) :type use_default_schema: bool :param include_tracking: add tracking information to dataset and resources (default: False) :type include_tracking: bool :rtype: dictionary ''' model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or _get_or_bust(data_dict, 'name_or_id') pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound context['package'] = pkg _check_access('package_show', context, data_dict) if data_dict.get('use_default_schema', False): context['schema'] = ckan.logic.schema.default_show_package_schema() include_tracking = asbool(data_dict.get('include_tracking', False)) package_dict = None use_cache = (context.get('use_cache', True) and not 'revision_id' in context and not 'revision_date' in context) if use_cache: try: search_result = search.show(name_or_id) except (search.SearchError, socket.error): pass else: use_validated_cache = 'schema' not in context if use_validated_cache and 'validated_data_dict' in search_result: package_json = search_result['validated_data_dict'] package_dict = json.loads(package_json) package_dict_validated = True else: package_dict = json.loads(search_result['data_dict']) package_dict_validated = False metadata_modified = pkg.metadata_modified.isoformat() search_metadata_modified = search_result['metadata_modified'] # solr stores less precice datetime, # truncate to 22 charactors to get good enough match if metadata_modified[:22] != search_metadata_modified[:22]: package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if include_tracking: # page-view tracking summary data package_dict['tracking_summary'] = ( model.TrackingSummary.get_for_package(package_dict['id'])) for resource_dict in package_dict['resources']: _add_tracking_summary_to_resource_dict(resource_dict, model) if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) for item in plugins.PluginImplementations(plugins.IResourceController): for resource_dict in package_dict['resources']: item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) return package_dict