def before_index(self, pkg_dict): """ Dynamically creates a license_id-is-ogl field to index on, and clean up resource formats prior to indexing. """ SearchIndexing.clean_title_string(pkg_dict) SearchIndexing.add_field__is_ogl(pkg_dict) SearchIndexing.resource_format_cleanup(pkg_dict) SearchIndexing.add_field__group_titles(pkg_dict) SearchIndexing.add_field__publisher(pkg_dict) if is_plugin_enabled('harvest'): SearchIndexing.add_field__harvest_document(pkg_dict) SearchIndexing.add_field__openness(pkg_dict) SearchIndexing.add_popularity(pkg_dict) SearchIndexing.add_field__group_abbreviation(pkg_dict) SearchIndexing.add_inventory(pkg_dict) # Extract multiple theme values (concatted with ' ') into one multi-value schema field all_themes = set() for value in (pkg_dict.get('theme-primary', ''), pkg_dict.get('theme-secondary', '')): for theme in value.split(' '): if theme: all_themes.add(theme) pkg_dict['all_themes'] = list(all_themes) return pkg_dict
def before_index(self, pkg_dict): """ DGU-specific changes to the pkg_dict before it is indexed. The main reason is so that we can add search facets. """ log.info('Indexing: %s', pkg_dict['name']) SearchIndexing.clean_title_string(pkg_dict) SearchIndexing.add_field__is_ogl(pkg_dict) SearchIndexing.resource_format_cleanup(pkg_dict) SearchIndexing.add_field__publisher(pkg_dict) SearchIndexing.add_field__organization_title_and_abbreviation(pkg_dict) if is_plugin_enabled('harvest'): SearchIndexing.add_field__harvest_document(pkg_dict) SearchIndexing.add_field__openness(pkg_dict) SearchIndexing.add_popularity(pkg_dict) SearchIndexing.add_inventory(pkg_dict) SearchIndexing.add_its(pkg_dict) SearchIndexing.add_register(pkg_dict) SearchIndexing.add_api_flag(pkg_dict) SearchIndexing.add_theme(pkg_dict) if is_plugin_enabled('dgu_schema'): SearchIndexing.add_schema(pkg_dict) SearchIndexing.add_collections(pkg_dict) return pkg_dict
def before_index(self, pkg_dict): """ Dynamically creates a license_id-is-ogl field to index on, and clean up resource formats prior to indexing. """ log.info('Indexing: %s', pkg_dict['name']) SearchIndexing.clean_title_string(pkg_dict) SearchIndexing.add_field__is_ogl(pkg_dict) SearchIndexing.resource_format_cleanup(pkg_dict) SearchIndexing.add_field__group_titles(pkg_dict) SearchIndexing.add_field__publisher(pkg_dict) if is_plugin_enabled('harvest'): SearchIndexing.add_field__harvest_document(pkg_dict) SearchIndexing.add_field__openness(pkg_dict) SearchIndexing.add_popularity(pkg_dict) SearchIndexing.add_field__group_abbreviation(pkg_dict) SearchIndexing.add_inventory(pkg_dict) SearchIndexing.add_theme(pkg_dict) if is_plugin_enabled('dgu_schema'): SearchIndexing.add_schema(pkg_dict) return pkg_dict