def harvest_source_index_clear(context, data_dict): ''' Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself. This is useful to clean history of long running harvest sources to start again fresh. :param id: the id of the harvest source to clear :type id: string ''' check_access('harvest_source_clear', context, data_dict) harvest_source_id = data_dict.get('id') source = HarvestSource.get(harvest_source_id) if not source: log.error('Harvest source %s does not exist', harvest_source_id) raise NotFound('Harvest source %s does not exist' % harvest_source_id) harvest_source_id = source.id conn = make_connection() query = ''' +%s:"%s" +site_id:"%s" ''' % ( 'harvest_source_id', harvest_source_id, config.get('ckan.site_id')) solr_commit = toolkit.asbool(config.get('ckan.search.solr_commit', 'true')) if toolkit.check_ckan_version(max_version='2.5.99'): # conn is solrpy try: conn.delete_query(query) if solr_commit: conn.commit() except Exception, e: log.exception(e) raise SearchIndexError(e) finally:
def index_resource_file(self, data_dict, file_index_field, file_path, defer_commit=False): """ Full text indexes the input file along with the package. So that any text search matching content in the file will return the package. Index ID of the package is retrieved from Solr and file content to be indexed is extracted using Solr and appended to the data_dictionary and updated in Solr. Current version of Solr doesn't support updating a particualr field of an index, thats why the entire package content is passed again for indexing. :param data_dict: Data dictionary of the package which contains the file as part of one of its resources :param file_index_field: Holds the extracted content of file in the Indexed dictionary. :param file_path: Actual file path :return: """ try: conn = make_connection() #commit = not defer_commit if not asbool(ckan_config.get('ckan.search.solr_commit', 'true')): commit = False commit = True query = "%s:%s" % ('id',data_dict['id']) response = conn.query(query) results = response.results if results and len(results) > 0: index_id = results[0]['index_id'] file_content = conn._extract_content(file_path) data_dict[file_index_field] = file_content data_dict['index_id']=index_id conn.update_fields(data_dict, [file_index_field], commit=commit) except Exception, e: log.exception(e) raise SearchIndexError(e)
def harvest_source_index_clear(context, data_dict): check_access('harvest_source_clear', context, data_dict) harvest_source_id = data_dict.get('id', None) source = HarvestSource.get(harvest_source_id) if not source: log.error('Harvest source %s does not exist', harvest_source_id) raise NotFound('Harvest source %s does not exist' % harvest_source_id) harvest_source_id = source.id conn = make_connection() query = ''' +%s:"%s" +site_id:"%s" ''' % ( 'harvest_source_id', harvest_source_id, config.get('ckan.site_id')) try: conn.delete_query(query) if asbool(config.get('ckan.search.solr_commit', 'true')): conn.commit() except Exception, e: log.exception(e) raise SearchIndexError(e)
try: conn.delete_query(query) if solr_commit: conn.commit() except Exception, e: log.exception(e) raise SearchIndexError(e) finally: conn.close() else: # conn is pysolr try: conn.delete(q=query, commit=solr_commit) except Exception, e: log.exception(e) raise SearchIndexError(e) return {'id': harvest_source_id} def harvest_objects_import(context, data_dict): ''' Reimports the existing harvest objects, specified by either source_id, harvest_object_id or package_id. It performs the import stage with the last fetched objects, optionally belonging to a certain source. Please note that no objects will be fetched from the remote server. It will only affect the last fetched objects already present in the