def harvest_source_index_clear(context, data_dict):
    '''
    Clears all datasets, jobs and objects related to a harvest source, but
    keeps the source itself.  This is useful to clean history of long running
    harvest sources to start again fresh.

    :param id: the id of the harvest source to clear
    :type id: string
    '''

    check_access('harvest_source_clear', context, data_dict)
    harvest_source_id = data_dict.get('id')

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    conn = make_connection()
    query = ''' +%s:"%s" +site_id:"%s" ''' % (
        'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))

    solr_commit = toolkit.asbool(config.get('ckan.search.solr_commit', 'true'))
    if toolkit.check_ckan_version(max_version='2.5.99'):
        # conn is solrpy
        try:
            conn.delete_query(query)
            if solr_commit:
                conn.commit()
        except Exception, e:
            log.exception(e)
            raise SearchIndexError(e)
        finally:
Exemple #2
0
    def index_resource_file(self, data_dict, file_index_field, file_path, defer_commit=False):
        """
        Full text indexes the input file along with the package. So that any text search matching content in the file
        will return the package. Index ID of the package is retrieved from Solr and file content to be indexed is
        extracted using Solr and appended to the data_dictionary and updated in Solr. Current version of Solr doesn't
        support updating a particualr field of an index, thats why the entire package content is passed again for
        indexing.

        :param data_dict: Data dictionary of the package which contains the file as part of one of its resources
        :param file_index_field: Holds the extracted content of file in the Indexed dictionary.
        :param file_path: Actual file path
        :return:
        """

        try:
            conn = make_connection()
            #commit = not defer_commit

            if not asbool(ckan_config.get('ckan.search.solr_commit', 'true')):
                commit = False
            commit = True
            query = "%s:%s" % ('id',data_dict['id'])

            response = conn.query(query)

            results = response.results

            if results and len(results) > 0:

                index_id = results[0]['index_id']

                file_content = conn._extract_content(file_path)

                data_dict[file_index_field] = file_content
                data_dict['index_id']=index_id

                conn.update_fields(data_dict, [file_index_field], commit=commit)
        except Exception, e:
            log.exception(e)
            raise SearchIndexError(e)
Exemple #3
0
def harvest_source_index_clear(context, data_dict):

    check_access('harvest_source_clear', context, data_dict)
    harvest_source_id = data_dict.get('id', None)

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    conn = make_connection()
    query = ''' +%s:"%s" +site_id:"%s" ''' % (
        'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))
    try:
        conn.delete_query(query)
        if asbool(config.get('ckan.search.solr_commit', 'true')):
            conn.commit()
    except Exception, e:
        log.exception(e)
        raise SearchIndexError(e)
        try:
            conn.delete_query(query)
            if solr_commit:
                conn.commit()
        except Exception, e:
            log.exception(e)
            raise SearchIndexError(e)
        finally:
            conn.close()
    else:
        # conn is pysolr
        try:
            conn.delete(q=query, commit=solr_commit)
        except Exception, e:
            log.exception(e)
            raise SearchIndexError(e)

    return {'id': harvest_source_id}


def harvest_objects_import(context, data_dict):
    '''
    Reimports the existing harvest objects, specified by either source_id,
    harvest_object_id or package_id.

    It performs the import stage with the last fetched objects, optionally
    belonging to a certain source.

    Please note that no objects will be fetched from the remote server.

    It will only affect the last fetched objects already present in the