Exemple #1
0
 def get_solr_data(self):
     """returns a dictionary that is to be indexed by SOLR"""
     data = {}
     for k in self._field_names():
         v = getattr(self, k, None)
         if isinstance(v, datetime.datetime) or \
            isinstance(v, datetime.date):
             data[k] = datetime_to_string_zulu(v)
         else:
             data[k] = v
         # only add 'sequenceNumber' if we have calculated it (this allows us to update separate components without recalculating the index)
         if getattr(self, 'sequenceNumber', None) is not None:
             data['sequenceNumber'] = self.sequenceNumber
     return data
Exemple #2
0
 def maybe(date):
     if date:
         date = datetime_to_string_zulu(date)
     return date
Exemple #3
0
def search_archivefiles(context, **kwargs):
    """
    possible arguments:
        start
        limit
        date_from
        date_to
        archive
        archive_id
        archiveFile
        country
        contains_text
        ead_id
        findingaid
        institution
        is_archiveFile
        is_component
        parent
        show_in_tree
        language
        xpath
        with_facets : return the argument with facets, default is True
    """
    start = kwargs.get('start', 0)
    limit = kwargs.get('limit', 1000)

    conditions = []
    query_used = {}
    with_facets = kwargs.get('with_facets', True)
    if with_facets:
        query = {
            'facet': 'true',
            'facet.field': ['country', 'language'],
            'start': start,
            'rows': limit,
        }
    else:
        query = {}

    date_from = kwargs.get('date_from')
    date_to = kwargs.get('date_to')
    if date_from:
        date_from = datetime_to_string_zulu(date_from)
    if date_to:
        date_to = datetime_to_string_zulu(date_to)
    if date_from or date_to:
        date_from = date_from or '*'
        date_to = date_to or '*'
        conditions.append('date_from:[* TO %(date_to)s]' % locals())
        conditions.append('date_to:[%(date_from)s TO *]' % locals())

    for k in [
            'archive',
            'archive_id',
            'archiveFile',
            'country',
            'contains_text',
            'ead_id',
            'findingaid',
            'institution',
            'is_component',
            'language',
            'parent',
            'show_in_tree',
            'xpath',
    ]:
        value = kwargs.get(k, None)
        if k == 'is_component' and not value:
            value = None
        if value is not None:
            query_used[k] = value
            value = solr_escape(value)
            value = '"%s"' % value.strip('"')
            if k == 'contains_text':
                value = value.lower()
                conditions.append('search_source:%s' % value)
                # cf. http://wiki.apache.org/solr/HighlightingParameters
                query['hl'] = True
                query['hl.fl'] = 'search_source'
                query['hl.q'] = 'search_source:%s' % value
            else:
                conditions.append('%s:%s' % (k, value))
    query['q'] = ' AND '.join(conditions) if conditions else '*:*'

    response = context.solr_archivefile.search(**query)
    results = response.documents
    for result in results:
        if 'breadcrumbs' in result:
            result['breadcrumbs'] = json.loads(result['breadcrumbs'])
        else:
            result['breadcrumbs'] = []

    total_results = response.total_results
    start = response.start
    end = start + len(results)

    if query.get('hl'):
        # incluce highlighted phrases in the results
        for component in results:
            component_id = 'archivefile=%s' % component['id']
            component['snippet'] = response.highlighting[component_id].get(
                'search_source', '')

    return {
        'results': results,
        'query_used': query_used,
        'total_results': total_results,
        'start': start,
        'end': end,
        'facets': with_facets and response.facets['facet_fields'] or {},
    }
Exemple #4
0
    def get_solr_data(self, partial_update_keys=None):
        """return a dictionary that can be indexed by solr

            partial_update_keys is a list of keys
            if partial_update_keys is given, we compute only values that depend on these keys
            and return a dictionary with all values of the form {'set':value} (which instructs solr to do a partial document update)
            This can be used for optimizing updates
        """
        def maybe(date):
            if date:
                date = datetime_to_string_zulu(date)
            return date

        # we first calculate the cheap keys
        solr_data = dict(
            number=self.number,
            sequenceNumber=self.sequenceNumber,
            URI=getattr(self, 'URI', None),
            status=self.status,
            dateLastModified=datetime_to_string_zulu(self.last_modified),
            date=maybe(self.date),
            folioNumber=getattr(self, 'folioNumber', None),
            originalFolioNumber=getattr(self, 'originalFolioNumber', None),
            title=getattr(self, 'title', None),
            subjectEN=getattr(self, 'subjectEN', None),
            transcription=getattr(self, 'transcription', None),
            transcriptionAuthor=getattr(self, 'transcriptionAuthor', None),
            transcriptionDate=maybe(getattr(self, 'transcriptionDate', None)),
            translationEN=getattr(self, 'translationEN', None),
            translationENDate=maybe(getattr(self, 'translationENDate', None)),
            translationENAuthor=getattr(self, 'translationENAuthor', None),
            translationID=getattr(self, 'translationID', None),
            translationIDAuthor=getattr(self, 'translationIDAuthor', None),
            type=getattr(self, 'type', None),
            language=getattr(self, 'language', None),
            relation=getattr(self, 'relation', None),
            source=getattr(self, 'source', None),
            creator=getattr(self, 'creator', None),
            format=getattr(self, 'format', None),
            contributor=getattr(self, 'contributor', None),
            publisher=getattr(self, 'publisher', None),
            rights=getattr(self, 'rights', None),
            user=getattr(self, 'user', None),
            timeFrameFrom=datetime_to_string(self.timeFrameFrom),
            timeFrameTo=datetime_to_string(self.timeFrameTo),
            translationIDDate=maybe(self.translationIDDate),
            archiveFile=self.archiveFile,
            text=self.get_solr_text(),
        )
        if partial_update_keys:
            if 'archive_id' in partial_update_keys:
                solr_data.update(
                    dict(
                        archive_id=self.archive_id,
                        archive=self.get_archive(),
                        country=self.get_country(),
                        institution=self.get_institution(),
                    ))
            if 'file' in partial_update_keys:
                solr_data.update(
                    dict(
                        default_image_id=self.get_default_image()
                        and self.get_default_image().id,
                        images_ids=[image.id for image in self.images],
                        images_filenames=[
                            image.filename for image in self.images
                        ],
                    ))
            solr_data = dict([(k, {'set': solr_data[k]}) for k in solr_data])
            solr_data['number'] = solr_data['number']['set']
        else:
            # we calculate all the data
            solr_data.update(
                dict(
                    archive_id=self.archive_id,
                    archiveFile=self.archiveFile,
                    archive=self.get_archive(),
                    country=self.get_country(),
                    institution=self.get_institution(),
                    # The following three fields will be collapsed in an array
                    # of dicts before being returned to the client.
                    # collapse_images_array is the function responsible of the conversion.
                    default_image_id=self.get_default_image()
                    and self.get_default_image().id,
                    images_ids=[image.id for image in self.images],
                    images_filenames=[image.filename for image in self.images],
                ))

        return solr_data
Exemple #5
0
 def dateLastModified(self):
     return datetime_to_string_zulu(self.last_modified)