def get_solr_data(self): """returns a dictionary that is to be indexed by SOLR""" data = {} for k in self._field_names(): v = getattr(self, k, None) if isinstance(v, datetime.datetime) or \ isinstance(v, datetime.date): data[k] = datetime_to_string_zulu(v) else: data[k] = v # only add 'sequenceNumber' if we have calculated it (this allows us to update separate components without recalculating the index) if getattr(self, 'sequenceNumber', None) is not None: data['sequenceNumber'] = self.sequenceNumber return data
def maybe(date): if date: date = datetime_to_string_zulu(date) return date
def search_archivefiles(context, **kwargs): """ possible arguments: start limit date_from date_to archive archive_id archiveFile country contains_text ead_id findingaid institution is_archiveFile is_component parent show_in_tree language xpath with_facets : return the argument with facets, default is True """ start = kwargs.get('start', 0) limit = kwargs.get('limit', 1000) conditions = [] query_used = {} with_facets = kwargs.get('with_facets', True) if with_facets: query = { 'facet': 'true', 'facet.field': ['country', 'language'], 'start': start, 'rows': limit, } else: query = {} date_from = kwargs.get('date_from') date_to = kwargs.get('date_to') if date_from: date_from = datetime_to_string_zulu(date_from) if date_to: date_to = datetime_to_string_zulu(date_to) if date_from or date_to: date_from = date_from or '*' date_to = date_to or '*' conditions.append('date_from:[* TO %(date_to)s]' % locals()) conditions.append('date_to:[%(date_from)s TO *]' % locals()) for k in [ 'archive', 'archive_id', 'archiveFile', 'country', 'contains_text', 'ead_id', 'findingaid', 'institution', 'is_component', 'language', 'parent', 'show_in_tree', 'xpath', ]: value = kwargs.get(k, None) if k == 'is_component' and not value: value = None if value is not None: query_used[k] = value value = solr_escape(value) value = '"%s"' % value.strip('"') if k == 'contains_text': value = value.lower() conditions.append('search_source:%s' % value) # cf. http://wiki.apache.org/solr/HighlightingParameters query['hl'] = True query['hl.fl'] = 'search_source' query['hl.q'] = 'search_source:%s' % value else: conditions.append('%s:%s' % (k, value)) query['q'] = ' AND '.join(conditions) if conditions else '*:*' response = context.solr_archivefile.search(**query) results = response.documents for result in results: if 'breadcrumbs' in result: result['breadcrumbs'] = json.loads(result['breadcrumbs']) else: result['breadcrumbs'] = [] total_results = response.total_results start = response.start end = start + len(results) if query.get('hl'): # incluce highlighted phrases in the results for component in results: component_id = 'archivefile=%s' % component['id'] component['snippet'] = response.highlighting[component_id].get( 'search_source', '') return { 'results': results, 'query_used': query_used, 'total_results': total_results, 'start': start, 'end': end, 'facets': with_facets and response.facets['facet_fields'] or {}, }
def get_solr_data(self, partial_update_keys=None): """return a dictionary that can be indexed by solr partial_update_keys is a list of keys if partial_update_keys is given, we compute only values that depend on these keys and return a dictionary with all values of the form {'set':value} (which instructs solr to do a partial document update) This can be used for optimizing updates """ def maybe(date): if date: date = datetime_to_string_zulu(date) return date # we first calculate the cheap keys solr_data = dict( number=self.number, sequenceNumber=self.sequenceNumber, URI=getattr(self, 'URI', None), status=self.status, dateLastModified=datetime_to_string_zulu(self.last_modified), date=maybe(self.date), folioNumber=getattr(self, 'folioNumber', None), originalFolioNumber=getattr(self, 'originalFolioNumber', None), title=getattr(self, 'title', None), subjectEN=getattr(self, 'subjectEN', None), transcription=getattr(self, 'transcription', None), transcriptionAuthor=getattr(self, 'transcriptionAuthor', None), transcriptionDate=maybe(getattr(self, 'transcriptionDate', None)), translationEN=getattr(self, 'translationEN', None), translationENDate=maybe(getattr(self, 'translationENDate', None)), translationENAuthor=getattr(self, 'translationENAuthor', None), translationID=getattr(self, 'translationID', None), translationIDAuthor=getattr(self, 'translationIDAuthor', None), type=getattr(self, 'type', None), language=getattr(self, 'language', None), relation=getattr(self, 'relation', None), source=getattr(self, 'source', None), creator=getattr(self, 'creator', None), format=getattr(self, 'format', None), contributor=getattr(self, 'contributor', None), publisher=getattr(self, 'publisher', None), rights=getattr(self, 'rights', None), user=getattr(self, 'user', None), timeFrameFrom=datetime_to_string(self.timeFrameFrom), timeFrameTo=datetime_to_string(self.timeFrameTo), translationIDDate=maybe(self.translationIDDate), archiveFile=self.archiveFile, text=self.get_solr_text(), ) if partial_update_keys: if 'archive_id' in partial_update_keys: solr_data.update( dict( archive_id=self.archive_id, archive=self.get_archive(), country=self.get_country(), institution=self.get_institution(), )) if 'file' in partial_update_keys: solr_data.update( dict( default_image_id=self.get_default_image() and self.get_default_image().id, images_ids=[image.id for image in self.images], images_filenames=[ image.filename for image in self.images ], )) solr_data = dict([(k, {'set': solr_data[k]}) for k in solr_data]) solr_data['number'] = solr_data['number']['set'] else: # we calculate all the data solr_data.update( dict( archive_id=self.archive_id, archiveFile=self.archiveFile, archive=self.get_archive(), country=self.get_country(), institution=self.get_institution(), # The following three fields will be collapsed in an array # of dicts before being returned to the client. # collapse_images_array is the function responsible of the conversion. default_image_id=self.get_default_image() and self.get_default_image().id, images_ids=[image.id for image in self.images], images_filenames=[image.filename for image in self.images], )) return solr_data
def dateLastModified(self): return datetime_to_string_zulu(self.last_modified)