def archiveFile_escape(s): m = re.match('\[(.*?) TO (.*?)\]$', s) if m is None: return solr_escape(s) else: archiveFile_from = solr_escape(m.groups()[0]) archiveFile_to = solr_escape(m.groups()[1]) return '["{archiveFile_from}" TO "{archiveFile_to}"]'.format( archiveFile_from=archiveFile_from, archiveFile_to=archiveFile_to)
def cast_scan_as_archivefile(context, scandata): """'extracts' the archive file information from a scan, suitable for solr indexing scan is a dictionary represting a scan""" # todo: cleanup code so we don't need to import here q = 'archive_id:{archive_id} AND archiveFile:{archiveFile}'.format( archive_id=solr_escape(scandata['archive_id']), archiveFile=solr_escape(scandata['archiveFile']), ) documents = context.solr_archivefile.search(q=q).documents from restrepo.db.archivefile import ArchiveFile archivefile = ArchiveFile() if documents: for k in documents[0]: setattr(archivefile, k, documents[0][k]) else: archivefile.archive_id = scandata['archive_id'] archivefile.archiveFile = scandata['archiveFile'] return archivefile
def update_archivefile(request): """ update information about this archivefile parameters: * **status:** a boolean a status: a value among :ref:`status_values` (except 0) * **user**: a string that identifies the user responsible for the udpate """ "" _data_changed = False archivefile = request._dbentity['archivefile'] # find a database record qry = request.db.query(ArchiveFile) qry = qry.filter(ArchiveFile.archive_id == archivefile['archive_id']) qry = qry.filter(ArchiveFile.archiveFile == archivefile['archiveFile']) try: archivefile_db = qry.one() except sqlalchemy.orm.exc.NoResultFound: # if we do not find this archivefile in the database # we create a new db record archivefile_db = ArchiveFile() for key in archivefile: setattr(archivefile_db, key, archivefile[key]) request.db.add(archivefile_db) _data_changed = True data = request.validated for k in data: if k in request.POST and k not in ['user']: # only update with data that is explicitly given if data[k] != getattr(archivefile_db, k): setattr(archivefile_db, k, data[k]) # also update the object that we are about to return archivefile[k] = data[k] _data_changed = True if _data_changed: request.db.flush() # update the index request.solr_archivefile.update([archivefile_db.get_solr_data(request)]) # request.solr_archivefile.update([archivefile]) if data: q = 'archive_id:{archive_id} AND archiveFile:{archiveFile}'.format( archive_id=solr_escape(archivefile['archive_id']), archiveFile=solr_escape(archivefile['archiveFile']), ) documents = request.solr_eadcomponent.search(q=q).documents if documents: for document in documents: document['status'] = data['status'] request.solr_eadcomponent.update(documents) user = get_user(request) log_events(request.db, user, [{ 'message': 'update', 'object_id': archivefile['id'], 'object_type': 'archivefile' }]) for ead_id in archivefile_db.ead_ids: if archivefile_db.status == STATUS_PUBLISHED: pagebrowser.update.refresh_book(request, ead_id=ead_id, archivefile_id=archivefile_db.id) else: pagebrowser.update.delete_book(request, ead_id=ead_id, archivefile_id=archivefile_db.id) return archivefile
def search_archivefiles(context, **kwargs): """ possible arguments: start limit date_from date_to archive archive_id archiveFile country contains_text ead_id findingaid institution is_archiveFile is_component parent show_in_tree language xpath with_facets : return the argument with facets, default is True """ start = kwargs.get('start', 0) limit = kwargs.get('limit', 1000) conditions = [] query_used = {} with_facets = kwargs.get('with_facets', True) if with_facets: query = { 'facet': 'true', 'facet.field': ['country', 'language'], 'start': start, 'rows': limit, } else: query = {} date_from = kwargs.get('date_from') date_to = kwargs.get('date_to') if date_from: date_from = datetime_to_string_zulu(date_from) if date_to: date_to = datetime_to_string_zulu(date_to) if date_from or date_to: date_from = date_from or '*' date_to = date_to or '*' conditions.append('date_from:[* TO %(date_to)s]' % locals()) conditions.append('date_to:[%(date_from)s TO *]' % locals()) for k in [ 'archive', 'archive_id', 'archiveFile', 'country', 'contains_text', 'ead_id', 'findingaid', 'institution', 'is_component', 'language', 'parent', 'show_in_tree', 'xpath', ]: value = kwargs.get(k, None) if k == 'is_component' and not value: value = None if value is not None: query_used[k] = value value = solr_escape(value) value = '"%s"' % value.strip('"') if k == 'contains_text': value = value.lower() conditions.append('search_source:%s' % value) # cf. http://wiki.apache.org/solr/HighlightingParameters query['hl'] = True query['hl.fl'] = 'search_source' query['hl.q'] = 'search_source:%s' % value else: conditions.append('%s:%s' % (k, value)) query['q'] = ' AND '.join(conditions) if conditions else '*:*' response = context.solr_archivefile.search(**query) results = response.documents for result in results: if 'breadcrumbs' in result: result['breadcrumbs'] = json.loads(result['breadcrumbs']) else: result['breadcrumbs'] = [] total_results = response.total_results start = response.start end = start + len(results) if query.get('hl'): # incluce highlighted phrases in the results for component in results: component_id = 'archivefile=%s' % component['id'] component['snippet'] = response.highlighting[component_id].get( 'search_source', '') return { 'results': results, 'query_used': query_used, 'total_results': total_results, 'start': start, 'end': end, 'facets': with_facets and response.facets['facet_fields'] or {}, }
def get_archivefiles( context, ead_id=None, archive_id=None, archivefile_id=None, archiveFile=None, archiveFiles=[], has_scans=None, status=None, start=0, limit=10000, sort='sort_field asc', ): q = ['*:*'] if ead_id: q.append('ead_ids:%s' % solr_escape(ead_id)) if archive_id: archive_id = int(archive_id) q.append('archive_id:%s' % solr_escape(archive_id)) if archivefile_id: q.append('archivefile_id:%s' % solr_escape(archivefile_id)) def archiveFile_escape(s): m = re.match('\[(.*?) TO (.*?)\]$', s) if m is None: return solr_escape(s) else: archiveFile_from = solr_escape(m.groups()[0]) archiveFile_to = solr_escape(m.groups()[1]) return '["{archiveFile_from}" TO "{archiveFile_to}"]'.format( archiveFile_from=archiveFile_from, archiveFile_to=archiveFile_to) if archiveFile: q.append('archiveFile:%s' % archiveFile_escape(archiveFile)) if archiveFiles: q.append('archiveFile:(%s)' % ' OR '.join(archiveFile_escape(s) for s in archiveFiles)) if status is not None: q.append('status:%s' % status) if has_scans is not None: if has_scans: q.append('number_of_scans:[1 TO *]') else: q.append('number_of_scans:0') q = ' AND '.join(q) response = context.solr_archivefile.search(q=q, rows=limit, start=start, sort=sort) results = response.documents results = [archivefile_solr_to_json(context, x) for x in results] total_results = response.total_results return total_results, results