Example #1
0
 def archiveFile_escape(s):
     m = re.match('\[(.*?) TO (.*?)\]$', s)
     if m is None:
         return solr_escape(s)
     else:
         archiveFile_from = solr_escape(m.groups()[0])
         archiveFile_to = solr_escape(m.groups()[1])
         return '["{archiveFile_from}" TO "{archiveFile_to}"]'.format(
             archiveFile_from=archiveFile_from,
             archiveFile_to=archiveFile_to)
Example #2
0
def cast_scan_as_archivefile(context, scandata):
    """'extracts' the archive file information from a scan, suitable for solr indexing

    scan is a dictionary represting a scan"""
    # todo: cleanup code so we don't need to import here
    q = 'archive_id:{archive_id} AND archiveFile:{archiveFile}'.format(
        archive_id=solr_escape(scandata['archive_id']),
        archiveFile=solr_escape(scandata['archiveFile']),
    )
    documents = context.solr_archivefile.search(q=q).documents
    from restrepo.db.archivefile import ArchiveFile
    archivefile = ArchiveFile()
    if documents:
        for k in documents[0]:
            setattr(archivefile, k, documents[0][k])
    else:
        archivefile.archive_id = scandata['archive_id']
        archivefile.archiveFile = scandata['archiveFile']
    return archivefile
Example #3
0
def update_archivefile(request):
    """

    update information about this archivefile

    parameters:
        * **status:** a boolean
            a status: a value among :ref:`status_values` (except 0)
        * **user**:  a string that identifies the user responsible for the udpate
    """
    ""
    _data_changed = False

    archivefile = request._dbentity['archivefile']

    # find a database record
    qry = request.db.query(ArchiveFile)
    qry = qry.filter(ArchiveFile.archive_id == archivefile['archive_id'])
    qry = qry.filter(ArchiveFile.archiveFile == archivefile['archiveFile'])
    try:
        archivefile_db = qry.one()
    except sqlalchemy.orm.exc.NoResultFound:
        # if we do not find this archivefile in the database
        # we create a new db record
        archivefile_db = ArchiveFile()
        for key in archivefile:
            setattr(archivefile_db, key, archivefile[key])

        request.db.add(archivefile_db)
        _data_changed = True

    data = request.validated
    for k in data:
        if k in request.POST and k not in ['user']:  # only update with data that is explicitly given
            if data[k] != getattr(archivefile_db, k):
                setattr(archivefile_db, k, data[k])
                # also update the object that we are about to return
                archivefile[k] = data[k]
                _data_changed = True

    if _data_changed:
        request.db.flush()
        # update the index
        request.solr_archivefile.update([archivefile_db.get_solr_data(request)])
#         request.solr_archivefile.update([archivefile])

        if data:
            q = 'archive_id:{archive_id} AND archiveFile:{archiveFile}'.format(
                archive_id=solr_escape(archivefile['archive_id']),
                archiveFile=solr_escape(archivefile['archiveFile']),
                )

            documents = request.solr_eadcomponent.search(q=q).documents
            if documents:
                for document in documents:
                    document['status'] = data['status']
                request.solr_eadcomponent.update(documents)

        user = get_user(request)
        log_events(request.db, user, [{
            'message': 'update', 'object_id': archivefile['id'], 'object_type': 'archivefile'
        }])

        for ead_id in archivefile_db.ead_ids:
            if archivefile_db.status == STATUS_PUBLISHED:
                pagebrowser.update.refresh_book(request, ead_id=ead_id, archivefile_id=archivefile_db.id)
            else:
                pagebrowser.update.delete_book(request, ead_id=ead_id, archivefile_id=archivefile_db.id)

    return archivefile
Example #4
0
def search_archivefiles(context, **kwargs):
    """
    possible arguments:
        start
        limit
        date_from
        date_to
        archive
        archive_id
        archiveFile
        country
        contains_text
        ead_id
        findingaid
        institution
        is_archiveFile
        is_component
        parent
        show_in_tree
        language
        xpath
        with_facets : return the argument with facets, default is True
    """
    start = kwargs.get('start', 0)
    limit = kwargs.get('limit', 1000)

    conditions = []
    query_used = {}
    with_facets = kwargs.get('with_facets', True)
    if with_facets:
        query = {
            'facet': 'true',
            'facet.field': ['country', 'language'],
            'start': start,
            'rows': limit,
        }
    else:
        query = {}

    date_from = kwargs.get('date_from')
    date_to = kwargs.get('date_to')
    if date_from:
        date_from = datetime_to_string_zulu(date_from)
    if date_to:
        date_to = datetime_to_string_zulu(date_to)
    if date_from or date_to:
        date_from = date_from or '*'
        date_to = date_to or '*'
        conditions.append('date_from:[* TO %(date_to)s]' % locals())
        conditions.append('date_to:[%(date_from)s TO *]' % locals())

    for k in [
            'archive',
            'archive_id',
            'archiveFile',
            'country',
            'contains_text',
            'ead_id',
            'findingaid',
            'institution',
            'is_component',
            'language',
            'parent',
            'show_in_tree',
            'xpath',
    ]:
        value = kwargs.get(k, None)
        if k == 'is_component' and not value:
            value = None
        if value is not None:
            query_used[k] = value
            value = solr_escape(value)
            value = '"%s"' % value.strip('"')
            if k == 'contains_text':
                value = value.lower()
                conditions.append('search_source:%s' % value)
                # cf. http://wiki.apache.org/solr/HighlightingParameters
                query['hl'] = True
                query['hl.fl'] = 'search_source'
                query['hl.q'] = 'search_source:%s' % value
            else:
                conditions.append('%s:%s' % (k, value))
    query['q'] = ' AND '.join(conditions) if conditions else '*:*'

    response = context.solr_archivefile.search(**query)
    results = response.documents
    for result in results:
        if 'breadcrumbs' in result:
            result['breadcrumbs'] = json.loads(result['breadcrumbs'])
        else:
            result['breadcrumbs'] = []

    total_results = response.total_results
    start = response.start
    end = start + len(results)

    if query.get('hl'):
        # incluce highlighted phrases in the results
        for component in results:
            component_id = 'archivefile=%s' % component['id']
            component['snippet'] = response.highlighting[component_id].get(
                'search_source', '')

    return {
        'results': results,
        'query_used': query_used,
        'total_results': total_results,
        'start': start,
        'end': end,
        'facets': with_facets and response.facets['facet_fields'] or {},
    }
Example #5
0
def get_archivefiles(
    context,
    ead_id=None,
    archive_id=None,
    archivefile_id=None,
    archiveFile=None,
    archiveFiles=[],
    has_scans=None,
    status=None,
    start=0,
    limit=10000,
    sort='sort_field asc',
):

    q = ['*:*']

    if ead_id:
        q.append('ead_ids:%s' % solr_escape(ead_id))

    if archive_id:
        archive_id = int(archive_id)
        q.append('archive_id:%s' % solr_escape(archive_id))

    if archivefile_id:
        q.append('archivefile_id:%s' % solr_escape(archivefile_id))

    def archiveFile_escape(s):
        m = re.match('\[(.*?) TO (.*?)\]$', s)
        if m is None:
            return solr_escape(s)
        else:
            archiveFile_from = solr_escape(m.groups()[0])
            archiveFile_to = solr_escape(m.groups()[1])
            return '["{archiveFile_from}" TO "{archiveFile_to}"]'.format(
                archiveFile_from=archiveFile_from,
                archiveFile_to=archiveFile_to)

    if archiveFile:
        q.append('archiveFile:%s' % archiveFile_escape(archiveFile))

    if archiveFiles:
        q.append('archiveFile:(%s)' %
                 ' OR '.join(archiveFile_escape(s) for s in archiveFiles))

    if status is not None:
        q.append('status:%s' % status)

    if has_scans is not None:
        if has_scans:
            q.append('number_of_scans:[1 TO *]')
        else:
            q.append('number_of_scans:0')

    q = ' AND '.join(q)

    response = context.solr_archivefile.search(q=q,
                                               rows=limit,
                                               start=start,
                                               sort=sort)

    results = response.documents
    results = [archivefile_solr_to_json(context, x) for x in results]
    total_results = response.total_results

    return total_results, results