Example #1
0
def attach_tags_to_record(uid, list_of_tags, record_id):
    """Attach a list of tags to a record.

    :param uid: a user id
    :param list_of_tags: a list of tags to be attached to a record
    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".
            format(record_id))
    if not uid:
        uid = current_user.get_id()
    # sort the list of tags
    list_of_tags.sort()
    tags_to_return = []
    for tag_name in list_of_tags:
        tag = attach_tag_to_record(uid, tag_name, record_id)
        # if tag is not None
        if tag:
            # append tag to the list that will be returned to user
            tags_to_return.append(tag)
    return tags_to_return
def solr_add_range(lower_recid, upper_recid, tags_to_index,
                   next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract = get_field_content_in_utf8(recid, 'abstract',
                                                 tags_to_index)
            author = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword = get_field_content_in_utf8(recid, 'keyword',
                                                tags_to_index)
            title = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,
                                                           recid=recid)

    return next_commit_counter
Example #3
0
def search_unit(query, f, m, wl=None):
    """Search for similar records."""
    from invenio.legacy.search_engine import record_exists
    from invenio.legacy.bibrank.record_sorter import METHODS
    from invenio.legacy.bibrank.word_searcher import find_similar

    results = intbitset([])

    if query:
        if isinstance(query, intbitset):
            ahitset = query
        else:
            recid = int(query)
            ahitset = [recid] if record_exists(recid) == 1 else []

        if len(ahitset):
            for recid in ahitset:
                results |= intbitset(
                    find_similar('jif',
                                 recid,
                                 intbitset([]),
                                 rank_limit_relevance=0,
                                 verbose=0,
                                 methods=METHODS)[0])

    return results
Example #4
0
    def get(self, record_id):
        from invenio.legacy.search_engine import record_exists, \
            check_user_can_view_record

        # Get output format
        output_format = self.get_output_format()

        # Check record's existence
        record_status = record_exists(record_id)
        if record_status == 0:
            raise RecordNotFoundError(
                message="Record {} does not exist.".format(record_id),
            )
        elif record_status == -1:
            raise RecordDeletedError(
                message="Record {} was deleted.".format(record_id),
            )

        # Check record's access
        (auth_code, auth_mesg) = check_user_can_view_record(
            current_user,
            record_id
        )
        if auth_code == 1:
            raise RecordForbiddenViewError(
                message="Access to record {} is forbidden.".format(record_id),
            )

        # Return record with requested output format.
        result = format_record(recID=record_id, of=output_format)
        return (result, 200)
Example #5
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Example #6
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Example #7
0
def attach_tag_to_record(uid, tag_name, record_id):
    """Attach a tag to a record.

    :param uid: user identifier
    :param tag_name: name of tag to be attached to record
    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    if not uid:
        uid = current_user.get_id()
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".format(record_id))
    tag = WtgTAG.query.filter(WtgTAG.name == tag_name,
                              WtgTAG.id_user == uid).first()
    # check if tag is not created
    if not tag:
        # create the tag
        tag = WtgTAG(name=tag_name, id_user=uid)
        try:
            db.session.add(tag)
            db.session.commit()
        except DBAPIError:
            db.session.rollback()
            raise tags_errors.TagNotCreatedError(
                "Error while saving the new tag '{0}'".format(tag_name))
        # attach the tag to the record
        association = WtgTAGRecord(id_tag=tag.id, id_bibrec=record_id)
        try:
            db.session.add(association)
            db.session.commit()
        except DBAPIError:
            db.session.rollback()
            raise tags_errors.TagRecordAssociationError(
                "Error when saving association between \
                tag '{0}' and record with id={1}".format(tag_name, record_id))
        return tag
    else:
        # tag already exists
        # check if tag is not attached to the record
        association = WtgTAGRecord.query.filter(
            WtgTAGRecord.id_tag == tag.id, WtgTAGRecord.id_bibrec == record_id)
        if not association:
            # create an association between the tag and the record
            association = WtgTAGRecord(id_tag=tag.id, id_bibrec=record_id)
            try:
                db.session.add(association)
                db.session.commit()
            except DBAPIError:
                db.session.rollback()
                raise tags_errors.TagRecordAssociationError(
                    "Error when saving association between \
                    tag '{0}' and record with id={1}".format(
                        tag_name, record_id))
        else:
            # tag exists and is attached to the record
            return None
Example #8
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = Query('{f}:"{p}"'.format(f=CFG_OAI_ID_FIELD, p=identifier)).search()
        if recids:
            for recid in recids:
                if record_exists(recid) > 0:
                    return recid
    return None
Example #9
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e', ap=-9)
        if recids:
            displayable_recids = get_records_that_can_be_displayed(current_user, recids)
            for recid in displayable_recids:
                if record_exists(recid) > 0:
                    return recid
    return None
Example #10
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = Query('{f}:"{p}"'.format(f=CFG_OAI_ID_FIELD,
                                          p=identifier)).search()
        if recids:
            for recid in recids:
                if record_exists(recid) > 0:
                    return recid
    return None
Example #11
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e', ap=-9)
        if recids:
            displayable_recids = get_records_that_can_be_displayed(current_user, recids)
            for recid in displayable_recids:
                if record_exists(recid) > 0:
                    return recid
    return None
Example #12
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result[
                'resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result[
                'resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result[
                'resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record(print_record(recid, 'xm'))[0]

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), cfg['CFG_BIBEDIT_TO_MERGE_SUFFIX'])
        if not os.path.isfile(file_path):  #check if file doesn't exist
            result['resultCode'], result[
                'resultText'] = 1, 'Temporary file doesnt exist'
        else:  #open file
            tmpfile = open(file_path, 'r')
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result[
                    'resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result[
                'resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result[
            'resultText'] = 1, 'Invalid record mode for record2'
    record_order_subfields(record)
    return record
Example #13
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e', ap=-9)
        if recids:
            restricted_recids = get_all_restricted_recids()
            for recid in recids:
                if record_exists(recid) > 0 and recid not in restricted_recids:
                    return recid
            if recid not in restricted_recids:
                return recid
    return None
Example #14
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e', ap=-9)
        if recids:
            restricted_recids = get_all_restricted_recids()
            for recid in recids:
                if record_exists(recid) > 0 and recid not in restricted_recids:
                    return recid
            if recid not in restricted_recids:
                return recid
    return None
Example #15
0
def oai_list_metadata_formats(argd):
    """Generates response to oai_list_metadata_formats verb."""

    if argd.get("identifier"):
        recid = oai_get_recid(argd["identifier"])
        _record_exists = record_exists(recid)
        if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"):
            return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd["identifier"])])

    out = ""
    for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
        out += X.metadataFormat()(X.metadataPrefix(prefix), X.schema(schema), X.metadataNamespace(namespace))

    return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
Example #16
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record( print_record(recid, 'xm') )[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                       CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path): #check if file doesn't exist
            result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist'
        else: #open file
            tmpfile = open(file_path, 'r')
            record = create_record( tmpfile.read() )[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result['resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2'
    return record
Example #17
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Example #18
0
def _get_record(recid, uid, result, fresh_record=False):
    """Retrieve record structure.
    """
    record = None
    mtime = None
    cache_dirty = None
    record_status = record_exists(recid)
    existing_cache = cache_exists(recid, uid)
    if record_status == 0:
        result['resultCode'], result[
            'resultText'] = 1, 'Non-existent record: %s' % recid
    elif record_status == -1:
        result['resultCode'], result[
            'resultText'] = 1, 'Deleted record: %s' % recid
    elif not existing_cache and record_locked_by_other_user(recid, uid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by user' % recid
    elif existing_cache and cache_expired(recid, uid) and \
        record_locked_by_other_user(recid, uid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by user' % recid
    elif record_locked_by_queue(recid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by queue' % recid
    else:
        if fresh_record:
            delete_cache(recid, uid)
            existing_cache = False
        if not existing_cache:
            record_revision, record = create_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            cache_dirty = False
        else:
            tmpRes = get_cache_contents(recid, uid)
            cache_dirty, record_revision, record = tmpRes[0], tmpRes[
                1], tmpRes[2]
            touch_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            if not latest_record_revision(recid, record_revision):
                result['cacheOutdated'] = True
        result['resultCode'], result['resultText'], result[
            'cacheDirty'], result[
                'cacheMTime'] = 0, 'Record OK', cache_dirty, mtime
    record_order_subfields(record)
    return record
def _get_breaking_news(lang, journal_name):
    """
    Gets the 'Breaking News' articles that are currently active according to
    start and end dates.
    """
    # CERN Bulletin only
    if not journal_name.lower() == 'cernbulletin':
        return ''
    # Look for active breaking news
    breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
                            if record_exists(recid) == 1]
    today = time.mktime(time.localtime())
    breaking_news = ""
    for recid in breaking_news_recids:
        temp_rec = BibFormatObject(recid)
        try:
            end_date = time.mktime(time.strptime(temp_rec.field("925__b"),
                                                 "%m/%d/%Y"))
        except:
            end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
        if end_date < today:
            continue
        try:
            start_date = time.mktime(time.strptime(temp_rec.field("925__a"),
                                                   "%m/%d/%Y"))
        except:
            start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
        if start_date > today:
            continue
        publish_date = temp_rec.field("269__c")
        if lang == 'fr':
            title = temp_rec.field("246_1a")
        else:
            title = temp_rec.field("245__a")
        breaking_news += '''
<h2 class="%s">%s<br/>
    <strong>
        <a href="%s/journal/popup?name=%s&amp;type=breaking_news&amp;record=%s&amp;ln=%s" target="_blank">%s</a>
    </strong>
</h2>
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
    if breaking_news:
        breaking_news = '<li>%s</li>' % breaking_news

    return breaking_news
def _get_breaking_news(lang, journal_name):
    """
    Gets the 'Breaking News' articles that are currently active according to
    start and end dates.
    """
    # CERN Bulletin only
    if not journal_name.lower() == 'cernbulletin':
        return ''
    # Look for active breaking news
    breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
                            if record_exists(recid) == 1]
    today = time.mktime(time.localtime())
    breaking_news = ""
    for recid in breaking_news_recids:
        temp_rec = BibFormatObject(recid)
        try:
            end_date = time.mktime(time.strptime(temp_rec.field("925__b"),
                                                 "%m/%d/%Y"))
        except:
            end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
        if end_date < today:
            continue
        try:
            start_date = time.mktime(time.strptime(temp_rec.field("925__a"),
                                                   "%m/%d/%Y"))
        except:
            start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
        if start_date > today:
            continue
        publish_date = temp_rec.field("269__c")
        if lang == 'fr':
            title = temp_rec.field("246_1a")
        else:
            title = temp_rec.field("245__a")
        breaking_news += '''
<h2 class="%s">%s<br/>
    <strong>
        <a href="%s/journal/popup?name=%s&amp;type=breaking_news&amp;record=%s&amp;ln=%s" target="_blank">%s</a>
    </strong>
</h2>
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
    if breaking_news:
        breaking_news = '<li>%s</li>' % breaking_news

    return breaking_news
Example #21
0
def oai_list_metadata_formats(argd):
    """Generates response to oai_list_metadata_formats verb."""

    if argd.get('identifier'):
        recid = oai_get_recid(argd['identifier'])
        _record_exists = record_exists(recid)
        if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"):
            return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])

    out = ""
    for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
        out += X.metadataFormat()(
            X.metadataPrefix(prefix),
            X.schema(schema),
            X.metadataNamespace(namespace)
        )

    return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
Example #22
0
def get_attached_tags_on_record(record_id):
    """Get all the user's tags that are attached to the record.

    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".format(record_id))
    attached_tags = []
    associations = WtgTAGRecord.query.filter(
        WtgTAGRecord.id_bibrec == record_id).all()
    if associations:
        for association in associations:
            tag = WtgTAG.query.filter(WtgTAG.id == association.id_tag).first()
            attached_tags.append(tag)
    return sorted(attached_tags, key=lambda t: t.name, reverse=False)
Example #23
0
def detach_tag_from_record(uid, tag_name, record_id):
    """Detach a tag from a record.

    :param uid: user identifier
    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    if not uid:
        uid = current_user.get_id()
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".
            format(record_id))
    # find tag
    retrieved_tag = WtgTAG.query.filter(
        WtgTAG.name == tag_name,
        WtgTAG.id_user == uid
    ).first()
    if not retrieved_tag:
        raise tags_errors.TagNotFoundError(
            "Tag '{0}' cannot be detached because it was not found".
            format(tag_name))
    association = WtgTAGRecord.query.filter(
        WtgTAGRecord.id_bibrec == record_id,
        WtgTAGRecord.id_tag == retrieved_tag.id
    ).first()
    # if there is an association between tag and record
    if association:
        # remove association
        try:
            db.session.delete(association)
            db.session.commit()
        except DBAPIError:
            db.session.rollback()
            raise tags_errors.TagRecordAssociationError(
                "Error while detaching tag '{0}' from record with id={1}".
                format(tag_name, record_id))
    else:
        raise tags_errors.TagRecordAssociationError(
            "Tag '{0} is not attached to record with id={1}".
            format(tag_name, record_id))
Example #24
0
def get_attached_tags_on_record(record_id):
    """Get all the user's tags that are attached to the record.

    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".
            format(record_id))
    attached_tags = []
    associations = WtgTAGRecord.query.filter(
        WtgTAGRecord.id_bibrec == record_id
    ).all()
    if associations:
        for association in associations:
            tag = WtgTAG.query.filter(WtgTAG.id == association.id_tag).first()
            attached_tags.append(tag)
    return sorted(attached_tags, key=lambda t: t.name, reverse=False)
Example #25
0
def oai_get_record(argd):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    recid = oai_get_recid(argd["identifier"])
    _record_exists = record_exists(recid)
    if _record_exists == 1 or (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"):
        out = print_record(recid, argd["metadataPrefix"], _record_exists)
        out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
    else:
        return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd["identifier"])])
    return out
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
Example #27
0
def search_unit(query, f, m, wl=None):
    """Search for records in citation index."""
    from invenio.legacy.search_engine import record_exists
    from invenio.legacy.bibrank.citation_searcher import \
        calculate_co_cited_with_list

    results = intbitset([])

    if query:
        if isinstance(query, intbitset):
            ahitset = query
        else:
            recid = int(query)
            ahitset = [recid] if record_exists(recid) == 1 else []

        if len(ahitset):
            for recid in ahitset:
                results |= intbitset([
                    x[0] for x in calculate_co_cited_with_list(recid)])

    return results
Example #28
0
def oai_get_record(argd):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    recid = oai_get_recid(argd['identifier'])
    _record_exists = record_exists(recid)
    if _record_exists == 1 or \
           (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
        out = print_record(recid, argd['metadataPrefix'], _record_exists)
        out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
    else:
        return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
    return out
Example #29
0
def search_unit(query, f, m, wl=None):
    """Search for similar records."""
    from invenio.legacy.search_engine import record_exists
    from invenio.legacy.bibrank.record_sorter import METHODS
    from invenio.legacy.bibrank.word_searcher import find_similar

    results = intbitset([])

    if query:
        if isinstance(query, intbitset):
            ahitset = query
        else:
            recid = int(query)
            ahitset = [recid] if record_exists(recid) == 1 else []

        if len(ahitset):
            for recid in ahitset:
                results |= intbitset(
                    find_similar("jif", recid, intbitset([]), rank_limit_relevance=0, verbose=0, methods=METHODS)[0]
                )

    return results
Example #30
0
def detach_tag_from_record(uid, tag_name, record_id):
    """Detach a tag from a record.

    :param uid: user identifier
    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    if not uid:
        uid = current_user.get_id()
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".format(record_id))
    # find tag
    retrieved_tag = WtgTAG.query.filter(WtgTAG.name == tag_name,
                                        WtgTAG.id_user == uid).first()
    if not retrieved_tag:
        raise tags_errors.TagNotFoundError(
            "Tag '{0}' cannot be detached because it was not found".format(
                tag_name))
    association = WtgTAGRecord.query.filter(
        WtgTAGRecord.id_bibrec == record_id,
        WtgTAGRecord.id_tag == retrieved_tag.id).first()
    # if there is an association between tag and record
    if association:
        # remove association
        try:
            db.session.delete(association)
            db.session.commit()
        except DBAPIError:
            db.session.rollback()
            raise tags_errors.TagRecordAssociationError(
                "Error while detaching tag '{0}' from record with id={1}".
                format(tag_name, record_id))
    else:
        raise tags_errors.TagRecordAssociationError(
            "Tag '{0} is not attached to record with id={1}".format(
                tag_name, record_id))
Example #31
0
def _get_record(recid, uid, result, fresh_record=False):
    """Retrieve record structure.
    """
    record = None
    mtime = None
    cache_dirty = None
    record_status = record_exists(recid)
    existing_cache = cache_exists(recid, uid)
    if record_status == 0:
        result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
    elif record_status == -1:
        result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
    elif not existing_cache and record_locked_by_other_user(recid, uid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid
    elif existing_cache and cache_expired(recid, uid) and \
        record_locked_by_other_user(recid, uid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid
    elif record_locked_by_queue(recid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
    else:
        if fresh_record:
            delete_cache(recid, uid)
            existing_cache = False
        if not existing_cache:
            record_revision, record = create_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            cache_dirty = False
        else:
            tmpRes = get_cache_contents(recid, uid)
            cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2]
            touch_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            if not latest_record_revision(recid, record_revision):
                result['cacheOutdated'] = True
        result['resultCode'], result['resultText'], result['cacheDirty'], result['cacheMTime'] = 0, 'Record OK', cache_dirty, mtime
    record_order_subfields(record)
    return record
Example #32
0
def attach_tags_to_record(uid, list_of_tags, record_id):
    """Attach a list of tags to a record.

    :param uid: a user id
    :param list_of_tags: a list of tags to be attached to a record
    :param record_id: record identifier
    """
    from invenio.legacy.search_engine import record_exists
    # find record
    if record_exists(record_id) != 1:
        raise tags_errors.RecordNotFoundError(
            "Tag error: Record with id={0} does not exist".format(record_id))
    if not uid:
        uid = current_user.get_id()
    # sort the list of tags
    list_of_tags.sort()
    tags_to_return = []
    for tag_name in list_of_tags:
        tag = attach_tag_to_record(uid, tag_name, record_id)
        # if tag is not None
        if tag:
            # append tag to the list that will be returned to user
            tags_to_return.append(tag)
    return tags_to_return
Example #33
0
def get_low_level_recIDs_from_control_no(control_no):
    """
    returns the list of EXISTING record ID(s) of the authority records
    corresponding to the given (INVENIO) MARC control_no
    (e.g. 'AUTHOR:(XYZ)abc123')
    (NB: the list should normally contain exactly 1 element)

    @param control_no: a (INVENIO) MARC internal control_no to an authority record
    @type control_no: string

    @return:: list containing the record ID(s) of the referenced authority record
        (should be only one)
    """
    # values returned
#    recIDs = []
    #check for correct format for control_no
#    control_no = ""
#    if CFG_BIBAUTHORITY_PREFIX_SEP in control_no:
#        auth_prefix, control_no = control_no.split(CFG_BIBAUTHORITY_PREFIX_SEP);
#        #enforce expected enforced_type if present
#        if (enforced_type is None) or (auth_prefix == enforced_type):
#            #low-level search needed e.g. for bibindex
#            hitlist = search_pattern(p='980__a:' + auth_prefix)
#            hitlist &= _get_low_level_recIDs_intbitset_from_control_no(control_no)
#            recIDs = list(hitlist)

    recIDs = list(_get_low_level_recIDs_intbitset_from_control_no(control_no))

    # filter out "DELETED" recIDs
    recIDs = [recID for recID in recIDs if record_exists(recID) > 0]

    # normally there should be exactly 1 authority record per control_number
    _assert_unique_control_no(recIDs, control_no)

    # return
    return recIDs
Example #34
0
    def test_record_creation(self):
        import os
        from wtforms import TextAreaField
        from datetime import datetime

        from invenio.legacy.search_engine import record_exists
        from invenio.cache import cache
        from invenio.config import CFG_PREFIX
        from invenio.modules.workflows.models import Workflow
        from invenio.modules.workflows.config import CFG_WORKFLOW_STATUS
        from invenio.modules.scheduler.models import SchTASK

        from invenio.webdeposit_utils import get_form, create_workflow, \
            set_form_status, CFG_DRAFT_STATUS
        from invenio_deposit.loader import \
            deposition_metadata
        from invenio.webdeposit_workflow_utils import \
            create_record_from_marc
        from invenio.modules.record.api import get_record

        user_id = self.login_user()
        for deposition_type in deposition_metadata.keys():

            deposition = create_workflow(deposition_type, user_id)
            assert deposition is not None

            # Check if deposition creates a record
            create_rec = create_record_from_marc()
            function_exists = False
            for workflow_function in deposition.workflow:
                if create_rec.func_code == workflow_function.func_code:
                    function_exists = True
            if not function_exists:
                # if a record is not created,
                # continue with the next deposition
                continue

            uuid = deposition.get_uuid()

            cache.delete_many("1:current_deposition_type", "1:current_uuid")
            cache.add("1:current_deposition_type", deposition_type)
            cache.add("1:current_uuid", uuid)

            # Run the workflow
            deposition.run()

            # Create form's json based on the field name
            form = get_form(user_id, uuid=uuid)
            webdeposit_json = {}

            # Fill the json with dummy data
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        webdeposit_json[field.name] = "test " + field.name

            draft = dict(
                form_type=form.__class__.__name__,
                form_values=webdeposit_json,
                step=0,  # dummy step
                status=CFG_DRAFT_STATUS['finished'],
                timestamp=str(datetime.now()))

            # Add a draft for the first step
            Workflow.set_extra_data(user_id=user_id,
                                    uuid=uuid,
                                    key='drafts',
                                    value={0: draft})

            workflow_status = CFG_WORKFLOW_STATUS.RUNNING
            while workflow_status != CFG_WORKFLOW_STATUS.COMPLETED:
                # Continue workflow
                deposition.run()
                set_form_status(user_id, uuid, CFG_DRAFT_STATUS['finished'])
                workflow_status = deposition.get_status()

            # Workflow is finished. Test if record is created
            recid = deposition.get_data('recid')
            assert recid is not None
            # Test that record id exists
            assert record_exists(recid) == 1

            # Test that the task exists
            task_id = deposition.get_data('task_id')
            assert task_id is not None

            bibtask = SchTASK.query.filter(SchTASK.id == task_id).first()
            assert bibtask is not None

            # Run bibupload, bibindex, webcoll manually
            cmd = "%s/bin/bibupload %s" % (CFG_PREFIX, task_id)
            assert not os.system(cmd)
            rec = get_record(recid)
            marc = rec.legacy_export_as_marc()
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        assert "test " + field.name in marc
Example #35
0
def check_user_can_view_record(user_info, recid):
    """Check if the user is authorized to view the given recid.

    The function grants access in two cases: either user has author rights on
    this record, or he has view rights to the primary collection this record
    belongs to.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: (0, ''), when authorization is granted, (>0, 'message') when
    authorization is not granted
    """
    from invenio.modules.access.engine import acc_authorize_action
    from invenio.modules.access.local_config import VIEWRESTRCOLL
    from invenio.modules.collections.cache import is_record_in_any_collection
    from invenio.legacy.search_engine import record_public_p, record_exists

    policy = cfg['CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY'].strip().upper()

    if isinstance(recid, str):
        recid = int(recid)
    # At this point, either webcoll has not yet run or there are some
    # restricted collections. Let's see first if the user own the record.
    if is_user_owner_of_record(user_info, recid):
        # Perfect! It's authorized then!
        return (0, '')

    if is_user_viewer_of_record(user_info, recid):
        # Perfect! It's authorized then!
        return (0, '')

    restricted_collections = get_restricted_collections_for_recid(
        recid, recreate_cache_if_needed=False)
    if not restricted_collections and record_public_p(recid):
        # The record is public and not part of any restricted collection
        return (0, '')
    if restricted_collections:
        # If there are restricted collections the user must be authorized to
        # all/any of them (depending on the policy)
        auth_code, auth_msg = 0, ''
        for collection in restricted_collections:
            (auth_code, auth_msg) = acc_authorize_action(user_info,
                                                         VIEWRESTRCOLL,
                                                         collection=collection)
            if auth_code and policy != 'ANY':
                # Ouch! the user is not authorized to this collection
                return (auth_code, auth_msg)
            elif auth_code == 0 and policy == 'ANY':
                # Good! At least one collection is authorized
                return (0, '')
        # Depending on the policy, the user will be either authorized or not
        return auth_code, auth_msg
    if is_record_in_any_collection(recid, recreate_cache_if_needed=False):
        # the record is not in any restricted collection
        return (0, '')
    elif record_exists(recid) > 0:
        # We are in the case where webcoll has not run.
        # Let's authorize SUPERADMIN
        (auth_code, auth_msg) = acc_authorize_action(user_info,
                                                     VIEWRESTRCOLL,
                                                     collection=None)
        if auth_code == 0:
            return (0, '')
        else:
            # Too bad. Let's print a nice message:
            return (
                1, "The record you are trying to access has just been "
                "submitted to the system and needs to be assigned to the "
                "proper collections. It is currently restricted for security "
                "reasons until the assignment will be fully completed. Please "
                "come back later to properly access this record.")
    else:
        # The record either does not exists or has been deleted.
        # Let's handle these situations outside of this code.
        return (0, '')
Example #36
0
    def decorated(recid, *args, **kwargs):
        from invenio.modules.access.mailcookie import \
            mail_cookie_create_authorize_action
        from invenio.modules.access.local_config import VIEWRESTRCOLL
        from invenio.legacy.search_engine import \
            guess_primary_collection_of_a_record, \
            check_user_can_view_record
        # ensure recid to be integer
        recid = int(recid)
        g.collection = collection = Collection.query.filter(
            Collection.name == guess_primary_collection_of_a_record(recid)).\
            one()
        g.bibrec = Bibrec.query.get(recid)

        record = get_record(recid)
        if record is None:
            return render_template('404.html')

        (auth_code, auth_msg) = check_user_can_view_record(current_user, recid)

        # only superadmins can use verbose parameter for obtaining debug
        # information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        from invenio.legacy.search_engine import record_exists, \
            get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(recid)
        merged_recid = get_merged_recid(recid)
        if record_status == -1 and merged_recid:
            return redirect(url_for('record.metadata', recid=merged_recid))
        elif record_status == -1:
            abort(apache.HTTP_GONE)  # The record is gone!

        title = record.get(cfg.get('RECORDS_BREADCRUMB_TITLE_KEY'), '')
        tabs = []

        if cfg.get('CFG_WEBLINKBACK_TRACKBACK_ENABLED'):

            @register_template_context_processor
            def trackback_context():
                from invenio.legacy.weblinkback.templates import \
                    get_trackback_auto_discovery_tag
                return {
                    'headerLinkbackTrackbackLink':
                    get_trackback_auto_discovery_tag(recid)
                }

        def _format_record(recid,
                           of='hd',
                           user_info=current_user,
                           *args,
                           **kwargs):
            from invenio.modules.formatter import format_record
            return format_record(recid,
                                 of,
                                 user_info=user_info,
                                 *args,
                                 **kwargs)

        @register_template_context_processor
        def record_context():
            from invenio.modules.comments.api import get_mini_reviews
            return dict(recid=recid,
                        record=record,
                        tabs=tabs,
                        title=title,
                        get_mini_reviews=get_mini_reviews,
                        collection=collection,
                        format_record=_format_record)

        pre_template_render.send(
            "%s.%s" % (blueprint.name, f.__name__),
            recid=recid,
        )
        return f(recid, *args, **kwargs)
Example #37
0
def record_get_xml(recID, format='xm', decompress=zlib.decompress,
                   on_the_fly=False):
    """
    Returns an XML string of the record given by recID.

    The function builds the XML directly from the database,
    without using the standard formatting process.

    'format' allows to define the flavour of XML:
        - 'xm' for standard XML
        - 'marcxml' for MARC XML
        - 'oai_dc' for OAI Dublin Core
        - 'xd' for XML Dublin Core

    If record does not exist, returns empty string.
    If the record is deleted, returns an empty MARCXML (with recid
    controlfield, OAI ID fields and 980__c=DELETED)

    @param recID: the id of the record to retrieve
    @param format: the format to use
    @param on_the_fly: if False, try to fetch precreated one in database
    @param decompress: the library to use to decompress cache from DB
    @return: the xml string of the record
    """
    from invenio.legacy.search_engine import record_exists

    def get_creation_date(recID, fmt="%Y-%m-%d"):
        "Returns the creation date of the record 'recID'."
        out = ""
        res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    def get_modification_date(recID, fmt="%Y-%m-%d"):
        "Returns the date of last modification for the record 'recID'."
        out = ""
        res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    #_ = gettext_set_language(ln)

    out = ""

    # sanity check:
    record_exist_p = record_exists(recID)
    if record_exist_p == 0: # doesn't exist
        return out

    # print record opening tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "  <record>\n"
        out += "   <header>\n"

        for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
            out += "    <identifier>%s</identifier>\n" % identifier
        out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
        out += "   </header>\n"
        out += "   <metadata>\n"

    if format.startswith("xm") or format == "marcxml":
        res = None
        if on_the_fly is False:
            # look for cached format existence:
            query = """SELECT value FROM bibfmt WHERE
            id_bibrec='%s' AND format='%s'""" % (recID, format)
            res = run_sql(query, None, 1)
        if res and record_exist_p == 1:
            # record 'recID' is formatted in 'format', so print it
            out += "%s" % decompress(res[0][0])
        else:
            # record 'recID' is not formatted in 'format' -- they are
            # not in "bibfmt" table; so fetch all the data from
            # "bibXXx" tables:
            if format == "marcxml":
                out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
            elif format.startswith("xm"):
                out += """    <record>\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
            if record_exist_p == -1:
                # deleted record, so display only OAI ID and 980:
                oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                if oai_ids:
                    out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                           (CFG_OAI_ID_FIELD[0:3],
                            CFG_OAI_ID_FIELD[3:4],
                            CFG_OAI_ID_FIELD[4:5],
                            CFG_OAI_ID_FIELD[5:6],
                            oai_ids[0])
                out += "<datafield tag=\"980\" ind1=\" \" ind2=\" \"><subfield code=\"c\">DELETED</subfield></datafield>\n"
                from invenio.legacy.search_engine import get_merged_recid
                merged_recid = get_merged_recid(recID)
                if merged_recid: # record was deleted but merged to other record, so display this information:
                    out += "<datafield tag=\"970\" ind1=\" \" ind2=\" \"><subfield code=\"d\">%d</subfield></datafield>\n" % merged_recid
            else:
                # controlfields
                query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                        "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                        "ORDER BY bb.field_number, b.tag ASC" % recID
                res = run_sql(query)
                for row in res:
                    field, value = row[0], row[1]
                    value = encode_for_xml(value)
                    out += """        <controlfield tag="%s">%s</controlfield>\n""" % \
                           (encode_for_xml(field[0:3]), value)
                # datafields
                i = 1 # Do not process bib00x and bibrec_bib00x, as
                      # they are controlfields. So start at bib01x and
                      # bibrec_bib00x (and set i = 0 at the end of
                      # first loop)
                for digit1 in range(0, 10):
                    for digit2 in range(i, 10):
                        bx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bx,
                                                                         bibx,
                                                                         recID,
                                                                         str(digit1)+str(digit2))
                        res = run_sql(query)
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_" or ind1 == "":
                                ind1 = " "
                            if ind2 == "_" or ind2 == "":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or \
                                   field[:-1] != field_old[:-1]:
                                if field_number_old != -999:
                                    out += """        </datafield>\n"""
                                out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                       (encode_for_xml(field[0:3]),
                                        encode_for_xml(ind1),
                                        encode_for_xml(ind2))
                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            value = encode_for_xml(value)
                            out += """            <subfield code="%s">%s</subfield>\n""" % \
                                   (encode_for_xml(field[-1:]), value)

                        # all fields/subfields printed in this run, so close the tag:
                        if field_number_old != -999:
                            out += """        </datafield>\n"""
                    i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
            # we are at the end of printing the record:
            out += "    </record>\n"

    elif format == "xd" or format == "oai_dc":
        # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
        out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                         xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                             http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
        if record_exist_p == -1:
            out += ""
        else:
            for f in get_fieldvalues(recID, "041__a"):
                out += "        <language>%s</language>\n" % f

            for f in get_fieldvalues(recID, "100__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "700__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "245__a"):
                out += "        <title>%s</title>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "65017a"):
                out += "        <subject>%s</subject>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "8564_u"):
                out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "520__a"):
                out += "        <description>%s</description>\n" % encode_for_xml(f)

            out += "        <date>%s</date>\n" % get_creation_date(recID)
        out += "    </dc>\n"


    # print record closing tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "   </metadata>\n"
        out += "  </record>\n"

    return out
Example #38
0
    def __call__(self, req, form):
        argd = wash_search_urlargd(form)

        argd['recid'] = self.recid

        argd['tab'] = self.tab

        # do we really enter here ?

        if self.format is not None:
            argd['of'] = self.format
        req.argd = argd
        uid = getUid(req)
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this record.",
                                       navmenuid='search')
        elif uid > 0:
            pref = get_user_preferences(uid)
            try:
                if 'rg' not in form:
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass

        user_info = collect_user_info(req)
        (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)

        if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
            argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            if acc_authorize_action(req, 'runbibedit')[0] != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        if auth_code and user_info['email'] == 'guest':
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                    make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text=auth_msg, \
                navmenuid='search')

        from invenio.legacy.search_engine import record_exists, get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(argd['recid'])
        merged_recid = get_merged_recid(argd['recid'])
        if record_status == -1 and merged_recid:
            url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s'
            url %= (str(merged_recid), argd['ln'])
            redirect_to_url(req, url)
        elif record_status == -1:
            req.status = apache.HTTP_GONE ## The record is gone!

        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if isinstance(out, intbitset):
            return out.fastdump()
        elif out == []:
            return str(out)
        else:
            return out
Example #39
0
def main():
    """Core loop."""
    check_running_process_user()
    logfilename = '%s/fulltext_files_migration_kit-%s.log' % (
        CFG_LOGDIR, datetime.today().strftime('%Y%m%d%H%M%S'))
    try:
        logfile = open(logfilename, 'w')
    except IOError as e:
        print(
            wrap_text_in_a_box(
                'NOTE: it\'s impossible to create the log:\n\n  %s\n\nbecause of:\n\n  %s\n\nPlease run this migration kit as the same user who runs Invenio (e.g. Apache)'
                % (logfilename, e),
                style='conclusion',
                break_long=False))
        sys.exit(1)

    bibdoc_bibdoc = retrieve_bibdoc_bibdoc()

    print(
        wrap_text_in_a_box(
            """This script migrate the filesystem structure used to store icons files to the new stricter structure.
This script must not be run during normal Invenio operations.
It is safe to run this script. No file will be deleted.
Anyway it is recommended to run a backup of the filesystem structure just in case.
A backup of the database tables involved will be automatically performed.""",
            style='important'))
    if not bibdoc_bibdoc:
        print(wrap_text_in_a_box("No need for migration", style='conclusion'))
        return
    print("%s icons will be migrated/fixed." % len(bibdoc_bibdoc))
    wait_for_user()
    print("Backing up database tables")
    try:
        if not backup_tables():
            print(
                wrap_text_in_a_box(
                    """It appears that is not the first time that you run this script.
Backup tables have been already created by a previous run.
In order for the script to go further they need to be removed.""",
                    style='important'))

            wait_for_user()
            print(
                "Backing up database tables (after dropping previous backup)",
                end=' ')
            backup_tables(drop=True)
            print("-> OK")
        else:
            print("-> OK")
    except Exception as e:
        print(
            wrap_text_in_a_box(
                "Unexpected error while backing up tables. Please, do your checks: %s"
                % e,
                style='conclusion'))
        sys.exit(1)

    to_fix_marc = intbitset()
    print("Created a complete log file into %s" % logfilename)
    try:
        try:
            for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
                try:
                    record_does_exist = True
                    recids = get_recid_from_docid(id_bibdoc1)
                    if not recids:
                        print("Skipping %s" % id_bibdoc1)
                        continue
                    for recid in recids:
                        if record_exists(recid[0]) > 0:
                            to_fix_marc.add(recid[0])
                        else:
                            record_does_exist = False
                    if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
                        if record_does_exist:
                            raise StandardError(
                                "Error when correcting document ID %s" %
                                id_bibdoc1)
                except Exception as err:
                    print("ERROR: %s" % err, file=logfile)
            print(wrap_text_in_a_box("DONE", style='conclusion'))
        except:
            logfile.close()
            register_exception()
            print(
                wrap_text_in_a_box(
                    title="INTERRUPTED BECAUSE OF ERROR!",
                    body=
                    """Please see the log file %s for what was the status prior to the error. Contact %s in case of problems, attaching the log."""
                    % (logfilename, CFG_SITE_SUPPORT_EMAIL),
                    style='conclusion'))
            sys.exit(1)
    finally:
        print(
            "Scheduling FIX-MARC to synchronize MARCXML for updated records.")
        cli_fix_marc(options={}, explicit_recid_set=to_fix_marc)
Example #40
0
def Ask_For_Record_Details_Confirmation(parameters, \
                                        curdir, \
                                        form, \
                                        user_info=None):
    """
       Display the details of a record on which some operation is to be carried
       out and prompt for the user's confirmation that it is the correct record.
       Upon the clicking of the confirmation button, augment step by one.

       Given the "recid" (001) of a record, retrieve the basic metadata
       (title, report-number(s) and author(s)) and display them in the
       user's browser along with a prompt asking them to confirm that
       it is indeed the record that they expected to see.

       The function depends upon the presence of the "sysno" global and the
       presence of the "step" field in the "form" parameter.
       When the user clicks on the "confirm" button, step will be augmented by
       1 and the form will be submitted.
       @parameters: None.
       @return: None.
       @Exceptions raise: InvenioWebSubmitFunctionError if problems are
        encountered;
        InvenioWebSubmitFunctionStop in order to display the details of the
        record and the confirmation message.
    """
    global sysno

    ## Make sure that we know the current step:
    try:
        current_step = int(form['step'])
    except TypeError:
        ## Can't determine step.
        msg = "Unable to determine submission step. Cannot continue."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        newstep = current_step + 1

    ## Make sure that the sysno is valid:
    try:
        working_recid = int(sysno)
    except TypeError:
        ## Unable to find the details of this record - cannot query the database
        msg = "Unable to retrieve details of record - record id was invalid."
        raise InvenioWebSubmitFunctionError(msg)

    if not record_exists(working_recid):
        ## Record doesn't exist.
        msg = "Unable to retrieve details of record [%s] - record does not " \
              "exist." % working_recid
        raise InvenioWebSubmitFunctionError(msg)

    ## Retrieve the details to be displayed:
    ##
    ## Author(s):
    rec_authors = ""
    rec_first_author = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_other_authors.split("\n")])

    ## Title:
    rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])

    ## Report numbers:
    rec_reportnums = ""
    rec_reportnum = print_record(int(sysno), 'tm', "037__a")
    rec_other_reportnums = print_record(int(sysno), 'tm', "088__a")
    if rec_reportnum != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in rec_reportnum.split("\n")])
    if rec_other_reportnums != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in \
                                   rec_other_reportnums.split("\n")])

    raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \
                                  { 'report-numbers' : rec_reportnums, \
                                    'title'          : rec_title, \
                                    'author'         : rec_authors, \
                                    'newstep'        : newstep, \
                                    'admin-email'    : CFG_SITE_ADMIN_EMAIL, \
                                  }   )
Example #41
0
def print_record(recid,
                 prefix='marcxml',
                 verb='ListRecords',
                 set_spec=None,
                 set_last_updated=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec is not None and not set_spec in sets and not [
                set_ for set_ in sets if set_.startswith("%s:" % set_spec)
        ]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in (
            'persistent', 'transient'):
        return ""

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    if not idents:
        return ""
    ## FIXME: Move these checks in a bibtask
    #try:
    #assert idents, "No OAI ID for record %s, please do your checks!" % recid
    #except AssertionError as err:
    #register_exception(alert_admin=True)
    #return ""
    #try:
    #assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
    #except AssertionError as err:
    #register_exception(alert_admin=True)
    ident = idents[0]

    header_body = EscapedXMLString('')
    header_body += X.identifier()(ident)
    if set_last_updated:
        header_body += X.datestamp()(max(get_modification_date(recid),
                                         set_last_updated))
    else:
        header_body += X.datestamp()(get_modification_date(recid))
    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
            # Print only if field not empty
            header_body += X.setSpec()(set_spec)

    header = X.header(status=status)(header_body)

    if verb == 'ListIdentifiers':
        return header
    else:
        if record_exists_result:
            metadata_body = format_record(recid,
                                          CFG_OAI_METADATA_FORMATS[prefix][0])
            metadata = X.metadata(body=metadata_body)
            provenance_body = get_record_provenance(recid)
            if provenance_body:
                provenance = X.about(body=provenance_body)
            else:
                provenance = ''
            rights_body = get_record_rights(recid)
            if rights_body:
                rights = X.about(body=rights_body)
            else:
                rights = ''
        else:
            metadata = ''
            provenance = ''
            rights = ''
        return X.record()(header, metadata, provenance, rights)
Example #42
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = "bibencode_" + str(batch_job["recid"]) + "_" + str(uuid.uuid4()) + ".xml"
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, "w")
        xml_file.write(marcxml)
        xml_file.close()
        targs = ["-c", xml_filename]
        task_low_level_submission("bibupload", "bibencode", *targs)

    # ---------#
    # GENERAL #
    # ---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job["recid"]) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job["recid"])

    # --------------------#
    # UPDATE FROM MASTER #
    # --------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, "update_from_master"):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, "bibdoc_master_comment", comment)
                m_description = getval(batch_job, "bibdoc_master_description", description)
                m_subformat = getval(batch_job, "bibdoc_master_subformat", subformat)
                if comment == m_comment and description == m_description and subformat == m_subformat:
                    found_master = True
                    batch_job["input"] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job["aspect"] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" % batch_job["recid"])
            task_update_progress("Video master for record %d not found" % batch_job["recid"])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, "assure_quality"):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job["jobs"])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job["input"])[1:]
    if not bibdoc_video_extension or getval(batch_job, "bibdoc_master_extension"):
        bibdoc_video_extension = getval(batch_job, "bibdoc_master_extension")
    if getval(batch_job, "bibdoc_master_docname"):
        bibdoc_video_docname = getval(batch_job, "bibdoc_master_docname")

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    # --------#
    # MASTER #
    # --------#
    if not getval(batch_job, "update_from_master"):
        if getval(batch_job, "add_master"):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension, getval(batch_job, "bibdoc_master_subformat", "master")
            )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job["input"],
                version=1,
                description=getval(batch_job, "bibdoc_master_description"),
                comment=getval(batch_job, "bibdoc_master_comment"),
                docformat=master_format,
            )

    # -----------#
    # JOBS LOOP #
    # -----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job["jobs"]:

        _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, "bibdoc_docname"):
            job["bibdoc_docname"] = Template(job["bibdoc_docname"]).safe_substitute(
                {"bibdoc_master_docname": bibdoc_video_docname}
            )

        # -------------#
        # TRANSCODING #
        # -------------#

        if job["mode"] == "encode":

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, "assure_quality") and getval(job, "fallback"):
                continue

            if getval(job, "profile"):
                profile = get_encoding_profile(job["profile"])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, "extension", getval(profile, "extension"))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, "bibdoc_subformat")
            bibdoc_slave_video_docname = getval(job, "bibdoc_docname", bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)
            )
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job["input"],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, "audiocodec"),
                vcodec=getval(job, "videocodec"),
                abitrate=getval(job, "videobitrate"),
                vbitrate=getval(job, "audiobitrate"),
                resolution=getval(job, "resolution"),
                passes=getval(job, "passes", 1),
                special=getval(job, "special"),
                specialfirst=getval(job, "specialfirst"),
                specialsecond=getval(job, "specialsecond"),
                metadata=getval(job, "metadata"),
                width=getval(job, "width"),
                height=getval(job, "height"),
                aspect=getval(batch_job, "aspect"),  # Aspect for every job
                profile=getval(job, "profile"),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message,
            )
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(
                        bibdoc_video_directory,
                        bibdoc_video_extension,
                        bibdoc_video_subformat,
                        1,
                        bibdoc_slave_video_docname,
                    ),
                )
                # bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat)
                if getval(job, "bibdoc_comment"):
                    bibdoc_video.set_comment(getval(job, "bibdoc_comment"), bibdoc_video_format)
                if getval(job, "bibdoc_description"):
                    bibdoc_video.set_description(getval(job, "bibdoc_description"), bibdoc_video_format)

        # ------------#
        # EXTRACTION #
        # ------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job["mode"] == "extract":
            if getval(job, "profile"):
                profile = get_extract_profile(job["profile"])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, "bibdoc_subformat")
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            # Move this to the batch description
            bibdoc_frame_docname = getval(job, "bibdoc_docname", bibdoc_video_docname)
            tmpfname = (
                tmpdir
                + "/"
                + bibdoc_frame_docname
                + "."
                + getval(profile, "extension", getval(job, "extension", "jpg"))
            )
            extraction_result = extract_frames(
                input_file=batch_job["input"],
                output_file=tmpfname,
                size=getval(job, "size"),
                positions=getval(job, "positions"),
                numberof=getval(job, "numberof"),
                width=getval(job, "width"),
                height=getval(job, "height"),
                aspect=getval(batch_job, "aspect"),
                profile=getval(job, "profile"),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message(
                        "Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, "bibdoc_subformat"))
                    )
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, "bibdoc_description"),
                        comment=getval(job, "bibdoc_comment"),
                        docformat=bibdoc_frame_format,
                    )
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    # -----------------#
    # FIX BIBDOC/MARC #
    # -----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job["recid"]], False)

    if getval(batch_job, "collection"):
        ## Make the record visible by moving in from the collection
        marcxml = (
            '<record><controlfield tag="001">%d</controlfield>'
            '<datafield tag="980" ind1=" " ind2=" ">'
            '<subfield code="a">%s</subfield></datafield></record>'
        ) % (batch_job["recid"], batch_job["collection"])
        upload_marcxml_file(marcxml)

    # ---------------------#
    # ADD MASTER METADATA #
    # ---------------------#

    if getval(batch_job, "add_master_metadata"):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(
            input_file=getval(batch_job, "input"),
            pbcoreIdentifier=batch_job["recid"],
            aspect_override=getval(batch_job, "aspect"),
        )
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    # ------------------#
    # ADD MARC SNIPPET #
    # ------------------#

    if getval(batch_job, "marc_snippet"):
        marc_snippet = open(getval(batch_job, "marc_snippet"))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    # --------------#
    # DELETE INPUT #
    # --------------#

    if getval(batch_job, "delete_input"):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, "delete_input_pattern", "") in getval(batch_job, "input"):
                try:
                    os.remove(getval(batch_job, "input"))
                except OSError:
                    pass

    # --------------#
    # NOTIFICATION #
    # --------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, "notify_user"):
            _notify_error_user(
                getval(batch_job, "notify_user"),
                getval(batch_job, "submission_filename", batch_job["input"]),
                getval(batch_job, "recid"),
                getval(batch_job, "submission_title", ""),
            )
            _task_write_message("Notify user because of an error")
        if getval(batch_job, "notify_admin"):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, "notify_admin") == type(str())):
                _notify_error_admin(batch_job, getval(batch_job, "notify_admin"))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, "notify_user"):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, "notify_user"),
                getval(batch_job, "submission_filename", batch_job["input"]),
                getval(batch_job, "recid"),
                getval(batch_job, "submission_title", ""),
            )
    return 1
def Ask_For_Record_Details_Confirmation(parameters, \
                                        curdir, \
                                        form, \
                                        user_info=None):
    """
       Display the details of a record on which some operation is to be carried
       out and prompt for the user's confirmation that it is the correct record.
       Upon the clicking of the confirmation button, augment step by one.

       Given the "recid" (001) of a record, retrieve the basic metadata
       (title, report-number(s) and author(s)) and display them in the
       user's browser along with a prompt asking them to confirm that
       it is indeed the record that they expected to see.

       The function depends upon the presence of the "sysno" global and the
       presence of the "step" field in the "form" parameter.
       When the user clicks on the "confirm" button, step will be augmented by
       1 and the form will be submitted.
       @parameters: None.
       @return: None.
       @Exceptions raise: InvenioWebSubmitFunctionError if problems are
        encountered;
        InvenioWebSubmitFunctionStop in order to display the details of the
        record and the confirmation message.
    """
    global sysno

    ## Make sure that we know the current step:
    try:
        current_step = int(form['step'])
    except TypeError:
        ## Can't determine step.
        msg = "Unable to determine submission step. Cannot continue."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        newstep = current_step + 1

    ## Make sure that the sysno is valid:
    try:
        working_recid = int(sysno)
    except TypeError:
        ## Unable to find the details of this record - cannot query the database
        msg = "Unable to retrieve details of record - record id was invalid."
        raise InvenioWebSubmitFunctionError(msg)

    if not record_exists(working_recid):
        ## Record doesn't exist.
        msg = "Unable to retrieve details of record [%s] - record does not " \
              "exist." % working_recid
        raise InvenioWebSubmitFunctionError(msg)

    ## Retrieve the details to be displayed:
    ##
    ## Author(s):
    rec_authors = ""
    rec_first_author    = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors   = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_other_authors.split("\n")])

    ## Title:
    rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])

    ## Report numbers:
    rec_reportnums = ""
    rec_reportnum        = print_record(int(sysno), 'tm', "037__a")
    rec_other_reportnums = print_record(int(sysno), 'tm', "088__a")
    if rec_reportnum != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in rec_reportnum.split("\n")])
    if rec_other_reportnums != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in \
                                   rec_other_reportnums.split("\n")])

    raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \
                                  { 'report-numbers' : rec_reportnums, \
                                    'title'          : rec_title, \
                                    'author'         : rec_authors, \
                                    'newstep'        : newstep, \
                                    'admin-email'    : CFG_SITE_ADMIN_EMAIL, \
                                  }   )
Example #44
0
    def get(self):
        # Temporarily disable search until fully tested.
        abort(405)

        from invenio.legacy.search_engine import perform_request_search, \
            record_exists, check_user_can_view_record

        given_mimetype = request.headers.get('Accept', 'application/json')
        output_format = self.mimetypes.get(given_mimetype)
        if output_format is None:
            raise RecordUnsuppotedMediaTypeError(
                message="Output format {} is not supported.".format(
                    given_mimetype
                ))

        # get URL parameters
        query = request.args.get('query', '')
        sort_field = request.args.get('sort_field', 'title')
        sort_order = request.args.get('sort_order', 'a')
        page = int(request.args.get('page', 1))
        per_page = int(request.args.get('per_page', 5))

        if page < 0:
            raise RecordError(
                message="Invalid page {}".format(page),
                status=400
            )

        if per_page < 0:
            raise RecordError(
                message="Invalid per_page {}".format(per_page),
                status=400
            )

        rec_ids = perform_request_search(p=query, sf=sort_field,
                                         so=sort_order, of='id')
        rec_ids_to_keep = []
        for recid in rec_ids:
            if record_exists(recid) > 0:
                (auth_code, auth_mesg) = check_user_can_view_record(
                    current_user, recid)
                if auth_code == 0:
                    rec_ids_to_keep.append(recid)
        records_in_requested_format = []
        if rec_ids_to_keep:
            for recid in rec_ids_to_keep:
                result = format_record(recID=recid, of=output_format)
                records_in_requested_format.append(result)

        records_to_return = []
        headers = {}
        if records_in_requested_format:
            p = pagination.RestfulPagination(
                page=page,
                per_page=per_page,
                total_count=len(records_in_requested_format)
            )
            if (page > p.pages):
                raise RecordError(
                    message="Invalid page {}".format(page),
                    status=400
                )
            records_to_return = p.slice(records_in_requested_format)
            kwargs = {}
            kwargs['endpoint'] = request.endpoint
            kwargs['args'] = request.args
            link_header = p.link_header(**kwargs)
            headers[link_header[0]] = link_header[1]
        return (json.dumps(records_to_return), 200, headers)
Example #45
0
def perform_request_delete(comID=-1,
                           recID=-1,
                           uid=-1,
                           reviews="",
                           ln=CFG_SITE_LANG):
    """
    """
    _ = gettext_set_language(ln)

    from invenio.legacy.search_engine import record_exists

    warnings = []

    ln = wash_language(ln)
    comID = wash_url_argument(comID, 'int')
    recID = wash_url_argument(recID, 'int')
    uid = wash_url_argument(uid, 'int')
    # parameter reviews is deduced from comID when needed

    if comID is not None and recID is not None and uid is not None:
        if comID <= 0 and recID <= 0 and uid <= 0:
            if comID != -1:
                try:
                    raise InvenioWebCommentWarning(_('Invalid comment ID.'))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(("WRN_WEBCOMMENT_ADMIN_INVALID_COMID",))
            return webcomment_templates.tmpl_admin_delete_form(ln, warnings)

        if comID > 0 and not recID > 0:
            comment = query_get_comment(comID)

            if comment:
                # Figure out if this is a review or a comment
                c_star_score = 5
                if comment[c_star_score] > 0:
                    reviews = 1
                else:
                    reviews = 0
                return (perform_request_comments(ln=ln,
                                                 comID=comID,
                                                 recID=recID,
                                                 reviews=reviews), None,
                        warnings)
            else:
                try:
                    raise InvenioWebCommentWarning(
                        _('Comment ID %(x_name)s does not exist.',
                          x_name=comID))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(('WRN_WEBCOMMENT_ADMIN_COMID_INEXISTANT', comID))
                return webcomment_templates.tmpl_admin_delete_form(
                    ln, warnings)

        elif recID > 0:
            if record_exists(recID):
                comID = ''
                reviews = wash_url_argument(reviews, 'int')
                return (perform_request_comments(ln=ln,
                                                 comID=comID,
                                                 recID=recID,
                                                 reviews=reviews), None,
                        warnings)
            else:
                try:
                    raise InvenioWebCommentWarning(
                        _('Record ID %(x_rec)s does not exist.', x_rec=comID))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(('WRN_WEBCOMMENT_ADMIN_RECID_INEXISTANT', comID))
                return webcomment_templates.tmpl_admin_delete_form(
                    ln, warnings)
        else:
            return webcomment_templates.tmpl_admin_delete_form(ln, warnings)

    else:
        return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
Example #46
0
def main():
    """Core loop."""
    check_running_process_user()
    logfilename = '%s/fulltext_files_migration_kit-%s.log' % (CFG_LOGDIR, datetime.today().strftime('%Y%m%d%H%M%S'))
    try:
        logfile = open(logfilename, 'w')
    except IOError as e:
        print(wrap_text_in_a_box('NOTE: it\'s impossible to create the log:\n\n  %s\n\nbecause of:\n\n  %s\n\nPlease run this migration kit as the same user who runs Invenio (e.g. Apache)' % (logfilename, e), style='conclusion', break_long=False))
        sys.exit(1)

    bibdoc_bibdoc = retrieve_bibdoc_bibdoc()

    print(wrap_text_in_a_box ("""This script migrate the filesystem structure used to store icons files to the new stricter structure.
This script must not be run during normal Invenio operations.
It is safe to run this script. No file will be deleted.
Anyway it is recommended to run a backup of the filesystem structure just in case.
A backup of the database tables involved will be automatically performed.""", style='important'))
    if not bibdoc_bibdoc:
        print(wrap_text_in_a_box("No need for migration", style='conclusion'))
        return
    print("%s icons will be migrated/fixed." % len(bibdoc_bibdoc))
    wait_for_user()
    print("Backing up database tables")
    try:
        if not backup_tables():
            print(wrap_text_in_a_box("""It appears that is not the first time that you run this script.
Backup tables have been already created by a previous run.
In order for the script to go further they need to be removed.""", style='important'))

            wait_for_user()
            print("Backing up database tables (after dropping previous backup)", end=' ')
            backup_tables(drop=True)
            print("-> OK")
        else:
            print("-> OK")
    except Exception as e:
        print(wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion'))
        sys.exit(1)

    to_fix_marc = intbitset()
    print("Created a complete log file into %s" % logfilename)
    try:
        try:
            for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
                try:
                    record_does_exist = True
                    recids = get_recid_from_docid(id_bibdoc1)
                    if not recids:
                        print("Skipping %s" % id_bibdoc1)
                        continue
                    for recid in recids:
                        if record_exists(recid[0]) > 0:
                            to_fix_marc.add(recid[0])
                        else:
                            record_does_exist = False
                    if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
                        if record_does_exist:
                            raise StandardError("Error when correcting document ID %s" % id_bibdoc1)
                except Exception as err:
                    print("ERROR: %s" % err, file=logfile)
            print(wrap_text_in_a_box("DONE", style='conclusion'))
        except:
            logfile.close()
            register_exception()
            print(wrap_text_in_a_box(
                title = "INTERRUPTED BECAUSE OF ERROR!",
                body = """Please see the log file %s for what was the status prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, CFG_SITE_SUPPORT_EMAIL),
            style = 'conclusion'))
            sys.exit(1)
    finally:
        print("Scheduling FIX-MARC to synchronize MARCXML for updated records.")
        cli_fix_marc(options={}, explicit_recid_set=to_fix_marc)
Example #47
0
    def decorated(recid, *args, **kwargs):
        from invenio.modules.access.mailcookie import mail_cookie_create_authorize_action
        from invenio.modules.access.local_config import VIEWRESTRCOLL
        from invenio.legacy.search_engine import guess_primary_collection_of_a_record, check_user_can_view_record

        # ensure recid to be integer
        recid = int(recid)
        g.collection = collection = Collection.query.filter(
            Collection.name == guess_primary_collection_of_a_record(recid)
        ).one()

        (auth_code, auth_msg) = check_user_can_view_record(current_user, recid)

        # only superadmins can use verbose parameter for obtaining debug
        # information
        if not current_user.is_super_admin and "verbose" in kwargs:
            kwargs["verbose"] = 0

        if auth_code and current_user.is_guest:
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {"collection": g.collection.name})
            url_args = {"action": cookie, "ln": g.ln, "referer": request.url}
            flash(_("Authorization failure"), "error")
            return redirect(url_for("webaccount.login", **url_args))
        elif auth_code:
            flash(auth_msg, "error")
            abort(apache.HTTP_UNAUTHORIZED)

        from invenio.legacy.search_engine import record_exists, get_merged_recid

        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(recid)
        merged_recid = get_merged_recid(recid)
        if record_status == -1 and merged_recid:
            return redirect(url_for("record.metadata", recid=merged_recid))
        elif record_status == -1:
            abort(apache.HTTP_GONE)  # The record is gone!

        g.bibrec = Bibrec.query.get(recid)
        record = get_record(recid)

        if record is None:
            return render_template("404.html")

        title = record.get(cfg.get("RECORDS_BREADCRUMB_TITLE_KEY"), "")
        tabs = []

        if cfg.get("CFG_WEBLINKBACK_TRACKBACK_ENABLED"):

            @register_template_context_processor
            def trackback_context():
                from invenio.legacy.weblinkback.templates import get_trackback_auto_discovery_tag

                return {"headerLinkbackTrackbackLink": get_trackback_auto_discovery_tag(recid)}

        def _format_record(recid, of="hd", user_info=current_user, *args, **kwargs):
            from invenio.modules.formatter import format_record

            return format_record(recid, of, user_info=user_info, *args, **kwargs)

        @register_template_context_processor
        def record_context():
            from invenio.modules.comments.api import get_mini_reviews

            return dict(
                recid=recid,
                record=record,
                tabs=tabs,
                title=title,
                get_mini_reviews=get_mini_reviews,
                collection=collection,
                format_record=_format_record,
            )

        pre_template_render.send("%s.%s" % (blueprint.name, f.__name__), recid=recid)
        return f(recid, *args, **kwargs)
Example #48
0
        def getfile(req, form):
            args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(get_record(self.recid).legacy_create_recstruct()):
                msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
                target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                    CFG_SITE_SECURE_URL + user_info['uri']}, {})
                return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                return page_not_authorized(req, "../", \
                                            text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."),
                    _("The error has been logged and will be taken in consideration as soon as possible."))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(_("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError as msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get('referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req, alert_admin=True)
                                warn += write_warning(_("The format %(x_form)s does not exist for the given version: %(x_vers)s",
                                            x_form=cgi.escape(docformat), x_vers=cgi.escape(str(msg))))
                                break
                            (auth_code, auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(_("This file is restricted: ") + str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(ip, docfile.get_version(), docformat, uid, self.recid)
                                try:
                                    return docfile.stream(req, download=is_download)
                                except InvenioBibDocFileError as msg:
                                    register_exception(req=req, alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(_("An error has happened in trying to stream the request file."))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(_("The requested file is hidden and can not be accessed."))

                        except InvenioBibDocFileError as msg:
                            register_exception(req=req, alert_admin=True)

            if docname and docformat and not warn:
                req.status = apache.HTTP_NOT_FOUND
                warn += write_warning(_("Requested file does not seem to exist."))
#            filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
            filelist = bibdocfile_templates.tmpl_display_bibrecdocs(bibarchive, "", version, ln=ln, verbose=verbose, display_hidden=display_hidden)

            t = warn + bibdocfile_templates.tmpl_filelist(
                ln=ln,
                filelist=filelist)

            cc = guess_primary_collection_of_a_record(self.recid)
            cc_id = Collection.query.filter_by(name=cc).value('id')
            unordered_tabs = None  # get_detailed_page_tabs(cc_id, self.recid, ln)
            ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in iteritems(unordered_tabs)]
            ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
            link_ln = ''
            if ln != CFG_SITE_LANG:
                link_ln = '?ln=%s' % ln
            tabs = [(unordered_tabs[tab_id]['label'],
                     '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln),
                     tab_id == 'files',
                     unordered_tabs[tab_id]['enabled'])
                    for (tab_id, dummy_order) in ordered_tabs_id
                    if unordered_tabs[tab_id]['visible'] is True]

            tabs_counts = {}  # get_detailed_page_tabs_counts(self.recid)
            top = webstyle_templates.detailed_record_container_top(self.recid,
                                                                   tabs,
                                                                   args['ln'],
                                                                   citationnum=tabs_counts['Citations'],
                                                                   referencenum=tabs_counts['References'],
                                                                   discussionnum=tabs_counts['Discussions'])
            bottom = webstyle_templates.detailed_record_container_bottom(self.recid,
                                                                         tabs,
                                                                         args['ln'])
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln'])
            return pageheaderonly(title=title,
                        navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
                                        ''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
                                        &gt; %s''' % \
                        (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),

                        description=description,
                        keywords=keywords,
                        uid=uid,
                        language=ln,
                        req=req,
                        navmenuid='search',
                        navtrail_append_title_p=0) + \
                        websearch_templates.tmpl_search_pagestart(ln) + \
                        top + t + bottom + \
                        websearch_templates.tmpl_search_pageend(ln) + \
                        pagefooteronly(language=ln, req=req)
Example #49
0
    def decorated(recid, *args, **kwargs):
        from invenio.legacy.search_engine import \
            guess_primary_collection_of_a_record, \
            check_user_can_view_record

        # ensure recid to be integer
        recid = int(recid)
        g.bibrec = Bibrec.query.get(recid)

        record = get_record(recid)
        if record is None:
            return render_template('404.html')

        g.collection = collection = Collection.query.filter(
            Collection.name == guess_primary_collection_of_a_record(recid)).\
            one()

        (auth_code, auth_msg) = check_user_can_view_record(current_user, recid)

        # only superadmins can use verbose parameter for obtaining debug
        # information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        from invenio.legacy.search_engine import record_exists, \
            get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(recid)
        merged_recid = get_merged_recid(recid)
        if record_status == -1 and merged_recid:
            return redirect(url_for('record.metadata', recid=merged_recid))
        elif record_status == -1:
            abort(apache.HTTP_GONE)  # The record is gone!

        title = record.get(cfg.get('RECORDS_BREADCRUMB_TITLE_KEY'), '')
        tabs = []

        def _format_record(recid, of='hd', user_info=current_user, *args,
                           **kwargs):
            from invenio.modules.formatter import format_record
            return format_record(recid, of, user_info=user_info, *args,
                                 **kwargs)

        @register_template_context_processor
        def record_context():
            from invenio.modules.comments.api import get_mini_reviews
            return dict(recid=recid,
                        record=record,
                        tabs=tabs,
                        title=title,
                        get_mini_reviews=get_mini_reviews,
                        collection=collection,
                        format_record=_format_record
                        )

        pre_template_render.send(
            "%s.%s" % (blueprint.name, f.__name__),
            recid=recid,
        )
        return f(recid, *args, **kwargs)
Example #50
0
def check_user_can_view_record(user_info, recid):
    """Check if the user is authorized to view the given recid.

    The function grants access in two cases: either user has author rights on
    this record, or he has view rights to the primary collection this record
    belongs to.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: (0, ''), when authorization is granted, (>0, 'message') when
    authorization is not granted
    """
    from invenio_records.api import get_record
    from invenio.modules.access.engine import acc_authorize_action
    from invenio.modules.access.local_config import VIEWRESTRCOLL
    from invenio.modules.collections.cache import is_record_in_any_collection
    from invenio.legacy.search_engine import record_exists

    policy = cfg["CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY"].strip().upper()

    if isinstance(recid, str):
        recid = int(recid)
    # At this point, either webcoll has not yet run or there are some
    # restricted collections. Let's see first if the user own the record.
    if is_user_owner_of_record(user_info, recid):
        # Perfect! It's authorized then!
        return (0, "")

    if is_user_viewer_of_record(user_info, recid):
        # Perfect! It's authorized then!
        return (0, "")

    restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False)
    if not restricted_collections and is_record_public(get_record(recid)):
        # The record is public and not part of any restricted collection
        return (0, "")
    if restricted_collections:
        # If there are restricted collections the user must be authorized to
        # all/any of them (depending on the policy)
        auth_code, auth_msg = 0, ""
        for collection in restricted_collections:
            (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection)
            if auth_code and policy != "ANY":
                # Ouch! the user is not authorized to this collection
                return (auth_code, auth_msg)
            elif auth_code == 0 and policy == "ANY":
                # Good! At least one collection is authorized
                return (0, "")
        # Depending on the policy, the user will be either authorized or not
        return auth_code, auth_msg
    if is_record_in_any_collection(recid, recreate_cache_if_needed=False):
        # the record is not in any restricted collection
        return (0, "")
    elif record_exists(recid) > 0:
        # We are in the case where webcoll has not run.
        # Let's authorize SUPERADMIN
        (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None)
        if auth_code == 0:
            return (0, "")
        else:
            # Too bad. Let's print a nice message:
            return (
                1,
                "The record you are trying to access has just been "
                "submitted to the system and needs to be assigned to the "
                "proper collections. It is currently restricted for security "
                "reasons until the assignment will be fully completed. Please "
                "come back later to properly access this record.",
            )
    else:
        # The record either does not exists or has been deleted.
        # Let's handle these situations outside of this code.
        return (0, "")
Example #51
0
    def decorated(recid, *args, **kwargs):
        from invenio.modules.access.mailcookie import \
            mail_cookie_create_authorize_action
        from invenio.modules.access.local_config import VIEWRESTRCOLL
        from invenio.legacy.search_engine import guess_primary_collection_of_a_record, \
            check_user_can_view_record
        from invenio.legacy.websearch.adminlib import get_detailed_page_tabs,\
            get_detailed_page_tabs_counts
        from invenio.b2share.modules.main.utils import check_fresh_record
        # ensure recid to be integer
        recid = int(recid)

        from invenio.legacy.search_engine import record_exists, get_merged_recid
        if record_exists(recid) == 0:
            # record doesn't exist, abort so it doesn't get incorrectly cached
            abort(apache.HTTP_NOT_FOUND)  # The record is gone!
        if check_fresh_record(current_user, recid):
            return render_template('record_waitforit.html', recid=recid)

        g.collection = collection = Collection.query.filter(
            Collection.name == guess_primary_collection_of_a_record(recid)).\
            one()

        (auth_code, auth_msg) = check_user_can_view_record(current_user, recid)

        # only superadmins can use verbose parameter for obtaining debug information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code and current_user.is_guest:
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {
                'collection': g.collection.name})
            url_args = {'action': cookie, 'ln': g.ln, 'referer': request.url}
            flash(_("Authorization failure"), 'error')
            return redirect(url_for('webaccount.login', **url_args))
        elif auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        from invenio.modules.records.api import get_record
        from invenio.legacy.search_engine import record_exists, get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(recid)
        merged_recid = get_merged_recid(recid)
        if record_status == -1 and merged_recid:
            return redirect(url_for('record.metadata', recid=merged_recid))
        elif record_status == -1:
            abort(apache.HTTP_GONE)  # The record is gone!

        g.bibrec = Bibrec.query.get(recid)
        record = get_record(recid)

        if record is None:
            return render_template('404.html')

        title = record.get(cfg.get('RECORDS_BREADCRUMB_TITLE_KEY'), '')

        # b = [(_('Home'), '')] + collection.breadcrumbs()[1:]
        # b += [(title, 'record.metadata', dict(recid=recid))]
        # current_app.config['breadcrumbs_map'][request.endpoint] = b
        g.record_tab_keys = []
        tabs = []
        counts = get_detailed_page_tabs_counts(recid)
        for k, v in iteritems(get_detailed_page_tabs(collection.id, recid,
                                                     g.ln)):
            t = {}
            b = 'record'
            if k == '':
                k = 'metadata'
            if k == 'comments' or k == 'reviews':
                b = 'comments'
            if k == 'linkbacks':
                b = 'weblinkback'
                k = 'index'

            t['key'] = b + '.' + k
            t['count'] = counts.get(k.capitalize(), -1)

            t.update(v)
            tabs.append(t)
            if v['visible']:
                g.record_tab_keys.append(b+'.'+k)

        if cfg.get('CFG_WEBLINKBACK_TRACKBACK_ENABLED'):
            @register_template_context_processor
            def trackback_context():
                from invenio.legacy.weblinkback.templates import get_trackback_auto_discovery_tag
                return dict(headerLinkbackTrackbackLink=get_trackback_auto_discovery_tag(recid))

        def _format_record(recid, of='hd', user_info=current_user, *args, **kwargs):
            from invenio.modules.formatter import format_record
            return format_record(recid, of, user_info=user_info, *args, **kwargs)

        @register_template_context_processor
        def record_context():
            from invenio.modules.comments.api import get_mini_reviews
            from invenio.legacy.bibdocfile.api import BibRecDocs
            all_files = [f for f in BibRecDocs(recid, human_readable=True).list_latest_files(list_hidden=False) \
                         if not f.is_icon()]
            files = [f for f in all_files if f.is_restricted(current_user)[0] == 0]
            has_private_files = len(files) < len(all_files)
            return dict(recid=recid,
                        record=record,
                        tabs=tabs,
                        title=title,
                        get_mini_reviews=get_mini_reviews,
                        collection=collection,
                        format_record=_format_record,
                        has_private_files=has_private_files,
                        files=files
                        )

        pre_template_render.send(
            "%s.%s" % (blueprint.name, f.__name__),
            recid=recid,
        )
        return f(recid, *args, **kwargs)
Example #52
0
def format_record(recID, of, ln=None, verbose=0, search_pattern=None,
                  xml_record=None, user_info=None, on_the_fly=False):
    """
    Format a record in given output format.

    Return a formatted version of the record in the specified
    language, search pattern, and with the specified output format.
    The function will define which format template must be applied.

    The record to be formatted can be specified with its ID (with
    'recID' parameter) or given as XML representation (with
    'xml_record' parameter). If 'xml_record' is specified 'recID' is
    ignored (but should still be given for reference. A dummy recid 0
    or -1 could be used).

    'user_info' allows to grant access to some functionalities on a
    page depending on the user's priviledges. The 'user_info' object
    makes sense only in the case of on-the-fly formatting. 'user_info'
    is the same object as the one returned by
    'webuser.collect_user_info(req)'

    :param recID: the ID of record to format.
    :type recID: int
    :param of: an output format code (or short identifier for the output
               format)
    :type of: string
    :param ln: the language to use to format the record
    :type ln: string
    :param verbose: the level of verbosity from 0 to 9.
                    - O: silent
                    - 5: errors
                    - 7: errors and warnings, stop if error in format elements
                    - 9: errors and warnings, stop if error (debug mode)
    :type verbose: int
    :param search_pattern: list of strings representing the user request in web
                           interface
    :type search_pattern: list(string)
    :param xml_record: an xml string represention of the record to format
    :type xml_record: string or None
    :param user_info: the information of the user who will view the formatted
                      page (if applicable)
    :param on_the_fly: if False, try to return an already preformatted version
                       of the record in the database
    :type on_the_fly: boolean
    :return: formatted record
    :rtype: string
    """
    ln = ln or cfg['CFG_SITE_LANG']
    from invenio.legacy.search_engine import record_exists
    if search_pattern is None:
        search_pattern = []

    out = ""

    if verbose == 9:
        out += """\n<span class="quicknote">
        Formatting record %i with output format %s.
        </span>""" % (recID, of)
    ############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
    if cfg['CFG_BIBFORMAT_USE_OLD_BIBFORMAT'] and cfg['CFG_PATH_PHP']:
        from . import engine as bibformat_engine
        return bibformat_engine.call_old_bibformat(recID, of=of,
                                                   on_the_fly=on_the_fly)
    ############################# END ##################################
    if not on_the_fly and \
       (ln == cfg['CFG_SITE_LANG'] or
        of.lower() == 'xm' or
        cfg['CFG_BIBFORMAT_USE_OLD_BIBFORMAT'] or
        (of.lower() in cfg['CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS'])) \
       and record_exists(recID) != -1:
        # Try to fetch preformatted record. Only possible for records
        # formatted in CFG_SITE_LANG language (other are never
        # stored), or of='xm' which does not depend on language.
        # Exceptions are made for output formats defined in
        # CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS, which are
        # always served from the same cache for any language.  Also,
        # do not fetch from DB when record has been deleted: we want
        # to return an "empty" record in that case
        from . import api
        res = api.get_preformatted_record(recID, of)
        if res is not None:
            # record 'recID' is formatted in 'of', so return it
            if verbose == 9:
                last_updated = api.get_preformatted_record_date(recID, of)
                out += """\n<br/><span class="quicknote">
                Found preformatted output for record %i (cache updated on %s).
                </span><br/>""" % (recID, last_updated)
            if of.lower() == 'xm':
                res = filter_hidden_fields(res, user_info)
            # try to replace language links in pre-cached res, if applicable:
            if ln != cfg['CFG_SITE_LANG'] and of.lower() in \
                    cfg['CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS']:
                # The following statements try to quickly replace any
                # language arguments in URL links.  Not an exact
                # science, but should work most of the time for most
                # of the formats, with not too many false positives.
                # We don't have time to parse output much here.
                res = res.replace('?ln=' + cfg['CFG_SITE_LANG'], '?ln=' + ln)
                res = res.replace('&ln=' + cfg['CFG_SITE_LANG'], '&ln=' + ln)
                res = res.replace('&amp;ln=' + cfg['CFG_SITE_LANG'],
                                  '&amp;ln=' + ln)
            out += res
            return out
        else:
            if verbose == 9:
                out += """\n<br/><span class="quicknote">""" \
                       """No preformatted output found for record %s.""" \
                       """</span>""" % recID

    # Live formatting of records in all other cases
    if verbose == 9:
        out += """\n<br/><span class="quicknote">
        Formatting record %i on-the-fly.
        </span>""" % recID

    try:
        from . import engine as bibformat_engine
        out += bibformat_engine.format_record(recID=recID,
                                              of=of,
                                              ln=ln,
                                              verbose=verbose,
                                              search_pattern=search_pattern,
                                              xml_record=xml_record,
                                              user_info=user_info)
        if of.lower() == 'xm':
            out = filter_hidden_fields(out, user_info)
        return out
    except Exception as e:
        if current_app.debug:
            six.reraise(*sys.exc_info())
        from invenio.ext.logging import register_exception
        register_exception(prefix="An error occured while formatting record "
                                  "%i in %s" % (recID, of),
                           alert_admin=True)
        #Failsafe execution mode
        import invenio.legacy.template
        websearch_templates = invenio.legacy.template.load('websearch')
        if verbose == 9:
            out += """\n<br/><span class="quicknote">
            An error occured while formatting record %i. (%s)
            </span>""" % (recID, str(e))
        if of.lower() == 'hd':
            if verbose == 9:
                out += """\n<br/><span class="quicknote">Formatting record""" \
                       """ %i with """ \
                       """websearch_templates.tmpl_print_record_detailed.""" \
                       """</span><br/>""" % recID
                return out + websearch_templates.tmpl_print_record_detailed(
                    ln=ln,
                    recID=recID)
        if verbose == 9:
            out += """\n<br/><span class="quicknote">Formatting record %i """ \
                   """with websearch_templates.tmpl_print_record_brief.""" \
                   """</span><br/>""" % recID
        return out + websearch_templates.tmpl_print_record_brief(ln=ln,
                                                                 recID=recID)
    def test_record_creation(self):
        import os
        from wtforms import TextAreaField
        from datetime import datetime

        from invenio.legacy.search_engine import record_exists
        from invenio.cache import cache
        from invenio.config import CFG_PREFIX
        from invenio.modules.workflows.models import Workflow
        from invenio.modules.workflows.config import CFG_WORKFLOW_STATUS
        from invenio.modules.scheduler.models import SchTASK

        from invenio.webdeposit_utils import get_form, create_workflow, \
            set_form_status, CFG_DRAFT_STATUS
        from invenio_deposit.loader import \
            deposition_metadata
        from invenio.webdeposit_workflow_utils import \
            create_record_from_marc
        from invenio.modules.record.api import get_record

        user_id = self.login_user()
        for deposition_type in deposition_metadata.keys():

            deposition = create_workflow(deposition_type, user_id)
            assert deposition is not None

            # Check if deposition creates a record
            create_rec = create_record_from_marc()
            function_exists = False
            for workflow_function in deposition.workflow:
                if create_rec.func_code == workflow_function .func_code:
                    function_exists = True
            if not function_exists:
                # if a record is not created,
                # continue with the next deposition
                continue

            uuid = deposition.get_uuid()

            cache.delete_many("1:current_deposition_type", "1:current_uuid")
            cache.add("1:current_deposition_type", deposition_type)
            cache.add("1:current_uuid", uuid)

            # Run the workflow
            deposition.run()

            # Create form's json based on the field name
            form = get_form(user_id, uuid=uuid)
            webdeposit_json = {}

            # Fill the json with dummy data
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        webdeposit_json[field.name] = "test " + field.name

            draft = dict(form_type=form.__class__.__name__,
                         form_values=webdeposit_json,
                         step=0,  # dummy step
                         status=CFG_DRAFT_STATUS['finished'],
                         timestamp=str(datetime.now()))

            # Add a draft for the first step
            Workflow.set_extra_data(user_id=user_id, uuid=uuid,
                                    key='drafts', value={0: draft})

            workflow_status = CFG_WORKFLOW_STATUS.RUNNING
            while workflow_status != CFG_WORKFLOW_STATUS.COMPLETED:
                # Continue workflow
                deposition.run()
                set_form_status(user_id, uuid, CFG_DRAFT_STATUS['finished'])
                workflow_status = deposition.get_status()

            # Workflow is finished. Test if record is created
            recid = deposition.get_data('recid')
            assert recid is not None
            # Test that record id exists
            assert record_exists(recid) == 1

            # Test that the task exists
            task_id = deposition.get_data('task_id')
            assert task_id is not None

            bibtask = SchTASK.query.filter(SchTASK.id == task_id).first()
            assert bibtask is not None

            # Run bibupload, bibindex, webcoll manually
            cmd = "%s/bin/bibupload %s" % (CFG_PREFIX, task_id)
            assert not os.system(cmd)
            rec = get_record(recid)
            marc = rec.legacy_export_as_marc()
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        assert "test " + field.name in marc
Example #54
0
def perform_request_delete(comID=-1, recID=-1, uid=-1, reviews="", ln=CFG_SITE_LANG):
    """
    """
    _ = gettext_set_language(ln)

    from invenio.legacy.search_engine import record_exists

    warnings = []

    ln = wash_language(ln)
    comID = wash_url_argument(comID, 'int')
    recID = wash_url_argument(recID, 'int')
    uid = wash_url_argument(uid, 'int')
    # parameter reviews is deduced from comID when needed

    if comID is not None and recID is not None and uid is not None:
        if comID <= 0 and recID <= 0 and uid <= 0:
            if comID != -1:
                try:
                    raise InvenioWebCommentWarning(_('Invalid comment ID.'))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(("WRN_WEBCOMMENT_ADMIN_INVALID_COMID",))
            return webcomment_templates.tmpl_admin_delete_form(ln, warnings)

        if comID > 0 and not recID > 0:
            comment = query_get_comment(comID)

            if comment:
                # Figure out if this is a review or a comment
                c_star_score = 5
                if comment[c_star_score] > 0:
                    reviews = 1
                else:
                    reviews = 0
                return (perform_request_comments(ln=ln, comID=comID, recID=recID, reviews=reviews), None, warnings)
            else:
                try:
                    raise InvenioWebCommentWarning(_('Comment ID %(x_name)s does not exist.', x_name=comID))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(('WRN_WEBCOMMENT_ADMIN_COMID_INEXISTANT', comID))
                return webcomment_templates.tmpl_admin_delete_form(ln, warnings)

        elif recID > 0:
            if record_exists(recID):
                comID = ''
                reviews = wash_url_argument(reviews, 'int')
                return (perform_request_comments(ln=ln, comID=comID, recID=recID, reviews=reviews), None, warnings)
            else:
                try:
                    raise InvenioWebCommentWarning(_('Record ID %(x_rec)s does not exist.', x_rec=comID))
                except InvenioWebCommentWarning as exc:
                    register_exception(stream='warning')
                    warnings.append((exc.message, ''))
                #warnings.append(('WRN_WEBCOMMENT_ADMIN_RECID_INEXISTANT', comID))
                return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
        else:
            return webcomment_templates.tmpl_admin_delete_form(ln, warnings)

    else:
        return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
Example #55
0
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None, set_last_updated=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
        return ""

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    if not idents:
        return ""
    ## FIXME: Move these checks in a bibtask
    #try:
        #assert idents, "No OAI ID for record %s, please do your checks!" % recid
    #except AssertionError as err:
        #register_exception(alert_admin=True)
        #return ""
    #try:
        #assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
    #except AssertionError as err:
        #register_exception(alert_admin=True)
    ident = idents[0]

    header_body = EscapedXMLString('')
    header_body += X.identifier()(ident)
    if set_last_updated:
        header_body += X.datestamp()(max(get_modification_date(recid), set_last_updated))
    else:
        header_body += X.datestamp()(get_modification_date(recid))
    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
            # Print only if field not empty
            header_body += X.setSpec()(set_spec)

    header = X.header(status=status)(header_body)

    if verb == 'ListIdentifiers':
        return header
    else:
        if record_exists_result:
            metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0])
            metadata = X.metadata(body=metadata_body)
            provenance_body = get_record_provenance(recid)
            if provenance_body:
                provenance = X.about(body=provenance_body)
            else:
                provenance = ''
            rights_body = get_record_rights(recid)
            if rights_body:
                rights = X.about(body=rights_body)
            else:
                rights = ''
        else:
            metadata = ''
            provenance = ''
            rights = ''
        return X.record()(header, metadata, provenance, rights)
Example #56
0
        def getfile(req, form):
            args = wash_urlargd(form,
                                bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req,
                                           "/%s/%s" %
                                           (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code,
             auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    cookie = mail_cookie_create_authorize_action(
                        VIEWRESTRCOLL, {
                            'collection':
                            guess_primary_collection_of_a_record(self.recid)
                        })
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                             make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                     CFG_SITE_SECURE_URL + user_info['uri']}, {})
                    return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    return page_not_authorized(req, "../", \
                                               text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."
                      ),
                    _("The error has been logged and will be taken in consideration as soon as possible."
                      ))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(
                    _("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError as msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get(
                                        'referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req,
                                                       alert_admin=True)
                                warn += write_warning(
                                    _("The format %(x_form)s does not exist for the given version: %(x_vers)s",
                                      x_form=cgi.escape(docformat),
                                      x_vers=cgi.escape(str(msg))))
                                break
                            (auth_code,
                             auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(
                                    user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(
                                        get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action(
                                        'viewrestrdoc',
                                        {'status': docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(
                                        _("This file is restricted: ") +
                                        str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(
                                        ip, docfile.get_version(), docformat,
                                        uid, self.recid)
                                try:
                                    return docfile.stream(req,
                                                          download=is_download)
                                except InvenioBibDocFileError as msg:
                                    register_exception(req=req,
                                                       alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(
                                        _("An error has happened in trying to stream the request file."
                                          ))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(
                                    _("The requested file is hidden and can not be accessed."
                                      ))

                        except InvenioBibDocFileError as msg:
                            register_exception(req=req, alert_admin=True)

            # Prevent leaking of restricted file names
            req.status = apache.HTTP_NOT_FOUND
            return

            if docname and docformat and not warn:
                req.status = apache.HTTP_NOT_FOUND
                warn += write_warning(
                    _("Requested file does not seem to exist."))


#            filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
            filelist = bibdocfile_templates.tmpl_display_bibrecdocs(
                bibarchive,
                "",
                version,
                ln=ln,
                verbose=verbose,
                display_hidden=display_hidden)

            t = warn + bibdocfile_templates.tmpl_filelist(ln=ln,
                                                          filelist=filelist)

            cc = guess_primary_collection_of_a_record(self.recid)
            unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid,
                                                    ln)
            ordered_tabs_id = [(tab_id, values['order'])
                               for (tab_id,
                                    values) in iteritems(unordered_tabs)]
            ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
            link_ln = ''
            if ln != CFG_SITE_LANG:
                link_ln = '?ln=%s' % ln
            tabs = [
                (unordered_tabs[tab_id]['label'], '%s/%s/%s/%s%s' %
                 (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln),
                 tab_id == 'files', unordered_tabs[tab_id]['enabled'])
                for (tab_id, dummy_order) in ordered_tabs_id
                if unordered_tabs[tab_id]['visible'] is True
            ]

            tabs_counts = get_detailed_page_tabs_counts(self.recid)
            top = webstyle_templates.detailed_record_container_top(
                self.recid,
                tabs,
                args['ln'],
                citationnum=tabs_counts['Citations'],
                referencenum=tabs_counts['References'],
                discussionnum=tabs_counts['Discussions'])
            bottom = webstyle_templates.detailed_record_container_bottom(
                self.recid, tabs, args['ln'])
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, args['ln'])
            return pageheaderonly(title=title,
                        navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
                                        ''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
                                        &gt; %s''' % \
                        (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),

                        description=description,
                        keywords=keywords,
                        uid=uid,
                        language=ln,
                        req=req,
                        navmenuid='search',
                        navtrail_append_title_p=0) + \
                        websearch_templates.tmpl_search_pagestart(ln) + \
                        top + t + bottom + \
                        websearch_templates.tmpl_search_pageend(ln) + \
                        pagefooteronly(language=ln, req=req)