Ejemplo n.º 1
0
def perform_upload_check(xml_record, mode):
    """ Performs a upload simulation with the given record and mode
    @return: string describing errors
    @rtype: string
    """
    error_cache = []
    def my_writer(msg, stream=sys.stdout, verbose=1):
        if verbose == 1:
            if 'DONE' not in msg:
                error_cache.append(msg.strip())

    orig_writer = bibupload_module.write_message
    bibupload_module.write_message = my_writer

    error_cache.extend(perform_basic_upload_checks(xml_record))
    if error_cache:
        # There has been some critical error
        return '\n'.join(error_cache)

    recs = xml_marc_to_records(xml_record)
    try:
        upload_mode = mode[2:]
        # Adapt input data for bibupload function
        if upload_mode == "r insert-or-replace":
            upload_mode = "replace_or_insert"
        for record in recs:
            if record:
                record_strip_empty_volatile_subfields(record)
                record_strip_empty_fields(record)
                bibupload(record, opt_mode=upload_mode, pretend=True)
    finally:
        bibupload_module.write_message = orig_writer

    return '\n'.join(error_cache)
Ejemplo n.º 2
0
def perform_upload_check(xml_record, mode):
    """ Performs a upload simulation with the given record and mode
    @return: string describing errors
    @rtype: string
    """
    error_cache = []

    def my_writer(msg, stream=sys.stdout, verbose=1):
        if verbose == 1:
            if 'DONE' not in msg:
                error_cache.append(msg.strip())

    orig_writer = bibupload_module.write_message
    bibupload_module.write_message = my_writer

    error_cache.extend(perform_basic_upload_checks(xml_record))
    if error_cache:
        # There has been some critical error
        return '\n'.join(error_cache)

    recs = xml_marc_to_records(xml_record)
    try:
        upload_mode = mode[2:]
        # Adapt input data for bibupload function
        if upload_mode == "r insert-or-replace":
            upload_mode = "replace_or_insert"
        for record in recs:
            if record:
                record_strip_empty_volatile_subfields(record)
                record_strip_empty_fields(record)
                bibupload(record, opt_mode=upload_mode, pretend=True)
    finally:
        bibupload_module.write_message = orig_writer

    return '\n'.join(error_cache)
Ejemplo n.º 3
0
def generate_keywords(req, recid, argd):
    """Extract keywords from the fulltexts.

    Do the extraction on the record witth a recid equal to the parameter.
    It first checks whether the keywords are not already
    stored in the temp file (maybe from the previous run).

    :param req: req object.
    :param recid: record id.
    :param argd: arguments passed from web.
    :keyword store_keywords: boolean, whether to save records in the file.
    :return: standard dictionary of kw objects or {}.
    """
    ln = argd['ln']
    _ = gettext_set_language(ln)
    keywords = {}

    # check the files were not already generated
    abs_path = get_tmp_file(recid)
    if os.path.exists(abs_path):
        try:
            # Try to load the data from the tmp file
            recs = xml_marc_to_records(open_marc_file(abs_path))
            return record_get_keywords(recs[0])
        except:
            pass

    # check it is allowed (for this user) to generate pages
    (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify')
    if exit_stat != 0:
        log.info('Access denied: ' + msg)
        msg = _("The site settings do not allow automatic keyword extraction")
        req.write(template.tmpl_page_msg(msg=msg))
        return 0, keywords, None

    # register generation
    bibdocfiles = BibRecDocs(recid).list_latest_files()
    if bibdocfiles:
        # User arrived at a page, but no keywords are available
        inprogress, msg = _doc_already_submitted(recid)
        if argd['generate'] != 'yes':
            # Display a form and give them possibility to generate keywords
            if inprogress:
                req.write(
                    template.tmpl_page_msg(
                        msg='<div class="warningbox">%s</div>' % _(msg)))
            else:
                req.write(template.tmpl_page_generate_keywords(req=req,
                                                               **argd))
            return 0, keywords, None
        else:  # after user clicked on "generate" button
            if inprogress:
                req.write(
                    template.tmpl_page_msg(
                        msg='<div class="warningbox">%s</div>' % _(msg)))
            else:
                schedule_extraction(recid,
                                    taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY)
                req.write(
                    template.
                    tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(
                        'We have registered your request, the automated'
                        'keyword extraction will run after some time. Please return back in a while.'
                    )))

    else:
        req.write(
            template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(
                "Unfortunately, we don't have a PDF fulltext for this record in the storage, \
                    keywords cannot be generated using an automated process."))
        )

    return 0, keywords, None
Ejemplo n.º 4
0
def generate_keywords(req, recid, argd):
    """Extract keywords from the fulltexts.

    Do the extraction on the record witth a recid equal to the parameter.
    It first checks whether the keywords are not already
    stored in the temp file (maybe from the previous run).

    :param req: req object.
    :param recid: record id.
    :param argd: arguments passed from web.
    :keyword store_keywords: boolean, whether to save records in the file.
    :return: standard dictionary of kw objects or {}.
    """
    ln = argd["ln"]
    _ = gettext_set_language(ln)
    keywords = {}

    # check the files were not already generated
    abs_path = get_tmp_file(recid)
    if os.path.exists(abs_path):
        try:
            # Try to load the data from the tmp file
            recs = xml_marc_to_records(open_marc_file(abs_path))
            return record_get_keywords(recs[0])
        except:
            pass

    # check it is allowed (for this user) to generate pages
    (exit_stat, msg) = acce.acc_authorize_action(req, "runbibclassify")
    if exit_stat != 0:
        log.info("Access denied: " + msg)
        msg = _("The site settings do not allow automatic keyword extraction")
        req.write(template.tmpl_page_msg(msg=msg))
        return 0, keywords, None

    # register generation
    bibdocfiles = BibRecDocs(recid).list_latest_files()
    if bibdocfiles:
        # User arrived at a page, but no keywords are available
        inprogress, msg = _doc_already_submitted(recid)
        if argd["generate"] != "yes":
            # Display a form and give them possibility to generate keywords
            if inprogress:
                req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg)))
            else:
                req.write(template.tmpl_page_generate_keywords(req=req, **argd))
            return 0, keywords, None
        else:  # after user clicked on "generate" button
            if inprogress:
                req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg)))
            else:
                schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY)
                req.write(
                    template.tmpl_page_msg(
                        msg='<div class="warningbox">%s</div>'
                        % _(
                            "We have registered your request, the automated"
                            "keyword extraction will run after some time. Please return back in a while."
                        )
                    )
                )

    else:
        req.write(
            template.tmpl_page_msg(
                msg='<div class="warningbox">%s</div>'
                % _(
                    "Unfortunately, we don't have a PDF fulltext for this record in the storage, \
                    keywords cannot be generated using an automated process."
                )
            )
        )

    return 0, keywords, None