Exemple #1
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """
    from invenio.legacy.bibdocfile.cli import cli_fix_marc

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str(
            uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR,
                                    xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    # if record_exists(batch_job['recid']) < 1:
    #     raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description',
                                       description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat',
                                     subformat)
                if (comment == m_comment and description == m_description
                        and subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(
                            124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" %
                                batch_job['recid'])
            task_update_progress("Video master for record %d not found" %
                                 batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(
        batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job,
                                            'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" %
                                bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension,
                getval(batch_job, 'bibdoc_master_subformat', 'master'))
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job['input'],
                version=1,
                description=getval(batch_job, 'bibdoc_master_description'),
                comment=getval(batch_job, 'bibdoc_master_comment'),
                docformat=master_format)

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------" %
                            (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(
                job['bibdoc_docname']).safe_substitute(
                    {'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(
                    job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname',
                                                bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory,
                                                 bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" %
                (bibdoc_slave_video_docname, bibdoc_video_extension,
                 bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job['input'],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, 'audiocodec'),
                vcodec=getval(job, 'videocodec'),
                abitrate=getval(job, 'videobitrate'),
                vbitrate=getval(job, 'audiobitrate'),
                resolution=getval(job, 'resolution'),
                passes=getval(job, 'passes', 1),
                special=getval(job, 'special'),
                specialfirst=getval(job, 'specialfirst'),
                specialsecond=getval(job, 'specialsecond'),
                metadata=getval(job, 'metadata'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),  # Aspect for every job
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message)
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(bibdoc_video_directory,
                                 bibdoc_video_extension,
                                 bibdoc_video_subformat, 1,
                                 bibdoc_slave_video_docname))
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                             bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(
                        getval(job, 'bibdoc_description'), bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname',
                                          bibdoc_video_docname)
            tmpfname = (
                tmpdir + "/" + bibdoc_frame_docname + '.' +
                getval(profile, 'extension', getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(
                input_file=batch_job['input'],
                output_file=tmpfname,
                size=getval(job, 'size'),
                positions=getval(job, 'positions'),
                numberof=getval(job, 'numberof'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(
                        filename)
                    _task_write_message("Creating new bibdoc for %s" %
                                        bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(
                            docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(
                        bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(
                            bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc" %
                                        (bibdoc_frame_docname,
                                         getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, 'bibdoc_description'),
                        comment=getval(job, 'bibdoc_comment'),
                        docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'),
                                 pbcoreIdentifier=batch_job['recid'],
                                 aspect_override=getval(batch_job, 'aspect'))
        from invenio_formatter.engines.xslt import format
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern',
                      '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str())):
                _notify_error_admin(batch_job, getval(batch_job,
                                                      'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
    return 1
Exemple #2
0
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content, restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0:
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError as err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename,
                                                 extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath, extension):
                    bibdoc = bibrecdocs.add_new_file(
                        fullpath,
                        doctype=paths_and_doctypes.get(path, path),
                        never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioBibDocFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file':
                                    fullpath,
                                    'icon-scale':
                                    iconsize,
                                    'icon-name':
                                    None,
                                    'icon-file-format':
                                    None,
                                    'multipage-icon':
                                    False,
                                    'multipage-icon-delay':
                                    100,
                                    'verbosity':
                                    0,
                                })
                            except Exception as e:
                                register_exception(
                                    prefix=
                                    'Impossible to create icon for %s (record %s)'
                                    % (fullpath, sysno),
                                    alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioBibDocFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace(
                                        '>', '').replace('<', '').replace(
                                            '^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(
                                            iconpath,
                                            subformat=
                                            CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT
                                            + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open(
                                                "%s/%s_ICON" % (curdir, path),
                                                "w")
                                        else:
                                            fd = open(
                                                "%s/%s_ICON_%s" %
                                                (curdir, path,
                                                 iconsize + '_' + icon_suffix),
                                                "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError as err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(
                                            msg)
                                except InvenioBibDocFileError as e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()

    # Update the MARC
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s",
                      (str(sysno), ))

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql(
        "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s",
        (sysno, ))

    return ""
def format_element(bfo,
                   max_photos='',
                   one_icon_per_bibdoc='yes',
                   twitter_card_type='photo',
                   use_webjournal_featured_image='no'):
    """Return an image of the record, suitable for the Open Graph protocol.

    Will look for any icon stored with the record, and will fallback to any
    image file attached to the record. Returns nothing when no image is found.

    Some optional structured properties are not considered, for optimizing both generation of the page
    and page size.

    @param max_photos: the maximum number of photos to display
    @param one_icon_per_bibdoc: shall we keep just one icon per bibdoc in the output (not repetition of same preview in multiple sizes)?
    @param twitter_card_type: the type of Twitter card: 'photo' (single photo) or 'gallery'. Fall back to 'photo' if not enough pictures for a 'gallery'.
    @param use_webjournal_featured_image: if 'yes', use the "featured image" (as defined in bfe_webjournal_articles_overview) as image for the Twitter Card
    """
    if not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""
    bibarchive = BibRecDocs(bfo.recID)
    bibdocs = bibarchive.list_bibdocs()
    tags = []
    images = []

    if max_photos.isdigit():
        max_photos = int(max_photos)
    else:
        max_photos = len(bibdocs)

    for doc in bibdocs[:max_photos]:
        found_icons = []
        found_image_url = ''
        found_image_size = 0
        for docfile in doc.list_latest_files(list_hidden=False):
            if docfile.is_icon():
                found_icons.append((docfile.get_size(), docfile.get_url()))
            elif get_superformat_from_format(docfile.get_format()).lower() in [
                    ".jpg", ".gif", ".jpeg", ".png"
            ]:
                found_image_url = docfile.get_url()
                found_image_size = docfile.get_size()
        found_icons.sort()

        # We might have found several icons for the same file: keep
        # middle-size one
        if found_icons:
            if one_icon_per_bibdoc.lower() == 'yes':
                found_icons = [found_icons[len(found_icons) / 2]]
        for icon_size, icon_url in found_icons:
            images.append(
                (icon_url, icon_url.replace(CFG_SITE_URL,
                                            CFG_SITE_SECURE_URL), icon_size))
        # Link to main file too (?)
        if found_image_url:
            images.append((found_image_url,
                           found_image_url.replace(CFG_SITE_URL,
                                                   CFG_SITE_SECURE_URL),
                           found_image_size))

    if CFG_CERN_SITE:
        # Add some more pictures from metadata
        dummy_size = 500 * 1024  # we don't we to check image size, we just make one (see Twitter Card limit)
        additional_images = [
            (image_url,
             image_url.replace("http://mediaarchive.cern.ch/",
                               "https://mediastream.cern.ch"), dummy_size)
            for image_url in bfo.fields("8567_u")
            if image_url.split('.')[-1] in ('jpg', 'png', 'jpeg',
                                            'gif') and 'A5' in image_url
        ]
        images.extend(additional_images)

    tags = [
        '<meta property="og:image" content="%s" />%s' %
        (image_url, image_url != image_secure_url
         and '\n<meta property="og:image:secure_url" content="%s" />' %
         image_secure_url or "")
        for image_url, image_secure_url, image_size in images
    ]

    # Twitter Card

    if use_webjournal_featured_image.lower() == 'yes':
        # First look for the prefered image, if available. Note that
        # it might be a remote one.
        try:
            from invenio.modules.formatter.format_elements import bfe_webjournal_articles_overview
            image_url = bfe_webjournal_articles_overview._get_feature_image(
                bfo)
            image_secure_url = image_url.replace('http:', 'https:')
            image_size = 500 * 1024  # TODO: check for real image size
            if image_url.strip():
                images.insert(0, (image_url, image_secure_url, image_size))
        except:
            pass

    # Filter out images that would not be compatible
    twitter_compatible_images = [image_url for image_url, image_secure_url, image_size in images if \
                                 image_size < 1024*1024][:4] #Max 1MB according to Twitter Card APIs, max 4 photos
    twitter_card_tags = []
    if len(twitter_compatible_images) == 4 and twitter_card_type == 'gallery':
        twitter_card_tags = ['<meta name="twitter:image%i" content="%s" />' % \
                             (twitter_compatible_images.index(image_url), image_url) \
                             for image_url in twitter_compatible_images]
    elif twitter_compatible_images:
        twitter_card_tags = [
            '<meta name="twitter:image" content="%s" />' %
            twitter_compatible_images[0]
        ]

    tags = twitter_card_tags + tags

    return "\n".join(tags)
Exemple #4
0
 def get_files():
     from invenio.legacy.bibdocfile.api import BibRecDocs
     for bibdoc in BibRecDocs(recid).list_bibdocs():
         for file in bibdoc.list_all_files():
             yield file.get_url()
    def test_BibDocs(self):
        """bibdocfile - BibDocs functions"""
        from invenio.legacy.bibdocfile.api import BibRecDocs
        #add file
        my_bibrecdoc = BibRecDocs(2)
        timestamp1 = datetime(
            *(time.strptime("2011-10-09 08:07:06", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(pkg_resources.resource_filename(
            'invenio_demosite.testsuite.regression', 'data/test.jpg'),
                                  'Main',
                                  'img_test',
                                  False,
                                  'test add new file',
                                  'test',
                                  '.jpg',
                                  modification_date=timestamp1)
        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        value = my_bibrecdoc.list_bibdocs()
        self.assertEqual(len(value), 2)
        #get total file (bibdoc)
        self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
        #get recid
        self.assertEqual(my_new_bibdoc.bibrec_links[0]["recid"], 2)
        #change name
        my_new_bibdoc.change_name(2, 'new_name')
        #get docname
        my_bibrecdoc = BibRecDocs(2)
        self.assertEqual(my_bibrecdoc.get_docname(my_new_bibdoc.id),
                         'new_name')
        #get type
        self.assertEqual(my_new_bibdoc.get_type(), 'Main')
        #get id
        self.assert_(my_new_bibdoc.get_id() > 80)
        #set status
        my_new_bibdoc.set_status('new status')
        #get status
        self.assertEqual(my_new_bibdoc.get_status(), 'new status')
        #get base directory
        self.assert_(my_new_bibdoc.get_base_dir().startswith(
            cfg['CFG_BIBDOCFILE_FILEDIR']))
        #get file number
        self.assertEqual(my_new_bibdoc.get_file_number(), 1)
        #add file new version
        timestamp2 = datetime(
            *(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_version(pkg_resources.resource_filename(
            'invenio_demosite.testsuite.regression', 'data/test.jpg'),
                                           description='the new version',
                                           comment=None,
                                           docformat=None,
                                           flags=["PERFORM_HIDE_PREVIOUS"],
                                           modification_date=timestamp2)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
        #revert
        timestamp3 = datetime.now()
        time.sleep(
            2
        )  # so we can see a difference between now() and the time of the revert
        my_new_bibdoc.revert(1)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3),
                         'test add new file')
        #get total size latest version
        self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
        #get latest version
        self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
        #list latest files
        self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
        self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
        #list version files
        self.assertEqual(
            len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
        #display # No Display facility inside of an object !
        #        value = my_new_bibdoc.display(version='', ln='en', display_hidden=True)
        #        self.assert_('>test add new file<' in value)
        #format already exist
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True)
        #get file
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1)
        #set description
        my_new_bibdoc.set_description('new description', '.jpg', version=1)
        #get description
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                         'new description')
        #set comment
        my_new_bibdoc.set_description('new comment', '.jpg', version=1)
        #get comment
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                         'new comment')
        #get history
        assert len(my_new_bibdoc.get_history()) > 0
        #check modification date
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=2).md, timestamp2)
        assert my_new_bibdoc.get_file('.jpg', version=3).md > timestamp3
        #delete file
        my_new_bibdoc.delete_file('.jpg', 2)
        #list all files
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 3)
        #add new format
        timestamp4 = datetime(
            *(time.strptime("2012-11-10 09:08:07", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_format(pkg_resources.resource_filename(
            'invenio_demosite.testsuite.regression', 'data/test.gif'),
                                          version=None,
                                          description=None,
                                          comment=None,
                                          docformat=None,
                                          modification_date=timestamp4)
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #check modification time
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(
            my_new_bibdoc.get_file('.gif', version=1).md, timestamp4)
        #change the format name
        my_new_bibdoc.change_docformat('.gif', '.gif;icon-640')
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.gif'), False)
        self.assertEqual(
            my_new_bibdoc.format_already_exists_p('.gif;icon-640'), True)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 1)
        #delete file
        my_new_bibdoc.delete_file('.gif;icon-640', 1)
        #empty bibdoc
        self.assertEqual(my_new_bibdoc.empty_p(), True)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False)
        #hide
        my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True)
        #add and get icon

        my_new_bibdoc.add_icon(pkg_resources.resource_filename(
            'invenio_demosite.testsuite.regression', 'data/icon-test.gif'),
                               modification_date=timestamp4)

        my_bibrecdoc = BibRecDocs(2)
        value = my_bibrecdoc.get_bibdoc("new_name")
        self.assertEqual(value.get_icon().docid,
                         my_new_bibdoc.get_icon().docid)
        self.assertEqual(value.get_icon().version,
                         my_new_bibdoc.get_icon().version)
        self.assertEqual(value.get_icon().format,
                         my_new_bibdoc.get_icon().format)

        #check modification time
        self.assertEqual(my_new_bibdoc.get_icon().md, timestamp4)
        #delete icon
        my_new_bibdoc.delete_icon()
        #get icon
        self.assertEqual(my_new_bibdoc.get_icon(), None)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
        #undelete
        my_new_bibdoc.undelete(previous_status='', recid=2)
        #expunging
        my_new_bibdoc.expunge()
        my_bibrecdoc.build_bibdoc_list()
        self.failIf('new_name' in my_bibrecdoc.get_bibdoc_names())
        self.failUnless(my_bibrecdoc.get_bibdoc_names())
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        from invenio.legacy.bibdocfile.api import BibRecDocs
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(
            *(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        #FIXME InvenioBibDocFileError
        my_bibrecdoc.add_new_file(pkg_resources.resource_filename(
            'invenio_demosite.testsuite.regression', 'data/test.jpg'),
                                  'Main',
                                  'img_test',
                                  False,
                                  'test add new file',
                                  'test',
                                  '.jpg',
                                  modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(
            my_new_bibdocfile.get_url(), cfg['CFG_SITE_URL'] +
            '/%s/2/files/img_test.jpg' % cfg['CFG_SITE_RECORD'])
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(cfg['CFG_BIBDOCFILE_FILEDIR']))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(),
                         my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name(), 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name(), 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(cfg['CFG_BIBDOCFILE_FILEDIR']))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(),
                         my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(),
                         my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(),
                         '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        import invenio.legacy.template
        tmpl = invenio.legacy.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
Exemple #7
0
def format_element(bfo):
    """
    Return the video of the record, suitable for the Open Graph protocol.
    """
    if not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""
    bibarchive = BibRecDocs(bfo.recID)
    bibdocs = bibarchive.list_bibdocs()
    additional_tags = ""
    tags = []
    videos = []
    images = []

    for doc in bibdocs:
        found_icons = []
        found_image_url = ''
        for docfile in doc.list_latest_files():
            if docfile.is_icon():
                found_icons.append((docfile.get_size(), docfile.get_url()))
            elif get_superformat_from_format(
                    docfile.get_format()).lower() in [".mp4", '.webm', '.ogv']:
                found_image_url = docfile.get_url()
        found_icons.sort()

        for icon_size, icon_url in found_icons:
            images.append(
                (icon_url, icon_url.replace(CFG_SITE_URL,
                                            CFG_SITE_SECURE_URL)))
        if found_image_url:
            videos.append((found_image_url,
                           found_image_url.replace(CFG_SITE_URL,
                                                   CFG_SITE_SECURE_URL)))

    if CFG_CERN_SITE:
        mp4_urls = [url.replace('http://mediaarchive.cern.ch', 'https://mediastream.cern.ch') \
                    for url in bfo.fields('8567_u') if url.endswith('.mp4')]
        img_urls = [url.replace('http://mediaarchive.cern.ch', 'https://mediastream.cern.ch') \
                    for url in bfo.fields('8567_u') if url.endswith('.jpg') or url.endswith('.png')]

        if mp4_urls:
            mp4_url = mp4_urls[0]
            if "4/3" in bfo.field("300__b"):
                width = "640"
                height = "480"
            else:
                width = "640"
                height = "360"
            additional_tags += '''
                <meta property="og:video" content="%(CFG_CERN_PLAYER_URL)s?file=%(mp4_url_relative)s&streamer=%(CFG_STREAMER_URL)s&provider=rtmp&stretching=exactfit&image=%(image_url)s" />
                <meta property="og:video:height" content="%(height)s" />
                <meta property="og:video:width" content="%(width)s" />
                <meta property="og:video:type" content="application/x-shockwave-flash" />
                <meta property="og:video" content="%(mp4_url)s" />
                <meta property="og:video:type" content="video/mp4" />
                <meta property="og:image" content="%(image_url)s" />
                <meta name="twitter:player:height" content="%(height)s" />
                <meta name="twitter:player:width" content="%(width)s" />

                <link rel="image_src" href="%(image_url)s" />
                <link rel="video_src" href="%(CFG_CERN_PLAYER_URL)s?file=%(mp4_url_relative)s&streamer=%(CFG_STREAMER_URL)s&provider=rtmp&stretching=exactfit&image=%(image_url)s"/>
                ''' % {
                'CFG_CERN_PLAYER_URL':
                "https://cds.cern.ch/mediaplayer.swf",
                'CFG_STREAMER_URL':
                "rtmp://wowza.cern.ch:1935/vod",
                'width':
                width,
                'height':
                height,
                'image_url':
                img_urls and img_urls[0] or '',
                'mp4_url':
                mp4_url.replace('http://mediaarchive.cern.ch',
                                'https://mediastream.cern.ch'),
                'mp4_url_relative':
                '/' + '/'.join(mp4_url.split('/')[4:])
            }
            try:
                from invenio.media_utils import generate_embedding_url
                embed_url = generate_embedding_url(bfo.field('037__a'))
                additional_tags += '''<meta name="twitter:player" content="%s"/>''' % cgi.escape(
                    embed_url, quote=True).replace('http://', 'https://', 1)
            except:
                pass

    tags = [
        '<meta property="og:image" content="%s" />%s' %
        (image_url, image_url != image_secure_url
         and '\n<meta property="og:image:secure_url" content="%s" />' %
         image_secure_url or "") for image_url, image_secure_url in images
    ]
    tags.extend([
        '<meta property="og:video" content="%s" />%s' %
        (video_url, video_url != video_secure_url
         and '\n<meta property="og:video:secure_url" content="%s" />' %
         video_secure_url or "") for video_url, video_secure_url in videos
    ])

    return "\n".join(tags) + additional_tags
Exemple #8
0
        def getfile(req, form):
            args = wash_urlargd(form,
                                bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req,
                                           "/%s/%s" %
                                           (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(get_record(self.recid).legacy_create_recstruct()):
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code,
             auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                cookie = mail_cookie_create_authorize_action(
                    VIEWRESTRCOLL, {
                        'collection':
                        guess_primary_collection_of_a_record(self.recid)
                    })
                target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                    CFG_SITE_SECURE_URL + user_info['uri']}, {})
                return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                return page_not_authorized(req, "../", \
                                            text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."
                      ),
                    _("The error has been logged and will be taken in consideration as soon as possible."
                      ))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(
                    _("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError as msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get(
                                        'referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req,
                                                       alert_admin=True)
                                warn += write_warning(
                                    _("The format %(x_form)s does not exist for the given version: %(x_vers)s",
                                      x_form=cgi.escape(docformat),
                                      x_vers=cgi.escape(str(msg))))
                                break
                            (auth_code,
                             auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(
                                    user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(
                                        get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action(
                                        'viewrestrdoc',
                                        {'status': docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(
                                        _("This file is restricted: ") +
                                        str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(
                                        ip, docfile.get_version(), docformat,
                                        uid, self.recid)
                                try:
                                    return docfile.stream(req,
                                                          download=is_download)
                                except InvenioBibDocFileError as msg:
                                    register_exception(req=req,
                                                       alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(
                                        _("An error has happened in trying to stream the request file."
                                          ))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(
                                    _("The requested file is hidden and can not be accessed."
                                      ))

                        except InvenioBibDocFileError as msg:
                            register_exception(req=req, alert_admin=True)

            if docname and docformat and not warn:
                req.status = apache.HTTP_NOT_FOUND
                warn += write_warning(
                    _("Requested file does not seem to exist."))


#            filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
            filelist = bibdocfile_templates.tmpl_display_bibrecdocs(
                bibarchive,
                "",
                version,
                ln=ln,
                verbose=verbose,
                display_hidden=display_hidden)

            t = warn + bibdocfile_templates.tmpl_filelist(ln=ln,
                                                          filelist=filelist)

            cc = guess_primary_collection_of_a_record(self.recid)
            cc_id = Collection.query.filter_by(name=cc).value('id')
            unordered_tabs = None  # get_detailed_page_tabs(cc_id, self.recid, ln)
            ordered_tabs_id = [(tab_id, values['order'])
                               for (tab_id,
                                    values) in iteritems(unordered_tabs)]
            ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
            link_ln = ''
            if ln != CFG_SITE_LANG:
                link_ln = '?ln=%s' % ln
            tabs = [
                (unordered_tabs[tab_id]['label'], '%s/%s/%s/%s%s' %
                 (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln),
                 tab_id == 'files', unordered_tabs[tab_id]['enabled'])
                for (tab_id, dummy_order) in ordered_tabs_id
                if unordered_tabs[tab_id]['visible'] is True
            ]

            tabs_counts = {}  # get_detailed_page_tabs_counts(self.recid)
            top = webstyle_templates.detailed_record_container_top(
                self.recid,
                tabs,
                args['ln'],
                citationnum=tabs_counts['Citations'],
                referencenum=tabs_counts['References'],
                discussionnum=tabs_counts['Discussions'])
            bottom = webstyle_templates.detailed_record_container_bottom(
                self.recid, tabs, args['ln'])
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, args['ln'])
            return pageheaderonly(title=title,
                        navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
                                        ''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
                                        &gt; %s''' % \
                        (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),

                        description=description,
                        keywords=keywords,
                        uid=uid,
                        language=ln,
                        req=req,
                        navmenuid='search',
                        navtrail_append_title_p=0) + \
                        websearch_templates.tmpl_search_pagestart(ln) + \
                        top + t + bottom + \
                        websearch_templates.tmpl_search_pageend(ln) + \
                        pagefooteronly(language=ln, req=req)
Exemple #9
0
def filter_fulltexts(recids, fulltext_type=None):
    """ returns list of records having a fulltext of type fulltext_type.
    If fulltext_type is empty, return all records having a fulltext"""
    recids = dict(recids)
    minimum_timestamp = get_minimum_timestamp()
    if fulltext_type:
        query = """SELECT id_bibrec, max(modification_date)
                   FROM bibrec_bibdoc
                   LEFT JOIN bibdoc ON bibrec_bibdoc.id_bibdoc=bibdoc.id
                   WHERE type=%s
                   GROUP BY id_bibrec"""
        res = run_sql(query, (fulltext_type,))
    else:
        query = """SELECT id_bibrec, max(modification_date)
                   FROM bibrec_bibdoc
                   LEFT JOIN bibdoc ON bibrec_bibdoc.id_bibdoc=bibdoc.id
                   GROUP BY id_bibrec"""
        res = run_sql(query)
    return [(recid, max(lastmod, minimum_timestamp)) for (recid, lastmod) in res if recid in recids and BibRecDocs(recid).list_latest_files(list_hidden=False)]
Exemple #10
0
def openaire_create_icon(docid=None, recid=None, reformat=True):
    """
    Celery task to create an icon for all documents in a given record or for
    just a specific document.
    """
    if recid:
        docs = BibRecDocs(recid).list_bibdocs()
    else:
        docs = [BibDoc(docid)]

    # Celery task will fail if BibDoc does not exists (on purpose ;-)
    for d in docs:
        logger.debug("Checking document %s" % d)
        if not d.get_icon(subformat_re=re.compile(ICON_SUBFORMAT)):
            logger.debug("Document has no icon")
            for f in d.list_latest_files():
                logger.debug("Checking file %s" % f)
                if not f.is_icon():
                    logger.debug("File not an icon")
                    file_path = f.get_full_path()
                    icon_path = None
                    try:
                        filename = os.path.splitext(
                            os.path.basename(file_path))[0]
                        logger.info("Creating icon from file %s" % file_path)
                        (icon_dir, icon_name) = create_icon({
                            'input-file':
                            file_path,
                            'icon-name':
                            "icon-%s" % filename,
                            'multipage-icon':
                            False,
                            'multipage-icon-delay':
                            0,
                            'icon-scale':
                            ICON_SIZE,
                            'icon-file-format':
                            ICON_FILEFORMAT,
                            'verbosity':
                            0
                        })
                        icon_path = os.path.join(icon_dir, icon_name)
                    except InvenioWebSubmitIconCreatorError as e:
                        logger.warning(
                            'Icon for file %s could not be created: %s' %
                            (file_path, str(e)))
                        register_exception(
                            prefix='Icon for file %s could not be created: %s'
                            % (file_path, str(e)),
                            alert_admin=False)

                    try:
                        if icon_path and os.path.exists(icon_path):
                            logger.debug("Adding icon %s to document" %
                                         icon_path)
                            d.add_icon(icon_path, subformat=ICON_SUBFORMAT)
                            recid_list = ",".join(
                                [str(x['recid']) for x in d.bibrec_links])
                            if reformat:
                                task_low_level_submission(
                                    'bibreformat', 'openaire', '-i',
                                    recid_list)

                    except InvenioBibDocFileError as e:
                        logger.warning(
                            'Icon %s for file %s could not be added to '
                            'document: %s' % (icon_path, f, str(e)))
                        register_exception(
                            prefix='Icon %s for file %s could not be added'
                            ' to document: %s' % (icon_path, f, str(e)),
                            alert_admin=False)
Exemple #11
0
    def get_words_from_fulltext(self, url_direct_or_indirect):
        """Returns all the words contained in the document specified by
           URL_DIRECT_OR_INDIRECT with the words being split by various
           SRE_SEPARATORS regexp set earlier.  If FORCE_FILE_EXTENSION is
           set (e.g. to "pdf", then treat URL_DIRECT_OR_INDIRECT as a PDF
           file.  (This is interesting to index Indico for example.)  Note
           also that URL_DIRECT_OR_INDIRECT may be either a direct URL to
           the fulltext file or an URL to a setlink-like page body that
           presents the links to be indexed.  In the latter case the
           URL_DIRECT_OR_INDIRECT is parsed to extract actual direct URLs
           to fulltext documents, for all knows file extensions as
           specified by global CONV_PROGRAMS config variable.
        """
        write_message("... reading fulltext files from %s started" %
                      url_direct_or_indirect,
                      verbose=2)
        try:
            if bibdocfile_url_p(url_direct_or_indirect):
                write_message("... %s is an internal document" %
                              url_direct_or_indirect,
                              verbose=2)
                try:
                    bibdoc = bibdocfile_url_to_bibdoc(url_direct_or_indirect)
                except InvenioBibDocFileError:
                    # Outdated 8564 tag
                    return []
                indexer = get_idx_indexer('fulltext')
                if indexer != 'native':
                    # A document might belong to multiple records
                    for rec_link in bibdoc.bibrec_links:
                        recid = rec_link["recid"]
                        # Adds fulltexts of all files once per records
                        if not recid in fulltext_added:
                            bibrecdocs = BibRecDocs(recid)
                            try:
                                text = bibrecdocs.get_text()
                            except InvenioBibDocFileError:
                                # Invalid PDF
                                continue
                            if indexer == 'SOLR' and CFG_SOLR_URL:
                                solr_add_fulltext(recid, text)
                            elif indexer == 'XAPIAN' and CFG_XAPIAN_ENABLED:
                                xapian_add(recid, 'fulltext', text)

                        fulltext_added.add(recid)
                    # we are relying on an external information retrieval system
                    # to provide full-text indexing, so dispatch text to it and
                    # return nothing here:
                    return []
                else:
                    text = ""
                    if hasattr(bibdoc, "get_text"):
                        text = bibdoc.get_text()
                    return self.tokenize_for_words_default(text)
            else:
                if CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY:
                    write_message(
                        "... %s is external URL but indexing only local files"
                        % url_direct_or_indirect,
                        verbose=2)
                    return []
                write_message("... %s is an external URL" %
                              url_direct_or_indirect,
                              verbose=2)
                urls_to_index = set()
                for splash_re, url_re in iteritems(CFG_BIBINDEX_SPLASH_PAGES):
                    if re.match(splash_re, url_direct_or_indirect):
                        write_message("... %s is a splash page (%s)" %
                                      (url_direct_or_indirect, splash_re),
                                      verbose=2)
                        html = urllib2.urlopen(url_direct_or_indirect).read()
                        urls = get_links_in_html_page(html)
                        write_message(
                            "... found these URLs in %s splash page: %s" %
                            (url_direct_or_indirect, ", ".join(urls)),
                            verbose=3)
                        for url in urls:
                            if re.match(url_re, url):
                                write_message(
                                    "... will index %s (matched by %s)" %
                                    (url, url_re),
                                    verbose=2)
                                urls_to_index.add(url)
                if not urls_to_index:
                    urls_to_index.add(url_direct_or_indirect)
                write_message("... will extract words from %s" %
                              ', '.join(urls_to_index),
                              verbose=2)
                words = {}
                for url in urls_to_index:
                    tmpdoc = download_url(url)
                    file_converter_logger = get_file_converter_logger()
                    old_logging_level = file_converter_logger.getEffectiveLevel(
                    )
                    if self.verbose > 3:
                        file_converter_logger.setLevel(logging.DEBUG)
                    try:
                        try:
                            tmptext = convert_file(tmpdoc,
                                                   output_format='.txt')
                            text = open(tmptext).read()
                            os.remove(tmptext)

                            indexer = get_idx_indexer('fulltext')
                            if indexer != 'native':
                                if indexer == 'SOLR' and CFG_SOLR_URL:
                                    solr_add_fulltext(
                                        None,
                                        text)  # FIXME: use real record ID
                                if indexer == 'XAPIAN' and CFG_XAPIAN_ENABLED:
                                    #xapian_add(None, 'fulltext', text) # FIXME: use real record ID
                                    pass
                                # we are relying on an external information retrieval system
                                # to provide full-text indexing, so dispatch text to it and
                                # return nothing here:
                                tmpwords = []
                            else:
                                tmpwords = self.tokenize_for_words_default(
                                    text)
                            words.update(dict(map(lambda x: (x, 1), tmpwords)))
                        except Exception as e:
                            message = 'ERROR: it\'s impossible to correctly extract words from %s referenced by %s: %s' % (
                                url, url_direct_or_indirect, e)
                            register_exception(prefix=message,
                                               alert_admin=True)
                            write_message(message, stream=sys.stderr)
                    finally:
                        os.remove(tmpdoc)
                        if self.verbose > 3:
                            file_converter_logger.setLevel(old_logging_level)
                return words.keys()
        except Exception as e:
            message = 'ERROR: it\'s impossible to correctly extract words from %s: %s' % (
                url_direct_or_indirect, e)
            register_exception(prefix=message, alert_admin=True)
            write_message(message, stream=sys.stderr)
            return []
def Move_Revised_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function revises the files of a record with the newly uploaded
    files.

    This function can work only if you can define a mapping from the
    WebSubmit element name that uploads the file, to the doctype of
    the file. In most cases, the doctype is equivalent to the element
    name, or just map to 'Main' doctype. That is typically the case if
    you use the Move_Files_to_Storage.py function to upload the files
    at submission step. For eg. with the DEMOBOO submission of the
    Atlantis Demo site, a file is uploaded thanks to the DEMOBOO_FILE
    element/File input, which is mapped to doctype DEMOBOO_FILE.

    The function ignores files for which multiple files exist for a
    single doctype in the record, or when several files are uploaded
    with the same element name.  If the record to revise does not have
    a corresponding file, the file is inserted


    This function is similar to Move_Uploaded_Files_to_Storage.py,
    excepted that Move_Uploaded_Files_to_Storage relies on files
    uploaded from the web interface created by
    Create_Upload_Files_Interface.py, while this function relies on
    the files uploaded by a regular WebSubmit page that you have built
    from WebSubmit admin:

    Regular WebSubmit interface       --(upload file)-->  Move_Revised_Files_to_Storage.py
    Create_Upload_Files_Interface.py  --(upload file)-->  Move_Uploaded_Files_to_Storage.py

    The main advantages of this function over the functions
    Create_Upload_Files_Interface.py/Move_Uploaded_Files_to_Storage is
    that it lets you customize the display of your submission in the
    way you want, which could be simpler for your users if you usually
    only upload a few and fixed number of files per record. The
    disadvantages are that this function is not capable of : deleting
    files, adding an alternative format to a file, add a variable
    number of files, does not allow to set permissions at the level of
    file, does not support user comments, renaming, etc.

    @param parameters:(dictionary) - must contain:

      + elementNameToDoctype: maps an element/field name to a doctype.
                              Eg. the file uploaded from the
                              DEMOBOO_FILE element (input file tag)
                              should revise the file with document
                              type (doctype) "Main":
                                 DEMOBOO_FILE=Main|DEMOBOO_FILE_2=ADDITIONAL
                              ('=' separates element name and doctype
                               '|' separates each doctype/element name group)

                              In most cases, the element name == doctype:
                               DEMOBOO_FILE=DEMOBOO_FILE|DEMOBOO_FILE_2=DEMOBOO_FILE_2

      + createIconDoctypes: the list of doctypes for which an icon
                            should be created when revising the file.
                            Eg:
                                Figure|Graph
                              ('|' separated values)
                              Use '*' for all doctypes

      + iconsize: size of the icon to create (when applicable)

      + keepPreviousVersionDoctypes: the list of doctypes for which
                                     the function should keep previous
                                     versions visible when revising a
                                     file.
                                     Eg:
                                       Main|Additional
                                     ('|' separated values)
                                     Default is all

      + createRelatedFormats: if uploaded files get converted to
                              whatever format we can (1) or not (0)
    """
    # pylint: disable=E0602
    # sysno is defined in the WebSubmit functions sandbox.

    global sysno
    bibrecdocs = BibRecDocs(int(sysno))

    # Wash function parameters
    (element_name_and_doctype, create_icon_doctypes, iconsize,
     keep_previous_version_doctypes, createRelatedFormats_p) = \
     wash_function_parameters(parameters, curdir)

    for element_name, doctype in element_name_and_doctype:
        _do_log(curdir, "Processing " + element_name)
        # Check if there is a corresponding file
        file_path = os.path.join(curdir, 'files', element_name,
                                 read_file(curdir, element_name))
        if file_path and os.path.exists(file_path):
            # Now identify which file to revise
            files_in_record = bibrecdocs.list_bibdocs(doctype)
            if len(files_in_record) == 1:
                # Ok, we can revise

                bibdoc_name = bibrecdocs.get_docname(files_in_record[0].id)
                revise(bibrecdocs, curdir, sysno, file_path, bibdoc_name,
                       doctype, iconsize, create_icon_doctypes,
                       keep_previous_version_doctypes, createRelatedFormats_p)
            elif len(files_in_record) == 0:
                # We must add the file
                add(bibrecdocs, curdir, sysno, file_path, doctype, iconsize,
                    create_icon_doctypes, createRelatedFormats_p)
            else:
                _do_log(curdir, "  %s ignored, because multiple files found for same doctype %s in record %s: %s" %\
                        (element_name, doctype, sysno,
                         ', '.join(files_in_record)))
        else:
            _do_log(curdir, "  No corresponding file found (%s)" % file_path)

    # Update the MARC
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    os.system(bibdocfile_bin + " --fix-marc --recid=" + sysno)

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql(
        "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s",
        (sysno, ))
Exemple #13
0
def tarballs_by_recids(recids,
                       sdir,
                       docname=None,
                       doctype=None,
                       docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(
                            afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(
                            afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_type() == doctype
                ]
            if docname:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_name() == docname
                ]
            if docformat:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_format() == docformat
                ]
            local_files.extend([(docfile.get_path(), recid)
                                for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Exemple #14
0
def goto(type, document='', number=0, lang='en', modif=0):
    """Goto function."""
    # FIXME the docs
    today = time.strftime('%Y-%m-%d')
    if type == 'SRR':
        ## We would like a CERN Staff Rules and Regulations
        recids = perform_request_search(
            cc='Staff Rules and Regulations',
            f="925__a:1996-01-01->%s 925__b:%s->9999-99-99" % (today, today))
        recid = recids[-1]
        reportnumber = get_fieldvalues(recid, '037__a')[0]
        edition = int(reportnumber[-2:])  # e.g. CERN-STAFF-RULES-ED08
        return BibRecDocs(recid).get_bibdoc(
            make_cern_ssr_docname(lang, edition,
                                  modif)).get_file('.pdf').get_url()
    elif type == "OPER-CIRC":
        recids = perform_request_search(
            cc="Operational Circulars",
            p="reportnumber:\"CERN-OPER-CIRC-%s-*\"" % number,
            sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname \
                 or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        try:
            return bibrecdocs.get_bibdoc(documents[document]) \
                .get_file('.pdf').get_url()
        except InvenioBibDocFileError:
            return bibrecdocs.get_bibdoc(documents[document]) \
                .get_file('.PDF').get_url()
    elif type == 'ADMIN-CIRC':
        recids = perform_request_search(
            cc="Administrative Circulars",
            p='reportnumber:"CERN-ADMIN-CIRC-%s-*"' % number,
            sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname or '_eng_' in ldocname \
                 or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        try:
            return bibrecdocs.get_bibdoc(
                documents[document]).get_file('.pdf').get_url()
        except InvenioBibDocFileError:
            return bibrecdocs.get_bibdoc(
                documents[document]).get_file('.PDF').get_url()
Exemple #15
0
def _analyze_documents(
        records,
        taxonomy_name,
        collection,
        output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER):
    """For each collection, parse the documents attached to the records
    in collection with the corresponding taxonomy_name.
    @var records: list of recids to process
    @var taxonomy_name: str, name of the taxonomy, e.g. HEP
    @var collection: str, collection name
    @keyword output_limit: int, max number of keywords to extract [3]
    @return: str, marcxml output format of results
    """
    global _INDEX

    if not records:
        # No records could be found.
        bibtask.write_message(
            "WARNING: No records were found in collection %s." % collection,
            stream=sys.stderr,
            verbose=2)
        return False

    # Process records:
    output = []
    for record in records:
        bibdocfiles = BibRecDocs(record).list_latest_files(
        )  # TODO: why this doesn't call list_all_files() ?
        keywords = {}
        akws = {}
        acro = {}
        single_keywords = composite_keywords = author_keywords = acronyms = None

        for doc in bibdocfiles:
            # Get the keywords for all PDF documents contained in the record.
            if text_extractor.is_pdf(doc.get_full_path()):
                bibtask.write_message(
                    'INFO: Generating keywords for record %d.' % record,
                    stream=sys.stderr,
                    verbose=3)
                fulltext = doc.get_path()

                single_keywords, composite_keywords, author_keywords, acronyms = \
                    engine.get_keywords_from_local_file(fulltext,
                                                        taxonomy_name,
                                                        with_author_keywords=True,
                                                        output_mode="raw",
                                                        output_limit=output_limit,
                                                        match_mode='partial')
            else:
                bibtask.write_message(
                    'WARNING: BibClassify does not know how to process \
                    doc: %s (type: %s) -- ignoring it.' %
                    (doc.fullpath, doc.doctype),
                    stream=sys.stderr,
                    verbose=3)

            if single_keywords or composite_keywords:
                cleaned_single = engine.clean_before_output(single_keywords)
                cleaned_composite = engine.clean_before_output(
                    composite_keywords)
                # merge the groups into one
                keywords.update(cleaned_single)
                keywords.update(cleaned_composite)
            acro.update(acronyms)
            akws.update(author_keywords)

        if len(keywords):
            output.append('<record>')
            output.append('<controlfield tag="001">%s</controlfield>' % record)
            output.append(
                engine._output_marc(keywords.items(), (),
                                    akws,
                                    acro,
                                    spires=bconfig.CFG_SPIRES_FORMAT))
            output.append('</record>')
        else:
            bibtask.write_message('WARNING: No keywords found for record %d.' %
                                  record,
                                  stream=sys.stderr,
                                  verbose=0)

        _INDEX += 1

        bibtask.task_update_progress('Done %d out of %d.' %
                                     (_INDEX, _RECIDS_NUMBER))
        bibtask.task_sleep_now_if_required(can_stop_too=False)

    return '\n'.join(output)
def format_element(bfo):
    """ Format element function to create the select and option elements
    with HTML5 data attributes that store all the necesarry metadata to
    construct video sources with JavaScript."""
    videos = {
        '360p': {
            'width': 640,
            'height': 360,
            'poster': None,
            'mp4': None,
            'webm': None,
            'ogv': None
        },
        '480p': {
            'width': 854,
            'height': 480,
            'poster': None,
            'mp4': None,
            'webm': None,
            'ogv': None,
        },
        '720p': {
            'width': 1280,
            'height': 720,
            'poster': None,
            'mp4': None,
            'webm': None,
            'ogv': None
        },
        '1080p': {
            'width': 1920,
            'height': 1080,
            'poster': None,
            'mp4': None,
            'webm': None,
            'ogv': None
        }
    }
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add the url to the videos dictionary
            if bibdocfile.get_superformat() in (
                    '.mp4', '.webm',
                    '.ogv') and bibdocfile.get_subformat() in ('360p', '480p',
                                                               '720p',
                                                               '1080p'):
                src = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                size = bibdocfile.get_subformat()
                videos[size][codec] = src
            ## When a poster signature is found, add the url to the videos dictionary
            elif bibdocfile.get_comment() in (
                    'POSTER') and bibdocfile.get_subformat() in (
                        '360p', '480p', '720p', '1080p'):
                src = bibdocfile.get_url()
                size = bibdocfile.get_subformat()
                videos[size]['poster'] = src
    ## Build video select options for every video size format that was found
    select_options = []
    for key, options in iteritems(videos):
        ## If we have at least one url, the format is available
        if options['mp4'] or options['webm'] or options['ogv']:
            ## create am option element
            option_element = create_option_element(
                url_webm=options['webm'],
                url_ogv=options['ogv'],
                url_mp4=options['mp4'],
                url_poster=options['poster'],
                width=options['width'],
                height=options['height'],
                subformat=key)
            select_options.append(option_element)
    select_element = create_select_element(select_options)
    return select_element
def create_photos_manager_interface(sysno,
                                    session_id,
                                    uid,
                                    doctype,
                                    indir,
                                    curdir,
                                    access,
                                    can_delete_photos=True,
                                    can_reorder_photos=True,
                                    can_upload_photos=True,
                                    editor_width=None,
                                    editor_height=None,
                                    initial_slider_value=100,
                                    max_slider_value=200,
                                    min_slider_value=80):
    """
    Creates and returns the HTML of the photos manager interface for
    submissions.

    @param sysno: current record id
    @param session_id: user session_id (as retrieved by get_session_id(...) )
    @param uid: user id
    @param doctype: doctype of the submission
    @param indir: submission "indir"
    @param curdir: submission "curdir"
    @param access: submission "access"
    @param can_delete_photos: if users can delete photos
    @param can_reorder_photos: if users can reorder photos
    @param can_upload_photos: if users can upload photos
    @param editor_width: width (in pixels) of the editor
    @param editor_height: height (in pixels) of the editor
    @param initial_slider_value: initial value of the photo size slider
    @param max_slider_value: max value of the photo size slider
    @param min_slider_value: min value of the photo size slider
    """
    out = ''

    PHOTO_MANAGER_ICONS = read_param_file(curdir,
                                          'PHOTO_MANAGER_ICONS',
                                          split_lines=True)
    photo_manager_icons_dict = dict(
        [value.split('/', 1) for value in PHOTO_MANAGER_ICONS if '/' in value])
    PHOTO_MANAGER_ORDER = read_param_file(curdir,
                                          'PHOTO_MANAGER_ORDER',
                                          split_lines=True)
    photo_manager_order_list = [
        value for value in PHOTO_MANAGER_ORDER if value.strip()
    ]
    PHOTO_MANAGER_DELETE = read_param_file(curdir,
                                           'PHOTO_MANAGER_DELETE',
                                           split_lines=True)
    photo_manager_delete_list = [
        value for value in PHOTO_MANAGER_DELETE if value.strip()
    ]
    PHOTO_MANAGER_NEW = read_param_file(curdir,
                                        'PHOTO_MANAGER_NEW',
                                        split_lines=True)
    photo_manager_new_dict = dict(
        [value.split('/', 1) for value in PHOTO_MANAGER_NEW if '/' in value])
    photo_manager_descriptions_dict = {}

    # Compile a regular expression that can match the "default" icon,
    # and not larger version.
    CFG_BIBDOCFILE_ICON_SUBFORMAT_RE_DEFAULT = re.compile(
        CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + '\Z')

    # Load the existing photos from the DB if we are displaying
    # this interface for the first time, and if a record exists
    if sysno and not PHOTO_MANAGER_ORDER:
        bibarchive = BibRecDocs(sysno)
        for doc in bibarchive.list_bibdocs():
            if doc.get_icon() is not None:
                original_url = doc.list_latest_files()[0].get_url()
                doc_id = str(doc.get_id())
                icon_url = doc.get_icon(
                    subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE_DEFAULT
                ).get_url()  # Get "default" icon
                description = ""
                for bibdoc_file in doc.list_latest_files():
                    #format = bibdoc_file.get_format().lstrip('.').upper()
                    #url = bibdoc_file.get_url()
                    #photo_files.append((format, url))
                    if not description and bibdoc_file.get_comment():
                        description = escape(bibdoc_file.get_comment())
                name = bibarchive.get_docname(doc.id)
                photo_manager_descriptions_dict[doc_id] = description
                photo_manager_icons_dict[doc_id] = icon_url
                photo_manager_order_list.append(doc_id)  # FIXME: respect order

    # Prepare the list of photos to display.
    photos_img = []
    for doc_id in photo_manager_order_list:
        if doc_id not in photo_manager_icons_dict:
            continue
        icon_url = photo_manager_icons_dict[doc_id]
        if PHOTO_MANAGER_ORDER:
            # Get description from disk only if some changes have been done
            description = escape(
                read_param_file(curdir, 'PHOTO_MANAGER_DESCRIPTION_' + doc_id))
        else:
            description = escape(photo_manager_descriptions_dict[doc_id])
        photos_img.append('''
        <li id="%(doc_id)s" style="width:%(initial_slider_value)spx;">
            <div class="imgBlock">
                <div class="normalLineHeight" style="margin-left:auto;margin-right:auto;display:inline" >
                    <img id="delete_%(doc_id)s" class="hidden" src="/img/cross_red.gif" alt="Delete" style="position:absolute;top:0;" onclick="delete_photo('%(doc_id)s');"/>
                    <img src="%(icon_url)s" class="imgIcon"/>
                 </div>
                 <div class="normalLineHeight">
                     <textarea style="width:95%%" id="PHOTO_MANAGER_DESCRIPTION_%(doc_id)s" name="PHOTO_MANAGER_DESCRIPTION_%(doc_id)s">%(description)s</textarea>
                 </div>
            </div>
        </li>''' % \
                  {'initial_slider_value': initial_slider_value,
                   'doc_id': doc_id,
                   'icon_url': icon_url,
                   'description': description})

    out += '''
    <link rel="stylesheet" href="%(CFG_SITE_URL)s/vendors/jquery-ui/themes/redmond/jquery-ui.min.css" type="text/css" charset="utf-8"/>
    <link rel="stylesheet" href="%(CFG_SITE_URL)s/vendors/jquery-ui/themes/redmond/theme.css" type="text/css" charset="utf-8"/>
    <style type="text/css">
            #sortable { list-style-type: none; margin: 0; padding: 0; }
            #sortable li { margin: auto 3px; padding: 1px; float: left; width: 180px; font-size:small; text-align: center; position: relative;}
            #sortable .imgIcon {max-height:95%%;max-width:95%%;margin: 2px;max-height:130px;}
            #sortable li div.imgBlock {vertical-align: middle; margin:
    auto;display:inline;display:inline-table;display:inline-block;vertical-align:middle;text-align : center; width:100%%;position:relative}
            #sortable li div.imgBlock .hidden {display:none;}
            %(delete_hover_class)s
            .fileUploadQueue{text-align:left; margin: 0 auto; width:300px;}
            .normalLineHeight {line-height:normal}
    </style>

    <div id="uploadedFiles" style="%(hide_photo_viewer)sborder-left:1px solid #555; border-top:1px solid #555;border-right:1px solid #eee;border-bottom:1px solid #eee;overflow:auto;%(editor_height_style)s%(editor_width_style)sbackground-color:#eee;margin:3px;text-align:left;position:relative"><ul id="sortable">%(photos_img)s</ul></div>
    <div id="grid_slider" style="%(hide_photo_viewer)swidth:300px;">
      <div class='ui-slider-handle'></div>
    </div>


    <script type="text/javascript" src="%(CFG_SITE_URL)s/vendors/uploadify/jquery.uploadify.min.js"></script>
    <script type="text/javascript" src="%(CFG_SITE_URL)s/vendors/swfobject/swfobject/swfobject.js"></script>
    <script type="text/javascript" src="%(CFG_SITE_URL)s/vendors/jquery-ui/jquery-ui.min.js"></script>
    <link rel="stylesheet" href="%(CFG_SITE_URL)s/vendors/uploadify/uploadify.css" type="text/css" />

    <script type="text/javascript">

    $(document).ready(function() {

        /* Uploading */
            if (%(can_upload_photos)s) {
            $('#uploadFile').uploadify({
                    'uploader': '%(CFG_SITE_URL)s/vendors/uploadify/uploadify.swf',
                    'script':    '/submit/uploadfile',
                    'cancelImg': '%(CFG_SITE_URL)s/vendors/uploadify/uploadify-cancel.png',
                    'multi' :    true,
                    'auto' :    true,
                    'simUploadLimit': 2,
                    'scriptData' : {'type': 'File', 'uid': %(uid)s, 'session_id': '%(session_id)s', 'indir': '%(indir)s', 'doctype': '%(doctype)s', 'access': '%(access)s'},
                    'displayDate': 'percentage',
                    'buttonText': 'Browse',
                    'fileDataName': 'NewFile' /* be compatible with CKEditor */,
                    'onSelectOnce': function(event, data) {

                     },
                    'onSelect': function(event, queueID, fileObj, response, data) {
                           $('#loading').css("visibility","visible");
                     },
                    'onAllComplete' : function(event, queueID, fileObj, response, data) {
                           $('#loading').css("visibility","hidden");
                     },
                    /*'onCheck': function(event, checkScript, fileQueue, folder, single) {

                           return false;
                     },*/
                    'onComplete': function(event, queueID, fileObj, response, data) {
                           $('#grid_slider').css("display","block");
                           $('#uploadedFiles').css("display","block");
                           var cur_width = $("#grid_slider").slider('option', 'value');
                           var response_obj = parse_invenio_response(response);
                           icon_url = '%(CFG_SITE_URL)s/img/file-icon-blank-96x128.gif'
                           if ("NewFile" in response_obj) {
                               filename = response_obj["NewFile"]["name"]
                               if ('iconName' in response_obj["NewFile"]){
                                   icon_name = response_obj["NewFile"]["iconName"]
                                   icon_url = '%(CFG_SITE_URL)s/submit/getuploadedfile?indir=%(indir)s&doctype=%(doctype)s&access=%(access)s&key=NewFile&icon=1&filename=' + icon_name
                               }
                           } else {
                               return true;
                           }
                           $('#sortable').append('<li id="'+ queueID +'" style="width:'+cur_width+'px;"><div class="imgBlock"><div class="normalLineHeight" style="margin-left:auto;margin-right:auto;display:inline" ><img id="delete_'+ queueID +'" class="hidden" src="/img/cross_red.gif" alt="Delete" style="position:absolute;top:0;" onclick="delete_photo(\\''+ queueID +'\\');"/><img src="'+ icon_url +'" class="imgIcon"/></div><div class="normalLineHeight"><textarea style="width:95%%" id="PHOTO_MANAGER_DESCRIPTION_'+ queueID +'" name="PHOTO_MANAGER_DESCRIPTION_'+ queueID +'"></textarea></div></div></li>');

                           update_order_field();
                           $('#photo_manager_icons').val($("#photo_manager_icons").val() + '\\n' + queueID + '/' + icon_url);
                           $('#photo_manager_new').val($("#photo_manager_new").val() + '\\n' + queueID + '/' + filename);
                           update_CSS();
                           return true;
                     }
            });
         }

        /* Resizing */
            $("#grid_slider").slider({
                    value: %(initial_slider_value)s,
                    max: %(max_slider_value)s,
                    min: %(min_slider_value)s,
                    slide: function(event, ui) {
                         update_CSS();
                    }
            });

            /* Update CSS to ensure that existing photos get nicely laid out*/
            update_CSS();

    });


    /* Ordering */
            $(function() {
                    if (%(can_reorder_photos)s) {
                        $("#sortable").sortable();
                        $("#sortable").bind('sortupdate', function(event, ui) {
                            update_order_field();
                        });
                    }
            });

            function delete_photo(docid){
                if (confirm("Are you sure you want to delete the photo? (The file will be deleted after you apply all the modifications)")) {
                    $("#" + docid).remove();
                    $("#photo_manager_delete").val($("#photo_manager_delete").val() + '\\n' + docid);
                    update_order_field();
                }
            }

    /* CSS-related */

            function update_CSS(){
                /* Update some style according to the slider size */
                var slider_value = $("#grid_slider").slider('option', 'value');
                $('#uploadedFiles li').css('width', slider_value+"px");
                /*$('#uploadedFiles div.floater').css('width', slider_value+"px");*/
                /* Update height attr accordingly so that image get centered.
                   First we need to get the tallest element of the list.
                 */
                var max_height = 0;
                $('#uploadedFiles li div').each(function() {
                    this_height = $(this).height();
                    if(this_height > max_height) {
                        max_height = this_height;
                    }
                });
                $('#uploadedFiles li').css('height',max_height+"px");
                $('#uploadedFiles li').css('line-height',max_height+"px");
            }

    /* Utils */
             function update_order_field(){
                 $("#photo_manager_order").val($("#sortable").sortable('toArray').join('\\n'));
             }

             function parse_invenio_response(response){
                 /* Return the javascript object included in the
                    the given Invenio message. Really dirty implementation, but ok
                    in this very simple scenario */
                 /*var object_string = response.substring(response.indexOf('<![CDATA[')+9, response.lastIndexOf(']]>'));*/ object_string = response;
                 var object = {};
                 eval('object=' + object_string);
                 return object;
              }

    </script>


    <div style="margin: 0 auto;">
    <img src="%(CFG_SITE_URL)s/img/loading.gif" style="visibility: hidden" id="loading"/>
    <input type="file" size="40" id="uploadFile" name="PHOTO_FILE" style="margin: 0 auto;%(upload_display)s"/>
    </div>

    <!--<a href="javascript:$('#uploadFile').fileUploadStart();">Upload Files</a> -->

    <textarea id="photo_manager_icons" style="display:none" name="PHOTO_MANAGER_ICONS">%(PHOTO_MANAGER_ICONS)s</textarea>
    <textarea id="photo_manager_order" style="display:none" name="PHOTO_MANAGER_ORDER">%(PHOTO_MANAGER_ORDER)s</textarea>
    <textarea id="photo_manager_new" style="display:none" name="PHOTO_MANAGER_NEW">%(PHOTO_MANAGER_NEW)s</textarea>
    <textarea id="photo_manager_delete" style="display:none" name="PHOTO_MANAGER_DELETE">%(PHOTO_MANAGER_DELETE)s</textarea>
    ''' % {
        'CFG_SITE_URL':
        CFG_SITE_URL,
        #'curdir': cgi.escape(quote(curdir, safe="")),#quote(curdir, safe=""),
        'uid':
        uid,
        'access':
        quote(access, safe=""),
        'doctype':
        quote(doctype, safe=""),
        'indir':
        quote(indir, safe=""),
        'session_id':
        quote(session_id, safe=""),
        'PHOTO_MANAGER_ICONS':
        '\n'.join([
            key + '/' + value
            for key, value in iteritems(photo_manager_icons_dict)
        ]),
        'PHOTO_MANAGER_ORDER':
        '\n'.join(photo_manager_order_list),
        'PHOTO_MANAGER_DELETE':
        '\n'.join(photo_manager_delete_list),
        'PHOTO_MANAGER_NEW':
        '\n'.join([
            key + '/' + value
            for key, value in iteritems(photo_manager_new_dict)
        ]),
        'initial_slider_value':
        initial_slider_value,
        'max_slider_value':
        max_slider_value,
        'min_slider_value':
        min_slider_value,
        'photos_img':
        '\n'.join(photos_img),
        'hide_photo_viewer':
        (len(photos_img) == 0 and len(photo_manager_new_dict.keys()) == 0)
        and 'display:none;' or '',
        'delete_hover_class':
        can_delete_photos
        and "#sortable li div.imgBlock:hover .hidden {display:inline;}" or '',
        'can_reorder_photos':
        can_reorder_photos and 'true' or 'false',
        'can_upload_photos':
        can_upload_photos and 'true' or 'false',
        'upload_display':
        not can_upload_photos and 'display: none' or '',
        'editor_width_style':
        editor_width and 'width:%spx;' % editor_width or '',
        'editor_height_style':
        editor_height and 'height:%spx;' % editor_height or ''
    }

    return out
def create_icons_for_record(recid,
                            icon_sizes,
                            icon_format_mappings=None,
                            docnames=None,
                            add_default_icon=False,
                            inherit_moreinfo=False):
    """Generate icons, if missing, for a record
       @param recid: the record id for which icons are being created
       @type recid: int
       @param icon_sizes: the list of icon sizes that need to be
       generated. Note that upscaled is not allowed
       @type icon_sizes: list
       @param icon_format_mappings: defines for each "master" format in
                                   which format the icons should be
                                   created. If the master format is
                                   not specified here, then its icons
                                   will be created in the same format,
                                   if possible (for eg. the icons of a
                                   TIFF file would be created as TIFF,
                                   while icons of a PDF or DOC file
                                   would be created in JPG) and unless
                                   a default mapping is not provided in
                                   C{CFG_ICON_CREATION_FORMAT_MAPPINGS}.
       @type icon_format_mappings: dict
       @param docnames: the list of docnames for which we want to create an icon.
                        If not provided, consider all docnames.
       @type docnames: list
       @param add_default_icon: if a default icon (i.e. without icon
                                size suffix, matching
                                CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT)
                                should be added or not.
       @type add_default_icon: bool
       @param inherit_moreinfo: if the added icons should also have
                                their description and comment set to
                                the same value as the "main" bibdoc
                                or not.
       @type inherit_moreinfo: bool
    """
    exceptions = []  # keep track of all exceptions
    done = 0
    brd = BibRecDocs(recid)
    bibdocs = brd.list_bibdocs()
    # Override default formats from CFG_ICON_CREATION_FORMAT_MAPPINGS
    # with values specified in icon_format_mappings
    if icon_format_mappings is None:
        icon_format_mappings = {}
    icon_format_mappings = dict(CFG_ICON_CREATION_FORMAT_MAPPINGS,
                                **icon_format_mappings)
    if icon_format_mappings.has_key('*') and \
      not icon_format_mappings['*']:
        # we must override the default in order to keep the
        # "superformat"
        del icon_format_mappings['*']
    for bibdoc in bibdocs:
        docname = brd.get_docname(bibdoc.id)
        if docnames and not docname in docnames:
            # Skip this docname
            continue
        comment, description = get_comment_and_description(
            bibdoc, inherit_moreinfo)
        default_icon_added_p = False
        bibfiles = bibdoc.list_latest_files()
        bibdoc_formats = [bibfile.get_format() for bibfile in bibfiles]
        for bibfile in bibfiles:
            if bibfile.get_subformat():
                # this is a subformat, do nothing
                continue
            filepath = bibfile.get_full_path()
            #do not consider the dot in front of the format
            superformat = bibfile.get_format()[1:].lower()
            bibfile_icon_formats = icon_format_mappings.get(
                superformat, icon_format_mappings.get('*', [superformat]))
            if isinstance(bibfile_icon_formats, str):
                bibfile_icon_formats = [bibfile_icon_formats]
            bibfile_icon_formats = [bibfile_icon_format for bibfile_icon_format in bibfile_icon_formats \
                                    if bibfile_icon_format in CFG_ALLOWED_FILE_EXTENSIONS]

            if add_default_icon and not default_icon_added_p:
                # add default icon
                try:
                    iconpath, iconname = _create_icon(
                        filepath,
                        CFG_DEFAULT_ICON_SIZE,
                        docname,
                        icon_format=CFG_DEFAULT_ICON_EXTENSION,
                        verbosity=9)
                    bibdoc.add_file_new_format(
                        os.path.join(iconpath, iconname),
                        docformat=".%s;%s" %
                        (CFG_DEFAULT_ICON_EXTENSION,
                         CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT),
                        comment=comment,
                        description=description)
                    default_icon_added_p = True
                    write_message(
                        "Added default icon to recid: %s, format: %s" %
                        (recid, CFG_DEFAULT_ICON_EXTENSION))
                except Exception, ex:
                    exceptions.append("Could not add new icon to recid: %s, format: %s; exc: %s" \
                                      % (recid, CFG_DEFAULT_ICON_EXTENSION, ex))

            # check if the subformat that we want to create already exists
            for icon_size in icon_sizes:
                washed_icon_size = icon_size.replace('>', '').replace(
                    '<', '').replace('^', '').replace('!', '')
                for icon_format in bibfile_icon_formats:
                    new_format = '.%s;%s-%s' % (
                        icon_format, CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT,
                        washed_icon_size)
                    if new_format in bibdoc_formats:
                        # the subformat already exists, do nothing
                        continue
                    # add icon
                    try:
                        iconpath, iconname = _create_icon(
                            filepath,
                            icon_size,
                            docname,
                            icon_format=icon_format,
                            verbosity=9)
                        bibdoc.add_file_new_format(os.path.join(
                            iconpath, iconname),
                                                   docformat=new_format,
                                                   comment=comment,
                                                   description=description)
                        write_message(
                            "Added icon to recid: %s, format: %s %s %s %s" %
                            (recid, new_format, iconpath, iconname, icon_size))
                        done += 1
                    except Exception, ex:
                        exceptions.append("Could not add new format to recid: %s, format: %s; exc: %s" \
                                            %(recid, new_format, ex))
def Move_Photos_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the submission's form
    through the PHOTO_MANAGER element and its asynchronous uploads at
    CFG_SITE_URL/submit/uploadfile.

    Parameters:
        @iconsize - Seperate multiple sizes with commas. The ImageMagick geometry inputs are supported.
              Use type 'geometry' as defined in ImageMagick.
              (eg. 320 or 320x240 or 100> or 5%)
              Example: "180>,700>" will create two icons, one with maximum dimension 180px, one 700px
        @iconformat - Allowed extensions (as defined in websubmit_icon_creator.py) are:
                "pdf", "gif", "jpg",
                "jpeg", "ps", "png", "bmp"
                "eps", "epsi", "epsf"

    The PHOTO_MANAGER elements builds the following file organization
    in the directory curdir::

                                     curdir/
                                        |
         ______________________________________________________________________
        |                                   |                                  |
      files/                         PHOTO_MANAGER_ICONS                     icons/
        |                            PHOTO_MANAGER_ORDER                       |
     (user id)/                      PHOTO_MANAGER_DELETE                  (user id)/
        |                            PHOTO_MANAGER_NEW                         |
     NewFile/                        PHOTO_MANAGER_DESCRIPTION_X           NewFile/
        |                                                                      |
        _______________________                                      _____________________
       |            |          |                                    |          |          |
     photo1.jpg  myPhoto.gif   ...                             photo1.jpg  myPhoto.gif   ...


    where the files are:
      - PHOTO_MANAGER_ORDER: ordered list of file IDs. One per line.

      - PHOTO_MANAGER_ICONS: mappings from file IDs to URL of the icons.
                             One per line. Separator: /

      - PHOTO_MANAGER_NEW: mapping from file ID to filename on disk. Only
                           applicable to files that have just been
                           uploaded (i.e. not bibdocfiles). One per
                           line. Separator: /

      - PHOTO_MANAGER_DELETE: list of files IDs that must be deleted. One
                               per line

      - PHOTO_MANAGER_DESCRIPTION_X, where X is file ID: contains photos
                                     descriptions (one per file)

    """
    global sysno

    icon_sizes = parameters.get('iconsize').split(',')
    icon_format = parameters.get('iconformat')
    if not icon_format:
        icon_format = 'gif'

    PHOTO_MANAGER_ICONS = read_param_file(curdir,
                                          'PHOTO_MANAGER_ICONS',
                                          split_lines=True)
    photo_manager_icons_dict = dict([value.split('/', 1) \
                                     for value in PHOTO_MANAGER_ICONS \
                                     if '/' in value])
    PHOTO_MANAGER_ORDER = read_param_file(curdir,
                                          'PHOTO_MANAGER_ORDER',
                                          split_lines=True)
    photo_manager_order_list = [
        value for value in PHOTO_MANAGER_ORDER if value.strip()
    ]
    PHOTO_MANAGER_DELETE = read_param_file(curdir,
                                           'PHOTO_MANAGER_DELETE',
                                           split_lines=True)
    photo_manager_delete_list = [
        value for value in PHOTO_MANAGER_DELETE if value.strip()
    ]
    PHOTO_MANAGER_NEW = read_param_file(curdir,
                                        'PHOTO_MANAGER_NEW',
                                        split_lines=True)
    photo_manager_new_dict = dict([value.split('/', 1) \
                               for value in PHOTO_MANAGER_NEW \
                               if '/' in value])

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)
    for photo_id in photo_manager_order_list:
        photo_description = read_param_file(
            curdir, 'PHOTO_MANAGER_DESCRIPTION_' + photo_id)
        # We must take different actions depending if we deal with a
        # file that already exists, or if it is a new file
        if photo_id in photo_manager_new_dict.keys():
            # New file
            if photo_id not in photo_manager_delete_list:
                filename = photo_manager_new_dict[photo_id]
                filepath = os.path.join(curdir, 'files', str(user_info['uid']),
                                        'NewFile', filename)
                icon_filename = os.path.splitext(filename)[0] + ".gif"
                fileiconpath = os.path.join(curdir, 'icons',
                                            str(user_info['uid']), 'NewFile',
                                            icon_filename)

                # Add the file
                if os.path.exists(filepath):
                    _do_log(curdir, "Adding file %s" % filepath)
                    bibdoc = bibrecdocs.add_new_file(filepath,
                                                     doctype="picture",
                                                     never_fail=True)
                    has_added_default_icon_subformat_p = False
                    for icon_size in icon_sizes:
                        # Create icon if needed
                        try:
                            (icon_path, icon_name) = create_icon({
                                'input-file':
                                filepath,
                                'icon-name':
                                icon_filename,
                                'icon-file-format':
                                icon_format,
                                'multipage-icon':
                                False,
                                'multipage-icon-delay':
                                100,
                                'icon-scale':
                                icon_size,  # Resize only if width > 300
                                'verbosity':
                                0,
                            })
                            fileiconpath = os.path.join(icon_path, icon_name)
                        except InvenioWebSubmitIconCreatorError as e:
                            _do_log(
                                curdir, "Icon could not be created to %s: %s" %
                                (filepath, e))
                            pass
                        if os.path.exists(fileiconpath):
                            try:
                                if not has_added_default_icon_subformat_p:
                                    bibdoc.add_icon(fileiconpath)
                                    has_added_default_icon_subformat_p = True
                                    _do_log(curdir,
                                            "Added icon %s" % fileiconpath)
                                else:
                                    icon_suffix = icon_size.replace(
                                        '>', '').replace('<', '').replace(
                                            '^', '').replace('!', '')
                                    bibdoc.add_icon(
                                        fileiconpath,
                                        subformat=
                                        CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT +
                                        "-" + icon_suffix)
                                    _do_log(curdir,
                                            "Added icon %s" % fileiconpath)
                            except InvenioBibDocFileError as e:
                                # Most probably icon already existed.
                                pass

                    if photo_description and bibdoc:
                        for file_format in [bibdocfile.get_format() \
                                       for bibdocfile in bibdoc.list_latest_files()]:
                            bibdoc.set_comment(photo_description, file_format)
                            _do_log(curdir,
                                    "Added comment %s" % photo_description)
        else:
            # Existing file
            bibdocname = bibrecdocs.get_docname(int(photo_id))
            if photo_id in photo_manager_delete_list:
                # In principle we should not get here. but just in case...
                bibrecdocs.delete_bibdoc(bibdocname)
                _do_log(curdir, "Deleted  %s" % bibdocname)
            else:
                bibdoc = bibrecdocs.get_bibdoc(bibdocname)
                for file_format in [bibdocfile.get_format() \
                               for bibdocfile in bibdoc.list_latest_files()]:
                    bibdoc.set_comment(photo_description, file_format)
                    _do_log(curdir, "Added comment %s" % photo_description)

    # Now delete requeted files
    for photo_id in photo_manager_delete_list:
        try:
            bibdocname = bibrecdocs.get_docname(int(photo_id))
            bibrecdocs.delete_bibdoc(bibdocname)
            _do_log(curdir, "Deleted  %s" % bibdocname)
        except:
            # we tried to delete a photo that does not exist (maybe already deleted)
            pass

    # Update the MARC
    _do_log(curdir, "Asking bibdocfile to fix marc")
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    os.system(bibdocfile_bin + " --fix-marc --recid=" + str(sysno))

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql(
        "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s",
        (sysno, ))

    return ""
 def tearDown(self):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     my_bibrecdoc = BibRecDocs(2)
     #delete
     my_bibrecdoc.delete_bibdoc('img_test')
     my_bibrecdoc.delete_bibdoc('new_name')
Exemple #21
0
def get_files(bfo,
              distinguish_main_and_additional_files=True,
              include_subformat_icons=False):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """
    CFG_SITE_URL = current_app.config['CFG_SITE_URL']
    CFG_CERN_SITE = current_app.config['CFG_CERN_SITE']
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config[
        'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS']
    _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(
        normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False  # We can provide link to older files. Will be
    # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
    # True if additional files are found.

    # Prepare object to return
    parsed_urls = {
        'main_urls': {},  # Urls hosted by Invenio (bibdocs)
        'others_urls': []  # External urls
    }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = []  # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(
                    url_format, subformat
            ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = _("Fulltext")
            if complete_url.has_key('y'):
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL):  # Not a bibdoc?
                if not descr:  # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([
                        part.split('=') for part in params.split('&')
                        if len(part.split('=')) == 2
                    ])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:  #,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append(
                        (url, descr))  # external url
            else:  # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        if not doc.get_doctype(bfo.recID) == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict[
                                    'subformat'][0]
                            parsed_urls['main_urls'][descr].append(
                                (url, name, url_format))
                if not assigned:  # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append(
                        (url, descr))  # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)
Exemple #22
0
def get_pdf_maxpage(recid):
    """Get maximal page from pdf."""
    from invenio.legacy.bibdocfile.api import BibRecDocs
    from .previewerext.pdftk import maxpage

    return maxpage(BibRecDocs(recid).list_latest_files(list_hidden=False)[0])
Exemple #23
0
def Stamp_Replace_Single_File_Approval(parameters, \
                                       curdir, \
                                       form, \
                                       user_info=None):
    """
    This function is intended to be called when a document has been
    approved and needs to be stamped.
    The function should be used when there is ONLY ONE file to be
    stamped after approval (for example, the "main file").
    The name of the file to be stamped should be known and should be stored
    in a file in the submission's working directory (without the extension).
    Generally, this will work our fine as the main file is named after the
    report number of the document, this will be stored in the report number
    file.

    @param parameters: (dictionary) - must contain:

         + latex_template: (string) - the name of the LaTeX template that
            should be used for the creation of the stamp.

         + latex_template_vars: (string) - a string-ified dictionary
            of variables to be replaced in the LaTeX template and the
            values (or names of files in curdir containing the values)
            with which to replace them. Use prefix 'FILE:' to specify
            that the stamped value must be read from a file in
            submission directory instead of being a fixed value to
            stamp.
            E.G.:
               { 'TITLE' : 'FILE:DEMOTHESIS_TITLE',
                 'DATE'  : 'FILE:DEMOTHESIS_DATE'
               }

         + file_to_be_stamped: (string) - this is the name of a file in the
            submission's working directory that contains the name of the
            bibdocfile that is to be stamped.

         + new_file_name: (string) - this is the name of a file in the
            submission's working directory that contains the name that is to
            be given to the file after it has been stamped. If empty, or if
            that file doesn't exist, the file will not be renamed after
            stamping.

         + switch_file: (string) - when this value is set, specifies
            the name of a file that will swith on/off the
            stamping. The stamp will be applied if the file exists in
            the submission directory and is not empty. If the file
            cannot be found or is empty, the stamp is not applied.
            Useful for eg. if you want to let your users control the
            stamping with a checkbox on your submission page.
            Leave this parameter empty to always stamp by default.

         + stamp: (string) - the type of stamp to be applied to the file.
            should be one of:
              + first (only the first page is stamped);
              + all (all pages are stamped);
              + coverpage (a separate cover-page is added to the file as a
                 first page);

         + layer: (string) - the position of the stamp. Should be one of:
              + background (invisible if original file has a white
                -not transparent- background layer)
              + foreground (on top of the stamped file.  If the stamp
                does not have a transparent background, will hide all
                of the document layers)
           The default value is 'background'.
    """
    ############
    ## Definition of important variables:
    ############
    ## The file stamper needs to be called with a dictionary of options of
    ## the following format:
    ##  { 'latex-template'      : "", ## TEMPLATE_NAME
    ##    'latex-template-var'  : {}, ## TEMPLATE VARIABLES
    ##    'input-file'          : "", ## INPUT FILE
    ##    'output-file'         : "", ## OUTPUT FILE
    ##    'stamp'               : "", ## STAMP TYPE
    ##    'layer'               : "", ## LAYER TO STAMP
    ##    'verbosity'           : 0,  ## VERBOSITY (we don't care about it)
    ##  }
    file_stamper_options = {
        'latex-template': "",
        'latex-template-var': {},
        'input-file': "",
        'output-file': "",
        'stamp': "",
        'layer': "",
        'verbosity': 0,
    }

    ## Check if stamping is enabled
    switch_file = parameters.get('switch_file', '')
    if switch_file:
        # Good, a "switch file" was specified. Check if it exists, and
        # it its value is not empty.
        if not _read_in_file(os.path.join(curdir, switch_file)):
            # File does not exist, or is emtpy. Silently abort
            # stamping.
            return ""

    ## Submission access number:
    access = _read_in_file("%s/access" % curdir)
    ## record ID for the current submission. It is found in the special file
    ## "SN" (sysno) in curdir:
    recid = _read_in_file("%s/SN" % curdir)
    try:
        recid = int(recid)
    except ValueError:
        ## No record ID. Cannot continue.
        err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                  "Cannot recover record ID from the submission's working " \
                  "directory. Stamping cannot be carried out. The " \
                  "submission ID is [%s]." % cgi.escape(access)
        register_exception(prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)
    ############
    ## Resolution of function parameters:
    ############
    ## The name of the LaTeX template to be used for stamp creation:
    latex_template = "%s" % ((type(parameters['latex_template']) is str \
                              and parameters['latex_template']) or "")
    ## A string containing the variables/values that should be substituted
    ## in the final (working) LaTeX template:
    latex_template_vars_string = "%s" % \
                       ((type(parameters['latex_template_vars']) is str \
                         and parameters['latex_template_vars']) or "")
    ## The type of stamp to be applied to the file(s):
    stamp = "%s" % ((type(parameters['stamp']) is str and \
                     parameters['stamp'].lower()) or "")
    ## The layer to use for stamping:
    try:
        layer = parameters['layer']
    except KeyError:
        layer = "background"
    if not layer in ('background', 'foreground'):
        layer = "background"
    ## Get the name of the file to be stamped from the file indicated in
    ## the file_to_be_stamped parameter:
    try:
        file_to_stamp_file = parameters['file_to_be_stamped']
    except KeyError:
        file_to_stamp_file = ""
    else:
        if file_to_stamp_file is None:
            file_to_stamp_file = ""
    ## Get the "basename" for the file to be stamped (it's mandatory that it
    ## be in curdir):
    file_to_stamp_file = os.path.basename(file_to_stamp_file).strip()
    name_file_to_stamp = _read_in_file("%s/%s" % (curdir, file_to_stamp_file))
    name_file_to_stamp.replace("\n", "").replace("\r", "")
    ##
    ## Get the name to be given to the file after it has been stamped (if there
    ## is one.) Once more, it will be found in a file in curdir:
    try:
        new_file_name_file = parameters['new_file_name']
    except KeyError:
        new_file_name_file = ""
    else:
        if new_file_name_file is None:
            new_file_name_file = ""
    ## Get the "basename" for the file containing the new file name. (It's
    ## mandatory that it be in curdir):
    new_file_name_file = os.path.basename(new_file_name_file).strip()
    new_file_name = _read_in_file("%s/%s" % (curdir, new_file_name_file))

    ############
    ## Begin:
    ############
    ##
    ## If no name for the file to stamp, warning.
    if name_file_to_stamp == "":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a valid name for the " \
                  "file to be stamped. Stamping could not, therefore, be " \
                  "carried out. The submission ID is [%s]." \
                  % access
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ##
    ## The file to be stamped is a bibdoc. We will only stamp it (a) if it
    ## exists; and (b) if it is a PDF file. So, get the path (in the bibdocs
    ## tree) to the file to be stamped:
    ##
    ## First get the object representing the bibdocs belonging to this record:
    bibrecdocs = BibRecDocs(recid)
    try:
        bibdoc_file_to_stamp = bibrecdocs.get_bibdoc("%s" % name_file_to_stamp)
    except InvenioBibDocFileError:
        ## Couldn't get a bibdoc object for this filename. Probably the file
        ## that we wanted to stamp wasn't attached to this record.
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a bibdoc object for the " \
                  "filename [%s] when trying to stamp the main file. " \
                  "Stamping could not be carried out. The submission ID is " \
                  "[%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ## Get the BibDocFile object for the PDF version of the bibdoc to be
    ## stamped:
    try:
        bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("pdf")
    except InvenioBibDocFileError:
        ## This bibdoc doesn't have a physical file with the extension ".pdf"
        ## (take note of the lower-case extension - the bibdocfile library
        ## is case-sensitive with respect to filenames.  Log that there was
        ## no "pdf" and check for a file with extension "PDF":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It wasn't possible to recover a PDF BibDocFile object " \
                  "for the file with the name [%s], using the extension " \
                  "[pdf] - note the lower case - the bibdocfile library " \
                  "relies upon the case of an extension. The submission ID " \
                  "is [%s] and the record ID is [%s]. Going to try " \
                  "looking for a file with a [PDF] extension before giving " \
                  "up . . . " \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        try:
            bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("PDF")
        except InvenioBibDocFileError:
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "It wasn't possible to recover a PDF " \
                      "BibDocFile object for the file with the name [%s], " \
                      "using the extension [PDF] - note the upper case. " \
                      "Had previously tried searching for [pdf] - now " \
                      "giving up. Stamping could not be carried out. " \
                      "The submission ID is [%s] and the record ID is [%s]." \
                      % (name_file_to_stamp, access, recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ############
    ## Go ahead and prepare the details for the LaTeX stamp template and its
    ## variables:
    ############
    ## Strip the LaTeX filename into the basename (All templates should be
    ## in the template repository):
    latex_template = os.path.basename(latex_template)

    ## Convert the string of latex template variables into a dictionary
    ## of search-term/replacement-term pairs:
    latex_template_vars = get_dictionary_from_string(
        latex_template_vars_string)
    ## For each of the latex variables, check in `CURDIR' for a file with that
    ## name. If found, use it's contents as the template-variable's value.
    ## If not, just use the raw value string already held by the template
    ## variable:
    latex_template_varnames = latex_template_vars.keys()
    for varname in latex_template_varnames:
        ## Get this variable's value:
        varvalue = latex_template_vars[varname].strip()
        if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \
                (varvalue.find("include(") == 0 and varvalue[-1] == ")")) \
                and varvalue != "":
            ## We don't want to interfere with date() or include() directives,
            ## so we only do this if the variable value didn't contain them:
            ##
            ## Is this variable value the name of a file in the current
            ## submission's working directory, from which a literal value for
            ## use in the template should be extracted? If yes, it will
            ## begin with "FILE:". If no, we leave the value exactly as it is.
            if varvalue.upper().find("FILE:") == 0:
                ## The value to be used is to be taken from a file. Clean the
                ## file name and if it's OK, extract that value from the file.
                ##
                seekvalue_fname = varvalue[5:].strip()
                seekvalue_fname = os.path.basename(seekvalue_fname).strip()
                if seekvalue_fname != "":
                    ## Attempt to extract the value from the file:
                    if os.access("%s/%s" % (curdir, seekvalue_fname), \
                                 os.R_OK|os.F_OK):
                        ## The file exists. Extract its value:
                        try:
                            repl_file_val = \
                              open("%s/%s" \
                                   % (curdir, seekvalue_fname), "r").readlines()
                        except IOError:
                            ## The file was unreadable.
                            err_msg = "Error in Stamp_Replace_Single_File_" \
                                      "Approval: The function attempted to " \
                                      "read a LaTex template variable " \
                                      "value from the following file in the " \
                                      "current submission's working " \
                                      "directory: [%s]. However, an " \
                                      "unexpected error was encountered " \
                                      "when doing so. Please inform the " \
                                      "administrator." \
                                      % seekvalue_fname
                            register_exception(req=user_info['req'])
                            raise InvenioWebSubmitFunctionError(err_msg)
                        else:
                            final_varval = ""
                            for line in repl_file_val:
                                final_varval += line
                            final_varval = final_varval.rstrip()
                            ## Replace the variable value with that which has
                            ## been read from the file:
                            latex_template_vars[varname] = final_varval
                    else:
                        ## The file didn't actually exist in the current
                        ## submission's working directory. Use an empty
                        ## value:
                        latex_template_vars[varname] = ""
                else:
                    ## The filename was not valid.
                    err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                              "The function was configured to read a LaTeX " \
                              "template variable from a file with the " \
                              "following instruction: [%s --> %s]. The " \
                              "filename, however, was not considered valid. " \
                              "Please report this to the administrator." \
                              % (varname, varvalue)
                    raise InvenioWebSubmitFunctionError(err_msg)

    ## Put the 'fixed' values into the file_stamper_options dictionary:
    file_stamper_options['latex-template'] = latex_template
    file_stamper_options['latex-template-var'] = latex_template_vars
    file_stamper_options['stamp'] = stamp
    file_stamper_options['layer'] = layer

    ## Put the input file and output file into the file_stamper_options
    ## dictionary:
    file_stamper_options['input-file'] = bibdocfile_file_to_stamp.fullpath
    file_stamper_options[
        'output-file'] = bibdocfile_file_to_stamp.get_full_name()
    ##
    ## Before attempting to stamp the file, log the dictionary of arguments
    ## that will be passed to websubmit_file_stamper:
    try:
        fh_log = open("%s/websubmit_file_stamper-calls-options.log" \
                      % curdir, "a+")
        fh_log.write("%s\n" % file_stamper_options)
        fh_log.flush()
        fh_log.close()
    except IOError:
        ## Unable to log the file stamper options.
        exception_prefix = "Unable to write websubmit_file_stamper " \
                           "options to log file " \
                           "%s/websubmit_file_stamper-calls-options.log" \
                           % curdir
        register_exception(prefix=exception_prefix)

    try:
        ## Try to stamp the file:
        (stamped_file_path_only, stamped_file_name) = \
                websubmit_file_stamper.stamp_file(file_stamper_options)
    except InvenioWebSubmitFileStamperError:
        ## It wasn't possible to stamp this file.
        ## Register the exception along with an informational message:
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "There was a problem stamping the file with the name [%s] " \
                  "and the fullpath [%s]. The file has not been stamped. " \
                  "The submission ID is [%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, \
                     file_stamper_options['input-file'], \
                     access, \
                     recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    else:
        ## Stamping was successful. The BibDocFile must now be revised with
        ## the latest (stamped) version of the file:
        file_comment = "Stamped by WebSubmit: %s" \
                       % time.strftime("%d/%m/%Y", time.localtime())
        try:
            dummy = \
                  bibrecdocs.add_new_version("%s/%s" \
                                             % (stamped_file_path_only, \
                                                stamped_file_name), \
                                                name_file_to_stamp, \
                                                comment=file_comment, \
                                                flags=('STAMPED', ))
        except InvenioBibDocFileError:
            ## Unable to revise the file with the newly stamped version.
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "After having stamped the file with the name [%s] " \
                      "and the fullpath [%s], it wasn't possible to revise " \
                      "that file with the newly stamped version. Stamping " \
                      "was unsuccessful. The submission ID is [%s] and the " \
                      "record ID is [%s]." \
                      % (name_file_to_stamp, \
                         file_stamper_options['input-file'], \
                         access, \
                         recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
        else:
            ## File revised. If the file should be renamed after stamping,
            ## do so.
            if new_file_name != "":
                try:
                    bibrecdocs.change_name(newname=new_file_name,
                                           docid=bibdoc_file_to_stamp.id)
                except (IOError, InvenioBibDocFileError):
                    ## Unable to change the name
                    wrn_msg = "Warning in Stamp_Replace_Single_File_Approval" \
                              ": After having stamped and revised the file " \
                              "with the name [%s] and the fullpath [%s], it " \
                              "wasn't possible to rename it to [%s]. The " \
                              "submission ID is [%s] and the record ID is " \
                              "[%s]." \
                              % (name_file_to_stamp, \
                                 file_stamper_options['input-file'], \
                                 new_file_name, \
                                 access, \
                                 recid)
    ## Finished.
    return ""