def get_missing_formats(filelist, desired_conversion=None):
    """Given a list of files it will return a dictionary of the form:
    file1 : missing formats to generate from it...
    """
    from invenio.bibdocfile import normalize_format, decompose_file

    def normalize_desired_conversion():
        ret = {}
        for key, value in desired_conversion.iteritems():
            ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value]
        return ret

    if desired_conversion is None:
        desired_conversion = CFG_WEBSUBMIT_DESIRED_CONVERSIONS

    available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist]
    missing_formats = []
    desired_conversion = normalize_desired_conversion()
    ret = {}
    for filename in filelist:
        aformat = decompose_file(filename, skip_version=True)[2]
        if aformat in desired_conversion:
            for desired_format in desired_conversion[aformat]:
                if desired_format not in available_formats and desired_format not in missing_formats:
                    missing_formats.append(desired_format)
                    if filename not in ret:
                        ret[filename] = []
                    ret[filename].append(desired_format)
    return ret
def get_missing_formats(filelist, desired_conversion=None):
    """Given a list of files it will return a dictionary of the form:
    file1 : missing formats to generate from it...
    """
    from invenio.bibdocfile import normalize_format, decompose_file

    def normalize_desired_conversion():
        ret = {}
        for key, value in desired_conversion.iteritems():
            ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value]
        return ret

    if desired_conversion is None:
        desired_conversion = CFG_WEBSUBMIT_DESIRED_CONVERSIONS

    available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist]
    missing_formats = []
    desired_conversion = normalize_desired_conversion()
    ret = {}
    for filename in filelist:
        aformat = decompose_file(filename, skip_version=True)[2]
        if aformat in desired_conversion:
            for desired_format in desired_conversion[aformat]:
                if desired_format not in available_formats and desired_format not in missing_formats:
                    missing_formats.append(desired_format)
                    if filename not in ret:
                        ret[filename] = []
                    ret[filename].append(desired_format)
    return ret
        def translate_link(match_obj):
            """Replace FCKeditor link by 'local' record link. Also
            create the FFT for that link"""
            file_type = match_obj.group("type")
            file_name = match_obj.group("filename")
            uid = match_obj.group("uid")
            dummy, name, extension = decompose_file(file_name)
            new_url = build_url(sysno, name, file_type, extension)
            original_location = match_obj.group()[1:-1]
            icon_location = original_location
            # Prepare FFT that will fetch the file (+ the original
            # file in the case of images)
            if file_type == "image":
                # Does original file exists, or do we just have the
                # icon? We expect the original file at a well defined
                # location
                possible_original_path = os.path.join(
                    CFG_PREFIX, "var", "tmp", "attachfile", uid, file_type, "original", file_name
                )
                if os.path.exists(possible_original_path):
                    icon_location = original_location
                    original_location = possible_original_path
                    new_url = build_url(sysno, name, file_type, extension, is_icon=True)

            docname = build_docname(name, file_type, extension)
            if original_location not in processed_paths:
                # Must create an FFT only if we have not yet processed
                # the file. This can happen if same image exists on
                # the same page (either in two different FCKeditor
                # instances, or twice in the HTML)
                processed_paths.append(original_location)
                write_fft(original_location, docname, icon_location, doctype=file_type)
            return '"' + new_url + '"'
Ejemplo n.º 4
0
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main': MainDir, 'Additional': IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]),
                                        doctype=type,
                                        never_fail=True)
    return ""
def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy):
    """
    Transform input_file into a .djvu file.
    @param input_file [string] the input file name
    @param output_file [string] the output_file file name, None for temporary generated
    @param resolution [int] the resolution of the output_file
    @param input_format [int] [1-9]:
        1 - DjVu Document (for verification or OCR)
        2 - PS/PS.GZ/PDF Document (default)
        3 - Photo/Picture/Icon
        4 - Scanned Document - B&W - <200 dpi
        5 - Scanned Document - B&W - 200-400 dpi
        6 - Scanned Document - B&W - >400 dpi
        7 - Scanned Document - Color/Mixed - <200 dpi
        8 - Scanned Document - Color/Mixed - 200-400 dpi
        9 - Scanned Document - Color/Mixed - >400 dpi
    @return [string] output_file input_file.
    raise InvenioWebSubmitFileConverterError in case of errors.
    Note: due to the bottleneck of using a centralized server, it is very
    slow and is not suitable for interactive usage (e.g. WebSubmit functions)
    """
    from invenio.bibdocfile import decompose_file
    input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu')

    ocr = ocr and "1" or "0"

    ## Any2djvu expect to find the file in the current directory.
    execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir)

    ## Any2djvu doesn't let you choose the output_file file name.
    djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu')
    shutil.move(djvu_output, output_file)
    clean_working_dir(working_dir)
    return output_file
Ejemplo n.º 6
0
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""
def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy):
    """
    Transform input_file into a .djvu file.
    @param input_file [string] the input file name
    @param output_file [string] the output_file file name, None for temporary generated
    @param resolution [int] the resolution of the output_file
    @param input_format [int] [1-9]:
        1 - DjVu Document (for verification or OCR)
        2 - PS/PS.GZ/PDF Document (default)
        3 - Photo/Picture/Icon
        4 - Scanned Document - B&W - <200 dpi
        5 - Scanned Document - B&W - 200-400 dpi
        6 - Scanned Document - B&W - >400 dpi
        7 - Scanned Document - Color/Mixed - <200 dpi
        8 - Scanned Document - Color/Mixed - 200-400 dpi
        9 - Scanned Document - Color/Mixed - >400 dpi
    @return [string] output_file input_file.
    raise InvenioWebSubmitFileConverterError in case of errors.
    Note: due to the bottleneck of using a centralized server, it is very
    slow and is not suitable for interactive usage (e.g. WebSubmit functions)
    """
    from invenio.bibdocfile import decompose_file
    input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu')

    ocr = ocr and "1" or "0"

    ## Any2djvu expect to find the file in the current directory.
    execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir)

    ## Any2djvu doesn't let you choose the output_file file name.
    djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu')
    shutil.move(djvu_output, output_file)
    clean_working_dir(working_dir)
    return output_file
Ejemplo n.º 8
0
def can_read_remote(inputfile):
    """Checks if inputfile is among metadata-readable
    file types
    @param inputfile: (string) path to the image
    @type inputfile: string
    @rtype: boolean
    @return: true if extension casn be handled"""

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in [".jpg", ".jpeg", "jpe", ".jfif", ".jfi", ".jif"]
Ejemplo n.º 9
0
def can_read_remote(inputfile):
    """Checks if inputfile is among metadata-readable
    file types
    @param inputfile: (string) path to the image
    @type inputfile: string
    @rtype: boolean
    @return: true if extension casn be handled"""

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
Ejemplo n.º 10
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir, current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM):
    """
    Return among the filelist the best file whose format is best suited for
    extracting text.
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')]
    for aformat in best_formats:
        for filename in filelist:
            if decompose_file(filename, skip_version=True)[2].endswith(aformat):
                return filename
    raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
Ejemplo n.º 12
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir,current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
Ejemplo n.º 13
0
def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in [".jpg", ".tiff", ".jpeg", "jpe", ".jfif", ".jfi", ".jif"]
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM):
    """
    Return among the filelist the best file whose format is best suited for
    extracting text.
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')]
    for aformat in best_formats:
        for filename in filelist:
            if decompose_file(filename, skip_version=True)[2].endswith(aformat):
                return filename
    raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
Ejemplo n.º 15
0
def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']
def gunzip(input_file, output_file=None, **dummy):
    """
    Uncompress a file.
    """
    from invenio.bibdocfile import decompose_file
    input_ext = decompose_file(input_file, skip_version=True)[2]
    if input_ext.endswith('.gz'):
        input_ext = input_ext[:-len('.gz')]
    else:
        input_ext = None
    input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False)
    execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file)
    return output_file
def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']
Ejemplo n.º 18
0
def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.pdf']
def gunzip(input_file, output_file=None, **dummy):
    """
    Uncompress a file.
    """
    from invenio.bibdocfile import decompose_file
    input_ext = decompose_file(input_file, skip_version=True)[2]
    if input_ext.endswith('.gz'):
        input_ext = input_ext[:-len('.gz')]
    else:
        input_ext = None
    input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False)
    execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file)
    return output_file
Ejemplo n.º 20
0
def createRelatedFormats(fullpath, overwrite=True):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    Return a list of the paths to the converted files
    """
    createdpaths = []
    basedir, filename, extension = decompose_file(fullpath)
    extension = extension.lower()
    if extension == ".pdf":
        if overwrite or \
               not os.path.exists("%s/%s.ps" % (basedir, filename)):
            # Create PostScript
            try:
                convert_file(fullpath, "%s/%s.ps" % (basedir, filename))
                createdpaths.append("%s/%s.ps" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
        if overwrite or \
               not os.path.exists("%s/%s.ps.gz" % (basedir, filename)):
            if os.path.exists("%s/%s.ps" % (basedir, filename)):
                os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename))
                createdpaths.append("%s/%s.ps.gz" % (basedir, filename))
    if extension == ".ps":
        if overwrite or \
               not os.path.exists("%s/%s.pdf" % (basedir, filename)):
            # Create PDF
            try:
                convert_file(fullpath, "%s/%s.pdf" % (basedir, filename))
                createdpaths.append("%s/%s.pdf" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
    if extension == ".ps.gz":
        if overwrite or \
               not os.path.exists("%s/%s.ps" % (basedir, filename)):
            #gunzip file
            os.system("%s %s" % (CFG_PATH_GUNZIP, fullpath))
        if overwrite or \
               not os.path.exists("%s/%s.pdf" % (basedir, filename)):
            # Create PDF
            try:
                convert_file("%s/%s.ps" % (basedir, filename),
                             "%s/%s.pdf" % (basedir, filename))
                createdpaths.append("%s/%s.pdf" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
        #gzip file
        if not os.path.exists("%s/%s.ps.gz" % (basedir, filename)):
            os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename))
    return createdpaths
def write_metadata(inputfile, outputfile, metadata_dictionary,
                   force=None, verbose=0):
    """
    Writes metadata to given file.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param outputfile: path to the resulting file.
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: keys and values of metadata to update.
    @type metadata_dictionary: dict
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @return: output of the plugin
    @rtype: string
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated.
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print ext.lower(), 'extension to write to'

    # Plugins
    metadata_extractor_plugins = PluginContainer(
        os.path.join(CFG_PYLIBDIR,
                     'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
        plugin_builder=plugin_builder_function,
        api_version=__required_plugin_API_version__
        )

    # Loop through the plugins to find a good one to ext
    for plugin_name, plugin in metadata_extractor_plugins.iteritems():
        if plugin.has_key('can_write_local') and \
            plugin['can_write_local'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            return plugin['write_metadata_local'](inputfile,
                                                  outputfile,
                                                  metadata_dictionary,
                                                  verbose)

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'
Ejemplo n.º 22
0
def write_metadata(inputfile, outputfile, metadata_dictionary,
                   force=None, verbose=0):
    """
    Writes metadata to given file.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param outputfile: path to the resulting file.
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: keys and values of metadata to update.
    @type metadata_dictionary: dict
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @return: output of the plugin
    @rtype: string
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated.
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print ext.lower(), 'extension to write to'

    # Plugins
    metadata_extractor_plugins = PluginContainer(
        os.path.join(CFG_PYLIBDIR,
                     'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
        plugin_builder=plugin_builder_function,
        api_version=__required_plugin_API_version__
        )

    # Loop through the plugins to find a good one to ext
    for plugin_name, plugin in metadata_extractor_plugins.iteritems():
        if plugin.has_key('can_write_local') and \
            plugin['can_write_local'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            return plugin['write_metadata_local'](inputfile,
                                                  outputfile,
                                                  metadata_dictionary,
                                                  verbose)

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'
def convert_file(input_file, output_file=None, output_format=None, **params):
    """
    Convert files from one format to another.
    @param input_file [string] the path to an existing file
    @param output_file [string] the path to the desired ouput. (if None a
        temporary file is generated)
    @param output_format [string] the desired format (if None it is taken from
        output_file)
    @param params other paramaters to pass to the particular converter
    @return [string] the final output_file
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    if output_format is None:
        if output_file is None:
            raise ValueError("At least output_file or format should be specified.")
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    else:
        output_ext = normalize_format(output_format)
    input_ext = decompose_file(input_file, skip_version=True)[2]
    conversion_chain = can_convert(input_ext, output_ext)
    if conversion_chain:
        current_input = input_file
        current_output = None
        for i in xrange(len(conversion_chain)):
            if i == (len(conversion_chain) - 1):
                current_output = output_file
            converter = conversion_chain[i][0]
            final_params = dict(conversion_chain[i][1])
            final_params.update(params)
            try:
                return converter(current_input, current_output, **final_params)
            except InvenioWebSubmitFileConverterError, err:
                raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err))
            except Exception, err:
                register_exception()
                raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err))
            current_input = current_output
def convert_file(input_file, output_file=None, output_format=None, **params):
    """
    Convert files from one format to another.
    @param input_file [string] the path to an existing file
    @param output_file [string] the path to the desired ouput. (if None a
        temporary file is generated)
    @param output_format [string] the desired format (if None it is taken from
        output_file)
    @param params other paramaters to pass to the particular converter
    @return [string] the final output_file
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    if output_format is None:
        if output_file is None:
            raise ValueError("At least output_file or format should be specified.")
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    else:
        output_ext = normalize_format(output_format)
    input_ext = decompose_file(input_file, skip_version=True)[2]
    conversion_chain = can_convert(input_ext, output_ext)
    if conversion_chain:
        current_input = input_file
        current_output = None
        for i in xrange(len(conversion_chain)):
            if i == (len(conversion_chain) - 1):
                current_output = output_file
            converter = conversion_chain[i][0]
            final_params = dict(conversion_chain[i][1])
            final_params.update(params)
            try:
                return converter(current_input, current_output, **final_params)
            except InvenioWebSubmitFileConverterError, err:
                raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err))
            except Exception, err:
                register_exception()
                raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err))
            current_input = current_output
Ejemplo n.º 25
0
def createRelatedFormats(fullpath, overwrite=True):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    Return a list of the paths to the converted files
    """
    createdpaths = []
    basedir, filename, extension = decompose_file(fullpath)
    extension = extension.lower()
    if extension == ".pdf":
        if overwrite or \
               not os.path.exists("%s/%s.ps" % (basedir, filename)):
            # Create PostScript
            try:
                convert_file(fullpath, "%s/%s.ps" % (basedir, filename))
                createdpaths.append("%s/%s.ps" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
        if overwrite or \
               not os.path.exists("%s/%s.ps.gz" % (basedir, filename)):
            if os.path.exists("%s/%s.ps" % (basedir, filename)):
                os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename))
                createdpaths.append("%s/%s.ps.gz" % (basedir, filename))
    if extension == ".ps":
        if overwrite or \
               not os.path.exists("%s/%s.pdf" % (basedir, filename)):
            # Create PDF
            try:
                convert_file(fullpath, "%s/%s.pdf" % (basedir, filename))
                createdpaths.append("%s/%s.pdf" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
    if extension == ".ps.gz":
        if overwrite or \
               not os.path.exists("%s/%s.ps" % (basedir, filename)):
            #gunzip file
            os.system("%s %s" % (CFG_PATH_GUNZIP, fullpath))
        if overwrite or \
               not os.path.exists("%s/%s.pdf" % (basedir, filename)):
            # Create PDF
            try:
                convert_file("%s/%s.ps" % (basedir, filename), "%s/%s.pdf" % (basedir, filename))
                createdpaths.append("%s/%s.pdf" % (basedir, filename))
            except InvenioWebSubmitFileConverterError:
                pass
        #gzip file
        if not os.path.exists("%s/%s.ps.gz" % (basedir, filename)):
            os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename))
    return createdpaths
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True):
    """Clean input_file and the output_file."""
    from invenio.bibdocfile import decompose_file, normalize_format
    output_ext = normalize_format(output_ext)
    debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext))
    if output_ext is None:
        if output_file is None:
            output_ext = '.tmp'
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    if output_file is None:
        try:
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True):
    """Clean input_file and the output_file."""
    from invenio.bibdocfile import decompose_file, normalize_format
    output_ext = normalize_format(output_ext)
    debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext))
    if output_ext is None:
        if output_file is None:
            output_ext = '.tmp'
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    if output_file is None:
        try:
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
Ejemplo n.º 28
0
def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.html', '.doc', '.ps', '.xls', '.ppt',
                           '.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac',
                           '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png',
                           '.deb', '.rpm', '.tar.gz', '.zip', '.elf',
                           '.s3m', '.xm', '.it', '.flv', '.real', '.avi',
                           '.mpeg', '.qt', '.asf']
Ejemplo n.º 29
0
def createRelatedFormats(fullpath, overwrite=True, debug=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        basedir, filename, extension = decompose_file(fullpath)
        extension = extension.lower()
        if debug:
            print >>sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension)

        filelist = glob.glob(os.path.join(basedir, "%s*" % filename))
        if debug:
            print >>sys.stderr, "filelist: %s" % filelist
        missing_formats = get_missing_formats(filelist)
        if debug:
            print >>sys.stderr, "missing_formats: %s" % missing_formats
        for path, formats in missing_formats.iteritems():
            if debug:
                print >>sys.stderr, "... path: %s, formats: %s" % (path, formats)
            for aformat in formats:
                if debug:
                    print >>sys.stderr, "...... aformat: %s" % aformat
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print >>sys.stderr, "...... newpath: %s" % newpath
                try:
                    convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError, msg:
                    if debug:
                        print >>sys.stderr, "...... Exception: %s" % msg
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths
Ejemplo n.º 30
0
def createRelatedFormats(fullpath, overwrite=True, debug=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        basedir, filename, extension = decompose_file(fullpath)
        extension = extension.lower()
        if debug:
            print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension)

        filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
        if debug:
            print >> sys.stderr, "filelist: %s" % filelist
        missing_formats = get_missing_formats(filelist)
        if debug:
            print >> sys.stderr, "missing_formats: %s" % missing_formats
        for path, formats in missing_formats.iteritems():
            if debug:
                print >> sys.stderr, "... path: %s, formats: %s" % (path, formats)
            for aformat in formats:
                if debug:
                    print >> sys.stderr, "...... aformat: %s" % aformat
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print >> sys.stderr, "...... newpath: %s" % newpath
                try:
                    convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError, msg:
                    if debug:
                        print >> sys.stderr, "...... Exception: %s" % msg
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths
        def translate_link(match_obj):
            """Replace CKEditor link by 'local' record link. Also
            create the FFT for that link"""
            file_type = match_obj.group('type')
            file_name = match_obj.group('filename')
            uid = match_obj.group('uid')
            dummy, name, extension = decompose_file(file_name)
            new_url = build_url(sysno, name, file_type, extension)
            original_location = match_obj.group()[1:-1]
            icon_location = original_location
            # Prepare FFT that will fetch the file (+ the original
            # file in the case of images)
            if file_type == 'image':
                # Does original file exists, or do we just have the
                # icon? We expect the original file at a well defined
                # location
                possible_original_path = os.path.join(CFG_PREFIX, 'var', 'tmp',
                                                      'attachfile', uid,
                                                      file_type, 'original',
                                                      file_name)
                if os.path.exists(possible_original_path):
                    icon_location = original_location
                    original_location = possible_original_path
                    new_url = build_url(sysno,
                                        name,
                                        file_type,
                                        extension,
                                        is_icon=True)

            docname = build_docname(name, file_type, extension)
            if original_location not in processed_paths:
                # Must create an FFT only if we have not yet processed
                # the file. This can happen if same image exists on
                # the same page (either in two different CKEditor
                # instances, or twice in the HTML)
                processed_paths.append(original_location)
                write_fft(curdir,
                          original_location,
                          docname,
                          icon_location,
                          doctype=file_type)
            return '"' + new_url + '"'
def _get_feature_image(record, ln=CFG_SITE_LANG):
    """
    Looks for an image that can be featured on the article overview page.
    """
    src = ""
    if ln == "fr":
        article = "".join(record.fields("590__b"))
        if not article:
            article = "".join(record.fields("520__b"))
    else:
        article = "".join(record.fields("520__b"))
        if not article:
            article = "".join(record.fields("590__b"))

    image = re.search(img_pattern, article)
    if image:
        src = image.group("image")
    if not src:
        # Look for an attached image
        icons = [icon for icon in record.fields("8564_q") if (decompose_file(icon)[2] in ["jpg", "jpeg", "png", "gif"])]
        if icons:
            src = icons[0]
    return src
def _get_feature_image(record, ln=CFG_SITE_LANG):
    """
    Looks for an image that can be featured on the article overview page.
    """
    src = ''
    if ln == "fr":
        article = ''.join(record.fields('590__b'))
        if not article:
            article = ''.join(record.fields('520__b'))
    else:
        article = ''.join(record.fields('520__b'))
        if not article:
            article = ''.join(record.fields('590__b'))

    image = re.search(img_pattern, article)
    if image:
        src = image.group("image")
    if not src:
        # Look for an attached image
        icons = [icon for icon in record.fields('8564_q') if \
                (decompose_file(icon)[2] in ['jpg', 'jpeg', 'png', 'gif'])]
        if icons:
            src = icons[0]
    return src
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
    else:
        output_file = os.path.abspath(output_file)
        if os.path.exists(output_file):
            os.remove(output_file)

    if need_working_dir:
        try:
            working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion')
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err)

        input_ext = decompose_file(input_file, skip_version=True)[2]
        new_input_file = os.path.join(working_dir, 'input' + input_ext)
        shutil.copy(input_file, new_input_file)
        input_file = new_input_file
    else:
        working_dir = None
        input_file = os.path.abspath(input_file)

    debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir))
    return (input_file, output_file, working_dir)


def clean_working_dir(working_dir):
    """
    Remove the working_dir.
    """
def process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path,
                            recid=None, allowed_types=default_allowed_types):
    """
    Process a file upload request.

    @param form: the form as in req object.
    @type form: dict
    @param uid: the user ID of the user uploading the file.
    @type uid: int
    @param user_files_path: the base URL where the file can be
        accessed from the web after upload.
        Note that you have to implement your own handler to stream the files from the directory
        C{user_files_absolute_path} if you set this value.
    @type user_files_path: string
    @param user_files_absolute_path: the base path on the server where
        the files should be saved.
        Eg:C{%(CFG_PREFIX)s/var/data/comments/%(recid)s/%(uid)s}
    @type user_files_absolute_path: string
    @param recid: the record ID for which we upload a file. Leave None if not relevant.
    @type recid: int
    @param allowed_types: types allowed for uploading. These
        are supported by CKEditor: ['File', 'Image', 'Flash', 'Media']
    @type allowed_types: list of strings
    @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function)
    """
    msg = ''
    filename = ''
    formfile = None
    uploaded_file_path = ''
    user_files_path = ''

    for key, formfields in form.items():
        if key != 'upload':
            continue
        if hasattr(formfields, "filename") and formfields.filename:
            # We have found our file
            filename = formfields.filename
            formfile = formfields.file
            break

    can_upload_file_p = False
    if not form['type'] in allowed_types:
        # Is the type sent through the form ok?
        msg = 'You are not allowed to upload a file of this type'
    else:
        # Is user allowed to upload such file extension?
        basedir, name, extension = decompose_file(filename)
        extension = extension[1:] # strip leading dot
        if extension in allowed_extensions.get(form['type'], []):
            can_upload_file_p = True

    if not can_upload_file_p:
        msg = 'You are not allowed to upload a file of this type'
    elif filename and formfile:
        ## Before saving the file to disk, wash the filename (in particular
        ## washing away UNIX and Windows (e.g. DFS) paths):
        filename = os.path.basename(filename.split('\\')[-1])
        # Remove \ / | : ? *
	filename = re.sub ( '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_', filename)
        filename = filename.strip()
        if filename != "":
            # Check that file does not already exist
            n = 1
            while os.path.exists(os.path.join(user_files_absolute_path, filename)):
                basedir, name, extension = decompose_file(filename)
                new_name = propose_next_docname(name)
                filename = new_name + extension

            # This may be dangerous if the file size is bigger than the available memory
            fp = open(os.path.join(user_files_absolute_path, filename), "w")
            fp.write(formfile.read())
            fp.close()

            uploaded_file_path = os.path.join(user_files_absolute_path, filename)
            uploaded_file_name = filename

    return (msg, uploaded_file_path, filename, user_files_path, form['CKEditorFuncNum'])
Ejemplo n.º 36
0
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    Metadata write method, takes the .pdf as input and creates a new
    one with the new info.

    @param inputfile: path to the pdf
    @type inputfile: string
    @param outputfile: path to the resulting pdf
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @type metadata_dictionary: dict
    """
    # Take the file name (0 base, 1 name, 2 ext)
    filename = decompose_file(inputfile)[1]

    # Print pdf metadata
    if verbose > 1:
        print 'Metadata information in the PDF file ' + filename + ': \n'
        try:
            os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
        except Exception:
            print 'Problem with inputfile to PDFTK'

    # Info file for pdftk
    (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
                                             dir=CFG_TMPDIR)
    os.close(fd)
    file_in = open(path_to_info, 'w')
    if verbose > 5:
        print "Saving PDFTK info file to %s" % path_to_info

    # User interaction to form the info file
    # Main Case: Dictionary received through option -d
    if not metadata_dictionary == {}:
        for tag in metadata_dictionary:
            line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
                   metadata_dictionary[tag] + '\n'
            if verbose > 0:
                print line
            file_in.writelines(line)
    else:
        data_modified = False
        user_input = 'user_input'
        print "Entering interactive mode. Choose what you want to do:"
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print "Aborting"
                    return
            else:
                try:
                    user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print "Aborting"
                    return
            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update:\n')
                    value = raw_input('With value:\n')
                except:
                    print "Aborting"
                    return
                # Write to info file
                line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
                data_modified = True
                file_in.writelines(line)
            elif user_input == 'a':
                return
            else:
                print "Invalid option: "
    file_in.close()

    (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
                                              dir=CFG_TMPDIR)
    os.close(fd)

    # Now we call pdftk tool to update the info on a pdf
    #try:
    cmd_pdftk = '%s %s update_info %s output %s'
    (exit_status, output_std, output_err) = \
                  run_shell_command(cmd_pdftk,
                                    args=(CFG_PATH_PDFTK, inputfile,
                                          path_to_info, pdf_temp_path))
    if verbose > 5:
        print output_std, output_err

    if os.path.exists(pdf_temp_path):
        # Move to final destination if exist
        try:
            shutil.move(pdf_temp_path, outputfile)
        except Exception, err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
                                                           (pdf_temp_path, outputfile))
Ejemplo n.º 37
0
def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    @param consider_version: (bool) if True, consider the version info
                             in C{fullpath} to find missing format
                             for that specific version, if C{fullpath}
                             contains version info
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        if consider_version:
            try:
                basedir, filename, extension, version = decompose_file_with_version(fullpath)
            except:
                basedir, filename, extension = decompose_file(fullpath)
                version = 0
        else:
            basedir, filename, extension = decompose_file(fullpath)
            version = 0
        extension = extension.lower()
        if debug:
            print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension)

        if overwrite:
            missing_formats = get_missing_formats([fullpath])
        else:
            if version:
                filelist = glob.glob(os.path.join(basedir, '%s*;%s' % (filename, version)))
            else:
                filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
            if debug:
                print >> sys.stderr, "filelist: %s" % filelist
            missing_formats = get_missing_formats(filelist)
        if debug:
            print >> sys.stderr, "missing_formats: %s" % missing_formats
        for path, formats in missing_formats.iteritems():
            if debug:
                print >> sys.stderr, "... path: %s, formats: %s" % (path, formats)
            for aformat in formats:
                if debug:
                    print >> sys.stderr, "...... aformat: %s" % aformat
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print >> sys.stderr, "...... newpath: %s" % newpath
                try:
                    if CFG_BIBDOCFILE_FILEDIR in basedir:
                        # We should create the new files in a temporary location, not
                        # directly inside the BibDoc directory.
                        newpath = convert_file(path, output_format=aformat)
                    else:
                        convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError, msg:
                    if debug:
                        print >> sys.stderr, "...... Exception: %s" % msg
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths
Ejemplo n.º 38
0
class WebInterfaceSubmitPages(WebInterfaceDirectory):

    _exports = ['summary', 'sub', 'direct', '', 'attachfile', 'uploadfile', \
                'getuploadedfile', 'upload_video', ('continue', 'continue_')]

    def uploadfile(self, req, form):
        """
        Similar to /submit, but only consider files. Nice for
        asynchronous Javascript uploads. Should be used to upload a
        single file.

        Also try to create an icon, and return URL to file(s) + icon(s)

        Authentication is performed based on session ID passed as
        parameter instead of cookie-based authentication, due to the
        use of this URL by the Flash plugin (to upload multiple files
        at once), which does not route cookies.

        FIXME: consider adding /deletefile and /modifyfile functions +
        parsing of additional parameters to rename files, add
        comments, restrictions, etc.
        """
        argd = wash_urlargd(
            form, {
                'doctype': (str, ''),
                'access': (str, ''),
                'indir': (str, ''),
                'session_id': (str, ''),
                'rename': (str, ''),
            })

        curdir = None
        if not form.has_key("indir") or \
               not form.has_key("doctype") or \
               not form.has_key("access"):
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'],
                                  argd['doctype'], argd['access'])

        user_info = collect_user_info(req)
        if form.has_key("session_id"):
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch. A ValueError will be raises if there is
            # something wrong
            session = get_session(req=req, sid=argd['session_id'])
            try:
                session = get_session(req=req, sid=argd['session_id'])
            except ValueError, e:
                raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)

            # Retrieve user information. We cannot rely on the session here.
            res = run_sql("SELECT uid FROM session WHERE session_key=%s",
                          (argd['session_id'], ))
            if len(res):
                uid = res[0][0]
                user_info = collect_user_info(uid)
                try:
                    act_fd = file(os.path.join(curdir, 'act'))
                    action = act_fd.read()
                    act_fd.close()
                except:
                    action = ""

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(
            uid,
            "submit",
            authorized_if_no_roles=not isGuestUser(uid),
            verbose=0,
            doctype=argd['doctype'],
            act=action)
        if acc_is_role("submit", doctype=argd['doctype'],
                       act=action) and auth_code != 0:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            added_files = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                file_to_open = os.path.join(curdir, filename)
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(
                        os.path.join(curdir, 'files', str(user_info['uid']),
                                     key))
                    try:
                        assert (
                            dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='curdir="%s", key="%s"' %
                                           (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError, e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(
                                    apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension
                        # This may be dangerous if the file size is bigger than the available memory
                        fp = open(os.path.join(dir_to_open, filename), "w")
                        fp.write(formfields.file.read())
                        fp.close()
                        fp = open(os.path.join(curdir, "lastuploadedfile"),
                                  "w")
                        fp.write(filename)
                        fp.close()
                        fp = open(file_to_open, "w")
                        fp.write(filename)
                        fp.close()
                        try:
                            # Create icon
                            (icon_path, icon_name) = create_icon({
                                'input-file':
                                os.path.join(dir_to_open, filename),
                                'icon-name':
                                filename,  # extension stripped automatically
                                'icon-file-format':
                                'gif',
                                'multipage-icon':
                                False,
                                'multipage-icon-delay':
                                100,
                                'icon-scale':
                                "300>",  # Resize only if width > 300
                                'verbosity':
                                0,
                            })

                            icons_dir = os.path.join(
                                os.path.join(curdir, 'icons',
                                             str(user_info['uid']), key))
                            if not os.path.exists(icons_dir):
                                # Create uid/icons dir if needed
                                try:
                                    os.makedirs(icons_dir)
                                except OSError, e:
                                    if e.errno != errno.EEXIST:
                                        # If the issue is only that
                                        # directory already exists,
                                        # then continue, else report
                                        register_exception(req=req,
                                                           alert_admin=True)
                                        raise apache.SERVER_RETURN(
                                            apache.HTTP_FORBIDDEN)
                            os.rename(os.path.join(icon_path, icon_name),
                                      os.path.join(icons_dir, icon_name))
                            added_files[key] = {
                                'name': filename,
                                'iconName': icon_name
                            }
                        except InvenioWebSubmitIconCreatorError, e:
                            # We could not create the icon
                            added_files[key] = {'name': filename}
                            continue
                    else:
Ejemplo n.º 39
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
Ejemplo n.º 40
0
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        #-------------#
                        # VIDEO STUFF #
                        #-------------#

                        ## Remove all previous uploads
                        filelist = os.listdir(os.path.split(formfields.file.name)[0])
                        for afile in filelist:
                            if argd['access'] in afile:
                                os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile))

                        ## Check if the file is a readable video
                        ## We must exclude all image and audio formats that are readable by ffprobe
    def add(self, req, form):
        """
        Add a comment (review) to record with id recid where recid>0
        Also works for adding a remark to basket with id recid where recid<-99
        @param ln: languange
        @param recid: record id
        @param action:  'DISPLAY' to display add form
                        'SUBMIT' to submit comment once form is filled
                        'REPLY' to reply to an already existing comment
        @param msg: the body of the comment/review or remark
        @param score: star score of the review
        @param note: title of the review
        @param comid: comment id, needed for replying
        @param editor_type: the type of editor used for submitting the
                            comment: 'textarea', 'ckeditor'.
        @param subscribe: if set, subscribe user to receive email
                          notifications when new comment are added to
                          this discussion
        @return the full html page.
        """
        argd = wash_urlargd(form, {'action': (str, "DISPLAY"),
                                   'msg': (str, ""),
                                   'note': (str, ''),
                                   'score': (int, 0),
                                   'comid': (int, 0),
                                   'editor_type': (str, ""),
                                   'subscribe': (str, ""),
                                   'cookie': (str, "")
                                   })
        _ = gettext_set_language(argd['ln'])

        actions = ['DISPLAY', 'REPLY', 'SUBMIT']
        uid = getUid(req)

        # Is site ready to accept comments?
        if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
            return page_not_authorized(req, "../comments/add",
                                       navmenuid='search')

        # Is user allowed to post comment?
        user_info = collect_user_info(req)
        (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
        (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
        if isGuestUser(uid):
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            # Save user's value in cookie, so that these "POST"
            # parameters are not lost during login process
            msg_cookie = mail_cookie_create_common('comment_msg',
                                                    {'msg': argd['msg'],
                                                     'note': argd['note'],
                                                     'score': argd['score'],
                                                     'editor_type': argd['editor_type'],
                                                     'subscribe': argd['subscribe']},
                                                    onetime=True)
            target = '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_SECURE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {})
            return redirect_to_url(req, target, norobot=True)
        elif (auth_code_1 or auth_code_2):
            return page_not_authorized(req, "../", \
                text = auth_msg_1 + auth_msg_2)

        if argd['comid']:
            # If replying to a comment, are we on a record that
            # matches the original comment user is replying to?
            if not check_comment_belongs_to_record(argd['comid'], self.recid):
                return page_not_authorized(req, "../", \
                                           text = _("Specified comment does not belong to this record"))


            # Is user trying to reply to a restricted comment? Make
            # sure user has access to it.  We will then inherit its
            # restriction for the new comment
            (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
            if auth_code:
                return page_not_authorized(req, "../", \
                                           text = _("You do not have access to the specified comment"))

            # Is user trying to reply to a deleted comment? If so, we
            # let submitted comment go (to not lose possibly submitted
            # content, if comment is submitted while original is
            # deleted), but we "reset" comid to make sure that for
            # action 'REPLY' the original comment is not included in
            # the reply
            if is_comment_deleted(argd['comid']):
                argd['comid'] = 0

        user_info = collect_user_info(req)
        can_attach_files = False
        (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid)
        if not auth_code and (user_info['email'] != 'guest'):
            can_attach_files = True

        warning_msgs = [] # list of warning tuples (warning_text, warning_color)
        added_files = {}
        if can_attach_files:
            # User is allowed to attach files. Process the files
            file_too_big = False
            formfields = form.get('commentattachment[]', [])
            if not hasattr(formfields, "__getitem__"): # A single file was uploaded
                formfields = [formfields]
            for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
                if hasattr(formfield, "filename") and formfield.filename:
                    filename = formfield.filename
                    dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid))
                    try:
                        assert(dir_to_open.startswith(CFG_TMPDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='User #%s tried to upload file to forbidden location: %s' \
                                           % (uid, dir_to_open))

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except:
                            register_exception(req=req, alert_admin=True)

                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        fp = open(os.path.join(dir_to_open, filename), "w")
                        # FIXME: temporary, waiting for wsgi handler to be
                        # fixed. Once done, read chunk by chunk
##                         while formfield.file:
##                             fp.write(formfield.file.read(10240))
                        fp.write(formfield.file.read())
                        fp.close()
                        # Isn't this file too big?
                        file_size = os.path.getsize(os.path.join(dir_to_open, filename))
                        if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \
                               file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
                            os.remove(os.path.join(dir_to_open, filename))
                            # One file is too big: record that,
                            # dismiss all uploaded files and re-ask to
                            # upload again
                            file_too_big = True
                            try:
                                raise InvenioWebCommentWarning(_('The size of file \\"%s\\" (%s) is larger than maximum allowed file size (%s). Select files again.') % (cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
                            except InvenioWebCommentWarning, exc:
                                register_exception(stream='warning')
                                warning_msgs.append((exc.message, ''))
                            #warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
                        else:
                            added_files[filename] = os.path.join(dir_to_open, filename)

            if file_too_big:
                # One file was too big. Removed all uploaded filed
                for filepath in added_files.items():
                    try:
                        os.remove(filepath)
                    except:
                        # File was already removed or does not exist?
                        pass
Ejemplo n.º 42
0
    def upload_video(self, req, form):
        """
        A clone of uploadfile but for (large) videos.
        Does not copy the uploaded file to the websubmit directory.
        Instead, the path to the file is stored inside the submission directory.
        """
        def gcd(a, b):
            """ the euclidean algorithm """
            while a:
                a, b = b % a, a
            return b

        from invenio.bibencode_extract import extract_frames
        from invenio.bibencode_config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
        from invenio.bibencode_encode import determine_aspect
        from invenio.bibencode_utils import probe
        from invenio.bibencode_metadata import ffprobe_metadata
        from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX

        argd = wash_urlargd(
            form, {
                'doctype': (str, ''),
                'access': (str, ''),
                'indir': (str, ''),
                'session_id': (str, ''),
                'rename': (str, ''),
            })

        curdir = None
        if not form.has_key("indir") or \
               not form.has_key("doctype") or \
               not form.has_key("access"):
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'],
                                  argd['doctype'], argd['access'])

        user_info = collect_user_info(req)
        if form.has_key("session_id"):
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch.

            uid = session.uid
            user_info = collect_user_info(uid)
            try:
                act_fd = file(os.path.join(curdir, 'act'))
                action = act_fd.read()
                act_fd.close()
            except:
                act = ""

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(
            uid,
            "submit",
            authorized_if_no_roles=not isGuestUser(uid),
            verbose=0,
            doctype=argd['doctype'],
            act=action)
        if acc_is_role("submit", doctype=argd['doctype'],
                       act=action) and auth_code != 0:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            json_response = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(
                        os.path.join(curdir, 'files', str(user_info['uid']),
                                     key))
                    try:
                        assert (
                            dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='curdir="%s", key="%s"' %
                                           (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError, e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(
                                    apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        #-------------#
                        # VIDEO STUFF #
                        #-------------#

                        ## Remove all previous uploads
                        filelist = os.listdir(
                            os.path.split(formfields.file.name)[0])
                        for afile in filelist:
                            if argd['access'] in afile:
                                os.remove(
                                    os.path.join(
                                        os.path.split(formfields.file.name)[0],
                                        afile))

                        ## Check if the file is a readable video
                        ## We must exclude all image and audio formats that are readable by ffprobe
                        if (os.path.splitext(filename)[1] in [
                                'jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png',
                                'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2',
                                'biff', 'cgm', 'exif', 'img', 'mng', 'pic',
                                'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif',
                                'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd',
                                'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac',
                                'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p',
                                'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd'
                        ] or not probe(formfields.file.name)):
                            formfields.file.close()
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## We have no "delete" attribute in Python 2.4
                        if sys.hexversion < 0x2050000:
                            ## We need to rename first and create a dummy file
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(
                                formfields.file.name
                            )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[
                                'access'] + "_" + os.path.split(
                                    formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)
                            dummy = open(formfields.file.name, "w")
                            dummy.close()
                            formfields.file.close()
                        else:
                            # Mark the NamedTemporatyFile as not to be deleted
                            formfields.file.delete = False
                            formfields.file.close()
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(
                                formfields.file.name
                            )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[
                                'access'] + "_" + os.path.split(
                                    formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)

                        # Write the path to the temp file to a file in STORAGEDIR
                        fp = open(os.path.join(dir_to_open, "filepath"), "w")
                        fp.write(new_tmp_fullpath)
                        fp.close()

                        fp = open(os.path.join(dir_to_open, "filename"), "w")
                        fp.write(filename)
                        fp.close()

                        ## We are going to extract some thumbnails for websubmit ##
                        sample_dir = os.path.join(
                            curdir, 'files', str(user_info['uid']),
                            CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR)
                        try:
                            ## Remove old thumbnails
                            shutil.rmtree(sample_dir)
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            os.makedirs(
                                os.path.join(curdir, 'files',
                                             str(user_info['uid']),
                                             sample_dir))
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            extract_frames(
                                input_file=new_tmp_fullpath,
                                output_file=os.path.join(
                                    sample_dir,
                                    CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
                                ),
                                size="600x600",
                                numberof=5)
                            json_response['frames'] = []
                            for extracted_frame in os.listdir(sample_dir):
                                json_response['frames'].append(extracted_frame)
                        except:
                            ## If the frame extraction fails, something was bad with the video
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to detect the aspect. if this fails, the video is not readable
                        ## or a wrong file might have been uploaded
                        try:
                            (aspect, width,
                             height) = determine_aspect(new_tmp_fullpath)
                            if aspect:
                                aspx, aspy = aspect.split(':')
                            else:
                                the_gcd = gcd(width, height)
                                aspx = str(width / the_gcd)
                                aspy = str(height / the_gcd)
                            json_response['aspx'] = aspx
                            json_response['aspy'] = aspy
                        except TypeError:
                            ## If the aspect detection completely fails
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to extract some metadata from the video container
                        metadata = ffprobe_metadata(new_tmp_fullpath)
                        json_response['meta_title'] = metadata['format'].get(
                            'TAG:title')
                        json_response['meta_description'] = metadata[
                            'format'].get('TAG:description')
                        json_response['meta_year'] = metadata['format'].get(
                            'TAG:year')
                        json_response['meta_author'] = metadata['format'].get(
                            'TAG:author')
                    ## Empty file name
                    else:
                        raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
                    ## We found our file, we can break the loop
                    break

            # Send our response
            if CFG_JSON_AVAILABLE:

                dumped_response = json.dumps(json_response)

                # store the response in the websubmit directory
                # this is needed if the submission is not finished and continued later
                response_dir = os.path.join(curdir, 'files',
                                            str(user_info['uid']), "response")
                try:
                    os.makedirs(response_dir)
                except OSError:
                    # register_exception(req=req, alert_admin=False)
                    pass
                fp = open(os.path.join(response_dir, "response"), "w")
                fp.write(dumped_response)
                fp.close()

                return dumped_response
def read_metadata(inputfile, force=None, remote=False,
                  loginpw=None, verbose=0):
    """
    Returns metadata extracted from given file as dictionary.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @param remote: if the file is accessed remotely or not
    @type remote: boolean
    @param loginpw: credentials to access secure servers (username:password)
    @type loginpw: string
    @return: dictionary of metadata tags as keys, and (interpreted)
             value as value
    @rtype: dict
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read.
    """
    metadata = None
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print ext.lower(), 'extension to extract from'

    # Load plugins
    metadata_extractor_plugins = PluginContainer(
        os.path.join(CFG_PYLIBDIR,
                     'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
        plugin_builder=plugin_builder_function,
        api_version=__required_plugin_API_version__)

    # Loop through the plugins to find a good one for given file
    for plugin_name, plugin in metadata_extractor_plugins.iteritems():
        # Local file
        if plugin.has_key('can_read_local') and \
            plugin['can_read_local'](inputfile) and not remote and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            fetched_metadata = plugin['read_metadata_local'](inputfile,
                                                             verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

        # Remote file
        elif remote and plugin.has_key('can_read_remote') and \
            plugin['can_read_remote'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            fetched_metadata = plugin['read_metadata_remote'](inputfile,
                                                              loginpw,
                                                              verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

    # Return in case we have something
    if metadata is not None:
        return metadata

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'
Ejemplo n.º 44
0
def createRelatedFormats(fullpath,
                         overwrite=True,
                         debug=False,
                         consider_version=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    @param consider_version: (bool) if True, consider the version info
                             in C{fullpath} to find missing format
                             for that specific version, if C{fullpath}
                             contains version info
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        if consider_version:
            try:
                basedir, filename, extension, version = decompose_file_with_version(
                    fullpath)
            except:
                basedir, filename, extension = decompose_file(fullpath)
                version = 0
        else:
            basedir, filename, extension = decompose_file(fullpath)
            version = 0
        extension = extension.lower()
        if debug:
            print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (
                basedir, filename, extension)

        if overwrite:
            missing_formats = get_missing_formats([fullpath])
        else:
            if version:
                filelist = glob.glob(
                    os.path.join(basedir, '%s*;%s' % (filename, version)))
            else:
                filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
            if debug:
                print >> sys.stderr, "filelist: %s" % filelist
            missing_formats = get_missing_formats(filelist)
        if debug:
            print >> sys.stderr, "missing_formats: %s" % missing_formats
        for path, formats in missing_formats.iteritems():
            if debug:
                print >> sys.stderr, "... path: %s, formats: %s" % (path,
                                                                    formats)
            for aformat in formats:
                if debug:
                    print >> sys.stderr, "...... aformat: %s" % aformat
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print >> sys.stderr, "...... newpath: %s" % newpath
                try:
                    if CFG_BIBDOCFILE_FILEDIR in basedir:
                        # We should create the new files in a temporary location, not
                        # directly inside the BibDoc directory.
                        newpath = convert_file(path, output_format=aformat)
                    else:
                        convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError, msg:
                    if debug:
                        print >> sys.stderr, "...... Exception: %s" % msg
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content, restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0:
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError, err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename,
                                                 extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath):
                    bibdoc = bibrecdocs.add_new_file(
                        fullpath,
                        doctype=paths_and_doctypes.get(path, path),
                        never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioWebSubmitFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file':
                                    fullpath,
                                    'icon-scale':
                                    iconsize,
                                    'icon-name':
                                    None,
                                    'icon-file-format':
                                    None,
                                    'multipage-icon':
                                    False,
                                    'multipage-icon-delay':
                                    100,
                                    'verbosity':
                                    0,
                                })
                            except Exception, e:
                                register_exception(
                                    prefix=
                                    'Impossible to create icon for %s (record %s)'
                                    % (fullpath, sysno),
                                    alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioWebSubmitFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace(
                                        '>', '').replace('<', '').replace(
                                            '^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(
                                            iconpath,
                                            subformat=
                                            CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT
                                            + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open(
                                                "%s/%s_ICON" % (curdir, path),
                                                "w")
                                        else:
                                            fd = open(
                                                "%s/%s_ICON_%s" %
                                                (curdir, path,
                                                 iconsize + '_' + icon_suffix),
                                                "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError, err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(
                                            msg)
                                except InvenioWebSubmitFileError, e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()
Ejemplo n.º 46
0
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(
                                    apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        #-------------#
                        # VIDEO STUFF #
                        #-------------#

                        ## Remove all previous uploads
                        filelist = os.listdir(
                            os.path.split(formfields.file.name)[0])
                        for afile in filelist:
                            if argd['access'] in afile:
                                os.remove(
                                    os.path.join(
                                        os.path.split(formfields.file.name)[0],
Ejemplo n.º 47
0
    def add(self, req, form):
        """
        Add a comment (review) to record with id recid where recid>0
        Also works for adding a remark to basket with id recid where recid<-99
        @param ln: languange
        @param recid: record id
        @param action:  'DISPLAY' to display add form
                        'SUBMIT' to submit comment once form is filled
                        'REPLY' to reply to an already existing comment
        @param msg: the body of the comment/review or remark
        @param score: star score of the review
        @param note: title of the review
        @param comid: comment id, needed for replying
        @param editor_type: the type of editor used for submitting the
                            comment: 'textarea', 'fckeditor'.
        @param subscribe: if set, subscribe user to receive email
                          notifications when new comment are added to
                          this discussion
        @return the full html page.
        """
        argd = wash_urlargd(
            form, {
                'action': (str, "DISPLAY"),
                'msg': (str, ""),
                'note': (str, ''),
                'score': (int, 0),
                'comid': (int, 0),
                'editor_type': (str, ""),
                'subscribe': (str, ""),
                'cookie': (str, "")
            })
        _ = gettext_set_language(argd['ln'])

        actions = ['DISPLAY', 'REPLY', 'SUBMIT']
        uid = getUid(req)

        # Is site ready to accept comments?
        if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS
                         and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
            return page_not_authorized(req,
                                       "../comments/add",
                                       navmenuid='search')

        # Is user allowed to post comment?
        user_info = collect_user_info(req)
        (auth_code_1,
         auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
        (auth_code_2,
         auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
        if isGuestUser(uid):
            cookie = mail_cookie_create_authorize_action(
                VIEWRESTRCOLL, {
                    'collection': guess_primary_collection_of_a_record(
                        self.recid)
                })
            # Save user's value in cookie, so that these "POST"
            # parameters are not lost during login process
            msg_cookie = mail_cookie_create_common(
                'comment_msg', {
                    'msg': argd['msg'],
                    'note': argd['note'],
                    'score': argd['score'],
                    'editor_type': argd['editor_type'],
                    'subscribe': argd['subscribe']
                },
                onetime=True)
            target = '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {})
            return redirect_to_url(req, target, norobot=True)
        elif (auth_code_1 or auth_code_2):
            return page_not_authorized(req, "../", \
                text = auth_msg_1 + auth_msg_2)

        if argd['comid']:
            # If replying to a comment, are we on a record that
            # matches the original comment user is replying to?
            if not check_comment_belongs_to_record(argd['comid'], self.recid):
                return page_not_authorized(req, "../", \
                                           text = _("Specified comment does not belong to this record"))

            # Is user trying to reply to a restricted comment? Make
            # sure user has access to it.  We will then inherit its
            # restriction for the new comment
            (auth_code,
             auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
            if auth_code:
                return page_not_authorized(req, "../", \
                                           text = _("You do not have access to the specified comment"))

            # Is user trying to reply to a deleted comment? If so, we
            # let submitted comment go (to not lose possibly submitted
            # content, if comment is submitted while original is
            # deleted), but we "reset" comid to make sure that for
            # action 'REPLY' the original comment is not included in
            # the reply
            if is_comment_deleted(argd['comid']):
                argd['comid'] = 0

        user_info = collect_user_info(req)
        can_attach_files = False
        (auth_code, auth_msg) = check_user_can_attach_file_to_comments(
            user_info, self.recid)
        if not auth_code and (user_info['email'] != 'guest'):
            can_attach_files = True

        warning_msgs = []
        added_files = {}
        if can_attach_files:
            # User is allowed to attach files. Process the files
            file_too_big = False
            formfields = form.get('commentattachment[]', [])
            if not hasattr(formfields,
                           "__getitem__"):  # A single file was uploaded
                formfields = [formfields]
            for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
                if hasattr(formfield, "filename") and formfield.filename:
                    filename = formfield.filename
                    dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment',
                                               str(uid))
                    try:
                        assert (dir_to_open.startswith(CFG_TMPDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='User #%s tried to upload file to forbidden location: %s' \
                                           % (uid, dir_to_open))

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except:
                            register_exception(req=req, alert_admin=True)

                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        fp = open(os.path.join(dir_to_open, filename), "w")
                        # FIXME: temporary, waiting for wsgi handler to be
                        # fixed. Once done, read chunk by chunk
                        ##                         while formfield.file:
                        ##                             fp.write(formfield.file.read(10240))
                        fp.write(formfield.file.read())
                        fp.close()
                        # Isn't this file too big?
                        file_size = os.path.getsize(
                            os.path.join(dir_to_open, filename))
                        if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \
                               file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
                            os.remove(os.path.join(dir_to_open, filename))
                            # One file is too big: record that,
                            # dismiss all uploaded files and re-ask to
                            # upload again
                            file_too_big = True
                            warning_msgs.append(
                                ('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED',
                                 cgi.escape(filename),
                                 str(file_size / 1024) + 'KB',
                                 str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE / 1024)
                                 + 'KB'))
                        else:
                            added_files[filename] = os.path.join(
                                dir_to_open, filename)

            if file_too_big:
                # One file was too big. Removed all uploaded filed
                for filepath in added_files.items():
                    try:
                        os.remove(filepath)
                    except:
                        # File was already removed or does not exist?
                        pass

        client_ip_address = req.remote_ip
        check_warnings = []
        (ok, problem) = check_recID_is_in_range(self.recid, check_warnings,
                                                argd['ln'])
        if ok:
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, argd['ln'])
            navtrail = create_navtrail_links(
                cc=guess_primary_collection_of_a_record(self.recid))
            # Infoscience modification
            navtrail += '<li><a href="%s/record/%s?ln=%s">%s</a></li>' % (
                CFG_SITE_URL, self.recid, argd['ln'], title)
            navtrail += '<li class="last">%s</li>' % (
                self.discussion == 1 and _('Reviews') or _('Comments'))

            if argd['action'] not in actions:
                argd['action'] = 'DISPLAY'

            if not argd['msg']:
                # User had to login in-between, so retrieve msg
                # from cookie
                try:
                    (kind,
                     cookie_argd) = mail_cookie_check_common(argd['cookie'],
                                                             delete=True)

                    argd.update(cookie_argd)
                except InvenioWebAccessMailCookieDeletedError, e:
                    return redirect_to_url(req, CFG_SITE_URL + '/record/' + \
                                           str(self.recid) + (self.discussion==1 and \
                                                              '/reviews' or '/comments'))
                except InvenioWebAccessMailCookieError, e:
                    # Invalid or empty cookie: continue
                    pass
Ejemplo n.º 48
0
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    Metadata write method, takes the .pdf as input and creates a new
    one with the new info.

    @param inputfile: path to the pdf
    @type inputfile: string
    @param outputfile: path to the resulting pdf
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @type metadata_dictionary: dict
    """
    # Take the file name (0 base, 1 name, 2 ext)
    filename = decompose_file(inputfile)[1]

    # Print pdf metadata
    if verbose > 1:
        print 'Metadata information in the PDF file ' + filename + ': \n'
        try:
            os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
        except Exception:
            print 'Problem with inputfile to PDFTK'

    # Info file for pdftk
    (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
                                             dir=CFG_TMPDIR)
    os.close(fd)
    file_in = open(path_to_info, 'w')
    if verbose > 5:
        print "Saving PDFTK info file to %s" % path_to_info

    # User interaction to form the info file
    # Main Case: Dictionary received through option -d
    if not metadata_dictionary == {}:
        for tag in metadata_dictionary:
            line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
                   metadata_dictionary[tag] + '\n'
            if verbose > 0:
                print line
            file_in.writelines(line)
    else:
        data_modified = False
        user_input = 'user_input'
        print "Entering interactive mode. Choose what you want to do:"
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print "Aborting"
                    return
            else:
                try:
                    user_input = raw_input(
                        '[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print "Aborting"
                    return
            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update:\n')
                    value = raw_input('With value:\n')
                except:
                    print "Aborting"
                    return
                # Write to info file
                line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
                data_modified = True
                file_in.writelines(line)
            elif user_input == 'a':
                return
            else:
                print "Invalid option: "
    file_in.close()

    (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
                                              dir=CFG_TMPDIR)
    os.close(fd)

    # Now we call pdftk tool to update the info on a pdf
    #try:
    cmd_pdftk = '%s %s update_info %s output %s'
    (exit_status, output_std, output_err) = \
                  run_shell_command(cmd_pdftk,
                                    args=(CFG_PATH_PDFTK, inputfile,
                                          path_to_info, pdf_temp_path))
    if verbose > 5:
        print output_std, output_err

    if os.path.exists(pdf_temp_path):
        # Move to final destination if exist
        try:
            shutil.move(pdf_temp_path, outputfile)
        except Exception, err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
                                                           (pdf_temp_path, outputfile))
def handle_file_post(req, allowed_mimetypes=None):
    """
    Handle the POST of a file.
    @return: the a tuple with th full path to the file saved on disk,
    and it's mimetype as provided by the request.
    @rtype: (string, string)
    """
    from invenio.bibdocfile import decompose_file, md5
    ## We retrieve the length
    clen = req.headers_in["Content-Length"]
    if clen is None:
        raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header is missing")
    try:
        clen = int(clen)
        assert (clen > 1)
    except (ValueError, AssertionError):
        raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header should contain a positive integer")
    ## Let's take the content type
    ctype = req.headers_in["Content-Type"]
    if allowed_mimetypes and ctype not in allowed_mimetypes:
        raise InvenioWebInterfaceWSGIContentTypeError("Content-Type not in allowed list of content types: %s" % allowed_mimetypes)
    ## Let's optionally accept a suggested filename
    suffix = prefix = ''
    g = RE_CDISPOSITION_FILENAME.search(req.headers_in.get("Content-Disposition", ""))
    if g:
        dummy, prefix, suffix = decompose_file(g.group("filename"))
    ## Let's optionally accept an MD5 hash (and use it later for comparison)
    cmd5 = req.headers_in["Content-MD5"]
    if cmd5:
        the_md5 = md5()

    ## Ok. We can initialize the file
    fd, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR)
    the_file = os.fdopen(fd, 'w')
    ## Let's read the file
    while True:
        chunk = req.read(max(10240, clen))
        if len(chunk) < clen:
            ## We expected to read at least clen (which is different than 0)
            ## but chunk was shorter! Gosh! Error! Panic!
            the_file.close()
            os.close(fd)
            os.remove(path)
            raise InvenioWebInterfaceWSGIContentLenghtError("File shorter than what specified in Content-Length")
        if cmd5:
            ## MD5 was in the header let's compute it
            the_md5.update(chunk)
        ## And let's definitively write the content to disk :-)
        the_file.write(chunk)
        clen -= len(chunk)
        if clen == 0:
            ## That's it. Everything was read.
            break
    if cmd5 and the_md5.hexdigest().lower() != cmd5.strip().lower():
        ## Let's check the MD5
        the_file.close()
        os.close(fd)
        os.remove(path)
        raise InvenioWebInterfaceWSGIContentMD5Error("MD5 checksum does not match")
    ## Let's clean everything up
    the_file.close()
    return (path, ctype)
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
    else:
        output_file = os.path.abspath(output_file)
        if os.path.exists(output_file):
            os.remove(output_file)

    if need_working_dir:
        try:
            working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion')
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err)

        input_ext = decompose_file(input_file, skip_version=True)[2]
        new_input_file = os.path.join(working_dir, 'input' + input_ext)
        shutil.copy(input_file, new_input_file)
        input_file = new_input_file
    else:
        working_dir = None
        input_file = os.path.abspath(input_file)

    debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir))
    return (input_file, output_file, working_dir)


def clean_working_dir(working_dir):
    """
    Remove the working_dir.
    """
Ejemplo n.º 51
0
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content,
                                 restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0 :
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError, err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath):
                    bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioWebSubmitFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file' : fullpath,
                                    'icon-scale' : iconsize,
                                    'icon-name' : None,
                                    'icon-file-format' : None,
                                    'multipage-icon' : False,
                                    'multipage-icon-delay' : 100,
                                    'verbosity' : 0,
                                })
                            except Exception, e:
                                register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioWebSubmitFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open("%s/%s_ICON" % (curdir, path), "w")
                                        else:
                                            fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError, err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(msg)
                                except InvenioWebSubmitFileError, e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()
Ejemplo n.º 52
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """
    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str(
            uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR,
                                    xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description',
                                       description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat',
                                     subformat)
                if (comment == m_comment and description == m_description
                        and subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(
                            124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" %
                                batch_job['recid'])
            task_update_progress("Video master for record %d not found" %
                                 batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(
        batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job,
                                            'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" %
                                bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension,
                getval(batch_job, 'bibdoc_master_subformat', 'master'))
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job['input'],
                version=1,
                description=getval(batch_job, 'bibdoc_master_description'),
                comment=getval(batch_job, 'bibdoc_master_comment'),
                docformat=master_format)

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------" %
                            (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(
                job['bibdoc_docname']).safe_substitute(
                    {'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(
                    job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname',
                                                bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" %
                (bibdoc_slave_video_docname, bibdoc_video_extension,
                 bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job['input'],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, 'audiocodec'),
                vcodec=getval(job, 'videocodec'),
                abitrate=getval(job, 'videobitrate'),
                vbitrate=getval(job, 'audiobitrate'),
                resolution=getval(job, 'resolution'),
                passes=getval(job, 'passes', 1),
                special=getval(job, 'special'),
                specialfirst=getval(job, 'specialfirst'),
                specialsecond=getval(job, 'specialsecond'),
                metadata=getval(job, 'metadata'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),  # Aspect for every job
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message)
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(bibdoc_video_directory,
                                 bibdoc_video_extension,
                                 bibdoc_video_subformat, 1,
                                 bibdoc_slave_video_docname))
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                             bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(
                        getval(job, 'bibdoc_description'), bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname',
                                          bibdoc_video_docname)
            tmpfname = (
                tmpdir + "/" + bibdoc_frame_docname + '.' +
                getval(profile, 'extension', getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(
                input_file=batch_job['input'],
                output_file=tmpfname,
                size=getval(job, 'size'),
                positions=getval(job, 'positions'),
                numberof=getval(job, 'numberof'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(
                        filename)
                    _task_write_message("Creating new bibdoc for %s" %
                                        bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(
                            docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(
                        bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(
                            bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc" %
                                        (bibdoc_frame_docname,
                                         getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, 'bibdoc_description'),
                        comment=getval(job, 'bibdoc_comment'),
                        docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'),
                                 pbcoreIdentifier=batch_job['recid'],
                                 aspect_override=getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern',
                      '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str())):
                _notify_error_admin(batch_job, getval(batch_job,
                                                      'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
    return 1
Ejemplo n.º 53
0
    def add(self, req, form):
        """
        Add a comment (review) to record with id recid where recid>0
        Also works for adding a remark to basket with id recid where recid<-99
        @param ln: languange
        @param recid: record id
        @param action:  'DISPLAY' to display add form
                        'SUBMIT' to submit comment once form is filled
                        'REPLY' to reply to an already existing comment
        @param msg: the body of the comment/review or remark
        @param score: star score of the review
        @param note: title of the review
        @param comid: comment id, needed for replying
        @param editor_type: the type of editor used for submitting the
                            comment: 'textarea', 'fckeditor'.
        @param subscribe: if set, subscribe user to receive email
                          notifications when new comment are added to
                          this discussion
        @return the full html page.
        """
        argd = wash_urlargd(form, {'action': (str, "DISPLAY"),
                                   'msg': (str, ""),
                                   'note': (str, ''),
                                   'score': (int, 0),
                                   'comid': (int, -1),
                                   'editor_type': (str, ""),
                                   'subscribe': (str, ""),
                                   'cookie': (str, "")
                                   })
        _ = gettext_set_language(argd['ln'])

        actions = ['DISPLAY', 'REPLY', 'SUBMIT']
        uid = getUid(req)

        # Is site ready to accept comments?
        if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
            return page_not_authorized(req, "../comments/add",
                                       navmenuid='search')

        # Is user allowed to post comment?
        user_info = collect_user_info(req)
        (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
        (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
        if isGuestUser(uid):
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            # Save user's value in cookie, so that these "POST"
            # parameters are not lost during login process
            msg_cookie = mail_cookie_create_common('comment_msg',
                                                    {'msg': argd['msg'],
                                                     'note': argd['note'],
                                                     'score': argd['score'],
                                                     'editor_type': argd['editor_type'],
                                                     'subscribe': argd['subscribe']},
                                                    onetime=True)
            target = '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {})
            return redirect_to_url(req, target, norobot=True)
        elif (auth_code_1 or auth_code_2):
            return page_not_authorized(req, "../", \
                text = auth_msg_1 + auth_msg_2)

        user_info = collect_user_info(req)
        can_attach_files = False
        (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid)
        if not auth_code and (user_info['email'] != 'guest' or user_info['apache_user']):
            can_attach_files = True

        warning_msgs = []
        added_files = {}
        if can_attach_files:
            # User is allowed to attach files. Process the files
            file_too_big = False
            formfields = form.get('commentattachment[]', [])
            if not hasattr(formfields, "__getitem__"): # A single file was uploaded
                formfields = [formfields]
            for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
                if hasattr(formfield, "filename") and formfield.filename:
                    filename = formfield.filename
                    dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid))
                    try:
                        assert(dir_to_open.startswith(CFG_TMPDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='User #%s tried to upload file to forbidden location: %s' \
                                           % (uid, dir_to_open))

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except:
                            register_exception(req=req, alert_admin=True)

                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        fp = open(os.path.join(dir_to_open, filename), "w")
                        # FIXME: temporary, waiting for wsgi handler to be
                        # fixed. Once done, read chunk by chunk
##                         while formfield.file:
##                             fp.write(formfield.file.read(10240))
                        fp.write(formfield.file.read())
                        fp.close()
                        # Isn't this file too big?
                        file_size = os.path.getsize(os.path.join(dir_to_open, filename))
                        if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \
                               file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
                            os.remove(os.path.join(dir_to_open, filename))
                            # One file is too big: record that,
                            # dismiss all uploaded files and re-ask to
                            # upload again
                            file_too_big = True
                            warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
                        else:
                            added_files[filename] = os.path.join(dir_to_open, filename)

            if file_too_big:
                # One file was too big. Removed all uploaded filed
                for filepath in added_files.items():
                    try:
                        os.remove(filepath)
                    except:
                        # File was already removed or does not exist?
                        pass

        client_ip_address = req.remote_ip
        check_warnings = []
        (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln'])
        if ok:
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(req,
                                                                                               self.recid,
                                                                                               argd['ln'])
            navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid))
            if navtrail:
                navtrail += ' &gt; '
            navtrail += '<a class="navtrail" href="%s/record/%s?ln=%s">'% (CFG_SITE_URL, self.recid, argd['ln'])
            navtrail += title
            navtrail += '</a>'
            navtrail += '&gt; <a class="navtrail" href="%s/record/%s/%s/?ln=%s">%s</a>' % (CFG_SITE_URL,
                                                                                           self.recid,
                                                                                           self.discussion==1 and 'reviews' or 'comments',
                                                                                           argd['ln'],
                                                                                           self.discussion==1 and _('Reviews') or _('Comments'))

            if argd['action'] not in actions:
                argd['action'] = 'DISPLAY'

            if not argd['msg']:
                # User had to login in-between, so retrieve msg
                # from cookie
                try:
                    (kind, cookie_argd) = mail_cookie_check_common(argd['cookie'],
                                                                    delete=True)

                    argd.update(cookie_argd)
                except InvenioWebAccessMailCookieDeletedError, e:
                    return redirect_to_url(req, CFG_SITE_URL + '/record/' + \
                                           str(self.recid) + (self.discussion==1 and \
                                                              '/reviews' or '/comments'))
                except InvenioWebAccessMailCookieError, e:
                    # Invalid or empty cookie: continue
                    pass
Ejemplo n.º 54
0
def read_metadata(inputfile, force=None, remote=False,
                  loginpw=None, verbose=0):
    """
    Returns metadata extracted from given file as dictionary.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @param remote: if the file is accessed remotely or not
    @type remote: boolean
    @param loginpw: credentials to access secure servers (username:password)
    @type loginpw: string
    @return: dictionary of metadata tags as keys, and (interpreted)
             value as value
    @rtype: dict
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read.
    """
    metadata = None
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print ext.lower(), 'extension to extract from'

    # Load plugins
    metadata_extractor_plugins = PluginContainer(
        os.path.join(CFG_PYLIBDIR,
                     'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
        plugin_builder=plugin_builder_function,
        api_version=__required_plugin_API_version__)

    # Loop through the plugins to find a good one for given file
    for plugin_name, plugin in metadata_extractor_plugins.iteritems():
        # Local file
        if plugin.has_key('can_read_local') and \
            plugin['can_read_local'](inputfile) and not remote and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            fetched_metadata = plugin['read_metadata_local'](inputfile,
                                                             verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

        # Remote file
        elif remote and plugin.has_key('can_read_remote') and \
            plugin['can_read_remote'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print 'Using ' + plugin_name
            fetched_metadata = plugin['read_metadata_remote'](inputfile,
                                                              loginpw,
                                                              verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

    # Return in case we have something
    if metadata is not None:
        return metadata

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'