def get_missing_formats(filelist, desired_conversion=None): """Given a list of files it will return a dictionary of the form: file1 : missing formats to generate from it... """ from invenio.bibdocfile import normalize_format, decompose_file def normalize_desired_conversion(): ret = {} for key, value in desired_conversion.iteritems(): ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value] return ret if desired_conversion is None: desired_conversion = CFG_WEBSUBMIT_DESIRED_CONVERSIONS available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist] missing_formats = [] desired_conversion = normalize_desired_conversion() ret = {} for filename in filelist: aformat = decompose_file(filename, skip_version=True)[2] if aformat in desired_conversion: for desired_format in desired_conversion[aformat]: if desired_format not in available_formats and desired_format not in missing_formats: missing_formats.append(desired_format) if filename not in ret: ret[filename] = [] ret[filename].append(desired_format) return ret
def get_missing_formats(filelist, desired_conversion=None): """Given a list of files it will return a dictionary of the form: file1 : missing formats to generate from it... """ from invenio.bibdocfile import normalize_format, decompose_file def normalize_desired_conversion(): ret = {} for key, value in desired_conversion.iteritems(): ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value] return ret if desired_conversion is None: desired_conversion = CFG_WEBSUBMIT_DESIRED_CONVERSIONS available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist] missing_formats = [] desired_conversion = normalize_desired_conversion() ret = {} for filename in filelist: aformat = decompose_file(filename, skip_version=True)[2] if aformat in desired_conversion: for desired_format in desired_conversion[aformat]: if desired_format not in available_formats and desired_format not in missing_formats: missing_formats.append(desired_format) if filename not in ret: ret[filename] = [] ret[filename].append(desired_format) return ret
def translate_link(match_obj): """Replace FCKeditor link by 'local' record link. Also create the FFT for that link""" file_type = match_obj.group("type") file_name = match_obj.group("filename") uid = match_obj.group("uid") dummy, name, extension = decompose_file(file_name) new_url = build_url(sysno, name, file_type, extension) original_location = match_obj.group()[1:-1] icon_location = original_location # Prepare FFT that will fetch the file (+ the original # file in the case of images) if file_type == "image": # Does original file exists, or do we just have the # icon? We expect the original file at a well defined # location possible_original_path = os.path.join( CFG_PREFIX, "var", "tmp", "attachfile", uid, file_type, "original", file_name ) if os.path.exists(possible_original_path): icon_location = original_location original_location = possible_original_path new_url = build_url(sysno, name, file_type, extension, is_icon=True) docname = build_docname(name, file_type, extension) if original_location not in processed_paths: # Must create an FFT only if we have not yet processed # the file. This can happen if same image exists on # the same page (either in two different FCKeditor # instances, or twice in the HTML) processed_paths.append(original_location) write_fft(original_location, docname, icon_location, doctype=file_type) return '"' + new_url + '"'
def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main': MainDir, 'Additional': IncludeDir} for type, dir in watcheddirs.iteritems(): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if not formats.has_key(filename): formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if not formats.has_key(existingBibdoc.get_docname()): existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy): """ Transform input_file into a .djvu file. @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param resolution [int] the resolution of the output_file @param input_format [int] [1-9]: 1 - DjVu Document (for verification or OCR) 2 - PS/PS.GZ/PDF Document (default) 3 - Photo/Picture/Icon 4 - Scanned Document - B&W - <200 dpi 5 - Scanned Document - B&W - 200-400 dpi 6 - Scanned Document - B&W - >400 dpi 7 - Scanned Document - Color/Mixed - <200 dpi 8 - Scanned Document - Color/Mixed - 200-400 dpi 9 - Scanned Document - Color/Mixed - >400 dpi @return [string] output_file input_file. raise InvenioWebSubmitFileConverterError in case of errors. Note: due to the bottleneck of using a centralized server, it is very slow and is not suitable for interactive usage (e.g. WebSubmit functions) """ from invenio.bibdocfile import decompose_file input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu') ocr = ocr and "1" or "0" ## Any2djvu expect to find the file in the current directory. execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir) ## Any2djvu doesn't let you choose the output_file file name. djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu') shutil.move(djvu_output, output_file) clean_working_dir(working_dir) return output_file
def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir} for type, dir in watcheddirs.iteritems(): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if not formats.has_key(filename): formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if not formats.has_key(existingBibdoc.get_docname()): existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy): """ Transform input_file into a .djvu file. @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param resolution [int] the resolution of the output_file @param input_format [int] [1-9]: 1 - DjVu Document (for verification or OCR) 2 - PS/PS.GZ/PDF Document (default) 3 - Photo/Picture/Icon 4 - Scanned Document - B&W - <200 dpi 5 - Scanned Document - B&W - 200-400 dpi 6 - Scanned Document - B&W - >400 dpi 7 - Scanned Document - Color/Mixed - <200 dpi 8 - Scanned Document - Color/Mixed - 200-400 dpi 9 - Scanned Document - Color/Mixed - >400 dpi @return [string] output_file input_file. raise InvenioWebSubmitFileConverterError in case of errors. Note: due to the bottleneck of using a centralized server, it is very slow and is not suitable for interactive usage (e.g. WebSubmit functions) """ from invenio.bibdocfile import decompose_file input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu') ocr = ocr and "1" or "0" ## Any2djvu expect to find the file in the current directory. execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir) ## Any2djvu doesn't let you choose the output_file file name. djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu') shutil.move(djvu_output, output_file) clean_working_dir(working_dir) return output_file
def can_read_remote(inputfile): """Checks if inputfile is among metadata-readable file types @param inputfile: (string) path to the image @type inputfile: string @rtype: boolean @return: true if extension casn be handled""" # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in [".jpg", ".jpeg", "jpe", ".jfif", ".jfi", ".jif"]
def can_read_remote(inputfile): """Checks if inputfile is among metadata-readable file types @param inputfile: (string) path to the image @type inputfile: string @rtype: boolean @return: true if extension casn be handled""" # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.jpg', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir, current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM): """ Return among the filelist the best file whose format is best suited for extracting text. """ from invenio.bibdocfile import decompose_file, normalize_format best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')] for aformat in best_formats: for filename in filelist: if decompose_file(filename, skip_version=True)[2].endswith(aformat): return filename raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir,current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def can_write_local(inputfile): """ Checks if inputfile is among metadata-writable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in [".jpg", ".tiff", ".jpeg", "jpe", ".jfif", ".jfi", ".jif"]
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM): """ Return among the filelist the best file whose format is best suited for extracting text. """ from invenio.bibdocfile import decompose_file, normalize_format best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')] for aformat in best_formats: for filename in filelist: if decompose_file(filename, skip_version=True)[2].endswith(aformat): return filename raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
def gunzip(input_file, output_file=None, **dummy): """ Uncompress a file. """ from invenio.bibdocfile import decompose_file input_ext = decompose_file(input_file, skip_version=True)[2] if input_ext.endswith('.gz'): input_ext = input_ext[:-len('.gz')] else: input_ext = None input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False) execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file) return output_file
def can_write_local(inputfile): """ Checks if inputfile is among metadata-writable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.pdf']
def gunzip(input_file, output_file=None, **dummy): """ Uncompress a file. """ from invenio.bibdocfile import decompose_file input_ext = decompose_file(input_file, skip_version=True)[2] if input_ext.endswith('.gz'): input_ext = input_ext[:-len('.gz')] else: input_ext = None input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False) execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file) return output_file
def createRelatedFormats(fullpath, overwrite=True): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats Return a list of the paths to the converted files """ createdpaths = [] basedir, filename, extension = decompose_file(fullpath) extension = extension.lower() if extension == ".pdf": if overwrite or \ not os.path.exists("%s/%s.ps" % (basedir, filename)): # Create PostScript try: convert_file(fullpath, "%s/%s.ps" % (basedir, filename)) createdpaths.append("%s/%s.ps" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass if overwrite or \ not os.path.exists("%s/%s.ps.gz" % (basedir, filename)): if os.path.exists("%s/%s.ps" % (basedir, filename)): os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename)) createdpaths.append("%s/%s.ps.gz" % (basedir, filename)) if extension == ".ps": if overwrite or \ not os.path.exists("%s/%s.pdf" % (basedir, filename)): # Create PDF try: convert_file(fullpath, "%s/%s.pdf" % (basedir, filename)) createdpaths.append("%s/%s.pdf" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass if extension == ".ps.gz": if overwrite or \ not os.path.exists("%s/%s.ps" % (basedir, filename)): #gunzip file os.system("%s %s" % (CFG_PATH_GUNZIP, fullpath)) if overwrite or \ not os.path.exists("%s/%s.pdf" % (basedir, filename)): # Create PDF try: convert_file("%s/%s.ps" % (basedir, filename), "%s/%s.pdf" % (basedir, filename)) createdpaths.append("%s/%s.pdf" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass #gzip file if not os.path.exists("%s/%s.ps.gz" % (basedir, filename)): os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename)) return createdpaths
def write_metadata(inputfile, outputfile, metadata_dictionary, force=None, verbose=0): """ Writes metadata to given file. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param outputfile: path to the resulting file. @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: keys and values of metadata to update. @type metadata_dictionary: dict @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @return: output of the plugin @rtype: string @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated. """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print ext.lower(), 'extension to write to' # Plugins metadata_extractor_plugins = PluginContainer( os.path.join(CFG_PYLIBDIR, 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'), plugin_builder=plugin_builder_function, api_version=__required_plugin_API_version__ ) # Loop through the plugins to find a good one to ext for plugin_name, plugin in metadata_extractor_plugins.iteritems(): if plugin.has_key('can_write_local') and \ plugin['can_write_local'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name return plugin['write_metadata_local'](inputfile, outputfile, metadata_dictionary, verbose) # Case of no plugin found, raise raise TypeError, 'Unsupported file type'
def write_metadata(inputfile, outputfile, metadata_dictionary, force=None, verbose=0): """ Writes metadata to given file. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param outputfile: path to the resulting file. @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: keys and values of metadata to update. @type metadata_dictionary: dict @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @return: output of the plugin @rtype: string @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated. """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print ext.lower(), 'extension to write to' # Plugins metadata_extractor_plugins = PluginContainer( os.path.join(CFG_PYLIBDIR, 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'), plugin_builder=plugin_builder_function, api_version=__required_plugin_API_version__ ) # Loop through the plugins to find a good one to ext for plugin_name, plugin in metadata_extractor_plugins.iteritems(): if plugin.has_key('can_write_local') and \ plugin['can_write_local'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name return plugin['write_metadata_local'](inputfile, outputfile, metadata_dictionary, verbose) # Case of no plugin found, raise raise TypeError, 'Unsupported file type'
def convert_file(input_file, output_file=None, output_format=None, **params): """ Convert files from one format to another. @param input_file [string] the path to an existing file @param output_file [string] the path to the desired ouput. (if None a temporary file is generated) @param output_format [string] the desired format (if None it is taken from output_file) @param params other paramaters to pass to the particular converter @return [string] the final output_file """ from invenio.bibdocfile import decompose_file, normalize_format if output_format is None: if output_file is None: raise ValueError("At least output_file or format should be specified.") else: output_ext = decompose_file(output_file, skip_version=True)[2] else: output_ext = normalize_format(output_format) input_ext = decompose_file(input_file, skip_version=True)[2] conversion_chain = can_convert(input_ext, output_ext) if conversion_chain: current_input = input_file current_output = None for i in xrange(len(conversion_chain)): if i == (len(conversion_chain) - 1): current_output = output_file converter = conversion_chain[i][0] final_params = dict(conversion_chain[i][1]) final_params.update(params) try: return converter(current_input, current_output, **final_params) except InvenioWebSubmitFileConverterError, err: raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err)) except Exception, err: register_exception() raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err)) current_input = current_output
def convert_file(input_file, output_file=None, output_format=None, **params): """ Convert files from one format to another. @param input_file [string] the path to an existing file @param output_file [string] the path to the desired ouput. (if None a temporary file is generated) @param output_format [string] the desired format (if None it is taken from output_file) @param params other paramaters to pass to the particular converter @return [string] the final output_file """ from invenio.bibdocfile import decompose_file, normalize_format if output_format is None: if output_file is None: raise ValueError("At least output_file or format should be specified.") else: output_ext = decompose_file(output_file, skip_version=True)[2] else: output_ext = normalize_format(output_format) input_ext = decompose_file(input_file, skip_version=True)[2] conversion_chain = can_convert(input_ext, output_ext) if conversion_chain: current_input = input_file current_output = None for i in xrange(len(conversion_chain)): if i == (len(conversion_chain) - 1): current_output = output_file converter = conversion_chain[i][0] final_params = dict(conversion_chain[i][1]) final_params.update(params) try: return converter(current_input, current_output, **final_params) except InvenioWebSubmitFileConverterError, err: raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err)) except Exception, err: register_exception() raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err)) current_input = current_output
def createRelatedFormats(fullpath, overwrite=True): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats Return a list of the paths to the converted files """ createdpaths = [] basedir, filename, extension = decompose_file(fullpath) extension = extension.lower() if extension == ".pdf": if overwrite or \ not os.path.exists("%s/%s.ps" % (basedir, filename)): # Create PostScript try: convert_file(fullpath, "%s/%s.ps" % (basedir, filename)) createdpaths.append("%s/%s.ps" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass if overwrite or \ not os.path.exists("%s/%s.ps.gz" % (basedir, filename)): if os.path.exists("%s/%s.ps" % (basedir, filename)): os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename)) createdpaths.append("%s/%s.ps.gz" % (basedir, filename)) if extension == ".ps": if overwrite or \ not os.path.exists("%s/%s.pdf" % (basedir, filename)): # Create PDF try: convert_file(fullpath, "%s/%s.pdf" % (basedir, filename)) createdpaths.append("%s/%s.pdf" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass if extension == ".ps.gz": if overwrite or \ not os.path.exists("%s/%s.ps" % (basedir, filename)): #gunzip file os.system("%s %s" % (CFG_PATH_GUNZIP, fullpath)) if overwrite or \ not os.path.exists("%s/%s.pdf" % (basedir, filename)): # Create PDF try: convert_file("%s/%s.ps" % (basedir, filename), "%s/%s.pdf" % (basedir, filename)) createdpaths.append("%s/%s.pdf" % (basedir, filename)) except InvenioWebSubmitFileConverterError: pass #gzip file if not os.path.exists("%s/%s.ps.gz" % (basedir, filename)): os.system("%s %s/%s.ps" % (CFG_PATH_GZIP, basedir, filename)) return createdpaths
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True): """Clean input_file and the output_file.""" from invenio.bibdocfile import decompose_file, normalize_format output_ext = normalize_format(output_ext) debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext)) if output_ext is None: if output_file is None: output_ext = '.tmp' else: output_ext = decompose_file(output_file, skip_version=True)[2] if output_file is None: try: (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR) os.close(fd) except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True): """Clean input_file and the output_file.""" from invenio.bibdocfile import decompose_file, normalize_format output_ext = normalize_format(output_ext) debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext)) if output_ext is None: if output_file is None: output_ext = '.tmp' else: output_ext = decompose_file(output_file, skip_version=True)[2] if output_file is None: try: (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR) os.close(fd) except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.html', '.doc', '.ps', '.xls', '.ppt', '.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac', '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png', '.deb', '.rpm', '.tar.gz', '.zip', '.elf', '.s3m', '.xm', '.it', '.flv', '.real', '.avi', '.mpeg', '.qt', '.asf']
def createRelatedFormats(fullpath, overwrite=True, debug=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] basedir, filename, extension = decompose_file(fullpath) extension = extension.lower() if debug: print >>sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension) filelist = glob.glob(os.path.join(basedir, "%s*" % filename)) if debug: print >>sys.stderr, "filelist: %s" % filelist missing_formats = get_missing_formats(filelist) if debug: print >>sys.stderr, "missing_formats: %s" % missing_formats for path, formats in missing_formats.iteritems(): if debug: print >>sys.stderr, "... path: %s, formats: %s" % (path, formats) for aformat in formats: if debug: print >>sys.stderr, "...... aformat: %s" % aformat newpath = os.path.join(basedir, filename + aformat) if debug: print >>sys.stderr, "...... newpath: %s" % newpath try: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError, msg: if debug: print >>sys.stderr, "...... Exception: %s" % msg register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def createRelatedFormats(fullpath, overwrite=True, debug=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] basedir, filename, extension = decompose_file(fullpath) extension = extension.lower() if debug: print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension) filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print >> sys.stderr, "filelist: %s" % filelist missing_formats = get_missing_formats(filelist) if debug: print >> sys.stderr, "missing_formats: %s" % missing_formats for path, formats in missing_formats.iteritems(): if debug: print >> sys.stderr, "... path: %s, formats: %s" % (path, formats) for aformat in formats: if debug: print >> sys.stderr, "...... aformat: %s" % aformat newpath = os.path.join(basedir, filename + aformat) if debug: print >> sys.stderr, "...... newpath: %s" % newpath try: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError, msg: if debug: print >> sys.stderr, "...... Exception: %s" % msg register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def translate_link(match_obj): """Replace CKEditor link by 'local' record link. Also create the FFT for that link""" file_type = match_obj.group('type') file_name = match_obj.group('filename') uid = match_obj.group('uid') dummy, name, extension = decompose_file(file_name) new_url = build_url(sysno, name, file_type, extension) original_location = match_obj.group()[1:-1] icon_location = original_location # Prepare FFT that will fetch the file (+ the original # file in the case of images) if file_type == 'image': # Does original file exists, or do we just have the # icon? We expect the original file at a well defined # location possible_original_path = os.path.join(CFG_PREFIX, 'var', 'tmp', 'attachfile', uid, file_type, 'original', file_name) if os.path.exists(possible_original_path): icon_location = original_location original_location = possible_original_path new_url = build_url(sysno, name, file_type, extension, is_icon=True) docname = build_docname(name, file_type, extension) if original_location not in processed_paths: # Must create an FFT only if we have not yet processed # the file. This can happen if same image exists on # the same page (either in two different CKEditor # instances, or twice in the HTML) processed_paths.append(original_location) write_fft(curdir, original_location, docname, icon_location, doctype=file_type) return '"' + new_url + '"'
def _get_feature_image(record, ln=CFG_SITE_LANG): """ Looks for an image that can be featured on the article overview page. """ src = "" if ln == "fr": article = "".join(record.fields("590__b")) if not article: article = "".join(record.fields("520__b")) else: article = "".join(record.fields("520__b")) if not article: article = "".join(record.fields("590__b")) image = re.search(img_pattern, article) if image: src = image.group("image") if not src: # Look for an attached image icons = [icon for icon in record.fields("8564_q") if (decompose_file(icon)[2] in ["jpg", "jpeg", "png", "gif"])] if icons: src = icons[0] return src
def _get_feature_image(record, ln=CFG_SITE_LANG): """ Looks for an image that can be featured on the article overview page. """ src = '' if ln == "fr": article = ''.join(record.fields('590__b')) if not article: article = ''.join(record.fields('520__b')) else: article = ''.join(record.fields('520__b')) if not article: article = ''.join(record.fields('590__b')) image = re.search(img_pattern, article) if image: src = image.group("image") if not src: # Look for an attached image icons = [icon for icon in record.fields('8564_q') if \ (decompose_file(icon)[2] in ['jpg', 'jpeg', 'png', 'gif'])] if icons: src = icons[0] return src
(fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR) os.close(fd) except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err) else: output_file = os.path.abspath(output_file) if os.path.exists(output_file): os.remove(output_file) if need_working_dir: try: working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion') except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err) input_ext = decompose_file(input_file, skip_version=True)[2] new_input_file = os.path.join(working_dir, 'input' + input_ext) shutil.copy(input_file, new_input_file) input_file = new_input_file else: working_dir = None input_file = os.path.abspath(input_file) debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir)) return (input_file, output_file, working_dir) def clean_working_dir(working_dir): """ Remove the working_dir. """
def process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path, recid=None, allowed_types=default_allowed_types): """ Process a file upload request. @param form: the form as in req object. @type form: dict @param uid: the user ID of the user uploading the file. @type uid: int @param user_files_path: the base URL where the file can be accessed from the web after upload. Note that you have to implement your own handler to stream the files from the directory C{user_files_absolute_path} if you set this value. @type user_files_path: string @param user_files_absolute_path: the base path on the server where the files should be saved. Eg:C{%(CFG_PREFIX)s/var/data/comments/%(recid)s/%(uid)s} @type user_files_absolute_path: string @param recid: the record ID for which we upload a file. Leave None if not relevant. @type recid: int @param allowed_types: types allowed for uploading. These are supported by CKEditor: ['File', 'Image', 'Flash', 'Media'] @type allowed_types: list of strings @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) """ msg = '' filename = '' formfile = None uploaded_file_path = '' user_files_path = '' for key, formfields in form.items(): if key != 'upload': continue if hasattr(formfields, "filename") and formfields.filename: # We have found our file filename = formfields.filename formfile = formfields.file break can_upload_file_p = False if not form['type'] in allowed_types: # Is the type sent through the form ok? msg = 'You are not allowed to upload a file of this type' else: # Is user allowed to upload such file extension? basedir, name, extension = decompose_file(filename) extension = extension[1:] # strip leading dot if extension in allowed_extensions.get(form['type'], []): can_upload_file_p = True if not can_upload_file_p: msg = 'You are not allowed to upload a file of this type' elif filename and formfile: ## Before saving the file to disk, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) # Remove \ / | : ? * filename = re.sub ( '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_', filename) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(user_files_absolute_path, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(user_files_absolute_path, filename), "w") fp.write(formfile.read()) fp.close() uploaded_file_path = os.path.join(user_files_absolute_path, filename) uploaded_file_name = filename return (msg, uploaded_file_path, filename, user_files_path, form['CKEditorFuncNum'])
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose): """ Metadata write method, takes the .pdf as input and creates a new one with the new info. @param inputfile: path to the pdf @type inputfile: string @param outputfile: path to the resulting pdf @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: metadata information to update inputfile @type metadata_dictionary: dict """ # Take the file name (0 base, 1 name, 2 ext) filename = decompose_file(inputfile)[1] # Print pdf metadata if verbose > 1: print 'Metadata information in the PDF file ' + filename + ': \n' try: os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data') except Exception: print 'Problem with inputfile to PDFTK' # Info file for pdftk (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \ dir=CFG_TMPDIR) os.close(fd) file_in = open(path_to_info, 'w') if verbose > 5: print "Saving PDFTK info file to %s" % path_to_info # User interaction to form the info file # Main Case: Dictionary received through option -d if not metadata_dictionary == {}: for tag in metadata_dictionary: line = 'InfoKey: ' + tag + '\nInfoValue: ' + \ metadata_dictionary[tag] + '\n' if verbose > 0: print line file_in.writelines(line) else: data_modified = False user_input = 'user_input' print "Entering interactive mode. Choose what you want to do:" while (user_input): if not data_modified: try: user_input = raw_input('[w]rite / [q]uit\n') except: print "Aborting" return else: try: user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n') except: print "Aborting" return if user_input == 'q': if not data_modified: return break elif user_input == 'w': try: tag = raw_input('Tag to update:\n') value = raw_input('With value:\n') except: print "Aborting" return # Write to info file line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n' data_modified = True file_in.writelines(line) elif user_input == 'a': return else: print "Invalid option: " file_in.close() (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \ dir=CFG_TMPDIR) os.close(fd) # Now we call pdftk tool to update the info on a pdf #try: cmd_pdftk = '%s %s update_info %s output %s' (exit_status, output_std, output_err) = \ run_shell_command(cmd_pdftk, args=(CFG_PATH_PDFTK, inputfile, path_to_info, pdf_temp_path)) if verbose > 5: print output_std, output_err if os.path.exists(pdf_temp_path): # Move to final destination if exist try: shutil.move(pdf_temp_path, outputfile) except Exception, err: raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \ (pdf_temp_path, outputfile))
def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats @param consider_version: (bool) if True, consider the version info in C{fullpath} to find missing format for that specific version, if C{fullpath} contains version info Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] if consider_version: try: basedir, filename, extension, version = decompose_file_with_version(fullpath) except: basedir, filename, extension = decompose_file(fullpath) version = 0 else: basedir, filename, extension = decompose_file(fullpath) version = 0 extension = extension.lower() if debug: print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension) if overwrite: missing_formats = get_missing_formats([fullpath]) else: if version: filelist = glob.glob(os.path.join(basedir, '%s*;%s' % (filename, version))) else: filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print >> sys.stderr, "filelist: %s" % filelist missing_formats = get_missing_formats(filelist) if debug: print >> sys.stderr, "missing_formats: %s" % missing_formats for path, formats in missing_formats.iteritems(): if debug: print >> sys.stderr, "... path: %s, formats: %s" % (path, formats) for aformat in formats: if debug: print >> sys.stderr, "...... aformat: %s" % aformat newpath = os.path.join(basedir, filename + aformat) if debug: print >> sys.stderr, "...... newpath: %s" % newpath try: if CFG_BIBDOCFILE_FILEDIR in basedir: # We should create the new files in a temporary location, not # directly inside the BibDoc directory. newpath = convert_file(path, output_format=aformat) else: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError, msg: if debug: print >> sys.stderr, "...... Exception: %s" % msg register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
class WebInterfaceSubmitPages(WebInterfaceDirectory): _exports = ['summary', 'sub', 'direct', '', 'attachfile', 'uploadfile', \ 'getuploadedfile', 'upload_video', ('continue', 'continue_')] def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. A ValueError will be raises if there is # something wrong session = get_session(req=req, sid=argd['session_id']) try: session = get_session(req=req, sid=argd['session_id']) except ValueError, e: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Retrieve user information. We cannot rely on the session here. res = run_sql("SELECT uid FROM session WHERE session_key=%s", (argd['session_id'], )) if len(res): uid = res[0][0] user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon({ 'input-file': os.path.join(dir_to_open, filename), 'icon-name': filename, # extension stripped automatically 'icon-file-format': 'gif', 'multipage-icon': False, 'multipage-icon-delay': 100, 'icon-scale': "300>", # Resize only if width > 300 'verbosity': 0, }) icons_dir = os.path.join( os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = { 'name': filename, 'iconName': icon_name } except InvenioWebSubmitIconCreatorError, e: # We could not create the icon added_files[key] = {'name': filename} continue else:
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
# If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir(os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe
def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'ckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd(form, {'action': (str, "DISPLAY"), 'msg': (str, ""), 'note': (str, ''), 'score': (int, 0), 'comid': (int, 0), 'editor_type': (str, ""), 'subscribe': (str, ""), 'cookie': (str, "") }) _ = gettext_set_language(argd['ln']) actions = ['DISPLAY', 'REPLY', 'SUBMIT'] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid='search') # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common('comment_msg', {'msg': argd['msg'], 'note': argd['note'], 'score': argd['score'], 'editor_type': argd['editor_type'], 'subscribe': argd['subscribe']}, onetime=True) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {}) return redirect_to_url(req, target, norobot=True) elif (auth_code_1 or auth_code_2): return page_not_authorized(req, "../", \ text = auth_msg_1 + auth_msg_2) if argd['comid']: # If replying to a comment, are we on a record that # matches the original comment user is replying to? if not check_comment_belongs_to_record(argd['comid'], self.recid): return page_not_authorized(req, "../", \ text = _("Specified comment does not belong to this record")) # Is user trying to reply to a restricted comment? Make # sure user has access to it. We will then inherit its # restriction for the new comment (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid']) if auth_code: return page_not_authorized(req, "../", \ text = _("You do not have access to the specified comment")) # Is user trying to reply to a deleted comment? If so, we # let submitted comment go (to not lose possibly submitted # content, if comment is submitted while original is # deleted), but we "reset" comid to make sure that for # action 'REPLY' the original comment is not included in # the reply if is_comment_deleted(argd['comid']): argd['comid'] = 0 user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) if not auth_code and (user_info['email'] != 'guest'): can_attach_files = True warning_msgs = [] # list of warning tuples (warning_text, warning_color) added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get('commentattachment[]', []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid)) try: assert(dir_to_open.startswith(CFG_TMPDIR)) except AssertionError: register_exception(req=req, prefix='User #%s tried to upload file to forbidden location: %s' \ % (uid, dir_to_open)) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk ## while formfield.file: ## fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize(os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \ file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True try: raise InvenioWebCommentWarning(_('The size of file \\"%s\\" (%s) is larger than maximum allowed file size (%s). Select files again.') % (cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warning_msgs.append((exc.message, '')) #warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) else: added_files[filename] = os.path.join(dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass
def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ def gcd(a, b): """ the euclidean algorithm """ while a: a, b = b % a, a return b from invenio.bibencode_extract import extract_frames from invenio.bibencode_config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.bibencode_encode import determine_aspect from invenio.bibencode_utils import probe from invenio.bibencode_metadata import ffprobe_metadata from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir( os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove( os.path.join( os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in [ 'jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd' ] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join( curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs( os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames( input_file=new_tmp_fullpath, output_file=os.path.join( sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME ), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get( 'TAG:title') json_response['meta_description'] = metadata[ 'format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get( 'TAG:year') json_response['meta_author'] = metadata['format'].get( 'TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response
def read_metadata(inputfile, force=None, remote=False, loginpw=None, verbose=0): """ Returns metadata extracted from given file as dictionary. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param verbose: verbosity @type verbose: int @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @param remote: if the file is accessed remotely or not @type remote: boolean @param loginpw: credentials to access secure servers (username:password) @type loginpw: string @return: dictionary of metadata tags as keys, and (interpreted) value as value @rtype: dict @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read. """ metadata = None # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print ext.lower(), 'extension to extract from' # Load plugins metadata_extractor_plugins = PluginContainer( os.path.join(CFG_PYLIBDIR, 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'), plugin_builder=plugin_builder_function, api_version=__required_plugin_API_version__) # Loop through the plugins to find a good one for given file for plugin_name, plugin in metadata_extractor_plugins.iteritems(): # Local file if plugin.has_key('can_read_local') and \ plugin['can_read_local'](inputfile) and not remote and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name fetched_metadata = plugin['read_metadata_local'](inputfile, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Remote file elif remote and plugin.has_key('can_read_remote') and \ plugin['can_read_remote'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name fetched_metadata = plugin['read_metadata_remote'](inputfile, loginpw, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Return in case we have something if metadata is not None: return metadata # Case of no plugin found, raise raise TypeError, 'Unsupported file type'
def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats @param consider_version: (bool) if True, consider the version info in C{fullpath} to find missing format for that specific version, if C{fullpath} contains version info Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] if consider_version: try: basedir, filename, extension, version = decompose_file_with_version( fullpath) except: basedir, filename, extension = decompose_file(fullpath) version = 0 else: basedir, filename, extension = decompose_file(fullpath) version = 0 extension = extension.lower() if debug: print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % ( basedir, filename, extension) if overwrite: missing_formats = get_missing_formats([fullpath]) else: if version: filelist = glob.glob( os.path.join(basedir, '%s*;%s' % (filename, version))) else: filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print >> sys.stderr, "filelist: %s" % filelist missing_formats = get_missing_formats(filelist) if debug: print >> sys.stderr, "missing_formats: %s" % missing_formats for path, formats in missing_formats.iteritems(): if debug: print >> sys.stderr, "... path: %s, formats: %s" % (path, formats) for aformat in formats: if debug: print >> sys.stderr, "...... aformat: %s" % aformat newpath = os.path.join(basedir, filename + aformat) if debug: print >> sys.stderr, "...... newpath: %s" % newpath try: if CFG_BIBDOCFILE_FILEDIR in basedir: # We should create the new files in a temporary location, not # directly inside the BibDoc directory. newpath = convert_file(path, output_format=aformat) else: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError, msg: if debug: print >> sys.stderr, "...... Exception: %s" % msg register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def Move_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the standard submission's form through file input element(s). The document are assigned a 'doctype' (or category) corresponding to the file input element (eg. a file uploaded throught 'DEMOPIC_FILE' will go to 'DEMOPIC_FILE' doctype/category). Websubmit engine builds the following file organization in the directory curdir/files: curdir/files | _____________________________________________________________________ | | | ./file input 1 element's name ./file input 2 element's name .... (for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX') | | test1.pdf test2.pdf There is only one instance of all possible extension(pdf, gz...) in each part otherwise we may encounter problems when renaming files. + parameters['rename']: if given, all the files in curdir/files are renamed. parameters['rename'] is of the form: <PA>elemfilename[re]</PA>* where re is an regexp to select(using re.sub) what part of the elem file has to be selected. e.g: <PA>file:TEST_FILE_RN</PA> + parameters['documenttype']: if given, other formats are created. It has 2 possible values: - if "picture" icon in gif format is created - if "fulltext" ps, gz .... formats are created + parameters['paths_and_suffixes']: directories to look into and corresponding suffix to add to every file inside. It must have the same structure as a Python dictionnary of the following form {'FrenchAbstract':'french', 'EnglishAbstract':''} The keys are the file input element name from the form <=> directories in curdir/files The values associated are the suffixes which will be added to all the files in e.g. curdir/files/FrenchAbstract + parameters['iconsize'] need only if 'icon' is selected in parameters['documenttype'] + parameters['paths_and_restrictions']: the restrictions to apply to each uploaded file. The parameter must have the same structure as a Python dictionnary of the following form: {'DEMOART_APPENDIX':'restricted'} Files not specified in this parameter are not restricted. The specified restrictions can include a variable that can be replaced at runtime, for eg: {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'} + parameters['paths_and_doctypes']: if a doctype is specified, the file will be saved under the 'doctype/collection' instead of under the default doctype/collection given by the name of the upload element that was used on the websubmit interface. to configure the doctype in websubmit, enter the value as in a dictionnary, for eg: {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from Demo_Export_Via_Sword [DEMOSWR] Document Types """ global sysno paths_and_suffixes = parameters['paths_and_suffixes'] paths_and_restrictions = parameters['paths_and_restrictions'] rename = parameters['rename'] documenttype = parameters['documenttype'] iconsizes = parameters['iconsize'].split(',') paths_and_doctypes = parameters['paths_and_doctypes'] ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes) paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions) paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes) ## Go through all the directories specified in the keys ## of parameters['paths_and_suffixes'] for path in paths_and_suffixes.keys(): ## Check if there is a directory for the current path if os.path.exists("%s/files/%s" % (curdir, path)): ## Retrieve the restriction to apply to files in this ## directory restriction = paths_and_restrictions.get(path, '') restriction = re.sub('<PA>(?P<content>[^<]*)</PA>', get_pa_tag_content, restriction) ## Go through all the files in curdir/files/path for current_file in os.listdir("%s/files/%s" % (curdir, path)): ## retrieve filename and extension dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if len(paths_and_suffixes[path]) != 0: extension = "_%s%s" % (paths_and_suffixes[path], extension) ## Build the new file name if rename parameter has been given if rename: filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \ get_pa_tag_content, \ parameters['rename']) if rename or len(paths_and_suffixes[path]) != 0: ## Rename the file try: # Write the log rename_cmd fd = open("%s/rename_cmd" % curdir, "a+") fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\ "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n") ## Rename os.rename("%s/files/%s/%s" % (curdir, path, current_file), \ "%s/files/%s/%s%s" % (curdir, path, filename, extension)) fd.close() ## Save the new name in a text file in curdir so that ## the new filename can be used by templates to created the recmysl fd = open("%s/%s_RENAMED" % (curdir, path), "w") fd.write("%s%s" % (filename, extension)) fd.close() except OSError, err: msg = "Cannot rename the file.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension) ## Check if there is any existing similar file if not bibrecdocs.check_file_exists(fullpath): bibdoc = bibrecdocs.add_new_file( fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True) bibdoc.set_status(restriction) ## Fulltext if documenttype == "fulltext": additionalformats = createRelatedFormats(fullpath) if len(additionalformats) > 0: for additionalformat in additionalformats: try: bibrecdocs.add_new_format(additionalformat) except InvenioWebSubmitFileError: pass ## Icon elif documenttype == "picture": has_added_default_icon_subformat_p = False for iconsize in iconsizes: try: iconpath, iconname = create_icon({ 'input-file': fullpath, 'icon-scale': iconsize, 'icon-name': None, 'icon-file-format': None, 'multipage-icon': False, 'multipage-icon-delay': 100, 'verbosity': 0, }) except Exception, e: register_exception( prefix= 'Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True) continue iconpath = os.path.join(iconpath, iconname) docname = decompose_file(fullpath)[1] try: mybibdoc = bibrecdocs.get_bibdoc(docname) except InvenioWebSubmitFileError: mybibdoc = None if iconpath is not None and mybibdoc is not None: try: icon_suffix = iconsize.replace( '>', '').replace('<', '').replace( '^', '').replace('!', '') if not has_added_default_icon_subformat_p: mybibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: mybibdoc.add_icon( iconpath, subformat= CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) ## Save the new icon filename in a text file in curdir so that ## it can be used by templates to created the recmysl try: if not has_added_default_icon_subformat_p: fd = open( "%s/%s_ICON" % (curdir, path), "w") else: fd = open( "%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w") fd.write(os.path.basename(iconpath)) fd.close() except OSError, err: msg = "Cannot store icon filename.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning( msg) except InvenioWebSubmitFileError, e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon()
# report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir( os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove( os.path.join( os.path.split(formfields.file.name)[0],
def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'fckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd( form, { 'action': (str, "DISPLAY"), 'msg': (str, ""), 'note': (str, ''), 'score': (int, 0), 'comid': (int, 0), 'editor_type': (str, ""), 'subscribe': (str, ""), 'cookie': (str, "") }) _ = gettext_set_language(argd['ln']) actions = ['DISPLAY', 'REPLY', 'SUBMIT'] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid='search') # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record( self.recid) }) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common( 'comment_msg', { 'msg': argd['msg'], 'note': argd['note'], 'score': argd['score'], 'editor_type': argd['editor_type'], 'subscribe': argd['subscribe'] }, onetime=True) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {}) return redirect_to_url(req, target, norobot=True) elif (auth_code_1 or auth_code_2): return page_not_authorized(req, "../", \ text = auth_msg_1 + auth_msg_2) if argd['comid']: # If replying to a comment, are we on a record that # matches the original comment user is replying to? if not check_comment_belongs_to_record(argd['comid'], self.recid): return page_not_authorized(req, "../", \ text = _("Specified comment does not belong to this record")) # Is user trying to reply to a restricted comment? Make # sure user has access to it. We will then inherit its # restriction for the new comment (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid']) if auth_code: return page_not_authorized(req, "../", \ text = _("You do not have access to the specified comment")) # Is user trying to reply to a deleted comment? If so, we # let submitted comment go (to not lose possibly submitted # content, if comment is submitted while original is # deleted), but we "reset" comid to make sure that for # action 'REPLY' the original comment is not included in # the reply if is_comment_deleted(argd['comid']): argd['comid'] = 0 user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments( user_info, self.recid) if not auth_code and (user_info['email'] != 'guest'): can_attach_files = True warning_msgs = [] added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get('commentattachment[]', []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid)) try: assert (dir_to_open.startswith(CFG_TMPDIR)) except AssertionError: register_exception(req=req, prefix='User #%s tried to upload file to forbidden location: %s' \ % (uid, dir_to_open)) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists( os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk ## while formfield.file: ## fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize( os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \ file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True warning_msgs.append( ('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size / 1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE / 1024) + 'KB')) else: added_files[filename] = os.path.join( dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass client_ip_address = req.remote_ip check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln']) if ok: title, description, keywords = websearch_templates.tmpl_record_page_header_content( req, self.recid, argd['ln']) navtrail = create_navtrail_links( cc=guess_primary_collection_of_a_record(self.recid)) # Infoscience modification navtrail += '<li><a href="%s/record/%s?ln=%s">%s</a></li>' % ( CFG_SITE_URL, self.recid, argd['ln'], title) navtrail += '<li class="last">%s</li>' % ( self.discussion == 1 and _('Reviews') or _('Comments')) if argd['action'] not in actions: argd['action'] = 'DISPLAY' if not argd['msg']: # User had to login in-between, so retrieve msg # from cookie try: (kind, cookie_argd) = mail_cookie_check_common(argd['cookie'], delete=True) argd.update(cookie_argd) except InvenioWebAccessMailCookieDeletedError, e: return redirect_to_url(req, CFG_SITE_URL + '/record/' + \ str(self.recid) + (self.discussion==1 and \ '/reviews' or '/comments')) except InvenioWebAccessMailCookieError, e: # Invalid or empty cookie: continue pass
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose): """ Metadata write method, takes the .pdf as input and creates a new one with the new info. @param inputfile: path to the pdf @type inputfile: string @param outputfile: path to the resulting pdf @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: metadata information to update inputfile @type metadata_dictionary: dict """ # Take the file name (0 base, 1 name, 2 ext) filename = decompose_file(inputfile)[1] # Print pdf metadata if verbose > 1: print 'Metadata information in the PDF file ' + filename + ': \n' try: os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data') except Exception: print 'Problem with inputfile to PDFTK' # Info file for pdftk (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \ dir=CFG_TMPDIR) os.close(fd) file_in = open(path_to_info, 'w') if verbose > 5: print "Saving PDFTK info file to %s" % path_to_info # User interaction to form the info file # Main Case: Dictionary received through option -d if not metadata_dictionary == {}: for tag in metadata_dictionary: line = 'InfoKey: ' + tag + '\nInfoValue: ' + \ metadata_dictionary[tag] + '\n' if verbose > 0: print line file_in.writelines(line) else: data_modified = False user_input = 'user_input' print "Entering interactive mode. Choose what you want to do:" while (user_input): if not data_modified: try: user_input = raw_input('[w]rite / [q]uit\n') except: print "Aborting" return else: try: user_input = raw_input( '[w]rite / [q]uit and apply / [a]bort \n') except: print "Aborting" return if user_input == 'q': if not data_modified: return break elif user_input == 'w': try: tag = raw_input('Tag to update:\n') value = raw_input('With value:\n') except: print "Aborting" return # Write to info file line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n' data_modified = True file_in.writelines(line) elif user_input == 'a': return else: print "Invalid option: " file_in.close() (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \ dir=CFG_TMPDIR) os.close(fd) # Now we call pdftk tool to update the info on a pdf #try: cmd_pdftk = '%s %s update_info %s output %s' (exit_status, output_std, output_err) = \ run_shell_command(cmd_pdftk, args=(CFG_PATH_PDFTK, inputfile, path_to_info, pdf_temp_path)) if verbose > 5: print output_std, output_err if os.path.exists(pdf_temp_path): # Move to final destination if exist try: shutil.move(pdf_temp_path, outputfile) except Exception, err: raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \ (pdf_temp_path, outputfile))
def handle_file_post(req, allowed_mimetypes=None): """ Handle the POST of a file. @return: the a tuple with th full path to the file saved on disk, and it's mimetype as provided by the request. @rtype: (string, string) """ from invenio.bibdocfile import decompose_file, md5 ## We retrieve the length clen = req.headers_in["Content-Length"] if clen is None: raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header is missing") try: clen = int(clen) assert (clen > 1) except (ValueError, AssertionError): raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header should contain a positive integer") ## Let's take the content type ctype = req.headers_in["Content-Type"] if allowed_mimetypes and ctype not in allowed_mimetypes: raise InvenioWebInterfaceWSGIContentTypeError("Content-Type not in allowed list of content types: %s" % allowed_mimetypes) ## Let's optionally accept a suggested filename suffix = prefix = '' g = RE_CDISPOSITION_FILENAME.search(req.headers_in.get("Content-Disposition", "")) if g: dummy, prefix, suffix = decompose_file(g.group("filename")) ## Let's optionally accept an MD5 hash (and use it later for comparison) cmd5 = req.headers_in["Content-MD5"] if cmd5: the_md5 = md5() ## Ok. We can initialize the file fd, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) the_file = os.fdopen(fd, 'w') ## Let's read the file while True: chunk = req.read(max(10240, clen)) if len(chunk) < clen: ## We expected to read at least clen (which is different than 0) ## but chunk was shorter! Gosh! Error! Panic! the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentLenghtError("File shorter than what specified in Content-Length") if cmd5: ## MD5 was in the header let's compute it the_md5.update(chunk) ## And let's definitively write the content to disk :-) the_file.write(chunk) clen -= len(chunk) if clen == 0: ## That's it. Everything was read. break if cmd5 and the_md5.hexdigest().lower() != cmd5.strip().lower(): ## Let's check the MD5 the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentMD5Error("MD5 checksum does not match") ## Let's clean everything up the_file.close() return (path, ctype)
(fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR) os.close(fd) except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err) else: output_file = os.path.abspath(output_file) if os.path.exists(output_file): os.remove(output_file) if need_working_dir: try: working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion') except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err) input_ext = decompose_file(input_file, skip_version=True)[2] new_input_file = os.path.join(working_dir, 'input' + input_ext) shutil.copy(input_file, new_input_file) input_file = new_input_file else: working_dir = None input_file = os.path.abspath(input_file) debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir)) return (input_file, output_file, working_dir) def clean_working_dir(working_dir): """ Remove the working_dir. """
def Move_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the standard submission's form through file input element(s). The document are assigned a 'doctype' (or category) corresponding to the file input element (eg. a file uploaded throught 'DEMOPIC_FILE' will go to 'DEMOPIC_FILE' doctype/category). Websubmit engine builds the following file organization in the directory curdir/files: curdir/files | _____________________________________________________________________ | | | ./file input 1 element's name ./file input 2 element's name .... (for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX') | | test1.pdf test2.pdf There is only one instance of all possible extension(pdf, gz...) in each part otherwise we may encounter problems when renaming files. + parameters['rename']: if given, all the files in curdir/files are renamed. parameters['rename'] is of the form: <PA>elemfilename[re]</PA>* where re is an regexp to select(using re.sub) what part of the elem file has to be selected. e.g: <PA>file:TEST_FILE_RN</PA> + parameters['documenttype']: if given, other formats are created. It has 2 possible values: - if "picture" icon in gif format is created - if "fulltext" ps, gz .... formats are created + parameters['paths_and_suffixes']: directories to look into and corresponding suffix to add to every file inside. It must have the same structure as a Python dictionnary of the following form {'FrenchAbstract':'french', 'EnglishAbstract':''} The keys are the file input element name from the form <=> directories in curdir/files The values associated are the suffixes which will be added to all the files in e.g. curdir/files/FrenchAbstract + parameters['iconsize'] need only if 'icon' is selected in parameters['documenttype'] + parameters['paths_and_restrictions']: the restrictions to apply to each uploaded file. The parameter must have the same structure as a Python dictionnary of the following form: {'DEMOART_APPENDIX':'restricted'} Files not specified in this parameter are not restricted. The specified restrictions can include a variable that can be replaced at runtime, for eg: {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'} + parameters['paths_and_doctypes']: if a doctype is specified, the file will be saved under the 'doctype/collection' instead of under the default doctype/collection given by the name of the upload element that was used on the websubmit interface. to configure the doctype in websubmit, enter the value as in a dictionnary, for eg: {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from Demo_Export_Via_Sword [DEMOSWR] Document Types """ global sysno paths_and_suffixes = parameters['paths_and_suffixes'] paths_and_restrictions = parameters['paths_and_restrictions'] rename = parameters['rename'] documenttype = parameters['documenttype'] iconsizes = parameters['iconsize'].split(',') paths_and_doctypes = parameters['paths_and_doctypes'] ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes) paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions) paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes) ## Go through all the directories specified in the keys ## of parameters['paths_and_suffixes'] for path in paths_and_suffixes.keys(): ## Check if there is a directory for the current path if os.path.exists("%s/files/%s" % (curdir, path)): ## Retrieve the restriction to apply to files in this ## directory restriction = paths_and_restrictions.get(path, '') restriction = re.sub('<PA>(?P<content>[^<]*)</PA>', get_pa_tag_content, restriction) ## Go through all the files in curdir/files/path for current_file in os.listdir("%s/files/%s" % (curdir, path)): ## retrieve filename and extension dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if len(paths_and_suffixes[path]) != 0: extension = "_%s%s" % (paths_and_suffixes[path], extension) ## Build the new file name if rename parameter has been given if rename: filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \ get_pa_tag_content, \ parameters['rename']) if rename or len(paths_and_suffixes[path]) != 0 : ## Rename the file try: # Write the log rename_cmd fd = open("%s/rename_cmd" % curdir, "a+") fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\ "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n") ## Rename os.rename("%s/files/%s/%s" % (curdir, path, current_file), \ "%s/files/%s/%s%s" % (curdir, path, filename, extension)) fd.close() ## Save the new name in a text file in curdir so that ## the new filename can be used by templates to created the recmysl fd = open("%s/%s_RENAMED" % (curdir, path), "w") fd.write("%s%s" % (filename, extension)) fd.close() except OSError, err: msg = "Cannot rename the file.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension) ## Check if there is any existing similar file if not bibrecdocs.check_file_exists(fullpath): bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True) bibdoc.set_status(restriction) ## Fulltext if documenttype == "fulltext": additionalformats = createRelatedFormats(fullpath) if len(additionalformats) > 0: for additionalformat in additionalformats: try: bibrecdocs.add_new_format(additionalformat) except InvenioWebSubmitFileError: pass ## Icon elif documenttype == "picture": has_added_default_icon_subformat_p = False for iconsize in iconsizes: try: iconpath, iconname = create_icon({ 'input-file' : fullpath, 'icon-scale' : iconsize, 'icon-name' : None, 'icon-file-format' : None, 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'verbosity' : 0, }) except Exception, e: register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True) continue iconpath = os.path.join(iconpath, iconname) docname = decompose_file(fullpath)[1] try: mybibdoc = bibrecdocs.get_bibdoc(docname) except InvenioWebSubmitFileError: mybibdoc = None if iconpath is not None and mybibdoc is not None: try: icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '') if not has_added_default_icon_subformat_p: mybibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: mybibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) ## Save the new icon filename in a text file in curdir so that ## it can be used by templates to created the recmysl try: if not has_added_default_icon_subformat_p: fd = open("%s/%s_ICON" % (curdir, path), "w") else: fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w") fd.write(os.path.basename(iconpath)) fd.close() except OSError, err: msg = "Cannot store icon filename.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) except InvenioWebSubmitFileError, e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon()
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str( uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues( 124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file( batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master')) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template( job['bibdoc_docname']).safe_substitute( {'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval( job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension) _task_write_message( "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename( bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname)) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description( getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = ( tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames( input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext( filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc( docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format( bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p( bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'), pbcoreIdentifier=batch_job['recid'], aspect_override=getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str())): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'fckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd(form, {'action': (str, "DISPLAY"), 'msg': (str, ""), 'note': (str, ''), 'score': (int, 0), 'comid': (int, -1), 'editor_type': (str, ""), 'subscribe': (str, ""), 'cookie': (str, "") }) _ = gettext_set_language(argd['ln']) actions = ['DISPLAY', 'REPLY', 'SUBMIT'] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid='search') # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common('comment_msg', {'msg': argd['msg'], 'note': argd['note'], 'score': argd['score'], 'editor_type': argd['editor_type'], 'subscribe': argd['subscribe']}, onetime=True) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {}) return redirect_to_url(req, target, norobot=True) elif (auth_code_1 or auth_code_2): return page_not_authorized(req, "../", \ text = auth_msg_1 + auth_msg_2) user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) if not auth_code and (user_info['email'] != 'guest' or user_info['apache_user']): can_attach_files = True warning_msgs = [] added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get('commentattachment[]', []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid)) try: assert(dir_to_open.startswith(CFG_TMPDIR)) except AssertionError: register_exception(req=req, prefix='User #%s tried to upload file to forbidden location: %s' \ % (uid, dir_to_open)) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk ## while formfield.file: ## fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize(os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \ file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) else: added_files[filename] = os.path.join(dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass client_ip_address = req.remote_ip check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln']) if ok: title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln']) navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid)) if navtrail: navtrail += ' > ' navtrail += '<a class="navtrail" href="%s/record/%s?ln=%s">'% (CFG_SITE_URL, self.recid, argd['ln']) navtrail += title navtrail += '</a>' navtrail += '> <a class="navtrail" href="%s/record/%s/%s/?ln=%s">%s</a>' % (CFG_SITE_URL, self.recid, self.discussion==1 and 'reviews' or 'comments', argd['ln'], self.discussion==1 and _('Reviews') or _('Comments')) if argd['action'] not in actions: argd['action'] = 'DISPLAY' if not argd['msg']: # User had to login in-between, so retrieve msg # from cookie try: (kind, cookie_argd) = mail_cookie_check_common(argd['cookie'], delete=True) argd.update(cookie_argd) except InvenioWebAccessMailCookieDeletedError, e: return redirect_to_url(req, CFG_SITE_URL + '/record/' + \ str(self.recid) + (self.discussion==1 and \ '/reviews' or '/comments')) except InvenioWebAccessMailCookieError, e: # Invalid or empty cookie: continue pass
def read_metadata(inputfile, force=None, remote=False, loginpw=None, verbose=0): """ Returns metadata extracted from given file as dictionary. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param verbose: verbosity @type verbose: int @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @param remote: if the file is accessed remotely or not @type remote: boolean @param loginpw: credentials to access secure servers (username:password) @type loginpw: string @return: dictionary of metadata tags as keys, and (interpreted) value as value @rtype: dict @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read. """ metadata = None # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print ext.lower(), 'extension to extract from' # Load plugins metadata_extractor_plugins = PluginContainer( os.path.join(CFG_PYLIBDIR, 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'), plugin_builder=plugin_builder_function, api_version=__required_plugin_API_version__) # Loop through the plugins to find a good one for given file for plugin_name, plugin in metadata_extractor_plugins.iteritems(): # Local file if plugin.has_key('can_read_local') and \ plugin['can_read_local'](inputfile) and not remote and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name fetched_metadata = plugin['read_metadata_local'](inputfile, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Remote file elif remote and plugin.has_key('can_read_remote') and \ plugin['can_read_remote'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print 'Using ' + plugin_name fetched_metadata = plugin['read_metadata_remote'](inputfile, loginpw, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Return in case we have something if metadata is not None: return metadata # Case of no plugin found, raise raise TypeError, 'Unsupported file type'