def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir} for type, dir in iteritems(watcheddirs): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if filename not in formats: formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if bibarchive.get_docname(existingBibdoc.id) not in formats: existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def can_read_remote(inputfile): """Checks if inputfile is among metadata-readable file types @param inputfile: (string) path to the image @type inputfile: string @rtype: boolean @return: true if extension casn be handled""" # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.jpg', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir,current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir, current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.pdf']
def can_write_local(inputfile): """ Checks if inputfile is among metadata-writable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']
def can_write_local(inputfile): """ Checks if inputfile is among metadata-writable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in [ '.jpg', '.tiff', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif' ]
def get_record_documents(recid, filename): """Yield LegacyBibDoc files from Documents.""" from invenio.modules.records.api import get_record from invenio.modules.documents.api import Document from invenio.legacy.bibdocfile.api import decompose_file record = get_record(recid) duuids = [ uuid for (k, uuid) in record.get('_documents', []) if k == filename ] for duuid in duuids: document = Document.get_document(duuid) if not document.is_authorized(current_user): current_app.logger.info( "Unauthorized access to /{recid}/files/{filename} " "({document}) by {current_user}".format( recid=recid, filename=filename, document=document, current_user=current_user)) continue if document.get( 'linked', False) and (document.get('uri').startswith('http://') or document.get('uri').startswith('https://')): url = document.get('uri') else: url = url_for('record.file', recid=recid, filename=filename) (dummy, name, superformat) = decompose_file(filename) class LegacyBibDoc(object): def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) def get_full_path(self): return document.get('uri') def get_recid(self): return recid yield LegacyBibDoc(name=name, superformat=superformat, url=url)
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in [ '.html', '.doc', '.ps', '.xls', '.ppt', '.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac', '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png', '.deb', '.rpm', '.tar.gz', '.zip', '.elf', '.s3m', '.xm', '.it', '.flv', '.real', '.avi', '.mpeg', '.qt', '.asf' ]
def can_read_local(inputfile): """ Checks if inputfile is among metadata-readable file types @param inputfile: path to the image @type inputfile: string @rtype: boolean @return: True if file can be processed """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] return ext.lower() in ['.html', '.doc', '.ps', '.xls', '.ppt', '.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac', '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png', '.deb', '.rpm', '.tar.gz', '.zip', '.elf', '.s3m', '.xm', '.it', '.flv', '.real', '.avi', '.mpeg', '.qt', '.asf']
def write_metadata(inputfile, outputfile, metadata_dictionary, force=None, verbose=0): """Write metadata to given file. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param outputfile: path to the resulting file. @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: keys and values of metadata to update. @type metadata_dictionary: dict @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @return: output of the plugin @rtype: string @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated. """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print(ext.lower(), 'extension to write to') # Loop through the plugins to find a good one to ext for plugin_name, plugin in iteritems(metadata_extractor_plugins): if 'can_write_local' in plugin and \ plugin['can_write_local'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) return plugin['write_metadata_local'](inputfile, outputfile, metadata_dictionary, verbose) # Case of no plugin found, raise raise TypeError('Unsupported file type')
def createRelatedFormats(fullpath, overwrite=True, debug=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] basedir, filename, extension = decompose_file(fullpath) extension = extension.lower() if debug: print("basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension), file=sys.stderr) filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print("filelist: %s" % filelist, file=sys.stderr) missing_formats = get_missing_formats(filelist) if debug: print("missing_formats: %s" % missing_formats, file=sys.stderr) for path, formats in iteritems(missing_formats): if debug: print("... path: %s, formats: %s" % (path, formats), file=sys.stderr) for aformat in formats: if debug: print("...... aformat: %s" % aformat, file=sys.stderr) newpath = os.path.join(basedir, filename + aformat) if debug: print("...... newpath: %s" % newpath, file=sys.stderr) try: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError as msg: if debug: print("...... Exception: %s" % msg, file=sys.stderr) register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def write_metadata(inputfile, outputfile, metadata_dictionary, force=None, verbose=0): """ Writes metadata to given file. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param outputfile: path to the resulting file. @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: keys and values of metadata to update. @type metadata_dictionary: dict @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @return: output of the plugin @rtype: string @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated. """ # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print(ext.lower(), 'extension to write to') # Loop through the plugins to find a good one to ext for plugin_name, plugin in iteritems(metadata_extractor_plugins): if 'can_write_local' in plugin and \ plugin['can_write_local'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) return plugin['write_metadata_local'](inputfile, outputfile, metadata_dictionary, verbose) # Case of no plugin found, raise raise TypeError, 'Unsupported file type'
def get_record_documents(recid, filename): """Yield LegacyBibDoc files from Documents.""" from invenio_records.api import get_record from invenio_documents.api import Document from invenio.legacy.bibdocfile.api import decompose_file record = get_record(recid) duuids = [uuid for (k, uuid) in record.get('_documents', []) if k == filename] for duuid in duuids: document = Document.get_document(duuid) if not document.is_authorized(current_user): current_app.logger.info( "Unauthorized access to /{recid}/files/{filename} " "({document}) by {current_user}".format( recid=recid, filename=filename, document=document, current_user=current_user)) continue if document.get('linked', False) and ( document.get('uri').startswith('http://') or document.get('uri').startswith('https://')): url = document.get('uri') else: url = url_for('record.file', recid=recid, filename=filename) (dummy, name, superformat) = decompose_file(filename) class LegacyBibDoc(object): def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) def get_full_path(self): return document.get('uri') def get_recid(self): return recid yield LegacyBibDoc(name=name, superformat=superformat, url=url)
def translate_link(match_obj): """Replace CKEditor link by 'local' record link. Also create the FFT for that link""" file_type = match_obj.group('type') file_name = match_obj.group('filename') uid = match_obj.group('uid') dummy, name, extension = decompose_file(file_name) new_url = build_url(sysno, name, file_type, extension) original_location = match_obj.group()[1:-1] icon_location = original_location # Prepare FFT that will fetch the file (+ the original # file in the case of images) if file_type == 'image': # Does original file exists, or do we just have the # icon? We expect the original file at a well defined # location possible_original_path = os.path.join(CFG_PREFIX, 'var', 'tmp', 'attachfile', uid, file_type, 'original', file_name) if os.path.exists(possible_original_path): icon_location = original_location original_location = possible_original_path new_url = build_url(sysno, name, file_type, extension, is_icon=True) docname = build_docname(name, file_type, extension) if original_location not in processed_paths: # Must create an FFT only if we have not yet processed # the file. This can happen if same image exists on # the same page (either in two different CKEditor # instances, or twice in the HTML) processed_paths.append(original_location) write_fft(curdir, original_location, docname, icon_location, doctype=file_type) return '"' + new_url + '"'
def _get_feature_image(record, ln=CFG_SITE_LANG): """ Looks for an image that can be featured on the article overview page. """ src = '' if ln == "fr": article = ''.join(record.fields('590__b')) if not article: article = ''.join(record.fields('520__b')) else: article = ''.join(record.fields('520__b')) if not article: article = ''.join(record.fields('590__b')) image = re.search(img_pattern, article) if image: src = image.group("image") if not src: # Look for an attached image icons = [icon for icon in record.fields('8564_q') if \ (decompose_file(icon)[2] in ['jpg', 'jpeg', 'png', 'gif'])] if icons: src = icons[0] return src
def process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path, recid=None, allowed_types=default_allowed_types): """ Process a file upload request. @param form: the form as in req object. @type form: dict @param uid: the user ID of the user uploading the file. @type uid: int @param user_files_path: the base URL where the file can be accessed from the web after upload. Note that you have to implement your own handler to stream the files from the directory C{user_files_absolute_path} if you set this value. @type user_files_path: string @param user_files_absolute_path: the base path on the server where the files should be saved. Eg:C{%(CFG_DATADIR)s/comments/%(recid)s/%(uid)s} @type user_files_absolute_path: string @param recid: the record ID for which we upload a file. Leave None if not relevant. @type recid: int @param allowed_types: types allowed for uploading. These are supported by CKEditor: ['File', 'Image', 'Flash', 'Media'] @type allowed_types: list of strings @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) """ msg = '' filename = '' formfile = None uploaded_file_path = '' user_files_path = '' for key, formfields in form.items(): if key != 'upload': continue if hasattr(formfields, "filename") and formfields.filename: # We have found our file filename = formfields.filename formfile = formfields.file break can_upload_file_p = False if not form['type'] in allowed_types: # Is the type sent through the form ok? msg = 'You are not allowed to upload a file of this type' else: # Is user allowed to upload such file extension? basedir, name, extension = decompose_file(filename) extension = extension[1:] # strip leading dot if extension in allowed_extensions.get(form['type'], []): can_upload_file_p = True if not can_upload_file_p: msg = 'You are not allowed to upload a file of this type' elif filename and formfile: ## Before saving the file to disk, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) # Remove \ / | : ? * filename = re.sub( '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_', filename) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists( os.path.join(user_files_absolute_path, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(user_files_absolute_path, filename), "w") fp.write(formfile.read()) fp.close() uploaded_file_path = os.path.join(user_files_absolute_path, filename) uploaded_file_name = filename return (msg, uploaded_file_path, filename, user_files_path, form['CKEditorFuncNum'])
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successful, 0 if not @rtype; int """ from invenio.legacy.bibdocfile.cli import cli_fix_marc def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists # if record_exists(batch_job['recid']) < 1: # raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) from invenio_formatter.engines.xslt import format marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successful, 0 if not @rtype; int """ from invenio.legacy.bibdocfile.cli import cli_fix_marc def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists # if record_exists(batch_job['recid']) < 1: # raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) from invenio.modules.formatter.engines.xslt import format marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def read_metadata(inputfile, force=None, remote=False, loginpw=None, verbose=0): """Return metadata extracted from given file as dictionary. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param verbose: verbosity @type verbose: int @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @param remote: if the file is accessed remotely or not @type remote: boolean @param loginpw: credentials to access secure servers (username:password) @type loginpw: string @return: dictionary of metadata tags as keys, and (interpreted) value as value @rtype: dict @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read. """ metadata = None # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print(ext.lower(), 'extension to extract from') # Loop through the plugins to find a good one for given file for plugin_name, plugin in iteritems(metadata_extractor_plugins): # Local file if 'can_read_local' in plugin and \ plugin['can_read_local'](inputfile) and not remote and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) fetched_metadata = plugin['read_metadata_local'](inputfile, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Remote file elif remote and 'can_read_remote' in plugin and \ plugin['can_read_remote'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) fetched_metadata = plugin['read_metadata_remote'](inputfile, loginpw, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Return in case we have something if metadata is not None: return metadata # Case of no plugin found, raise raise TypeError('Unsupported file type')
def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ def gcd(a, b): """ the euclidean algorithm """ while a: a, b = b % a, a return b from invenio.modules.encoder.extract import extract_frames from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.modules.encoder.encode import determine_aspect from invenio.modules.encoder.utils import probe from invenio.modules.encoder.metadata import ffprobe_metadata from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir( os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove( os.path.join( os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in [ 'jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd' ] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join( curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs( os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames( input_file=new_tmp_fullpath, output_file=os.path.join( sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME ), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get( 'TAG:title') json_response['meta_description'] = metadata[ 'format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get( 'TAG:year') json_response['meta_author'] = metadata['format'].get( 'TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response
def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ def gcd(a, b): """ the euclidean algorithm """ while a: a, b = b % a, a return b from invenio.modules.encoder.extract import extract_frames from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.modules.encoder.encode import determine_aspect from invenio.modules.encoder.utils import probe from invenio.modules.encoder.metadata import ffprobe_metadata from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir(os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in ['jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd'] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join(curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs(os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames(input_file=new_tmp_fullpath, output_file=os.path.join(sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get('TAG:title') json_response['meta_description'] = metadata['format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get('TAG:year') json_response['meta_author'] = metadata['format'].get('TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break; # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response
def Move_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the standard submission's form through file input element(s). The document are assigned a 'doctype' (or category) corresponding to the file input element (eg. a file uploaded throught 'DEMOPIC_FILE' will go to 'DEMOPIC_FILE' doctype/category). Websubmit engine builds the following file organization in the directory curdir/files: curdir/files | _____________________________________________________________________ | | | ./file input 1 element's name ./file input 2 element's name .... (for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX') | | test1.pdf test2.pdf There is only one instance of all possible extension(pdf, gz...) in each part otherwise we may encounter problems when renaming files. + parameters['rename']: if given, all the files in curdir/files are renamed. parameters['rename'] is of the form: <PA>elemfilename[re]</PA>* where re is an regexp to select(using re.sub) what part of the elem file has to be selected. e.g: <PA>file:TEST_FILE_RN</PA> + parameters['documenttype']: if given, other formats are created. It has 2 possible values: - if "picture" icon in gif format is created - if "fulltext" ps, gz .... formats are created + parameters['paths_and_suffixes']: directories to look into and corresponding suffix to add to every file inside. It must have the same structure as a Python dictionnary of the following form {'FrenchAbstract':'french', 'EnglishAbstract':''} The keys are the file input element name from the form <=> directories in curdir/files The values associated are the suffixes which will be added to all the files in e.g. curdir/files/FrenchAbstract + parameters['iconsize'] need only if 'icon' is selected in parameters['documenttype'] + parameters['paths_and_restrictions']: the restrictions to apply to each uploaded file. The parameter must have the same structure as a Python dictionnary of the following form: {'DEMOART_APPENDIX':'restricted'} Files not specified in this parameter are not restricted. The specified restrictions can include a variable that can be replaced at runtime, for eg: {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'} + parameters['paths_and_doctypes']: if a doctype is specified, the file will be saved under the 'doctype/collection' instead of under the default doctype/collection given by the name of the upload element that was used on the websubmit interface. to configure the doctype in websubmit, enter the value as in a dictionnary, for eg: {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from Demo_Export_Via_Sword [DEMOSWR] Document Types """ global sysno paths_and_suffixes = parameters['paths_and_suffixes'] paths_and_restrictions = parameters['paths_and_restrictions'] rename = parameters['rename'] documenttype = parameters['documenttype'] iconsizes = parameters['iconsize'].split(',') paths_and_doctypes = parameters['paths_and_doctypes'] ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes) paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions) paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes) ## Go through all the directories specified in the keys ## of parameters['paths_and_suffixes'] for path in paths_and_suffixes.keys(): ## Check if there is a directory for the current path if os.path.exists("%s/files/%s" % (curdir, path)): ## Retrieve the restriction to apply to files in this ## directory restriction = paths_and_restrictions.get(path, '') restriction = re.sub('<PA>(?P<content>[^<]*)</PA>', get_pa_tag_content, restriction) ## Go through all the files in curdir/files/path for current_file in os.listdir("%s/files/%s" % (curdir, path)): ## retrieve filename and extension dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if len(paths_and_suffixes[path]) != 0: extension = "_%s%s" % (paths_and_suffixes[path], extension) ## Build the new file name if rename parameter has been given if rename: filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \ get_pa_tag_content, \ parameters['rename']) if rename or len(paths_and_suffixes[path]) != 0 : ## Rename the file try: # Write the log rename_cmd fd = open("%s/rename_cmd" % curdir, "a+") fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\ "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n") ## Rename os.rename("%s/files/%s/%s" % (curdir, path, current_file), \ "%s/files/%s/%s%s" % (curdir, path, filename, extension)) fd.close() ## Save the new name in a text file in curdir so that ## the new filename can be used by templates to created the recmysl fd = open("%s/%s_RENAMED" % (curdir, path), "w") fd.write("%s%s" % (filename, extension)) fd.close() except OSError as err: msg = "Cannot rename the file.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension) ## Check if there is any existing similar file if not bibrecdocs.check_file_exists(fullpath, extension): bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True) bibdoc.set_status(restriction) ## Fulltext if documenttype == "fulltext": additionalformats = createRelatedFormats(fullpath) if len(additionalformats) > 0: for additionalformat in additionalformats: try: bibrecdocs.add_new_format(additionalformat) except InvenioBibDocFileError: pass ## Icon elif documenttype == "picture": has_added_default_icon_subformat_p = False for iconsize in iconsizes: try: iconpath, iconname = create_icon({ 'input-file' : fullpath, 'icon-scale' : iconsize, 'icon-name' : None, 'icon-file-format' : None, 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'verbosity' : 0, }) except Exception as e: register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True) continue iconpath = os.path.join(iconpath, iconname) docname = decompose_file(fullpath)[1] try: mybibdoc = bibrecdocs.get_bibdoc(docname) except InvenioBibDocFileError: mybibdoc = None if iconpath is not None and mybibdoc is not None: try: icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '') if not has_added_default_icon_subformat_p: mybibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: mybibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) ## Save the new icon filename in a text file in curdir so that ## it can be used by templates to created the recmysl try: if not has_added_default_icon_subformat_p: fd = open("%s/%s_ICON" % (curdir, path), "w") else: fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w") fd.write(os.path.basename(iconpath)) fd.close() except OSError as err: msg = "Cannot store icon filename.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) except InvenioBibDocFileError as e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon() # Update the MARC bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno),)) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,)) return ""
def handle_file_post(req, allowed_mimetypes=None): """ Handle the POST of a file. @return: the a tuple with the full path to the file saved on disk, and it's mimetype as provided by the request. @rtype: (string, string) """ from invenio.legacy.bibdocfile.api import decompose_file, md5 ## We retrieve the length clen = req.headers_in["Content-Length"] if clen is None: raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header is missing") try: clen = int(clen) assert (clen > 1) except (ValueError, AssertionError): raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header should contain a positive integer") ## Let's take the content type ctype = req.headers_in["Content-Type"] if allowed_mimetypes and ctype not in allowed_mimetypes: raise InvenioWebInterfaceWSGIContentTypeError("Content-Type not in allowed list of content types: %s" % allowed_mimetypes) ## Let's optionally accept a suggested filename suffix = prefix = '' g = RE_CDISPOSITION_FILENAME.search(req.headers_in.get("Content-Disposition", "")) if g: dummy, prefix, suffix = decompose_file(g.group("filename")) ## Let's optionally accept an MD5 hash (and use it later for comparison) cmd5 = req.headers_in.get("Content-MD5") if cmd5: the_md5 = md5() ## Ok. We can initialize the file fd, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) the_file = os.fdopen(fd, 'w') ## Let's read the file while True: chunk = req.read(min(10240, clen)) if len(chunk) < min(10240, clen): ## We expected to read at least clen (which is different than 0) ## but chunk was shorter! Gosh! Error! Panic! the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentLenghtError("File shorter than what specified in Content-Length") if cmd5: ## MD5 was in the header let's compute it the_md5.update(chunk) ## And let's definitively write the content to disk :-) the_file.write(chunk) clen -= len(chunk) if clen == 0: ## That's it. Everything was read. break if cmd5 and the_md5.hexdigest().lower() != cmd5.strip().lower(): ## Let's check the MD5 the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentMD5Error("MD5 checksum does not match") ## Let's clean everything up the_file.close() return (path, ctype)
def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats @param consider_version: (bool) if True, consider the version info in C{fullpath} to find missing format for that specific version, if C{fullpath} contains version info Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] if consider_version: try: basedir, filename, extension, version = decompose_file_with_version(fullpath) except: basedir, filename, extension = decompose_file(fullpath) version = 0 else: basedir, filename, extension = decompose_file(fullpath) version = 0 extension = extension.lower() if debug: print("basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension), file=sys.stderr) if overwrite: missing_formats = get_missing_formats([fullpath]) else: if version: filelist = glob.glob(os.path.join(basedir, '%s*;%s' % (filename, version))) else: filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print("filelist: %s" % filelist, file=sys.stderr) missing_formats = get_missing_formats(filelist) if debug: print("missing_formats: %s" % missing_formats, file=sys.stderr) for path, formats in iteritems(missing_formats): if debug: print("... path: %s, formats: %s" % (path, formats), file=sys.stderr) for aformat in formats: if debug: print("...... aformat: %s" % aformat, file=sys.stderr) newpath = os.path.join(basedir, filename + aformat) if debug: print("...... newpath: %s" % newpath, file=sys.stderr) try: if CFG_BIBDOCFILE_FILEDIR in basedir: # We should create the new files in a temporary location, not # directly inside the BibDoc directory. newpath = convert_file(path, output_format=aformat) else: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError as msg: if debug: print("...... Exception: %s" % msg, file=sys.stderr) register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'ckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd( form, { 'action': (str, "DISPLAY"), 'msg': (str, ""), 'note': (str, ''), 'score': (int, 0), 'comid': (int, 0), 'editor_type': (str, ""), 'subscribe': (str, ""), 'cookie': (str, "") }) _ = gettext_set_language(argd['ln']) actions = ['DISPLAY', 'REPLY', 'SUBMIT'] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid='search') # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record( self.recid) }) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common( 'comment_msg', { 'msg': argd['msg'], 'note': argd['note'], 'score': argd['score'], 'editor_type': argd['editor_type'], 'subscribe': argd['subscribe'] }, onetime=True) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {}) return redirect_to_url(req, target, norobot=True) elif (auth_code_1 or auth_code_2): return page_not_authorized(req, "../", \ text = auth_msg_1 + auth_msg_2) if argd['comid']: # If replying to a comment, are we on a record that # matches the original comment user is replying to? if not check_comment_belongs_to_record(argd['comid'], self.recid): return page_not_authorized(req, "../", \ text = _("Specified comment does not belong to this record")) # Is user trying to reply to a restricted comment? Make # sure user has access to it. We will then inherit its # restriction for the new comment (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid']) if auth_code: return page_not_authorized(req, "../", \ text = _("You do not have access to the specified comment")) # Is user trying to reply to a deleted comment? If so, we # let submitted comment go (to not lose possibly submitted # content, if comment is submitted while original is # deleted), but we "reset" comid to make sure that for # action 'REPLY' the original comment is not included in # the reply if is_comment_deleted(argd['comid']): argd['comid'] = 0 user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments( user_info, self.recid) if not auth_code and (user_info['email'] != 'guest'): can_attach_files = True warning_msgs = [ ] # list of warning tuples (warning_text, warning_color) added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get('commentattachment[]', []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPSHAREDDIR, 'webcomment', str(uid)) try: assert (dir_to_open.startswith(CFG_TMPSHAREDDIR)) except AssertionError: register_exception(req=req, prefix='User #%s tried to upload file to forbidden location: %s' \ % (uid, dir_to_open)) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename ## (in particular washing away UNIX and Windows ## (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists( os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk # while formfield.file: # fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize( os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \ file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True try: raise InvenioWebCommentWarning( _('The size of file \\"%(x_file)s\\" (%(x_size)s) is larger than maximum allowed file size (%(x_max)s). Select files again.', x_file=cgi.escape(filename), x_size=str(file_size / 1024) + 'KB', x_max=str( CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE / 1024) + 'KB')) except InvenioWebCommentWarning as exc: register_exception(stream='warning') warning_msgs.append((exc.message, '')) #warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) else: added_files[filename] = os.path.join( dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass client_ip_address = req.remote_ip check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln']) if ok: title, description, keywords = websearch_templates.tmpl_record_page_header_content( req, self.recid, argd['ln']) navtrail = create_navtrail_links( cc=guess_primary_collection_of_a_record(self.recid)) if navtrail: navtrail += ' > ' navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">' % ( CFG_SITE_URL, CFG_SITE_RECORD, self.recid, argd['ln']) navtrail += cgi.escape(title) navtrail += '</a>' navtrail += '> <a class="navtrail" href="%s/%s/%s/%s/?ln=%s">%s</a>' % ( CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.discussion == 1 and 'reviews' or 'comments', argd['ln'], self.discussion == 1 and _('Reviews') or _('Comments')) if argd['action'] not in actions: argd['action'] = 'DISPLAY' if not argd['msg']: # User had to login in-between, so retrieve msg # from cookie try: (kind, cookie_argd) = mail_cookie_check_common(argd['cookie'], delete=True) argd.update(cookie_argd) except InvenioWebAccessMailCookieDeletedError: return redirect_to_url(req, CFG_SITE_SECURE_URL + '/'+ CFG_SITE_RECORD +'/' + \ str(self.recid) + (self.discussion==1 and \ '/reviews' or '/comments')) except InvenioWebAccessMailCookieError: # Invalid or empty cookie: continue pass subscribe = False if argd['subscribe'] and \ get_user_subscription_to_discussion(self.recid, uid) == 0: # User is not already subscribed, and asked to subscribe subscribe = True body = perform_request_add_comment_or_remark( recID=self.recid, ln=argd['ln'], uid=uid, action=argd['action'], msg=argd['msg'], note=argd['note'], score=argd['score'], reviews=self.discussion, comID=argd['comid'], client_ip_address=client_ip_address, editor_type=argd['editor_type'], can_attach_files=can_attach_files, subscribe=subscribe, req=req, attached_files=added_files, warnings=warning_msgs) if self.discussion: title = _("Add Review") else: title = _("Add Comment") jqueryheader = ''' <script src="%(CFG_SITE_URL)s/vendors/jquery-multifile/jquery.MultiFile.pack.js" type="text/javascript"></script> ''' % { 'CFG_SITE_URL': CFG_SITE_URL } return page(title=title, body=body, navtrail=navtrail, uid=uid, language=CFG_SITE_LANG, verbose=1, req=req, navmenuid='search', metaheaderadd=jqueryheader) # id not in range else: return page(title=_("Record Not Found"), body=problem, uid=uid, verbose=1, req=req, navmenuid='search')
def process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path, recid=None, allowed_types=default_allowed_types): """ Process a file upload request. @param form: the form as in req object. @type form: dict @param uid: the user ID of the user uploading the file. @type uid: int @param user_files_path: the base URL where the file can be accessed from the web after upload. Note that you have to implement your own handler to stream the files from the directory C{user_files_absolute_path} if you set this value. @type user_files_path: string @param user_files_absolute_path: the base path on the server where the files should be saved. Eg:C{%(CFG_DATADIR)s/comments/%(recid)s/%(uid)s} @type user_files_absolute_path: string @param recid: the record ID for which we upload a file. Leave None if not relevant. @type recid: int @param allowed_types: types allowed for uploading. These are supported by CKEditor: ['File', 'Image', 'Flash', 'Media'] @type allowed_types: list of strings @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) """ msg = '' filename = '' formfile = None uploaded_file_path = '' user_files_path = '' for key, formfields in form.items(): if key != 'upload': continue if hasattr(formfields, "filename") and formfields.filename: # We have found our file filename = formfields.filename formfile = formfields.file break can_upload_file_p = False if not form['type'] in allowed_types: # Is the type sent through the form ok? msg = 'You are not allowed to upload a file of this type' else: # Is user allowed to upload such file extension? basedir, name, extension = decompose_file(filename) extension = extension[1:] # strip leading dot if extension in allowed_extensions.get(form['type'], []): can_upload_file_p = True if not can_upload_file_p: msg = 'You are not allowed to upload a file of this type' elif filename and formfile: ## Before saving the file to disk, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) # Remove \ / | : ? * filename = re.sub ( '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_', filename) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(user_files_absolute_path, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(user_files_absolute_path, filename), "w") fp.write(formfile.read()) fp.close() uploaded_file_path = os.path.join(user_files_absolute_path, filename) uploaded_file_name = filename return (msg, uploaded_file_path, filename, user_files_path, form['CKEditorFuncNum'])
def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False): """Given a fullpath, this function extracts the file's extension and finds in which additional format the file can be converted and converts it. @param fullpath: (string) complete path to file @param overwrite: (bool) overwrite already existing formats @param consider_version: (bool) if True, consider the version info in C{fullpath} to find missing format for that specific version, if C{fullpath} contains version info Return a list of the paths to the converted files """ file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if debug: file_converter_logger.setLevel(DEBUG) try: createdpaths = [] if consider_version: try: basedir, filename, extension, version = decompose_file_with_version( fullpath) except: basedir, filename, extension = decompose_file(fullpath) version = 0 else: basedir, filename, extension = decompose_file(fullpath) version = 0 extension = extension.lower() if debug: print("basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension), file=sys.stderr) if overwrite: missing_formats = get_missing_formats([fullpath]) else: if version: filelist = glob.glob( os.path.join(basedir, '%s*;%s' % (filename, version))) else: filelist = glob.glob(os.path.join(basedir, '%s*' % filename)) if debug: print("filelist: %s" % filelist, file=sys.stderr) missing_formats = get_missing_formats(filelist) if debug: print("missing_formats: %s" % missing_formats, file=sys.stderr) for path, formats in iteritems(missing_formats): if debug: print("... path: %s, formats: %s" % (path, formats), file=sys.stderr) for aformat in formats: if debug: print("...... aformat: %s" % aformat, file=sys.stderr) newpath = os.path.join(basedir, filename + aformat) if debug: print("...... newpath: %s" % newpath, file=sys.stderr) try: if CFG_BIBDOCFILE_FILEDIR in basedir: # We should create the new files in a temporary location, not # directly inside the BibDoc directory. newpath = convert_file(path, output_format=aformat) else: convert_file(path, newpath) createdpaths.append(newpath) except InvenioWebSubmitFileConverterError as msg: if debug: print("...... Exception: %s" % msg, file=sys.stderr) register_exception(alert_admin=True) finally: if debug: file_converter_logger.setLevel(old_logging_level) return createdpaths
def Move_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the standard submission's form through file input element(s). The document are assigned a 'doctype' (or category) corresponding to the file input element (eg. a file uploaded throught 'DEMOPIC_FILE' will go to 'DEMOPIC_FILE' doctype/category). Websubmit engine builds the following file organization in the directory curdir/files: curdir/files | _____________________________________________________________________ | | | ./file input 1 element's name ./file input 2 element's name .... (for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX') | | test1.pdf test2.pdf There is only one instance of all possible extension(pdf, gz...) in each part otherwise we may encounter problems when renaming files. + parameters['rename']: if given, all the files in curdir/files are renamed. parameters['rename'] is of the form: <PA>elemfilename[re]</PA>* where re is an regexp to select(using re.sub) what part of the elem file has to be selected. e.g: <PA>file:TEST_FILE_RN</PA> + parameters['documenttype']: if given, other formats are created. It has 2 possible values: - if "picture" icon in gif format is created - if "fulltext" ps, gz .... formats are created + parameters['paths_and_suffixes']: directories to look into and corresponding suffix to add to every file inside. It must have the same structure as a Python dictionnary of the following form {'FrenchAbstract':'french', 'EnglishAbstract':''} The keys are the file input element name from the form <=> directories in curdir/files The values associated are the suffixes which will be added to all the files in e.g. curdir/files/FrenchAbstract + parameters['iconsize'] need only if 'icon' is selected in parameters['documenttype'] + parameters['paths_and_restrictions']: the restrictions to apply to each uploaded file. The parameter must have the same structure as a Python dictionnary of the following form: {'DEMOART_APPENDIX':'restricted'} Files not specified in this parameter are not restricted. The specified restrictions can include a variable that can be replaced at runtime, for eg: {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'} + parameters['paths_and_doctypes']: if a doctype is specified, the file will be saved under the 'doctype/collection' instead of under the default doctype/collection given by the name of the upload element that was used on the websubmit interface. to configure the doctype in websubmit, enter the value as in a dictionnary, for eg: {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from Demo_Export_Via_Sword [DEMOSWR] Document Types """ global sysno paths_and_suffixes = parameters['paths_and_suffixes'] paths_and_restrictions = parameters['paths_and_restrictions'] rename = parameters['rename'] documenttype = parameters['documenttype'] iconsizes = parameters['iconsize'].split(',') paths_and_doctypes = parameters['paths_and_doctypes'] ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes) paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions) paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes) ## Go through all the directories specified in the keys ## of parameters['paths_and_suffixes'] for path in paths_and_suffixes.keys(): ## Check if there is a directory for the current path if os.path.exists("%s/files/%s" % (curdir, path)): ## Retrieve the restriction to apply to files in this ## directory restriction = paths_and_restrictions.get(path, '') restriction = re.sub('<PA>(?P<content>[^<]*)</PA>', get_pa_tag_content, restriction) ## Go through all the files in curdir/files/path for current_file in os.listdir("%s/files/%s" % (curdir, path)): ## retrieve filename and extension dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if len(paths_and_suffixes[path]) != 0: extension = "_%s%s" % (paths_and_suffixes[path], extension) ## Build the new file name if rename parameter has been given if rename: filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \ get_pa_tag_content, \ parameters['rename']) if rename or len(paths_and_suffixes[path]) != 0: ## Rename the file try: # Write the log rename_cmd fd = open("%s/rename_cmd" % curdir, "a+") fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\ "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n") ## Rename os.rename("%s/files/%s/%s" % (curdir, path, current_file), \ "%s/files/%s/%s%s" % (curdir, path, filename, extension)) fd.close() ## Save the new name in a text file in curdir so that ## the new filename can be used by templates to created the recmysl fd = open("%s/%s_RENAMED" % (curdir, path), "w") fd.write("%s%s" % (filename, extension)) fd.close() except OSError as err: msg = "Cannot rename the file.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension) ## Check if there is any existing similar file if not bibrecdocs.check_file_exists(fullpath, extension): bibdoc = bibrecdocs.add_new_file( fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True) bibdoc.set_status(restriction) ## Fulltext if documenttype == "fulltext": additionalformats = createRelatedFormats(fullpath) if len(additionalformats) > 0: for additionalformat in additionalformats: try: bibrecdocs.add_new_format(additionalformat) except InvenioBibDocFileError: pass ## Icon elif documenttype == "picture": has_added_default_icon_subformat_p = False for iconsize in iconsizes: try: iconpath, iconname = create_icon({ 'input-file': fullpath, 'icon-scale': iconsize, 'icon-name': None, 'icon-file-format': None, 'multipage-icon': False, 'multipage-icon-delay': 100, 'verbosity': 0, }) except Exception as e: register_exception( prefix= 'Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True) continue iconpath = os.path.join(iconpath, iconname) docname = decompose_file(fullpath)[1] try: mybibdoc = bibrecdocs.get_bibdoc(docname) except InvenioBibDocFileError: mybibdoc = None if iconpath is not None and mybibdoc is not None: try: icon_suffix = iconsize.replace( '>', '').replace('<', '').replace( '^', '').replace('!', '') if not has_added_default_icon_subformat_p: mybibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: mybibdoc.add_icon( iconpath, subformat= CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) ## Save the new icon filename in a text file in curdir so that ## it can be used by templates to created the recmysl try: if not has_added_default_icon_subformat_p: fd = open( "%s/%s_ICON" % (curdir, path), "w") else: fd = open( "%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w") fd.write(os.path.basename(iconpath)) fd.close() except OSError as err: msg = "Cannot store icon filename.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning( msg) except InvenioBibDocFileError as e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon() # Update the MARC bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno), )) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql( "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno, )) return ""
def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon( { 'input-file' : os.path.join(dir_to_open, filename), 'icon-name' : filename, # extension stripped automatically 'icon-file-format' : 'gif', 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : "300>", # Resize only if width > 300 'verbosity' : 0, }) icons_dir = os.path.join(os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = {'name': filename, 'iconName': icon_name} except InvenioWebSubmitIconCreatorError as e: # We could not create the icon added_files[key] = {'name': filename} continue else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Send our response if CFG_JSON_AVAILABLE: return json.dumps(added_files)
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose): """ Metadata write method, takes the .pdf as input and creates a new one with the new info. @param inputfile: path to the pdf @type inputfile: string @param outputfile: path to the resulting pdf @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: metadata information to update inputfile @type metadata_dictionary: dict """ # Take the file name (0 base, 1 name, 2 ext) filename = decompose_file(inputfile)[1] # Print pdf metadata if verbose > 1: print('Metadata information in the PDF file ' + filename + ': \n') try: os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data') except Exception: print('Problem with inputfile to PDFTK') # Info file for pdftk (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \ dir=CFG_TMPDIR) os.close(fd) file_in = open(path_to_info, 'w') if verbose > 5: print("Saving PDFTK info file to %s" % path_to_info) # User interaction to form the info file # Main Case: Dictionary received through option -d if not metadata_dictionary == {}: for tag in metadata_dictionary: line = 'InfoKey: ' + tag + '\nInfoValue: ' + \ metadata_dictionary[tag] + '\n' if verbose > 0: print(line) file_in.writelines(line) else: data_modified = False user_input = 'user_input' print("Entering interactive mode. Choose what you want to do:") while (user_input): if not data_modified: try: user_input = raw_input('[w]rite / [q]uit\n') except: print("Aborting") return else: try: user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n') except: print("Aborting") return if user_input == 'q': if not data_modified: return break elif user_input == 'w': try: tag = raw_input('Tag to update:\n') value = raw_input('With value:\n') except: print("Aborting") return # Write to info file line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n' data_modified = True file_in.writelines(line) elif user_input == 'a': return else: print("Invalid option: ") file_in.close() (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \ dir=CFG_TMPDIR) os.close(fd) # Now we call pdftk tool to update the info on a pdf #try: cmd_pdftk = '%s %s update_info %s output %s' (exit_status, output_std, output_err) = \ run_shell_command(cmd_pdftk, args=(CFG_PATH_PDFTK, inputfile, path_to_info, pdf_temp_path)) if verbose > 5: print(output_std, output_err) if os.path.exists(pdf_temp_path): # Move to final destination if exist try: shutil.move(pdf_temp_path, outputfile) except Exception as err: raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \ (pdf_temp_path, outputfile)) else: # Something bad happened raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata " + output_err)
def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" try: recid_fd = file(os.path.join(curdir, 'SN')) recid = recid_fd.read() recid_fd.close() except: recid = '' user_is_owner = False if recid: user_is_owner = is_user_owner_of_record(user_info, recid) try: categ_fd = file(os.path.join(curdir, 'combo%s' % argd['doctype'])) categ = categ_fd.read() categ_fd.close() except IOError: categ = '*' # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action, categ=categ) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0 and not user_is_owner: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon({ 'input-file': os.path.join(dir_to_open, filename), 'icon-name': filename, # extension stripped automatically 'icon-file-format': 'gif', 'multipage-icon': False, 'multipage-icon-delay': 100, 'icon-scale': "300>", # Resize only if width > 300 'verbosity': 0, }) icons_dir = os.path.join( os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = { 'name': filename, 'iconName': icon_name } except InvenioWebSubmitIconCreatorError as e: # We could not create the icon added_files[key] = {'name': filename} continue else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Send our response if CFG_JSON_AVAILABLE: return json.dumps(added_files)
def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose): """ Metadata write method, takes the .pdf as input and creates a new one with the new info. @param inputfile: path to the pdf @type inputfile: string @param outputfile: path to the resulting pdf @type outputfile: string @param verbose: verbosity @type verbose: int @param metadata_dictionary: metadata information to update inputfile @type metadata_dictionary: dict """ # Take the file name (0 base, 1 name, 2 ext) filename = decompose_file(inputfile)[1] # Print pdf metadata if verbose > 1: print('Metadata information in the PDF file ' + filename + ': \n') try: os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data') except Exception: print('Problem with inputfile to PDFTK') # Info file for pdftk (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \ dir=CFG_TMPDIR) os.close(fd) file_in = open(path_to_info, 'w') if verbose > 5: print("Saving PDFTK info file to %s" % path_to_info) # User interaction to form the info file # Main Case: Dictionary received through option -d if not metadata_dictionary == {}: for tag in metadata_dictionary: line = 'InfoKey: ' + tag + '\nInfoValue: ' + \ metadata_dictionary[tag] + '\n' if verbose > 0: print(line) file_in.writelines(line) else: data_modified = False user_input = 'user_input' print("Entering interactive mode. Choose what you want to do:") while (user_input): if not data_modified: try: user_input = raw_input('[w]rite / [q]uit\n') except: print("Aborting") return else: try: user_input = raw_input( '[w]rite / [q]uit and apply / [a]bort \n') except: print("Aborting") return if user_input == 'q': if not data_modified: return break elif user_input == 'w': try: tag = raw_input('Tag to update:\n') value = raw_input('With value:\n') except: print("Aborting") return # Write to info file line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n' data_modified = True file_in.writelines(line) elif user_input == 'a': return else: print("Invalid option: ") file_in.close() (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \ dir=CFG_TMPDIR) os.close(fd) # Now we call pdftk tool to update the info on a pdf #try: cmd_pdftk = '%s %s update_info %s output %s' (exit_status, output_std, output_err) = \ run_shell_command(cmd_pdftk, args=(CFG_PATH_PDFTK, inputfile, path_to_info, pdf_temp_path)) if verbose > 5: print(output_std, output_err) if os.path.exists(pdf_temp_path): # Move to final destination if exist try: shutil.move(pdf_temp_path, outputfile) except Exception as err: raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \ (pdf_temp_path, outputfile)) else: # Something bad happened raise InvenioWebSubmitFileMetadataRuntimeError( "Could not update metadata " + output_err)
def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'ckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd( form, { "action": (str, "DISPLAY"), "msg": (str, ""), "note": (str, ""), "score": (int, 0), "comid": (int, 0), "editor_type": (str, ""), "subscribe": (str, ""), "cookie": (str, ""), }, ) _ = gettext_set_language(argd["ln"]) actions = ["DISPLAY", "REPLY", "SUBMIT"] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid="search") # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)} ) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common( "comment_msg", { "msg": argd["msg"], "note": argd["note"], "score": argd["score"], "editor_type": argd["editor_type"], "subscribe": argd["subscribe"], }, onetime=True, ) target = ( CFG_SITE_SECURE_URL + "/youraccount/login" + make_canonical_urlargd( { "action": cookie, "ln": argd["ln"], "referer": CFG_SITE_SECURE_URL + user_info["uri"] + "&cookie=" + msg_cookie, }, {}, ) ) return redirect_to_url(req, target, norobot=True) elif auth_code_1 or auth_code_2: return page_not_authorized(req, "../", text=auth_msg_1 + auth_msg_2) if argd["comid"]: # If replying to a comment, are we on a record that # matches the original comment user is replying to? if not check_comment_belongs_to_record(argd["comid"], self.recid): return page_not_authorized(req, "../", text=_("Specified comment does not belong to this record")) # Is user trying to reply to a restricted comment? Make # sure user has access to it. We will then inherit its # restriction for the new comment (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd["comid"]) if auth_code: return page_not_authorized(req, "../", text=_("You do not have access to the specified comment")) # Is user trying to reply to a deleted comment? If so, we # let submitted comment go (to not lose possibly submitted # content, if comment is submitted while original is # deleted), but we "reset" comid to make sure that for # action 'REPLY' the original comment is not included in # the reply if is_comment_deleted(argd["comid"]): argd["comid"] = 0 user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) if not auth_code and (user_info["email"] != "guest"): can_attach_files = True warning_msgs = [] # list of warning tuples (warning_text, warning_color) added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get("commentattachment[]", []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPSHAREDDIR, "webcomment", str(uid)) try: assert dir_to_open.startswith(CFG_TMPSHAREDDIR) except AssertionError: register_exception( req=req, prefix="User #%s tried to upload file to forbidden location: %s" % (uid, dir_to_open), ) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename ## (in particular washing away UNIX and Windows ## (e.g. DFS) paths): filename = os.path.basename(filename.split("\\")[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists(os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk # while formfield.file: # fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize(os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True try: raise InvenioWebCommentWarning( _( 'The size of file \\"%(x_file)s\\" (%(x_size)s) is larger than maximum allowed file size (%(x_max)s). Select files again.', x_file=cgi.escape(filename), x_size=str(file_size / 1024) + "KB", x_max=str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE / 1024) + "KB", ) ) except InvenioWebCommentWarning as exc: register_exception(stream="warning") warning_msgs.append((exc.message, "")) # warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) else: added_files[filename] = os.path.join(dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass client_ip_address = req.remote_ip check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd["ln"]) if ok: title, description, keywords = websearch_templates.tmpl_record_page_header_content( req, self.recid, argd["ln"] ) navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid)) if navtrail: navtrail += " > " navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">' % ( CFG_SITE_URL, CFG_SITE_RECORD, self.recid, argd["ln"], ) navtrail += cgi.escape(title) navtrail += "</a>" navtrail += '> <a class="navtrail" href="%s/%s/%s/%s/?ln=%s">%s</a>' % ( CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.discussion == 1 and "reviews" or "comments", argd["ln"], self.discussion == 1 and _("Reviews") or _("Comments"), ) if argd["action"] not in actions: argd["action"] = "DISPLAY" if not argd["msg"]: # User had to login in-between, so retrieve msg # from cookie try: (kind, cookie_argd) = mail_cookie_check_common(argd["cookie"], delete=True) argd.update(cookie_argd) except InvenioWebAccessMailCookieDeletedError: return redirect_to_url( req, CFG_SITE_SECURE_URL + "/" + CFG_SITE_RECORD + "/" + str(self.recid) + (self.discussion == 1 and "/reviews" or "/comments"), ) except InvenioWebAccessMailCookieError: # Invalid or empty cookie: continue pass subscribe = False if argd["subscribe"] and get_user_subscription_to_discussion(self.recid, uid) == 0: # User is not already subscribed, and asked to subscribe subscribe = True body = perform_request_add_comment_or_remark( recID=self.recid, ln=argd["ln"], uid=uid, action=argd["action"], msg=argd["msg"], note=argd["note"], score=argd["score"], reviews=self.discussion, comID=argd["comid"], client_ip_address=client_ip_address, editor_type=argd["editor_type"], can_attach_files=can_attach_files, subscribe=subscribe, req=req, attached_files=added_files, warnings=warning_msgs, ) if self.discussion: title = _("Add Review") else: title = _("Add Comment") jqueryheader = """ <script src="%(CFG_SITE_URL)s/vendors/jquery-multifile/jquery.MultiFile.pack.js" type="text/javascript"></script> """ % { "CFG_SITE_URL": CFG_SITE_URL } return page( title=title, body=body, navtrail=navtrail, uid=uid, language=CFG_SITE_LANG, verbose=1, req=req, navmenuid="search", metaheaderadd=jqueryheader, ) # id not in range else: return page(title=_("Record Not Found"), body=problem, uid=uid, verbose=1, req=req, navmenuid="search")
def read_metadata(inputfile, force=None, remote=False, loginpw=None, verbose=0): """ Returns metadata extracted from given file as dictionary. Availability depends on input file format and installed plugins (return C{TypeError} if unsupported file format). @param inputfile: path to a file @type inputfile: string @param verbose: verbosity @type verbose: int @param force: name of plugin to use, to skip plugin auto-discovery @type force: string @param remote: if the file is accessed remotely or not @type remote: boolean @param loginpw: credentials to access secure servers (username:password) @type loginpw: string @return: dictionary of metadata tags as keys, and (interpreted) value as value @rtype: dict @raise TypeError: if file format is not supported. @raise RuntimeError: if required library to process file is missing. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read. """ metadata = None # Check file type (0 base, 1 name, 2 ext) ext = decompose_file(inputfile)[2] if verbose > 5: print(ext.lower(), 'extension to extract from') # Loop through the plugins to find a good one for given file for plugin_name, plugin in iteritems(metadata_extractor_plugins): # Local file if 'can_read_local' in plugin and \ plugin['can_read_local'](inputfile) and not remote and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) fetched_metadata = plugin['read_metadata_local'](inputfile, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Remote file elif remote and 'can_read_remote' in plugin and \ plugin['can_read_remote'](inputfile) and \ (not force or plugin_name == force): if verbose > 5: print('Using ' + plugin_name) fetched_metadata = plugin['read_metadata_remote'](inputfile, loginpw, verbose) if not metadata: metadata = fetched_metadata else: metadata.update(fetched_metadata) # Return in case we have something if metadata is not None: return metadata # Case of no plugin found, raise raise TypeError, 'Unsupported file type'