def get_extract_profiles(): """ Returns a dictionary representation of the frame extraction profiles """ if not os.path.exists(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL): shutil.copy(CFG_BIBENCODE_PROFILES_EXTRACT, CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL) default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT) local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL) default_profiles.update(local_profiles) return default_profiles
def get_encoding_profiles(): """ Returns a dictionary representation of the encoding profiles """ if not os.path.exists(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL): shutil.copy(CFG_BIBENCODE_PROFILES_ENCODING, CFG_BIBENCODE_PROFILES_ENCODING_LOCAL) default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING) local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL) default_profiles.update(local_profiles) return default_profiles
def watch_directory(new_job_dir=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS, old_job_dir=CFG_BIBENCODE_DAEMON_DIR_OLDJOBS): """ Checks a folder job files, parses and executes them @param new_job_dir: path to the directory with new jobs @type new_job_dir: string @param old_job_dir: path to the directory where the old jobs are moved @type old_job_dir: string """ global _NUMBER, _TASKID write_message('Checking directory %s for new jobs' % new_job_dir) task_update_progress('Checking for new jobs') _TASKID = task_get_task_param('task_id') files = os.listdir(new_job_dir) for file in files: file_fullpath = os.path.join(new_job_dir, file) if has_signature(file_fullpath): write_message('New Job found: %s' % file) job = json_decode_file(file_fullpath) if not getval(job, 'isbatch'): args = job_to_args(job) if not launch_task(args): write_message('Error submitting task') else: ## We need the job description for the batch engine ## So we need to use the new path inside the oldjobs dir process_batch(os.path.join(old_job_dir, file)) ## Move the file to the done dir shutil.move(file_fullpath, os.path.join(old_job_dir, file)) ## Update number for next job _NUMBER += 1 return 1
def create_update_jobs_by_recids(recids, batch_template_file, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS): """ Creates the job description files to update all given recids @param recids: Iterable set of recids @type recids: iterable @param batch_template_file: fullpath to the template for the update @type batch_tempalte_file: string @param job_directory: fullpath to the directory storing the job files @type job_directory: string """ batch_template = json_decode_file(batch_template_file) for recid in recids: task_update_progress("Creating Update Job for %d" % recid) write_message("Creating Update Job for %d" % recid) job = dict(batch_template) job["recid"] = recid timestamp = generate_timestamp() job_filename = "update_%d_%s.job" % (recid, timestamp) create_job_from_dictionary(job, job_filename, job_directory) return 1
def create_update_jobs_by_recids( recids, batch_template_file, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS): """ Creates the job description files to update all given recids @param recids: Iterable set of recids @type recids: iterable @param batch_template_file: fullpath to the template for the update @type batch_tempalte_file: string @param job_directory: fullpath to the directory storing the job files @type job_directory: string """ batch_template = json_decode_file(batch_template_file) for recid in recids: task_update_progress("Creating Update Job for %d" % recid) write_message("Creating Update Job for %d" % recid) job = dict(batch_template) job['recid'] = recid timestamp = generate_timestamp() job_filename = "update_%d_%s.job" % (recid, timestamp) create_job_from_dictionary(job, job_filename, job_directory) return 1
def Video_Processing(parameters, curdir, form, user_info=None): """ Perform all the required processing of the video. Parameters are: * "batch_template": to specify the absolute path to a configuration describe which manipulation should the uploaded file receive. If empty, will use by default etc/bibencode/batch_template_submission.json * "aspect": to specify in which form element the aspect will be available * "title": to specify in which form element the title will be available """ ## Read the batch template for submissions if parameters.get('batch_template'): try: batch_template = json_decode_file(parameters.get('batch_template')) except: register_exception(prefix="The given batch template was not readable") raise else: batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION) ## Handle the filepath file_storing_path = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filepath') try: fp = open(file_storing_path) fullpath = fp.read() fp.close() batch_template['input'] = fullpath except: register_exception(prefix="The file containing the path to the video was not readable") raise ## Handle the filename file_storing_name = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filename') try: fp = open(file_storing_name) filename = fp.read() fp.close() batch_template['bibdoc_master_docname'] = os.path.splitext(os.path.split(filename)[1])[0] batch_template['bibdoc_master_extension'] = os.path.splitext(filename)[1] batch_template['submission_filename'] = filename except: register_exception(prefix="The file containing the original filename of the video was not readable") raise ## Handle the aspect ratio if parameters.get('aspect'): try: file_storing_aspect = os.path.join(curdir, parameters.get('aspect')) fp = open(file_storing_aspect) aspect = fp.read() fp.close() batch_template['aspect'] = aspect except: register_exception(prefix="The file containing the ascpect ratio of the video was not readable") raise else: batch_template['aspect'] = None ## Handle the title if parameters.get('title'): try: file_storing_title = os.path.join(curdir, parameters['title']) fp = open(file_storing_title) title = fp.read() fp.close() except: register_exception(prefix="The file containing the title of the video was not readable") raise else: batch_template['submission_title'] = None ## Set the rest batch_template['notify_admin'] = CFG_SITE_ADMIN_EMAIL batch_template['notify_user'] = user_info['email'] batch_template['recid'] = sysno timestamp = generate_timestamp() job_filename = "submission_%d_%s.job" % (sysno, timestamp) create_job_from_dictionary(batch_template, job_filename)
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str( uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues( 124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file( batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master')) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template( job['bibdoc_docname']).safe_substitute( {'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval( job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension) _task_write_message( "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename( bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname)) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description( getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = ( tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames( input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext( filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc( docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format( bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p( bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'), pbcoreIdentifier=batch_job['recid'], aspect_override=getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str())): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def task_submit_check_options(): """ Checks the tasks arguments for validity """ #----------------# # General Checks # #----------------# ## FFMPEG CONFIGURATION ## ## The status of ffmpeg should be checked before a task is submitted ## There is a minimum configuration that ffmpeg must be compiled with ## See bibencode_utils and bibencode_config config = check_ffmpeg_configuration() if config: ## Prints missing configuration string = '' for item in config: string += ('\t' + item + '\n') write_message( "FFmpeg options are missing. Please recompile and add:\n" + string) return False ## MODE ## ## Check if the mode is a valid if _topt('mode') is None: write_message('You have to specify a mode using \'-m MODE\'') return False if _topt('mode') not in CFG_BIBENCODE_VALID_MODES: write_message('%s is not a valid mode. Use one of %s' % (_topt('mode'), CFG_BIBENCODE_VALID_MODES)) return False ## INPUT ## ## Check if the input file is given and if it exists ## You should allways use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta', 'batch'): if _topt('input') is None: write_message('You must specify an input file using \'-i FILE\'') return False else: if not os.path.exists(_topt('input')): print("The file %s does not exist" % _topt('input')) return False ## OUTPUT ## ## Check if the output file is given and if it exists ## You should allways use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta'): if _topt('output') is None: write_message('No output file is given. Please specify with' ' \'-o NAME\'') return False #---------------# # Encoding Mode # #---------------# if _topt('mode') == 'encode': ## PROFILE ## Check for a valid profile if this is given if _topt('profile_name') is not None: if _topt('profile_name') not in get_encoding_profiles(): write_message( '%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_ENCODING)) return False ## If the profile exists else: pass ## AUDIOCODEC ## ## Checks if the audiocodec is one of the predefined if _topt('acodec') is not None: if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS: write_message( '%s is not a valid audiocodec.\nAvailable codecs: %s' % (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS)) return False ## VIDEOCODEC ## Checks if the videocodec is one of the predefined if _topt('vcodec') is not None: if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS: write_message( '%s is not a valid videocodec.\nAvailable codecs: %s' % (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS)) return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG preset if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' ' \'WxH\' notation or one of these values:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)) return False ## Check if both a size and vertical or horizontal resolution if (_topt('width') or _topt('height')) and _topt('size'): write_message('Options \'width\' and \'height\' can not be ' 'combined with \'resolution\'') return False ## PASSES ## ## If a number of passes is given, it should be either 1 oder 2. ## You could do an infinite number of passes with ffmpeg, ## But it will almost never make a difference above 2 passes. ## So, we currently only support 2 passes. if _topt('passes') is not None: if _topt('passes') not in (1, 2): write_message('The number of passes must be either 1 or 2') return False else: task_set_option('passes', 1) ## BITRATE ## ## Check if the given bitrate is either 1000 sth. or 1000k sth. if _topt('abitrate') is not None: pass if _topt('vbitrate') is not None: pass #-----------------# # Extraction Mode # #-----------------# elif _topt('mode') == 'extract': ## PROFILE ## ## If a profile is given, check its validity if _topt('profile_name') is not None: if _topt('profile_name') not in get_extract_profiles(): write_message( '%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_EXTRACT)) return False ## If the profile exists else: pass ## You cannot give both a number and specific positions ## !!! Think about allowing both -> First extract by number, ## !!! then additionally the specific positions if (((_topt('numberof') is not None) and (_topt('positions') is not None)) or ((_topt('numberof') is None) and (_topt('positions') is None))): write_message('Please specify either a number of frames to ' 'take or specific positions') return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG specific value if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' '\'WxH\' notation or one of these valus:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)) return False #---------------# # Metadata Mode # #---------------# elif _topt('mode') == 'meta': ## You have to give exactly one meta suboption if not _xor(_topt('meta_input'), _topt('meta_dump')): write_message("You can either dump or write metadata") return False ## METADATA INPUT ## if _topt('meta_input') is not None: ## Check if this is either a filename (that should exist) ## or if this a jsonic metadata notation if os.path.exists(_topt('meta_input')): pass else: try: metadict = json.loads(_topt('meta_input')) task_set_option('meta_input', metadict) except ValueError: write_message( 'The value %s of the \'--meta\' parameter is ' 'neither a valid filename nor a jsonic dict' % _topt('meta_input')) return False #------------# # Batch Mode # #------------# elif _topt('mode') == 'batch': if _topt('collection') and _topt('search'): write_message('You can either use \'search\' or \'collection\'') return False elif _topt('collection'): template = json_decode_file(_topt('input')) print('\n') print("#---------------------------------------------#") print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION #") print("#---------------------------------------------#") print('\n') print('The selected template file contains:') pprint(template) print('\n') elif _topt('search'): template = json_decode_file(_topt('input')) message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s' #" % _topt( 'search') print('\n') print("#" + "-" * (len(message) - 2) + "#") print(message) print("#" + "-" * (len(message) - 2) + "#") print('\n') print('The selected template file contains:') pprint(template) print('\n') #-------------# # Daemon Mode # #-------------# elif _topt('mode') == 'daemon': task_set_task_param('task_specific_name', 'daemon') ## You can either give none or both folders, but not only one if _xor(_topt('new_job_folder'), _topt('old_job_folder')): write_message('When specifying folders for the daemon mode, you ' 'have to specify both the folder for the new jobs ' 'and the old ones') return False ## If every check went fine return True
def Video_Processing(parameters, curdir, form, user_info=None): """ """ ## Read the batch template for submissions if parameters.get("batch_template"): try: batch_template = json_decode_file(parameters.get("batch_template")) except: register_exception(prefix="The given batch template was not readable") raise else: batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION) ## Handle the filepath file_storing_path = os.path.join(curdir, "files", str(user_info["uid"]), "NewFile", "filepath") try: fp = open(file_storing_path) fullpath = fp.read() fp.close() batch_template["input"] = fullpath except: register_exception(prefix="The file containing the path to the video was not readable") raise ## Handle the filename file_storing_name = os.path.join(curdir, "files", str(user_info["uid"]), "NewFile", "filename") try: fp = open(file_storing_name) filename = fp.read() fp.close() batch_template["bibdoc_master_docname"] = os.path.splitext(os.path.split(filename)[1])[0] batch_template["bibdoc_master_extension"] = os.path.splitext(filename)[1] batch_template["submission_filename"] = filename except: register_exception(prefix="The file containing the original filename of the video was not readable") raise ## Handle the aspect ratio if parameters.get("aspect"): try: file_storing_aspect = os.path.join(curdir, parameters.get("aspect")) fp = open(file_storing_aspect) aspect = fp.read() fp.close() batch_template["aspect"] = aspect except: register_exception(prefix="The file containing the ascpect ratio of the video was not readable") raise else: batch_template["aspect"] = None ## Handle the title if parameters.get("title"): try: file_storing_title = os.path.join(curdir, parameters["title"]) fp = open(file_storing_title) title = fp.read() fp.close() except: register_exception(prefix="The file containing the title of the video was not readable") raise else: batch_template["submission_title"] = None ## Set the rest batch_template["notify_admin"] = CFG_SITE_ADMIN_EMAIL batch_template["notify_user"] = user_info["email"] batch_template["recid"] = sysno timestamp = generate_timestamp() job_filename = "submission_%d_%s.job" % (sysno, timestamp) create_job_from_dictionary(batch_template, job_filename)
def task_submit_check_options(): """ Checks the tasks arguments for validity """ #----------------# # General Checks # #----------------# ## FFMPEG CONFIGURATION ## ## The status of ffmpeg should be checked before a task is submitted ## There is a minimum configuration that ffmpeg must be compiled with ## See bibencode_utils and bibencode_config config = check_ffmpeg_configuration() if config: ## Prints missing configuration string = '' for item in config: string += ('\t' + item + '\n') write_message( "FFmpeg options are missing. Please recompile and add:\n" + string ) return False ## MODE ## ## Check if the mode is a valid if _topt('mode') is None: write_message('You have to specify a mode using \'-m MODE\'') return False if _topt('mode') not in CFG_BIBENCODE_VALID_MODES: write_message('%s is not a valid mode. Use one of %s' % (_topt('mode'), CFG_BIBENCODE_VALID_MODES)) return False ## INPUT ## ## Check if the input file is given and if it exists ## You should allways use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta', 'batch'): if _topt('input') is None: write_message('You must specify an input file using \'-i FILE\'') return False else: if not os.path.exists(_topt('input')): print("The file %s does not exist" % _topt('input')) return False ## OUTPUT ## ## Check if the output file is given and if it exists ## You should allways use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta'): if _topt('output') is None: write_message('No output file is given. Please specify with' ' \'-o NAME\'' ) return False #---------------# # Encoding Mode # #---------------# if _topt('mode') == 'encode': ## PROFILE ## Check for a valid profile if this is given if _topt('profile_name') is not None: if _topt('profile_name') not in get_encoding_profiles(): write_message('%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_ENCODING) ) return False ## If the profile exists else: pass ## AUDIOCODEC ## ## Checks if the audiocodec is one of the predefined if _topt('acodec') is not None: if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS: write_message( '%s is not a valid audiocodec.\nAvailable codecs: %s' % (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS) ) return False ## VIDEOCODEC ## Checks if the videocodec is one of the predefined if _topt('vcodec') is not None: if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS: write_message( '%s is not a valid videocodec.\nAvailable codecs: %s' % (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS) ) return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG preset if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' ' \'WxH\' notation or one of these values:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES) ) return False ## Check if both a size and vertical or horizontal resolution if (_topt('width') or _topt('height')) and _topt('size'): write_message('Options \'width\' and \'height\' can not be ' 'combined with \'resolution\'') return False ## PASSES ## ## If a number of passes is given, it should be either 1 oder 2. ## You could do an infinite number of passes with ffmpeg, ## But it will almost never make a difference above 2 passes. ## So, we currently only support 2 passes. if _topt('passes') is not None: if _topt('passes') not in (1, 2): write_message('The number of passes must be either 1 or 2') return False else: task_set_option('passes', 1) ## BITRATE ## ## Check if the given bitrate is either 1000 sth. or 1000k sth. if _topt('abitrate') is not None: pass if _topt('vbitrate') is not None: pass #-----------------# # Extraction Mode # #-----------------# elif _topt('mode') == 'extract': ## PROFILE ## ## If a profile is given, check its validity if _topt('profile_name') is not None: if _topt('profile_name') not in get_extract_profiles(): write_message('%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_EXTRACT) ) return False ## If the profile exists else: pass ## You cannot give both a number and specific positions ## !!! Think about allowing both -> First extract by number, ## !!! then additionally the specific positions if ( ((_topt('numberof') is not None) and (_topt('positions') is not None)) or ((_topt('numberof') is None) and (_topt('positions') is None)) ): write_message('Please specify either a number of frames to ' 'take or specific positions') return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG specific value if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' '\'WxH\' notation or one of these valus:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES) ) return False #---------------# # Metadata Mode # #---------------# elif _topt('mode') == 'meta': ## You have to give exactly one meta suboption if not _xor(_topt('meta_input'), _topt('meta_dump')): write_message("You can either dump or write metadata") return False ## METADATA INPUT ## if _topt('meta_input') is not None: ## Check if this is either a filename (that should exist) ## or if this a jsonic metadata notation if os.path.exists(_topt('meta_input')): pass else: try: metadict = json.loads(_topt('meta_input')) task_set_option('meta_input', metadict) except ValueError: write_message('The value %s of the \'--meta\' parameter is ' 'neither a valid filename nor a jsonic dict' % _topt('meta_input')) return False #------------# # Batch Mode # #------------# elif _topt('mode') == 'batch': if _topt('collection') and _topt('search'): write_message('You can either use \'search\' or \'collection\'') return False elif _topt('collection'): template = json_decode_file(_topt('input')) print('\n') print("#---------------------------------------------#") print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION #") print("#---------------------------------------------#") print('\n') print('The selected template file contains:') pprint(template) print('\n') elif _topt('search'): template = json_decode_file(_topt('input')) message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s' #" % _topt('search') print('\n') print("#" + "-"*(len(message)-2) + "#") print(message) print("#" + "-"*(len(message)-2) + "#") print('\n') print('The selected template file contains:') pprint(template) print('\n') #-------------# # Daemon Mode # #-------------# elif _topt('mode') == 'daemon': task_set_task_param('task_specific_name', 'daemon') ## You can either give none or both folders, but not only one if _xor(_topt('new_job_folder'), _topt('old_job_folder')): write_message('When specifying folders for the daemon mode, you ' 'have to specify both the folder for the new jobs ' 'and the old ones') return False ## If every check went fine return True
def pbcore_metadata(input_file, pbcoreIdentifier=None, pbcoreTitle=None, pbcoreDescription=None, instantiationIdentifier=None, instantiationPhysical=None, instantiationLocation=None, instantiationGenerations=None,instantiationExtension=None, instantiationPart=None, instantiationAnnotation=None, instantiationRights=None, instantiationRelation=None, xmlns="pbcore", aspect_override=None ): """ Transformes parsed metadata to a pbcore representation. To supplement all the pbcore field, we need both ffprobe and mediainfo. If only ffprobe is installed, it will not fail but supplement only partially. @param input_file: full path to the file to extract the metadata from @type input_file: string @return: pbcore xml metadata representation @rtype: string """ def _follow_path(path, locals_u, meta_dict, probe_dict, stream_number=None): """ Trys to follow a given path and returns the value it represents. The path is a string that must be like this: local->variable_name ffprobe->format->param ffprobe->video->param ffprobe->audio->param ffprobe->stream->param mediainfo->general->param mediainfo->audio->param mediainfo->video->param mediainfo->track->param @param path: Path to the value @type: string @param locals_u: Local variables @type locals_u: dict @param meta_dict: Mediainfo metadata @type meta_dict: dict @param probe_dict: FFprobe metadata @type probe_dict: dict @param stream_number: To follow a path to a specific stream @type stream_number: int @return: value of the element the path points to @rtype: string """ path_segments = path.split("->") ## ffprobe if path_segments[0] == 'ffprobe': ## format if path_segments[1] == 'format': return getval(probe_dict['format'], path_segments[2], 0) ## 1st video elif path_segments[1] in ('video', 'audio'): for stream in probe_dict['streams']: if getval(stream, 'codec_type') == path_segments[1]: return getval(stream, path_segments[2], 0) ## stream by number elif path_segments[1] == 'stream': return getval(probe_dict['streams'][stream_number], path_segments[2], 0) ## mediainfo elif path_segments[0] == 'mediainfo': ## general, video, audio if path_segments[1] in ('general', 'video', 'audio'): for track in meta_dict: if getval(track, 'kind_of_stream').lower() == path_segments[1]: return getval(track, path_segments[2], 0) ## stream by number elif path_segments[1] == 'track': ## We rely on format being the first track in mediainfo ## And the order of streams in ffprobe and tracks in mediainfo being the same return getval(meta_dict[stream_number+1], path_segments[2], 0) ## local variable elif path_segments[0] == 'local': return getval(locals_u, path_segments[1], 0) ## direct input else: return path_segments[0] def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None): """ substitute a mapping dictionary an returns the substituted value. The dictionary must contain of a 'tag' a 'mapping' and a 'call' @param mapping: mapping dictionary to substitute @type: dict @param locals_u: Local variables @type locals_u: dict @param meta_dict: Mediainfo metadata @type meta_dict: dict @param probe_dict: FFprobe metadata @type probe_dict: dict @param stream_number: To follow a path to a specific stream @type stream_number: int @return: substituted mapping @rtype: string """ items = [] for value in mapping: mapping = value['mapping'] tag = value['tag'] call = getval(value, 'call') micro_mappings = mapping.split(';;') values = [] foundall = True for micro_mapping in micro_mappings: value = _follow_path(micro_mapping, locals_u, meta_dict, probe_dict, stream_number) if value: if call: value = globals()[call](value) values.append(value.strip()) else: foundall &= False try: if values and foundall: items.append(tag % "".join(values)) except: pass return items ## Get the metadata from ffprobe and mediainfo meta_dict = mediainfo_metadata(input_file, aspect_override) probe_dict = ffprobe_metadata(input_file) # parse the mappings pbcore_mappings = json_decode_file(CFG_BIBENCODE_PBCORE_MAPPINGS) ## INSTANTIATION ## # According to the PBcore standard, this strict order MUST be followed instantiation_mapping = pbcore_mappings['instantiation_mapping'] ## ESSENCE TRACK ## # According to the PBcore standard, this strict order MUST be followed essencetrack_mapping = pbcore_mappings['essencetrack_mapping'] ## The XML header for the PBcore document header = ( """<?xml version="1.0" encoding="UTF-8"?><pbcoreDescriptionDocument """ """xmlns%(xmlns)s="http://www.pbcore.org/PBCore/PBCoreNamespace.html" """ """xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" """ """xsi:schemaLocation="http://www.pbcore.org/PBCore/PBCoreNamespace.html">""" ) if pbcoreIdentifier: pbcoreIdentifier ="""<pbcoreIdentifier>%s</pbcoreIdentifier>""" % pbcoreIdentifier else: pbcoreIdentifier = "" if pbcoreTitle: pbcoreTitle = """<pbcoreTitle>%s</pbcoreTitle>""" % pbcoreTitle else: pbcoreTitle = "" tail = """</pbcoreDescriptionDocument>""" ## ESSENCE TRACKS ## essencetracks = [] for stream_no in range(len(probe_dict['streams'])): essencetracks.append(_map_values(essencetrack_mapping, locals(), meta_dict, probe_dict, stream_no)) joinedtracks = [] for track in essencetracks: track = "<instantiationEssenceTrack>" + "".join(track) + "</instantiationEssenceTrack>" joinedtracks.append(track) joinedtracks = "".join(joinedtracks) ## INSTANTIATION ## instantiation_items = _map_values(instantiation_mapping, locals(), meta_dict, probe_dict) joinedinstantiation = "<pbcoreInstantiation>" + "".join(instantiation_items) + "</pbcoreInstantiation>" joined = "%s%s%s%s%s" % (header, pbcoreIdentifier, pbcoreTitle, joinedinstantiation, tail) if xmlns: joined = joined % {"xmlns" : ":%s" % xmlns} joined = re.sub("<(\w[^>]+)>", "<%s:\g<1>>" % xmlns, joined) joined = re.sub("<\/([^>]+)>", "</%s:\g<1>>" % xmlns, joined) else: joined = joined % {"xmlns" : ""} return joined
def Video_Processing(parameters, curdir, form, user_info=None): """ """ ## Read the batch template for submissions if parameters.get('batch_template'): try: batch_template = json_decode_file(parameters.get('batch_template')) except: register_exception(prefix="The given batch template was not readable") raise else: batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION) ## Handle the filepath file_storing_path = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filepath') try: fp = open(file_storing_path) fullpath = fp.read() fp.close() batch_template['input'] = fullpath except: register_exception(prefix="The file containing the path to the video was not readable") raise ## Handle the filename file_storing_name = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filename') try: fp = open(file_storing_name) filename = fp.read() fp.close() batch_template['bibdoc_master_docname'] = os.path.splitext(os.path.split(filename)[1])[0] batch_template['bibdoc_master_extension'] = os.path.splitext(filename)[1] batch_template['submission_filename'] = filename except: register_exception(prefix="The file containing the original filename of the video was not readable") raise ## Handle the aspect ratio if parameters.get('aspect'): try: file_storing_aspect = os.path.join(curdir, parameters.get('aspect')) fp = open(file_storing_aspect) aspect = fp.read() fp.close() batch_template['aspect'] = aspect except: register_exception(prefix="The file containing the ascpect ratio of the video was not readable") raise else: batch_template['aspect'] = None ## Handle the title if parameters.get('title'): try: file_storing_title = os.path.join(curdir, parameters['title']) fp = open(file_storing_title) title = fp.read() fp.close() except: register_exception(prefix="The file containing the title of the video was not readable") raise else: batch_template['submission_title'] = None ## Set the rest batch_template['notify_admin'] = CFG_SITE_ADMIN_EMAIL batch_template['notify_user'] = user_info['email'] batch_template['recid'] = sysno timestamp = generate_timestamp() job_filename = "submission_%d_%s.job" % (sysno, timestamp) create_job_from_dictionary(batch_template, job_filename)
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def pbcore_metadata(input_file, pbcoreIdentifier=None, pbcoreTitle=None, pbcoreDescription=None, instantiationIdentifier=None, instantiationPhysical=None, instantiationLocation=None, instantiationGenerations=None, instantiationExtension=None, instantiationPart=None, instantiationAnnotation=None, instantiationRights=None, instantiationRelation=None, xmlns="pbcore", aspect_override=None): """ Transformes parsed metadata to a pbcore representation. To supplement all the pbcore field, we need both ffprobe and mediainfo. If only ffprobe is installed, it will not fail but supplement only partially. @param input_file: full path to the file to extract the metadata from @type input_file: string @return: pbcore xml metadata representation @rtype: string """ def _follow_path(path, locals_u, meta_dict, probe_dict, stream_number=None): """ Trys to follow a given path and returns the value it represents. The path is a string that must be like this: local->variable_name ffprobe->format->param ffprobe->video->param ffprobe->audio->param ffprobe->stream->param mediainfo->general->param mediainfo->audio->param mediainfo->video->param mediainfo->track->param @param path: Path to the value @type: string @param locals_u: Local variables @type locals_u: dict @param meta_dict: Mediainfo metadata @type meta_dict: dict @param probe_dict: FFprobe metadata @type probe_dict: dict @param stream_number: To follow a path to a specific stream @type stream_number: int @return: value of the element the path points to @rtype: string """ path_segments = path.split("->") ## ffprobe if path_segments[0] == 'ffprobe': ## format if path_segments[1] == 'format': return getval(probe_dict['format'], path_segments[2], 0) ## 1st video elif path_segments[1] in ('video', 'audio'): for stream in probe_dict['streams']: if getval(stream, 'codec_type') == path_segments[1]: return getval(stream, path_segments[2], 0) ## stream by number elif path_segments[1] == 'stream': return getval(probe_dict['streams'][stream_number], path_segments[2], 0) ## mediainfo elif path_segments[0] == 'mediainfo': ## general, video, audio if path_segments[1] in ('general', 'video', 'audio'): for track in meta_dict: if getval(track, 'kind_of_stream').lower() == path_segments[1]: return getval(track, path_segments[2], 0) ## stream by number elif path_segments[1] == 'track': ## We rely on format being the first track in mediainfo ## And the order of streams in ffprobe and tracks in mediainfo being the same return getval(meta_dict[stream_number + 1], path_segments[2], 0) ## local variable elif path_segments[0] == 'local': return getval(locals_u, path_segments[1], 0) ## direct input else: return path_segments[0] def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None): """ substitute a mapping dictionary an returns the substituted value. The dictionary must contain of a 'tag' a 'mapping' and a 'call' @param mapping: mapping dictionary to substitute @type: dict @param locals_u: Local variables @type locals_u: dict @param meta_dict: Mediainfo metadata @type meta_dict: dict @param probe_dict: FFprobe metadata @type probe_dict: dict @param stream_number: To follow a path to a specific stream @type stream_number: int @return: substituted mapping @rtype: string """ items = [] for value in mapping: mapping = value['mapping'] tag = value['tag'] call = getval(value, 'call') micro_mappings = mapping.split(';;') values = [] foundall = True for micro_mapping in micro_mappings: value = _follow_path(micro_mapping, locals_u, meta_dict, probe_dict, stream_number) if value: if call: value = globals()[call](value) values.append(value.strip()) else: foundall &= False try: if values and foundall: items.append(tag % "".join(values)) except: pass return items ## Get the metadata from ffprobe and mediainfo meta_dict = mediainfo_metadata(input_file, aspect_override) probe_dict = ffprobe_metadata(input_file) # parse the mappings pbcore_mappings = json_decode_file(CFG_BIBENCODE_PBCORE_MAPPINGS) ## INSTANTIATION ## # According to the PBcore standard, this strict order MUST be followed instantiation_mapping = pbcore_mappings['instantiation_mapping'] ## ESSENCE TRACK ## # According to the PBcore standard, this strict order MUST be followed essencetrack_mapping = pbcore_mappings['essencetrack_mapping'] ## The XML header for the PBcore document header = ( """<?xml version="1.0" encoding="UTF-8"?><pbcoreDescriptionDocument """ """xmlns%(xmlns)s="http://www.pbcore.org/PBCore/PBCoreNamespace.html" """ """xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" """ """xsi:schemaLocation="http://www.pbcore.org/PBCore/PBCoreNamespace.html">""" ) if pbcoreIdentifier: pbcoreIdentifier = """<pbcoreIdentifier>%s</pbcoreIdentifier>""" % pbcoreIdentifier else: pbcoreIdentifier = "" if pbcoreTitle: pbcoreTitle = """<pbcoreTitle>%s</pbcoreTitle>""" % pbcoreTitle else: pbcoreTitle = "" tail = """</pbcoreDescriptionDocument>""" ## ESSENCE TRACKS ## essencetracks = [] for stream_no in range(len(probe_dict['streams'])): essencetracks.append( _map_values(essencetrack_mapping, locals(), meta_dict, probe_dict, stream_no)) joinedtracks = [] for track in essencetracks: track = "<instantiationEssenceTrack>" + "".join( track) + "</instantiationEssenceTrack>" joinedtracks.append(track) joinedtracks = "".join(joinedtracks) ## INSTANTIATION ## instantiation_items = _map_values(instantiation_mapping, locals(), meta_dict, probe_dict) joinedinstantiation = "<pbcoreInstantiation>" + "".join( instantiation_items) + "</pbcoreInstantiation>" joined = "%s%s%s%s%s" % (header, pbcoreIdentifier, pbcoreTitle, joinedinstantiation, tail) if xmlns: joined = joined % {"xmlns": ":%s" % xmlns} joined = re.sub("<(\w[^>]+)>", "<%s:\g<1>>" % xmlns, joined) joined = re.sub("<\/([^>]+)>", "</%s:\g<1>>" % xmlns, joined) else: joined = joined % {"xmlns": ""} return joined
def Video_Processing(parameters, curdir, form, user_info=None): """ Perform all the required processing of the video. Parameters are: * "batch_template": to specify the absolute path to a configuration describe which manipulation should the uploaded file receive. If empty, will use by default etc/bibencode/batch_template_submission.json * "aspect": to specify in which form element the aspect will be available * "title": to specify in which form element the title will be available """ ## Read the batch template for submissions if parameters.get('batch_template'): try: batch_template = json_decode_file(parameters.get('batch_template')) except: register_exception( prefix="The given batch template was not readable") raise else: batch_template = json_decode_file( CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION) ## Handle the filepath file_storing_path = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filepath') try: fp = open(file_storing_path) fullpath = fp.read() fp.close() batch_template['input'] = fullpath except: register_exception( prefix="The file containing the path to the video was not readable" ) raise ## Handle the filename file_storing_name = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filename') try: fp = open(file_storing_name) filename = fp.read() fp.close() batch_template['bibdoc_master_docname'] = os.path.splitext( os.path.split(filename)[1])[0] batch_template['bibdoc_master_extension'] = os.path.splitext( filename)[1] batch_template['submission_filename'] = filename except: register_exception( prefix= "The file containing the original filename of the video was not readable" ) raise ## Handle the aspect ratio if parameters.get('aspect'): try: file_storing_aspect = os.path.join(curdir, parameters.get('aspect')) fp = open(file_storing_aspect) aspect = fp.read() fp.close() batch_template['aspect'] = aspect except: register_exception( prefix= "The file containing the ascpect ratio of the video was not readable" ) raise else: batch_template['aspect'] = None ## Handle the title if parameters.get('title'): try: file_storing_title = os.path.join(curdir, parameters['title']) fp = open(file_storing_title) title = fp.read() fp.close() except: register_exception( prefix= "The file containing the title of the video was not readable") raise else: batch_template['submission_title'] = None ## Set the rest batch_template['notify_admin'] = CFG_SITE_ADMIN_EMAIL batch_template['notify_user'] = user_info['email'] batch_template['recid'] = sysno timestamp = generate_timestamp() job_filename = "submission_%d_%s.job" % (sysno, timestamp) create_job_from_dictionary(batch_template, job_filename)