def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def extract_frames(input_file, output_file=None, size=None, positions=None, numberof=None, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=task_update_progress, message_fnc=write_message): """ Extracts frames from a given video using ffmpeg based on the given parameters. Starts a subprocess. The status of the process is continously written to the given messaging functions. @param input_file: Full path to the input video. @type input_file: String @param output_file: Full path to the output file, in case of multiple outs, there will be squential numbers appended to the file's name automatically. If this parameter is not given, the output filename will be generated from the input file The output can be substituted with information. Valid substrings for substitution are: %(input)s for the input filename %(timecode)s for the timecode %(size)s for the frame size %(number)d for sequential numbers Everything else that could be a python substitution substring should be escaped accordingly. !!! Warnning !!! FFmpeg will also try to substitude if there are any '%' left. This will likely screw up the extraction. @type output_file: string @param size: The size of the frames. Format is WxH @type size: string @param positions: A list of positions within the video where the frames should be shot. Percentual values between 0 and 100 or HH:MM:SS.ss are accepted. @type positions: string @param numberof: In case you don't want to give positions but just a fixed number of frames to extract. @type numberof: nt @param extension: If no output filename is given, construct the name with this extension @type extension: string @param width: The width of the extracted frame. @type width: int @param height: The height of the extracted frame @type height: int @param aspect: A float representing the aspect ratio of the video. 4:3 equals 1.33 and 16:9 equals 1.77. See also 'width' @type aspect: float or "4:3" like string @param profile: A profile to use. The priority is on the parameters directly given to the function. @type profile: string @param update_fnc: A function called to display or log an the encoding status. This function must accept a string. @type update_fnc: function @param message_fnc: A function to log important messages or errors. This function must accept a string. @type message_fnc: function @return: 1 if the extraction was successful, 0 if not @rtype: bool """ #---------# # PROFILE # #---------# ## Takes parameters from the profile if they are not directly given if profile: profile = get_extract_profile(profile) size = chose(size, 'size', profile) positions = chose(positions, 'positions', profile) numberof = chose(numberof, 'numberof', profile) extension = chose(extension, 'extension', profile) width = chose(width, 'width', profile) height = chose(height, 'height', profile) #---------------# # Check and fix # #---------------# ## If neither positions nor a number of shots are given if not positions and not numberof: raise ValueError( "Either argument \'positions\' xor argument \'numberof\' must be given" ) ## If both are given if positions and numberof: raise ValueError( "Arguments \'positions\' and \'numberof\' exclude each other") ## If just a number of shots to take is given by 'numberof' if numberof and not positions: ## Parse the duration from the input info = ffprobe_metadata(input_file) if info is None: message_fnc("An error occured while receiving the video log") return 0 duration = float(info['format']['duration']) if duration is None: message_fnc( "Could not extract by \'numberof\' because video duration is unknown." ) return 0 positions = [] for pos in range(numberof): ## Calculate the position for every shot and append it to the list position = pos * (duration / numberof) positions.append(position) ## If specific positions are given elif positions and not numberof: ## Check if correct timecodes or seconds are given i = 0 for pos in positions: if not (is_seconds(pos) or is_timecode(pos)): raise ValueError( "The given position \'%s\' is neither a value in seconds nor a timecode!" % str(pos)) ## if a timecode is given, convert it to seconds if is_timecode(pos): positions[i] = timecode_to_seconds(pos) i += 1 ## If no output filename is given, use input filename and append jpg if output_file is None: ipath = os.path.splitext(input_file)[0] if not extension.startswith("."): extension = "." + extension output_file = ipath + extension ## If no explizit size for the frames is given if not size: size = determine_resolution_preserving_aspect(input_file, width, height, aspect) #------------# # Extraction # #------------# counter = 1 for position in positions: #---------------------------# # Generate output file name # #---------------------------# number_substituted = False if '%(number)' in output_file: number_substituted = True ## If the output filename should be stubstituted try: output_filename = output_file % { 'input': os.path.splitext(os.path.split(input_file)[1])[0], 'timecode': seconds_to_timecode(position), 'size': size, 'number': counter } except KeyError: raise ## In case that more than one shot is taken and you don't want to substitute if not number_substituted: if len(positions) > 1: path, ext = os.path.splitext(output_file) output_filename = path + str(counter).zfill( len(str(len(positions)))) + ext ## If you dont want to substitute and only one file is selected, ## it will just take the output or input name without altering it else: output_filename = output_file #-------------# # Run process # #-------------# ## Build the command for ffmpeg command = (CFG_BIBENCODE_FFMPEG_EXTRACT_COMMAND % (position, input_file, size, output_filename)).split() ## Start subprocess and poll the output until it finishes process = subprocess.Popen(command, stderr=subprocess.PIPE) stderr = [] while process.poll() is None: ## We want to keep the last lines of output in case of an error stderr += process.communicate()[1].splitlines() stderr = stderr[-5:] ## If something went wrong, print the last lines of the log if process.poll() != 0: msg = ("Error while extracting frame %d of %d" % (counter, len(positions))) message_fnc(msg) update_fnc(msg) ## Print the end of the log message_fnc("Last lines of the FFmpeg log:") for line in stderr: message_fnc(line) return 0 else: update_fnc("Frame %d of %d extracted" % (counter, len(positions))) counter += 1 ## Everything should be fine if this position is reached message_fnc("Extraction of frames was successful") return 1
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str( uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues( 124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file( batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master')) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template( job['bibdoc_docname']).safe_substitute( {'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval( job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension) _task_write_message( "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename( bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname)) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description( getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = ( tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames( input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext( filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc( docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format( bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p( bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'), pbcoreIdentifier=batch_job['recid'], aspect_override=getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str())): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def extract_frames(input_file, output_file=None, size=None, positions=None, numberof=None, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=task_update_progress, message_fnc=write_message): """ Extracts frames from a given video using ffmpeg based on the given parameters. Starts a subprocess. The status of the process is continously written to the given messaging functions. @param input_file: Full path to the input video. @type input_file: String @param output_file: Full path to the output file, in case of multiple outs, there will be squential numbers appended to the file's name automatically. If this parameter is not given, the output filename will be generated from the input file The output can be substituted with information. Valid substrings for substitution are: %(input)s for the input filename %(timecode)s for the timecode %(size)s for the frame size %(number)d for sequential numbers Everything else that could be a python substitution substring should be escaped accordingly. !!! Warnning !!! FFmpeg will also try to substitude if there are any '%' left. This will likely screw up the extraction. @type output_file: string @param size: The size of the frames. Format is WxH @type size: string @param positions: A list of positions within the video where the frames should be shot. Percentual values between 0 and 100 or HH:MM:SS.ss are accepted. @type positions: string @param numberof: In case you don't want to give positions but just a fixed number of frames to extract. @type numberof: nt @param extension: If no output filename is given, construct the name with this extension @type extension: string @param width: The width of the extracted frame. @type width: int @param height: The height of the extracted frame @type height: int @param aspect: A float representing the aspect ratio of the video. 4:3 equals 1.33 and 16:9 equals 1.77. See also 'width' @type aspect: float or "4:3" like string @param profile: A profile to use. The priority is on the parameters directly given to the function. @type profile: string @param update_fnc: A function called to display or log an the encoding status. This function must accept a string. @type update_fnc: function @param message_fnc: A function to log important messages or errors. This function must accept a string. @type message_fnc: function @return: 1 if the extraction was successful, 0 if not @rtype: bool """ #---------# # PROFILE # #---------# ## Takes parameters from the profile if they are not directly given if profile: profile = get_extract_profile(profile) size = chose(size, 'size', profile) positions = chose(positions, 'positions', profile) numberof = chose(numberof, 'numberof', profile) extension = chose(extension, 'extension', profile) width = chose(width, 'width', profile) height = chose(height, 'height', profile) #---------------# # Check and fix # #---------------# ## If neither positions nor a number of shots are given if not positions and not numberof: raise ValueError("Either argument \'positions\' xor argument \'numberof\' must be given") ## If both are given if positions and numberof: raise ValueError("Arguments \'positions\' and \'numberof\' exclude each other") ## If just a number of shots to take is given by 'numberof' if numberof and not positions: ## Parse the duration from the input info = ffprobe_metadata(input_file) if info is None: message_fnc("An error occured while receiving the video log") return 0 duration = float(info['format']['duration']) if duration is None: message_fnc("Could not extract by \'numberof\' because video duration is unknown.") return 0 positions = [] for pos in range(numberof): ## Calculate the position for every shot and append it to the list position = pos * (duration / numberof) positions.append(position) ## If specific positions are given elif positions and not numberof: ## Check if correct timecodes or seconds are given i = 0 for pos in positions: if not (is_seconds(pos) or is_timecode(pos)): raise ValueError("The given position \'%s\' is neither a value in seconds nor a timecode!" % str(pos)) ## if a timecode is given, convert it to seconds if is_timecode(pos): positions[i] = timecode_to_seconds(pos) i += 1 ## If no output filename is given, use input filename and append jpg if output_file is None: ipath = os.path.splitext(input_file)[0] if not extension.startswith("."): extension = "." + extension output_file = ipath + extension ## If no explizit size for the frames is given if not size: size = determine_resolution_preserving_aspect(input_file, width, height, aspect) #------------# # Extraction # #------------# counter = 1 for position in positions: #---------------------------# # Generate output file name # #---------------------------# number_substituted = False if '%(number)' in output_file: number_substituted = True ## If the output filename should be stubstituted try: output_filename = output_file % { 'input': os.path.splitext(os.path.split(input_file)[1])[0], 'timecode': seconds_to_timecode(position), 'size': size, 'number': counter } except KeyError: raise ## In case that more than one shot is taken and you don't want to substitute if not number_substituted: if len(positions) > 1: path, ext = os.path.splitext(output_file) output_filename = path + str(counter).zfill(len(str(len(positions)))) + ext ## If you dont want to substitute and only one file is selected, ## it will just take the output or input name without altering it else: output_filename = output_file #-------------# # Run process # #-------------# ## Build the command for ffmpeg command = (CFG_BIBENCODE_FFMPEG_EXTRACT_COMMAND % ( position, input_file, size, output_filename )).split() ## Start subprocess and poll the output until it finishes process = subprocess.Popen(command, stderr=subprocess.PIPE) stderr = [] while process.poll() is None: ## We want to keep the last lines of output in case of an error stderr += process.communicate()[1].splitlines() stderr = stderr[-5:] ## If something went wrong, print the last lines of the log if process.poll() != 0: msg = ("Error while extracting frame %d of %d" % (counter, len(positions))) message_fnc(msg) update_fnc(msg) ## Print the end of the log message_fnc("Last lines of the FFmpeg log:") for line in stderr: message_fnc(line) return 0 else: update_fnc("Frame %d of %d extracted" % (counter, len(positions))) counter += 1 ## Everything should be fine if this position is reached message_fnc("Extraction of frames was successful") return 1