예제 #1
0
def get_encoding_profiles():
    """ Returns a dictionary representation of the encoding profiles
    """
    if not os.path.exists(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL):
        shutil.copy(CFG_BIBENCODE_PROFILES_ENCODING, CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
    default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING)
    local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
    default_profiles.update(local_profiles)
    return default_profiles
예제 #2
0
def get_extract_profiles():
    """ Returns a dictionary representation of the frame extraction profiles
    """
    if not os.path.exists(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL):
        shutil.copy(CFG_BIBENCODE_PROFILES_EXTRACT, CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
    default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT)
    local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
    default_profiles.update(local_profiles)
    return default_profiles
예제 #3
0
def get_extract_profiles():
    """ Returns a dictionary representation of the frame extraction profiles
    """
    if not os.path.exists(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL):
        shutil.copy(CFG_BIBENCODE_PROFILES_EXTRACT, CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
    default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT)
    local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
    default_profiles.update(local_profiles)
    return default_profiles
예제 #4
0
def get_encoding_profiles():
    """ Returns a dictionary representation of the encoding profiles
    """
    if not os.path.exists(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL):
        shutil.copy(CFG_BIBENCODE_PROFILES_ENCODING, CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
    default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING)
    local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
    default_profiles.update(local_profiles)
    return default_profiles
예제 #5
0
파일: daemon.py 프로젝트: mhellmic/b2share
def watch_directory(new_job_dir=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS,
                    old_job_dir=CFG_BIBENCODE_DAEMON_DIR_OLDJOBS):
    """ Checks a folder job files, parses and executes them
    @param new_job_dir: path to the directory with new jobs
    @type new_job_dir: string
    @param old_job_dir: path to the directory where the old jobs are moved
    @type old_job_dir: string
    """
    global _NUMBER, _TASKID
    write_message('Checking directory %s for new jobs' % new_job_dir)
    task_update_progress('Checking for new jobs')
    _TASKID = task_get_task_param('task_id')
    files = os.listdir(new_job_dir)
    for file in files:
        file_fullpath = os.path.join(new_job_dir, file)
        if has_signature(file_fullpath):
            write_message('New Job found: %s' % file)
            job = json_decode_file(file_fullpath)
            if not getval(job, 'isbatch'):
                args = job_to_args(job)
                if not launch_task(args):
                    write_message('Error submitting task')
            else:
                ## We need the job description for the batch engine
                ## So we need to use the new path inside the oldjobs dir
                process_batch(os.path.join(old_job_dir, file))
            ## Move the file to the done dir
            shutil.move(file_fullpath, os.path.join(old_job_dir, file))
            ## Update number for next job
            _NUMBER += 1
    return 1
예제 #6
0
def watch_directory(new_job_dir=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS,
                    old_job_dir=CFG_BIBENCODE_DAEMON_DIR_OLDJOBS):
    """ Checks a folder job files, parses and executes them
    @param new_job_dir: path to the directory with new jobs
    @type new_job_dir: string
    @param old_job_dir: path to the directory where the old jobs are moved
    @type old_job_dir: string
    """
    global _NUMBER, _TASKID
    write_message('Checking directory %s for new jobs' % new_job_dir)
    task_update_progress('Checking for new jobs')
    _TASKID = task_get_task_param('task_id')
    files = os.listdir(new_job_dir)
    for file in files:
        file_fullpath = os.path.join(new_job_dir, file)
        if has_signature(file_fullpath):
            write_message('New Job found: %s' % file)
            job = json_decode_file(file_fullpath)
            if not getval(job, 'isbatch'):
                args = job_to_args(job)
                if not launch_task(args):
                    write_message('Error submitting task')
            else:
                ## We need the job description for the batch engine
                ## So we need to use the new path inside the oldjobs dir
                process_batch(os.path.join(old_job_dir, file))
            ## Move the file to the done dir
            shutil.move(file_fullpath, os.path.join(old_job_dir, file))
            ## Update number for next job
            _NUMBER += 1
    return 1
예제 #7
0
def create_update_jobs_by_recids(recids, batch_template_file, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS):
    """ Creates the job description files to update all given recids
    @param recids: Iterable set of recids
    @type recids: iterable
    @param batch_template_file: fullpath to the template for the update
    @type batch_tempalte_file: string
    @param job_directory: fullpath to the directory storing the job files
    @type job_directory: string
    """
    batch_template = json_decode_file(batch_template_file)
    for recid in recids:
        task_update_progress("Creating Update Job for %d" % recid)
        write_message("Creating Update Job for %d" % recid)
        job = dict(batch_template)
        job["recid"] = recid
        timestamp = generate_timestamp()
        job_filename = "update_%d_%s.job" % (recid, timestamp)
        create_job_from_dictionary(job, job_filename, job_directory)
    return 1
예제 #8
0
def create_update_jobs_by_recids(recids,
                                 batch_template_file,
                                 job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS
                                 ):
    """ Creates the job description files to update all given recids
    @param recids: Iterable set of recids
    @type recids: iterable
    @param batch_template_file: fullpath to the template for the update
    @type batch_tempalte_file: string
    @param job_directory: fullpath to the directory storing the job files
    @type job_directory: string
    """
    batch_template = json_decode_file(batch_template_file)
    for recid in recids:
        task_update_progress("Creating Update Job for %d" % recid)
        write_message("Creating Update Job for %d" % recid)
        job = dict(batch_template)
        job['recid'] = recid
        timestamp = generate_timestamp()
        job_filename = "update_%d_%s.job" % (recid, timestamp)
        create_job_from_dictionary(job, job_filename, job_directory)
    return 1
예제 #9
0
def pbcore_metadata(input_file, pbcoreIdentifier=None, pbcoreTitle=None,
                    pbcoreDescription=None, instantiationIdentifier=None,
                    instantiationPhysical=None, instantiationLocation=None,
                    instantiationGenerations=None,instantiationExtension=None,
                    instantiationPart=None, instantiationAnnotation=None,
                    instantiationRights=None, instantiationRelation=None,
                    xmlns="pbcore", aspect_override=None
                    ):
    """ Transformes parsed metadata to a pbcore representation.
    To supplement all the pbcore field, we need both ffprobe and mediainfo.
    If only ffprobe is installed, it will not fail but supplement only partially.
    @param input_file: full path to the file to extract the metadata from
    @type input_file: string
    @return: pbcore xml metadata representation
    @rtype: string
    """

    def _follow_path(path, locals_u, meta_dict, probe_dict, stream_number=None):
        """
        Trys to follow a given path and returns the value it represents.
        The path is a string that must be like this:
            local->variable_name
            ffprobe->format->param
            ffprobe->video->param
            ffprobe->audio->param
            ffprobe->stream->param
            mediainfo->general->param
            mediainfo->audio->param
            mediainfo->video->param
            mediainfo->track->param

        @param path: Path to the value
        @type: string
        @param locals_u: Local variables
        @type locals_u: dict
        @param meta_dict: Mediainfo metadata
        @type meta_dict: dict
        @param probe_dict: FFprobe metadata
        @type probe_dict: dict
        @param stream_number: To follow a path to a specific stream
        @type stream_number: int
        @return: value of the element the path points to
        @rtype: string
        """
        path_segments = path.split("->")
        ## ffprobe
        if path_segments[0] == 'ffprobe':
            ## format
            if path_segments[1] == 'format':
                return getval(probe_dict['format'], path_segments[2], 0)
            ## 1st video
            elif path_segments[1] in ('video', 'audio'):
                for stream in probe_dict['streams']:
                    if getval(stream, 'codec_type') == path_segments[1]:
                        return getval(stream, path_segments[2], 0)
            ## stream by number
            elif path_segments[1] == 'stream':
                return getval(probe_dict['streams'][stream_number],
                              path_segments[2], 0)
        ## mediainfo
        elif path_segments[0] == 'mediainfo':
            ## general, video, audio
            if path_segments[1] in ('general', 'video', 'audio'):
                for track in meta_dict:
                    if getval(track, 'kind_of_stream').lower() == path_segments[1]:
                        return getval(track, path_segments[2], 0)
            ## stream by number
            elif path_segments[1] == 'track':
                ## We rely on format being the first track in mediainfo
                ## And the order of streams in ffprobe and tracks in mediainfo being the same
                return getval(meta_dict[stream_number+1], path_segments[2], 0)
        ## local variable
        elif path_segments[0] == 'local':
            return getval(locals_u, path_segments[1], 0)
        ## direct input
        else:
            return path_segments[0]

    def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None):
        """ substitute a mapping dictionary an returns the substituted value.
        The dictionary must contain of a 'tag' a 'mapping' and a 'call'

        @param mapping: mapping dictionary to substitute
        @type: dict
        @param locals_u: Local variables
        @type locals_u: dict
        @param meta_dict: Mediainfo metadata
        @type meta_dict: dict
        @param probe_dict: FFprobe metadata
        @type probe_dict: dict
        @param stream_number: To follow a path to a specific stream
        @type stream_number: int
        @return: substituted mapping
        @rtype: string
        """
        items = []
        for value in mapping:
            mapping = value['mapping']
            tag = value['tag']
            call = getval(value, 'call')
            micro_mappings = mapping.split(';;')
            values = []
            foundall = True
            for micro_mapping in micro_mappings:
                value = _follow_path(micro_mapping, locals_u, meta_dict, probe_dict, stream_number)
                if value:
                    if call:
                        value = globals()[call](value)
                    values.append(value.strip())
                else:
                    foundall &= False
            try:
                if values and foundall:
                    items.append(tag % "".join(values))
            except:
                pass
        return items

    ## Get the metadata from ffprobe and mediainfo
    meta_dict = mediainfo_metadata(input_file, aspect_override)
    probe_dict = ffprobe_metadata(input_file)

    # parse the mappings
    pbcore_mappings = json_decode_file(CFG_BIBENCODE_PBCORE_MAPPINGS)

    ## INSTANTIATION ##
    # According to the PBcore standard, this strict order MUST be followed
    instantiation_mapping = pbcore_mappings['instantiation_mapping']

    ## ESSENCE TRACK ##
    # According to the PBcore standard, this strict order MUST be followed
    essencetrack_mapping = pbcore_mappings['essencetrack_mapping']

    ## The XML header for the PBcore document
    header = (
    """<?xml version="1.0" encoding="UTF-8"?><pbcoreDescriptionDocument """
    """xmlns%(xmlns)s="http://www.pbcore.org/PBCore/PBCoreNamespace.html" """
    """xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" """
    """xsi:schemaLocation="http://www.pbcore.org/PBCore/PBCoreNamespace.html">"""
    )
    if pbcoreIdentifier:
        pbcoreIdentifier ="""<pbcoreIdentifier>%s</pbcoreIdentifier>""" % pbcoreIdentifier
    else:
        pbcoreIdentifier = ""
    if pbcoreTitle:
        pbcoreTitle = """<pbcoreTitle>%s</pbcoreTitle>""" % pbcoreTitle
    else:
        pbcoreTitle = ""
    tail = """</pbcoreDescriptionDocument>"""

    ## ESSENCE TRACKS ##
    essencetracks = []
    for stream_no in range(len(probe_dict['streams'])):
        essencetracks.append(_map_values(essencetrack_mapping, locals(),
                                         meta_dict, probe_dict, stream_no))
    joinedtracks = []
    for track in essencetracks:
        track = "<instantiationEssenceTrack>" + "".join(track) + "</instantiationEssenceTrack>"
        joinedtracks.append(track)
    joinedtracks = "".join(joinedtracks)

    ## INSTANTIATION ##
    instantiation_items = _map_values(instantiation_mapping, locals(),
                                      meta_dict, probe_dict)
    joinedinstantiation = "<pbcoreInstantiation>" + "".join(instantiation_items) + "</pbcoreInstantiation>"

    joined = "%s%s%s%s%s" % (header, pbcoreIdentifier, pbcoreTitle,
                           joinedinstantiation, tail)

    if xmlns:
        joined = joined % {"xmlns" : ":%s" % xmlns}
        joined = re.sub("<(\w[^>]+)>", "<%s:\g<1>>" % xmlns, joined)
        joined = re.sub("<\/([^>]+)>", "</%s:\g<1>>" % xmlns, joined)
    else:
        joined = joined % {"xmlns" : ""}

    return joined
예제 #10
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """
    from invenio.legacy.bibdocfile.cli import cli_fix_marc

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    # if record_exists(batch_job['recid']) < 1:
    #     raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        from invenio_formatter.engines.xslt import format
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
예제 #11
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """
    from invenio.legacy.bibdocfile.cli import cli_fix_marc

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    # if record_exists(batch_job['recid']) < 1:
    #     raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        from invenio.modules.formatter.engines.xslt import format
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
예제 #12
0
def task_submit_check_options():
    """ Checks the tasks arguments for validity
    """

    #----------------#
    # General Checks #
    #----------------#

    ## FFMPEG CONFIGURATION ##
    ## The status of ffmpeg should be checked before a task is submitted
    ## There is a minimum configuration that ffmpeg must be compiled with
    ## See bibencode_utils and bibencode_config
    config = check_ffmpeg_configuration()
    if config:
        ## Prints missing configuration
        string = ''
        for item in config:
            string += ('\t' + item + '\n')
        write_message(
            "FFmpeg options are missing. Please recompile and add:\n" + string)
        return False

    ## MODE ##
    ## Check if the mode is a valid
    if _topt('mode') is None:
        write_message('You have to specify a mode using \'-m MODE\'')
        return False
    if _topt('mode') not in CFG_BIBENCODE_VALID_MODES:
        write_message('%s is not a valid mode. Use one of %s' %
                      (_topt('mode'), CFG_BIBENCODE_VALID_MODES))
        return False

    ## INPUT ##
    ## Check if the input file is given and if it exists
    ## You should always use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta', 'batch'):
        if _topt('input') is None:
            write_message('You must specify an input file using \'-i FILE\'')
            return False
        else:
            if not os.path.exists(_topt('input')):
                print(("The file %s does not exist" % _topt('input')))
                return False

    ## OUTPUT ##
    ## Check if the output file is given and if it exists
    ## You should always use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta'):
        if _topt('output') is None:
            write_message('No output file is given. Please specify with'
                          ' \'-o NAME\'')
            return False

    #---------------#
    # Encoding Mode #
    #---------------#
    if _topt('mode') == 'encode':

        ## PROFILE ## Check for a valid profile if this is given
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_encoding_profiles():
                write_message(
                    '%s not found in %s' %
                    (_topt('profile_name'), CFG_BIBENCODE_PROFILES_ENCODING))
                return False
            ## If the profile exists
            else:
                pass

        ## AUDIOCODEC ##
        ## Checks if the audiocodec is one of the predefined
        if _topt('acodec') is not None:
            if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS:
                write_message(
                    '%s is not a valid audiocodec.\nAvailable codecs: %s' %
                    (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS))
                return False

        ## VIDEOCODEC ## Checks if the videocodec is one of the predefined
        if _topt('vcodec') is not None:
            if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS:
                write_message(
                    '%s is not a valid videocodec.\nAvailable codecs: %s' %
                    (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS))
                return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG preset
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        ' \'WxH\' notation or one of these values:\n%s' %
                        (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES))
                    return False
        ## Check if both a size and vertical or horizontal resolution
        if (_topt('width') or _topt('height')) and _topt('size'):
            write_message('Options \'width\' and \'height\' can not be '
                          'combined with \'resolution\'')
            return False

        ## PASSES ##
        ## If a number of passes is given, it should be either 1 oder 2.
        ## You could do an infinite number of passes with ffmpeg,
        ## But it will almost never make a difference above 2 passes.
        ## So, we currently only support 2 passes.
        if _topt('passes') is not None:
            if _topt('passes') not in (1, 2):
                write_message('The number of passes must be either 1 or 2')
                return False
        else:
            task_set_option('passes', 1)

        ## BITRATE ##
        ## Check if the given bitrate is either 1000 sth. or 1000k sth.
        if _topt('abitrate') is not None:
            pass
        if _topt('vbitrate') is not None:
            pass

    #-----------------#
    # Extraction Mode #
    #-----------------#
    elif _topt('mode') == 'extract':

        ## PROFILE ##
        ## If a profile is given, check its validity
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_extract_profiles():
                write_message(
                    '%s not found in %s' %
                    (_topt('profile_name'), CFG_BIBENCODE_PROFILES_EXTRACT))
                return False
            ## If the profile exists
            else:
                pass

        ## You cannot give both a number and specific positions
        ## !!! Think about allowing both -> First extract by number,
        ## !!! then additionally the specific positions
        if (((_topt('numberof') is not None) and
             (_topt('positions') is not None)) or
            ((_topt('numberof') is None) and (_topt('positions') is None))):
            write_message('Please specify either a number of frames to '
                          'take or specific positions')
            return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG specific value
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        '\'WxH\' notation or one of these valus:\n%s' %
                        (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES))
                    return False

    #---------------#
    # Metadata Mode #
    #---------------#
    elif _topt('mode') == 'meta':

        ## You have to give exactly one meta suboption
        if not _xor(_topt('meta_input'), _topt('meta_dump')):
            write_message("You can either dump or write metadata")
            return False

        ## METADATA INPUT ##
        if _topt('meta_input') is not None:
            ## Check if this is either a filename (that should exist)
            ## or if this a jsonic metadata notation
            if os.path.exists(_topt('meta_input')):
                pass
            else:
                try:
                    metadict = json.loads(_topt('meta_input'))
                    task_set_option('meta_input', metadict)
                except ValueError:
                    write_message(
                        'The value %s of the \'--meta\' parameter is '
                        'neither a valid filename nor a jsonic dict' %
                        _topt('meta_input'))
                    return False

    #------------#
    # Batch Mode #
    #------------#
    elif _topt('mode') == 'batch':
        if _topt('collection') and _topt('search'):
            write_message('You can either use \'search\' or \'collection\'')
            return False
        elif _topt('collection'):
            template = json_decode_file(_topt('input'))
            print('\n')
            print("#---------------------------------------------#")
            print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION  #")
            print("#---------------------------------------------#")
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')
        elif _topt('search'):
            template = json_decode_file(_topt('input'))
            message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s'  #" % _topt(
                'search')
            print('\n')
            print(("#" + "-" * (len(message) - 2) + "#"))
            print(message)
            print(("#" + "-" * (len(message) - 2) + "#"))
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')

    #-------------#
    # Daemon Mode #
    #-------------#
    elif _topt('mode') == 'daemon':
        task_set_task_param('task_specific_name', 'daemon')
        ## You can either give none or both folders, but not only one
        if _xor(_topt('new_job_folder'), _topt('old_job_folder')):
            write_message('When specifying folders for the daemon mode, you '
                          'have to specify both the folder for the new jobs '
                          'and the old ones')
            return False

    ## If every check went fine
    return True
예제 #13
0
def Video_Processing(parameters, curdir, form, user_info=None):
    """
    Perform all the required processing of the video.

    Parameters are:
    * "batch_template": to specify the absolute path to a
        configuration describe which manipulation should the uploaded file
        receive. If empty, will use by default
        etc/bibencode/batch_template_submission.json
    * "aspect": to specify in which form element the aspect will be available
    * "title": to specify in which form element the title will be available
    """

    ## Read the batch template for submissions
    if parameters.get("batch_template"):
        try:
            batch_template = json_decode_file(parameters.get("batch_template"))
        except:
            register_exception(prefix="The given batch template was not readable")
            raise
    else:
        batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION)

    ## Handle the filepath
    file_storing_path = os.path.join(curdir, "files", str(user_info["uid"]), "NewFile", "filepath")
    try:
        fp = open(file_storing_path)
        fullpath = fp.read()
        fp.close()
        batch_template["input"] = fullpath
    except:
        register_exception(prefix="The file containing the path to the video was not readable")
        raise

    ## Handle the filename
    file_storing_name = os.path.join(curdir, "files", str(user_info["uid"]), "NewFile", "filename")
    try:
        fp = open(file_storing_name)
        filename = fp.read()
        fp.close()
        batch_template["bibdoc_master_docname"] = os.path.splitext(os.path.split(filename)[1])[0]
        batch_template["bibdoc_master_extension"] = os.path.splitext(filename)[1]
        batch_template["submission_filename"] = filename
    except:
        register_exception(prefix="The file containing the original filename of the video was not readable")
        raise

    ## Handle the aspect ratio
    if parameters.get("aspect"):
        try:
            file_storing_aspect = os.path.join(curdir, parameters.get("aspect"))
            fp = open(file_storing_aspect)
            aspect = fp.read()
            fp.close()
            batch_template["aspect"] = aspect
        except:
            register_exception(prefix="The file containing the ascpect ratio of the video was not readable")
            raise
    else:
        batch_template["aspect"] = None

    ## Handle the title
    if parameters.get("title"):
        try:
            file_storing_title = os.path.join(curdir, parameters["title"])
            fp = open(file_storing_title)
            title = fp.read()
            fp.close()
        except:
            register_exception(prefix="The file containing the title of the video was not readable")
            raise
    else:
        batch_template["submission_title"] = None

    ## Set the rest
    batch_template["notify_admin"] = CFG_SITE_ADMIN_EMAIL
    batch_template["notify_user"] = user_info["email"]
    batch_template["recid"] = sysno

    timestamp = generate_timestamp()
    job_filename = "submission_%d_%s.job" % (sysno, timestamp)
    create_job_from_dictionary(batch_template, job_filename)
예제 #14
0
파일: tasks.py 프로젝트: mhellmic/b2share
def task_submit_check_options():
    """ Checks the tasks arguments for validity
    """

    #----------------#
    # General Checks #
    #----------------#

    ## FFMPEG CONFIGURATION ##
    ## The status of ffmpeg should be checked before a task is submitted
    ## There is a minimum configuration that ffmpeg must be compiled with
    ## See bibencode_utils and bibencode_config
    config = check_ffmpeg_configuration()
    if config:
        ## Prints missing configuration
        string = ''
        for item in config:
            string += ('\t' + item + '\n')
        write_message(
            "FFmpeg options are missing. Please recompile and add:\n" + string
        )
        return False

    ## MODE ##
    ## Check if the mode is a valid
    if _topt('mode') is None:
        write_message('You have to specify a mode using \'-m MODE\'')
        return False
    if _topt('mode') not in CFG_BIBENCODE_VALID_MODES:
        write_message('%s is not a valid mode. Use one of %s'
                      % (_topt('mode'), CFG_BIBENCODE_VALID_MODES))
        return False

    ## INPUT ##
    ## Check if the input file is given and if it exists
    ## You should allways use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta', 'batch'):
        if _topt('input') is None:
            write_message('You must specify an input file using \'-i FILE\'')
            return False
        else:
            if not os.path.exists(_topt('input')):
                print(("The file %s does not exist" % _topt('input')))
                return False

    ## OUTPUT ##
    ## Check if the output file is given and if it exists
    ## You should allways use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta'):
        if _topt('output') is None:
            write_message('No output file is given. Please specify with'
                          ' \'-o NAME\''
                          )
            return False

    #---------------#
    # Encoding Mode #
    #---------------#
    if _topt('mode') == 'encode':

        ## PROFILE ## Check for a valid profile if this is given
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_encoding_profiles():
                write_message('%s not found in %s' %
                              (_topt('profile_name'),
                               CFG_BIBENCODE_PROFILES_ENCODING)
                              )
                return False
            ## If the profile exists
            else:
                pass

        ## AUDIOCODEC ##
        ## Checks if the audiocodec is one of the predefined
        if _topt('acodec') is not None:
            if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS:
                write_message(
                    '%s is not a valid audiocodec.\nAvailable codecs: %s'
                    % (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS)
                )
                return False

        ## VIDEOCODEC ## Checks if the videocodec is one of the predefined
        if _topt('vcodec') is not None:
            if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS:
                write_message(
                    '%s is not a valid videocodec.\nAvailable codecs: %s'
                    % (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS)
                )
                return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG preset
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        ' \'WxH\' notation or one of these values:\n%s'
                        % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)
                    )
                    return False
        ## Check if both a size and vertical or horizontal resolution
        if (_topt('width') or _topt('height')) and _topt('size'):
            write_message('Options \'width\' and \'height\' can not be '
                          'combined with \'resolution\'')
            return False

        ## PASSES ##
        ## If a number of passes is given, it should be either 1 oder 2.
        ## You could do an infinite number of passes with ffmpeg,
        ## But it will almost never make a difference above 2 passes.
        ## So, we currently only support 2 passes.
        if _topt('passes') is not None:
            if _topt('passes') not in (1, 2):
                write_message('The number of passes must be either 1 or 2')
                return False
        else:
            task_set_option('passes', 1)

        ## BITRATE ##
        ## Check if the given bitrate is either 1000 sth. or 1000k sth.
        if _topt('abitrate') is not None:
            pass
        if _topt('vbitrate') is not None:
            pass

    #-----------------#
    # Extraction Mode #
    #-----------------#
    elif _topt('mode') == 'extract':

        ## PROFILE ##
        ## If a profile is given, check its validity
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_extract_profiles():
                write_message('%s not found in %s' %
                              (_topt('profile_name'),
                               CFG_BIBENCODE_PROFILES_EXTRACT)
                              )
                return False
            ## If the profile exists
            else:
                pass

        ## You cannot give both a number and specific positions
        ## !!! Think about allowing both -> First extract by number,
        ## !!! then additionally the specific positions
        if (
            ((_topt('numberof') is not None) and
            (_topt('positions') is not None))
            or
            ((_topt('numberof') is None) and
            (_topt('positions') is None))
            ):
            write_message('Please specify either a number of frames to '
                          'take or specific positions')
            return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG specific value
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        '\'WxH\' notation or one of these valus:\n%s'
                        % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)
                    )
                    return False

    #---------------#
    # Metadata Mode #
    #---------------#
    elif _topt('mode') == 'meta':

        ## You have to give exactly one meta suboption
        if not _xor(_topt('meta_input'),
                   _topt('meta_dump')):
            write_message("You can either dump or write metadata")
            return False

        ## METADATA INPUT ##
        if _topt('meta_input') is not None:
            ## Check if this is either a filename (that should exist)
            ## or if this a jsonic metadata notation
            if os.path.exists(_topt('meta_input')):
                pass
            else:
                try:
                    metadict = json.loads(_topt('meta_input'))
                    task_set_option('meta_input', metadict)
                except ValueError:
                    write_message('The value %s of the \'--meta\' parameter is '
                                  'neither a valid filename nor a jsonic dict'
                                  % _topt('meta_input'))
                    return False

    #------------#
    # Batch Mode #
    #------------#
    elif _topt('mode') == 'batch':
        if _topt('collection') and _topt('search'):
            write_message('You can either use \'search\' or \'collection\'')
            return False
        elif _topt('collection'):
            template = json_decode_file(_topt('input'))
            print('\n')
            print("#---------------------------------------------#")
            print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION  #")
            print("#---------------------------------------------#")
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')
        elif _topt('search'):
            template = json_decode_file(_topt('input'))
            message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s'  #" % _topt('search')
            print('\n')
            print(("#" + "-"*(len(message)-2) + "#"))
            print(message)
            print(("#" + "-"*(len(message)-2) + "#"))
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')


    #-------------#
    # Daemon Mode #
    #-------------#
    elif _topt('mode') == 'daemon':
        task_set_task_param('task_specific_name', 'daemon')
        ## You can either give none or both folders, but not only one
        if _xor(_topt('new_job_folder'), _topt('old_job_folder')):
            write_message('When specifying folders for the daemon mode, you '
                          'have to specify both the folder for the new jobs '
                          'and the old ones')
            return False


    ## If every check went fine
    return True
예제 #15
0
def pbcore_metadata(input_file, pbcoreIdentifier=None, pbcoreTitle=None,
                    pbcoreDescription=None, instantiationIdentifier=None,
                    instantiationPhysical=None, instantiationLocation=None,
                    instantiationGenerations=None,instantiationExtension=None,
                    instantiationPart=None, instantiationAnnotation=None,
                    instantiationRights=None, instantiationRelation=None,
                    xmlns="pbcore", aspect_override=None
                    ):
    """ Transformes parsed metadata to a pbcore representation.
    To supplement all the pbcore field, we need both ffprobe and mediainfo.
    If only ffprobe is installed, it will not fail but supplement only partially.
    @param input_file: full path to the file to extract the metadata from
    @type input_file: string
    @return: pbcore xml metadata representation
    @rtype: string
    """

    def _follow_path(path, locals_u, meta_dict, probe_dict, stream_number=None):
        """
        Trys to follow a given path and returns the value it represents.
        The path is a string that must be like this:
            local->variable_name
            ffprobe->format->param
            ffprobe->video->param
            ffprobe->audio->param
            ffprobe->stream->param
            mediainfo->general->param
            mediainfo->audio->param
            mediainfo->video->param
            mediainfo->track->param

        @param path: Path to the value
        @type: string
        @param locals_u: Local variables
        @type locals_u: dict
        @param meta_dict: Mediainfo metadata
        @type meta_dict: dict
        @param probe_dict: FFprobe metadata
        @type probe_dict: dict
        @param stream_number: To follow a path to a specific stream
        @type stream_number: int
        @return: value of the element the path points to
        @rtype: string
        """
        path_segments = path.split("->")
        ## ffprobe
        if path_segments[0] == 'ffprobe':
            ## format
            if path_segments[1] == 'format':
                return getval(probe_dict['format'], path_segments[2], 0)
            ## 1st video
            elif path_segments[1] in ('video', 'audio'):
                for stream in probe_dict['streams']:
                    if getval(stream, 'codec_type') == path_segments[1]:
                        return getval(stream, path_segments[2], 0)
            ## stream by number
            elif path_segments[1] == 'stream':
                return getval(probe_dict['streams'][stream_number],
                              path_segments[2], 0)
        ## mediainfo
        elif path_segments[0] == 'mediainfo':
            ## general, video, audio
            if path_segments[1] in ('general', 'video', 'audio'):
                for track in meta_dict:
                    if getval(track, 'kind_of_stream').lower() == path_segments[1]:
                        return getval(track, path_segments[2], 0)
            ## stream by number
            elif path_segments[1] == 'track':
                ## We rely on format being the first track in mediainfo
                ## And the order of streams in ffprobe and tracks in mediainfo being the same
                return getval(meta_dict[stream_number+1], path_segments[2], 0)
        ## local variable
        elif path_segments[0] == 'local':
            return getval(locals_u, path_segments[1], 0)
        ## direct input
        else:
            return path_segments[0]

    def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None):
        """ substitute a mapping dictionary an returns the substituted value.
        The dictionary must contain of a 'tag' a 'mapping' and a 'call'

        @param mapping: mapping dictionary to substitute
        @type: dict
        @param locals_u: Local variables
        @type locals_u: dict
        @param meta_dict: Mediainfo metadata
        @type meta_dict: dict
        @param probe_dict: FFprobe metadata
        @type probe_dict: dict
        @param stream_number: To follow a path to a specific stream
        @type stream_number: int
        @return: substituted mapping
        @rtype: string
        """
        items = []
        for value in mapping:
            mapping = value['mapping']
            tag = value['tag']
            call = getval(value, 'call')
            micro_mappings = mapping.split(';;')
            values = []
            foundall = True
            for micro_mapping in micro_mappings:
                value = _follow_path(micro_mapping, locals_u, meta_dict, probe_dict, stream_number)
                if value:
                    if call:
                        value = globals()[call](value)
                    values.append(value.strip())
                else:
                    foundall &= False
            try:
                if values and foundall:
                    items.append(tag % "".join(values))
            except:
                pass
        return items

    ## Get the metadata from ffprobe and mediainfo
    meta_dict = mediainfo_metadata(input_file, aspect_override)
    probe_dict = ffprobe_metadata(input_file)

    # parse the mappings
    pbcore_mappings = json_decode_file(CFG_BIBENCODE_PBCORE_MAPPINGS)

    ## INSTANTIATION ##
    # According to the PBcore standard, this strict order MUST be followed
    instantiation_mapping = pbcore_mappings['instantiation_mapping']

    ## ESSENCE TRACK ##
    # According to the PBcore standard, this strict order MUST be followed
    essencetrack_mapping = pbcore_mappings['essencetrack_mapping']

    ## The XML header for the PBcore document
    header = (
    """<?xml version="1.0" encoding="UTF-8"?><pbcoreDescriptionDocument """
    """xmlns%(xmlns)s="http://www.pbcore.org/PBCore/PBCoreNamespace.html" """
    """xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" """
    """xsi:schemaLocation="http://www.pbcore.org/PBCore/PBCoreNamespace.html">"""
    )
    if pbcoreIdentifier:
        pbcoreIdentifier ="""<pbcoreIdentifier>%s</pbcoreIdentifier>""" % pbcoreIdentifier
    else:
        pbcoreIdentifier = ""
    if pbcoreTitle:
        pbcoreTitle = """<pbcoreTitle>%s</pbcoreTitle>""" % pbcoreTitle
    else:
        pbcoreTitle = ""
    tail = """</pbcoreDescriptionDocument>"""

    ## ESSENCE TRACKS ##
    essencetracks = []
    for stream_no in range(len(probe_dict['streams'])):
        essencetracks.append(_map_values(essencetrack_mapping, locals(),
                                         meta_dict, probe_dict, stream_no))
    joinedtracks = []
    for track in essencetracks:
        track = "<instantiationEssenceTrack>" + "".join(track) + "</instantiationEssenceTrack>"
        joinedtracks.append(track)
    joinedtracks = "".join(joinedtracks)

    ## INSTANTIATION ##
    instantiation_items = _map_values(instantiation_mapping, locals(),
                                      meta_dict, probe_dict)
    joinedinstantiation = "<pbcoreInstantiation>" + "".join(instantiation_items) + "</pbcoreInstantiation>"

    joined = "%s%s%s%s%s" % (header, pbcoreIdentifier, pbcoreTitle,
                           joinedinstantiation, tail)

    if xmlns:
        joined = joined % {"xmlns" : ":%s" % xmlns}
        joined = re.sub("<(\w[^>]+)>", "<%s:\g<1>>" % xmlns, joined)
        joined = re.sub("<\/([^>]+)>", "</%s:\g<1>>" % xmlns, joined)
    else:
        joined = joined % {"xmlns" : ""}

    return joined
예제 #16
0
def Video_Processing(parameters, curdir, form, user_info=None):
    """
    Perform all the required processing of the video.

    Parameters are:
    * "batch_template": to specify the absolute path to a
        configuration describe which manipulation should the uploaded file
        receive. If empty, will use by default
        etc/bibencode/batch_template_submission.json
    * "aspect": to specify in which form element the aspect will be available
    * "title": to specify in which form element the title will be available
    """

    ## Read the batch template for submissions
    if parameters.get('batch_template'):
        try:
            batch_template = json_decode_file(parameters.get('batch_template'))
        except:
            register_exception(prefix="The given batch template was not readable")
            raise
    else:
        batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION)

    ## Handle the filepath
    file_storing_path = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filepath')
    try:
        fp = open(file_storing_path)
        fullpath = fp.read()
        fp.close()
        batch_template['input'] = fullpath
    except:
        register_exception(prefix="The file containing the path to the video was not readable")
        raise

    ## Handle the filename
    file_storing_name = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filename')
    try:
        fp = open(file_storing_name)
        filename = fp.read()
        fp.close()
        batch_template['bibdoc_master_docname'] = os.path.splitext(os.path.split(filename)[1])[0]
        batch_template['bibdoc_master_extension'] = os.path.splitext(filename)[1]
        batch_template['submission_filename'] = filename
    except:
        register_exception(prefix="The file containing the original filename of the video was not readable")
        raise

    ## Handle the aspect ratio
    if parameters.get('aspect'):
        try:
            file_storing_aspect = os.path.join(curdir, parameters.get('aspect'))
            fp = open(file_storing_aspect)
            aspect = fp.read()
            fp.close()
            batch_template['aspect'] = aspect
        except:
            register_exception(prefix="The file containing the ascpect ratio of the video was not readable")
            raise
    else:
        batch_template['aspect'] = None

    ## Handle the title
    if parameters.get('title'):
        try:
            file_storing_title = os.path.join(curdir, parameters['title'])
            fp = open(file_storing_title)
            title = fp.read()
            fp.close()
        except:
            register_exception(prefix="The file containing the title of the video was not readable")
            raise
    else:
        batch_template['submission_title'] = None

    ## Set the rest
    batch_template['notify_admin'] = CFG_SITE_ADMIN_EMAIL
    batch_template['notify_user'] = user_info['email']
    batch_template['recid'] = sysno

    timestamp = generate_timestamp()
    job_filename = "submission_%d_%s.job" % (sysno, timestamp)
    create_job_from_dictionary(batch_template, job_filename)