Ejemplo n.º 1
0
def mediaconch_check(inputPath, ingestType, ingestLogBoilerplate):
    '''
	Check input file against MediaConch policy.
	Needs to be cleaned up. Move logic to pymmFunctions and keep logging here.
	Also, we don't have any policies set up yet...
	'''
    if ingestType == 'film scan':
        policyStatus = pymmFunctions.check_policy(ingestType, inputPath)
        if policyStatus:
            message = filename + " passed the MediaConch policy check."
            status = "ok"
        else:
            message = filename + " did not pass the MediaConch policy check."
            status = "not ok, but not critical?"

        pymmFunctions.ingest_log(message, status, **ingestLogBoilerplate)
Ejemplo n.º 2
0
def input_file_metadata(ingestLogBoilerplate, processingVars):
    pymmFunctions.ingest_log(
        # message
        "The input file MD5 hash is: " +
        makeMetadata.hash_file(processingVars['inputPath']),
        # status
        'OK',
        # ingest boilerplate
        **ingestLogBoilerplate)

    mediainfo = makeMetadata.get_mediainfo_report(
        processingVars['inputPath'], processingVars['packageMetadataObjects'])
    if mediainfo:
        pymmFunctions.ingest_log(
            # message
            "mediainfo XML report for input file written to metadata directory for package.",
            # status
            'OK',
            # ingest boilerplate
            **ingestLogBoilerplate)

    frameMD5 = makeMetadata.make_frame_md5(
        processingVars['inputPath'], processingVars['packageMetadataObjects'])
    if frameMD5 != False:
        pymmFunctions.ingest_log(
            # message
            "frameMD5 report for input file written to metadata directory for package",
            # status
            "OK",
            # ingest boilerplate
            **ingestLogBoilerplate)
Ejemplo n.º 3
0
def check_av_status(inputPath, interactiveMode, ingestLogBoilerplate):
    '''
	Check whether or not a file is recognized as an a/v file.
	If it isn't and user declares interactive mode, ask whether to continue, otherwise quit.
	'''
    if not pymmFunctions.is_av(inputPath):
        _is_av = False
        message = "WARNING: " + ingestLogBoilerplate[
            'filename'] + " is not recognized as an a/v file."
        print(message)
        pymmFunctions.ingest_log(
            # message
            message,
            #status
            'warning',
            # ingest boilerplate
            **ingestLogBoilerplate)

    if interactiveMode:
        stayOrGo = input(
            "If you want to quit press 'q' and hit enter, otherwise press any other key:"
        )
        if stayOrGo == 'q':
            # CLEANUP AND LOG THIS @fixme
            sys.exit()
        else:
            if _is_av == False:
                pymmFunctions.ingest_log(
                    # message
                    message,
                    # status
                    'warning',
                    # ingest boilerplate
                    **ingestLogBoilerplate)
    else:
        pymmFunctions.ingest_log(
            # message
            ingestLogBoilerplate['filename'] + " is an AV file, way to go.",
            # status
            'OK',
            # ingest boilerplate
            **ingestLogBoilerplate)
Ejemplo n.º 4
0
def main():
    #########################
    #### SET INGEST ARGS ####
    args = set_args()
    inputPath = args.inputPath
    operator = args.operator
    report_to_db = args.database_reporting
    ingestType = args.ingestType
    makeProres = args.makeProres
    concatChoice = args.concat
    cleanupStrategy = args.cleanup_originals
    interactiveMode = args.interactiveMode
    # read aip staging dir from config
    aip_staging = config['paths']['aip_staging']
    # make a uuid for the ingest
    ingestUUID = str(uuid.uuid4())
    # make a temp ID based on input path for the ingested object
    # this will get replaced by the ingest UUID during final package move ...?
    tempID = pymmFunctions.get_temp_id(inputPath)
    #### END SET INGEST ARGS ####
    #############################

    #############################
    #### TEST / SET ENV VARS ####
    # sniff whether the input is a file or directory
    inputType = sniff_input(inputPath, ingestUUID, concatChoice)
    if not inputType:
        sys.exit(1)
    if inputType == 'dir':
        source_list = pymmFunctions.list_files(inputPath)
        subs = 0
        for _object in source_list:
            if os.path.isdir(_object):
                subs += 1
                print("\nYou have subdirectory(ies) in your input:"
                      "\n({})\n".format(_object))
        if subs > 0:
            print("This is not currently supported. Exiting!")
            sys.exit()

    # create directory paths for ingest...
    packageOutputDir,packageObjectDir,packageMetadataDir,\
    packageMetadataObjects,packageLogDir = prep_package(tempID)

    # check that required vars are declared & init other vars
    requiredVars = ['inputPath', 'operator']
    if interactiveMode == False:
        # Quit if there are required variables missing
        missingVars = 0
        for flag in requiredVars:
            if getattr(args, flag) == None:
                print('''
					CONFIGURATION PROBLEM:
					YOU FORGOT TO SET ''' + flag + '''. It is required.
					Try again, but set ''' + flag + ''' with the flag --''' + flag)
                missingVars += 1
        if missingVars > 0:
            sys.exit()
    else:
        # ask operator/input file
        operator = input("Please enter your name: ")
        inputPath = input(
            "Please drag the file you want to ingest into this window___"
        ).rstrip()
        inputPath = pymmFunctions.sanitize_dragged_linux_paths(inputPath)

    # Set up a canonical name that will be passed to each log entry.
    # For files it's the basename, for dirs it's the dir name.
    if inputPath:
        canonicalName = os.path.basename(inputPath)
        if inputType == 'file':
            filename = input_name = canonicalName
        elif inputType == 'dir':
            filename = ''
            input_name = canonicalName

    # set up a dict for processing variables to pass around
    processingVars = {
        'operator': operator,
        'inputPath': inputPath,
        'tempID': tempID,
        'ingestType': ingestType,
        'ingestUUID': ingestUUID,
        'filename': filename,
        'input_name': input_name,
        'makeProres': makeProres,
        'packageOutputDir': packageOutputDir,
        'packageObjectDir': packageObjectDir,
        'packageMetadataDir': packageMetadataDir,
        'packageMetadataObjects': packageMetadataObjects,
        'packageLogDir': packageLogDir,
        'aip_staging': aip_staging
    }
    #### END TEST / SET ENV VARS ####
    #################################

    ###########################
    #### LOGGING / CLEANUP ####
    # set up a log file for this ingest
    ingestLogPath = os.path.join(
        packageLogDir,
        tempID + '_' + pymmFunctions.timestamp('now') + '_ingestfile-log.txt')
    with open(ingestLogPath, 'x') as ingestLog:
        print('Laying a log at ' + ingestLogPath)
    ingestLogBoilerplate = {
        'ingestLogPath': ingestLogPath,
        'tempID': tempID,
        'input_name': input_name,
        'filename': filename,
        'operator': operator
    }
    pymmFunctions.ingest_log(
        # message
        'start',
        # status
        'start',
        # ingest boilerplate
        **ingestLogBoilerplate)

    # tell the system log that we are starting
    pymmFunctions.pymm_log(input_name, tempID, operator, '', 'STARTING')

    # if interactive ask about cleanup
    if interactiveMode:
        reset_cleanup_choice()

    # insert database record for this ingest (log 'ingestion start')
    # --> http://id.loc.gov/vocabulary/preservation/eventType/ins.html
    # @fixme
    # @logme # @dbme

    #### END LOGGING / CLEANUP ####
    ###############################

    ###############
    ## DO STUFF! ##
    ###############
    if inputType == 'file':
        # check that input file is actually a/v
        check_av_status(inputPath, interactiveMode,
                        ingestLogBoilerplate)  # @dbme
        mediaconch_check(inputPath, ingestType, ingestLogBoilerplate)  # @dbme
        move_input_file(processingVars)  # @logme # @dbme
        input_file_metadata(ingestLogBoilerplate,
                            processingVars)  # @logme # @dbme
        make_derivs(processingVars)  # @logme # @dbme
    elif inputType == 'dir':
        for _file in source_list:
            # set processing variables per file
            ingestLogBoilerplate['filename'] = os.path.basename(_file)  # @dbme
            processingVars['filename'] = os.path.basename(_file)  # @dbme
            processingVars['inputPath'] = _file  # @dbme
            # check that input file is actually a/v
            check_av_status(_file, interactiveMode,
                            ingestLogBoilerplate)  # @dbme
            mediaconch_check(_file, ingestType, ingestLogBoilerplate)  # @dbme
            move_input_file(processingVars)  # @dbme
            input_file_metadata(ingestLogBoilerplate, processingVars)  # @dbme
            make_derivs(processingVars)  # @dbme
        # reset the processing variables to the original state
        processingVars['filename'] = ''
        processingVars['inputPath'] = inputPath

    # MOVE SIP TO AIP STAGING
    # a) make a hashdeep manifest @fixme
    # b) move it
    move_sip(processingVars)  # @dbme
    packageVerified = False
    # c) audit the hashdeep manifest @fixme
    # packageVerified = result of audit @fixme

    # FINISH LOGGING
    do_cleanup(cleanupStrategy, packageVerified, inputPath, packageOutputDir,
               'done')  # @dbme