Exemple #1
0
def main():
    config = pymmFunctions.read_config()
    args = set_args()
    requiredArgs = ['inputPath', 'destination']
    inputPath = args.inputPath
    algorithm = args.algorithm
    removeOriginals = args.removeOriginals
    destination = args.destination
    loglevel = args.loglevel
    logDir = args.logDir
    now = pymmFunctions.timestamp('now')
    # Quit if there are required variables missing
    missingArgs = 0
    for _arg in requiredArgs:
        if getattr(args, _arg) == None:
            print("CONFIGURATION PROBLEM:\n"
                  "You forgot to set " + _arg + ". It is required.\n"
                  "Try again, but set " + _arg + " with the flag --" + _arg +
                  "\n")
            missingArgs += 1
    if missingArgs > 0:
        sys.exit()

    # set up rsync log
    if loglevel == 'all':
        pymmLogpath = os.path.join(config['logging']['pymm_log_dir'],
                                   'pymm_log.txt')
        # AT WHAT POINT WILL WE ACTUALLY WANT TO PYMMLOG A COPY? FINAL AIP XFER?
        try:
            rsyncLogpath = os.path.join(
                logDir, 'rsync_log_' + pymmFunctions.get_base(inputPath) +
                '_' + pymmFunctions.timestamp('now') + '.txt')
        except:
            print("there was a problem getting the rsync log path ....")
            rsyncLogpath = ''
    else:
        rsyncLogpath = ''

    # sniff what the input is
    dir_or_file = pymmFunctions.dir_or_file(inputPath)
    if dir_or_file == False:
        print("oy you've got big problems. " + inputPath +
              " is not a directory or a file. what is it? is it a ghost?")
        sys.exit()
    # copy the input according to its type
    elif dir_or_file == 'dir':
        # add trailing slash for rsync destination directory
        if not destination[-1] == '/':
            destination = destination + '/'
        copy_dir(inputPath, rsyncLogpath, destination)
    elif dir_or_file == 'file':
        copy_file(inputPath, rsyncLogpath, destination)
    else:
        print("o_O what is going on here? you up to something?")
        sys.exit()
Exemple #2
0
def add_pbcore_md5_location(CurrentIngest):
    '''
	have to call this after creation of object manifest for SIP
	and parsing of manifest by report_SIP_fixity()
	'''
    if CurrentIngest.InputObject.pbcoreFile != '':
        pbcoreFile = CurrentIngest.InputObject.pbcoreFile
        pbcoreXML = pbcore.PBCoreDocument(pbcoreFile)
        for _object in CurrentIngest.InputObject.ComponentObjects:
            # look for component files and add instantiation data
            if _object.objectCategory == 'file':
                # add md5 as an identifier to the
                # pbcoreInstantiation for the file
                # processingVars['filename'] = _object
                inputFileMD5 = _object.md5hash
                attributes = {
                    "source":
                    "BAMPFA {}".format(pymmFunctions.timestamp('iso8601')),
                    "annotation":
                    "messageDigest",
                    "version":
                    "MD5"
                }
                makePbcore.add_element_to_instantiation(
                    pbcoreXML, _object.basename, 'instantiationIdentifier',
                    attributes, inputFileMD5)
                # add 'BAMPFA Digital Repository' as instantiationLocation
                attributes = {}
                makePbcore.add_element_to_instantiation(
                    pbcoreXML, _object.basename, 'instantiationLocation',
                    attributes, "BAMPFA Digital Repository")
        makePbcore.xml_to_file(pbcoreXML, pbcoreFile)
Exemple #3
0
 def create_ingestLog(self):
     self.ingestLogPath = os.path.join(
         self.packageLogDir,
         '{}_{}_ingest-log.txt'.format(self.tempID,
                                       pymmFunctions.timestamp('now')))
     with open(self.ingestLogPath, 'x') as ingestLog:
         print('Laying a log at ' + self.ingestLogPath)
Exemple #4
0
def insert_event(CurrentIngest, eventType, outcome, status):
    # this is the THING the event is being performed on
    theObject = CurrentIngest.currentTargetObject
    objectIdentifierValue = theObject.objectIdentifierValue
    # get the name of the computer
    computer = CurrentIngest.ProcessArguments.computer
    # get the name of the program, script, or function doing the event
    caller = CurrentIngest.caller
    user = CurrentIngest.ProcessArguments.user

    objectID = theObject.databaseID
    if CurrentIngest.ProcessArguments.databaseReporting == True:
        #insert the event
        eventInsert = dbReporters.EventInsert(
            eventType,
            objectID,
            objectIdentifierValue,
            pymmFunctions.timestamp('iso8601'),
            status,
            outcome,
            caller,
            computer,
            user,
            eventID=None)

        eventID = eventInsert.report_to_db()
        del eventInsert
    else:
        eventID = None
    return eventID
Exemple #5
0
def ingest_log(CurrentIngest, event, outcome, status):
    stamp = pymmFunctions.timestamp("iso8601")

    canonicalName = CurrentIngest.InputObject.canonicalName
    inputPath = CurrentIngest.InputObject.inputPath
    tempID = CurrentIngest.tempID
    user = CurrentIngest.ProcessArguments.user
    filename = CurrentIngest.InputObject.filename
    ingestLogPath = CurrentIngest.ingestLogPath
    inputTypeDetail = CurrentIngest.InputObject.inputTypeDetail
    try:
        _object = CurrentIngest.currentTargetObject.inputPath
    except:
        _object = CurrentIngest.currentTargetObject.objectIdentifierValue

    if event == "ingestion start":
        stamp = ("#" * 50) + "\n\n" + stamp + "\n\n"
        systemInfo = CurrentIngest.systemInfo
        workingDir = CurrentIngest.ProcessArguments.outdir_ingestsip
        ingestUUID = CurrentIngest.ingestUUID
        stuffToLog = [
            stamp, "Event Type: ingestion start\n",
            "Object Canonical Name: {}\n".format(canonicalName),
            "Object Input Filepath: {}\n".format(inputPath),
            "Object Temp ID: {}\n".format(tempID),
            "Object Type: {}\n".format(inputTypeDetail),
            "Ingest UUID: {}\n".format(ingestUUID),
            "Ingest Working Directory: {}\n".format(workingDir),
            "Operator: {}\n".format(user),
            "\n### SYSTEM INFO: ### \n{}\n".format(systemInfo), ("#" * 50)
        ]
        if filename not in ("", None):
            name = "Object Filename: {}\n".format(filename)
            stuffToLog.insert(3, name)

    else:
        stuffToLog = [
            "{} | ".format(stamp), "Status: {} | ".format(status),
            "Event Type: {} | ".format(event),
            "Event Outcome: {} | ".format(outcome),
            "Operator: {} | ".format(user),
            "Current Target Object: {} | ".format(_object)
        ]
        if filename not in ("", None):
            name = "Object Filename: {} | ".format(filename)
            path = "Object Filepath: {} | ".format(inputPath)
            stuffToLog.insert(4, name)
            stuffToLog.insert(5, path)

    with open(ingestLogPath, "a+") as ingestLog:
        for item in stuffToLog:
            ingestLog.write(item)
        ingestLog.write("\n\n")
Exemple #6
0
def pymm_log(CurrentIngest, event, outcome, status):
    check_pymm_log_exists()
    pymmConfig = pymmFunctions.read_config()
    pymmLogDir = pymmConfig['logging']['pymm_log_dir']
    pymmLogPath = os.path.join(pymmLogDir, 'pymm_log.txt')
    stamp = pymmFunctions.timestamp('iso8601')
    systemInfo = CurrentIngest.systemInfo

    objectRootPath = CurrentIngest.InputObject.inputPath
    canonicalName = CurrentIngest.InputObject.canonicalName
    inputTypeDetail = CurrentIngest.InputObject.inputTypeDetail

    user = CurrentIngest.ProcessArguments.user
    ingestUUID = CurrentIngest.ingestUUID
    tempID = CurrentIngest.tempID
    workingDir = CurrentIngest.ProcessArguments.outdir_ingestsip

    prefix = ''
    suffix = '\n'
    # I think basename gets updated depending on what is getting logged... ? @fixme
    basename = os.path.basename(objectRootPath)
    if status == 'STARTING':
        prefix = ('&' * 50) + '\n\n'
        stuffToLog = [
            prefix, stamp, "\nEvent type: Ingestion start\n",
            "Object canonical name: {}\n".format(canonicalName),
            "Object filepath: {}\n".format(objectRootPath),
            "Object type: {}\n".format(inputTypeDetail),
            "Ingest UUID: {}\n".format(ingestUUID),
            "Operator: {}\n".format(user),
            "Ingest working directory: {}\n".format(workingDir),
            "\n### SYSTEM INFO: ### \n{}".format(systemInfo), suffix
        ]
    elif status in ("ENDING", "ABORTING"):
        suffix = '\n\n' + ('#' * 50) + "\n\n"
        stuffToLog = [
            prefix, stamp, " | Status: {} |".format(status),
            " | Event type: Ingestion end | ", "Outcome: {}".format(outcome),
            suffix
        ]
    else:
        stuffToLog = [
            prefix, stamp, " | Status: {}".format(status),
            " | Object name: {}".format(basename),
            " | Event type: {}".format(event),
            " | Event outcome: {}".format(outcome), suffix
        ]

    with open(pymmLogPath, 'a') as log:
        for item in stuffToLog:
            log.write(item)
Exemple #7
0
def main():
    config = pymmFunctions.read_config()
    args = set_args()
    requiredArgs = ['inputPath', 'destination']
    inputPath = args.inputPath
    movingSIP = args.movingSIP
    algorithm = args.algorithm
    removeOriginals = args.removeOriginals
    destination = args.destination
    loglevel = args.loglevel
    logDir = args.logDir
    useMV = args.useMV
    now = pymmFunctions.timestamp('now')
    # Quit if there are required variables missing
    missingArgs = 0

    try:
        # see if the input/destination are on the same filesystem
        # if so, we will use mv rather than rsync for efficiency
        inputFS = pymmFunctions.get_filesystem_id(inputPath)
        destFS = pymmFunctions.get_filesystem_id(destination)
        print(inputFS, destFS)
        if inputFS == destFS:
            print("HEYYYY")
            sameFilesystem = True
        else:
            sameFilesystem = False
    except:
        sameFilesystem = False

    for _arg in requiredArgs:
        if getattr(args, _arg) == None:
            print("CONFIGURATION PROBLEM:\n"
                  "You forgot to set {0}. It is required.\n"
                  "Try again, but set {0} with the flag --{0}\n".format(_arg))
            missingArgs += 1
    if missingArgs > 0:
        sys.exit()

    if not movingSIP:
        # set up rsync log
        if loglevel == 'all':
            pymmLogpath = os.path.join(config['logging']['pymm_log_dir'],
                                       'pymm_log.txt')
            try:
                rsyncLogPath = os.path.join(
                    logDir, 'rsync_log_{}_{}.txt'.format(
                        pymmFunctions.get_base(inputPath),
                        pymmFunctions.timestamp('now')))
            except:
                print("there was a problem getting the rsync log path ....")
                rsyncLogPath = ''
        else:
            rsyncLogPath = '.'

        # sniff what the input is
        dir_or_file = pymmFunctions.dir_or_file(inputPath)
        if dir_or_file == False:
            print(
                "oy you've got big problems. {} is not a directory or a file."
                " what is it? is it a ghost?".format(inputPath))
            return False, False
            # sys.exit(1)
        # copy the input according to its type
        elif dir_or_file == 'dir':
            # add trailing slash for rsync destination directory
            if not destination[-1] == '/':
                destination = destination + '/'
            if not sameFilesystem == True:
                rsync_object(inputPath, rsyncLogPath, destination)
            else:
                mv_object(inputPath, destination)
        elif dir_or_file == 'file':
            if not sameFilesystem == True or useMV == False:
                rsync_object(inputPath, rsyncLogPath, destination)
            else:
                mv_object(inputPath, destination)
        else:
            print("o_O what is going on here? you up to something?")
            # sys.exit()

    else:
        stagedSIPpath, safe = move_n_verify_sip(inputPath, destination)
        print(stagedSIPpath)
        return stagedSIPpath, safe
Exemple #8
0
def manifest_path(inputPath, _uuid, _type):
    manifestPath = os.path.join(
        inputPath, '{}_manifest_{}_{}.txt'.format(
            _type, _uuid, pymmFunctions.timestamp('8601-filename')))
    return manifestPath
Exemple #9
0
def main():
    #########################
    #### SET INGEST ARGS ####
    args = set_args()
    inputPath = args.inputPath
    operator = args.operator
    report_to_db = args.database_reporting
    ingestType = args.ingestType
    makeProres = args.makeProres
    concatChoice = args.concat
    cleanupStrategy = args.cleanup_originals
    interactiveMode = args.interactiveMode
    # read aip staging dir from config
    aip_staging = config['paths']['aip_staging']
    # make a uuid for the ingest
    ingestUUID = str(uuid.uuid4())
    # make a temp ID based on input path for the ingested object
    # this will get replaced by the ingest UUID during final package move ...?
    tempID = pymmFunctions.get_temp_id(inputPath)
    #### END SET INGEST ARGS ####
    #############################

    #############################
    #### TEST / SET ENV VARS ####
    # sniff whether the input is a file or directory
    inputType = sniff_input(inputPath, ingestUUID, concatChoice)
    if not inputType:
        sys.exit(1)
    if inputType == 'dir':
        source_list = pymmFunctions.list_files(inputPath)
        subs = 0
        for _object in source_list:
            if os.path.isdir(_object):
                subs += 1
                print("\nYou have subdirectory(ies) in your input:"
                      "\n({})\n".format(_object))
        if subs > 0:
            print("This is not currently supported. Exiting!")
            sys.exit()

    # create directory paths for ingest...
    packageOutputDir,packageObjectDir,packageMetadataDir,\
    packageMetadataObjects,packageLogDir = prep_package(tempID)

    # check that required vars are declared & init other vars
    requiredVars = ['inputPath', 'operator']
    if interactiveMode == False:
        # Quit if there are required variables missing
        missingVars = 0
        for flag in requiredVars:
            if getattr(args, flag) == None:
                print('''
					CONFIGURATION PROBLEM:
					YOU FORGOT TO SET ''' + flag + '''. It is required.
					Try again, but set ''' + flag + ''' with the flag --''' + flag)
                missingVars += 1
        if missingVars > 0:
            sys.exit()
    else:
        # ask operator/input file
        operator = input("Please enter your name: ")
        inputPath = input(
            "Please drag the file you want to ingest into this window___"
        ).rstrip()
        inputPath = pymmFunctions.sanitize_dragged_linux_paths(inputPath)

    # Set up a canonical name that will be passed to each log entry.
    # For files it's the basename, for dirs it's the dir name.
    if inputPath:
        canonicalName = os.path.basename(inputPath)
        if inputType == 'file':
            filename = input_name = canonicalName
        elif inputType == 'dir':
            filename = ''
            input_name = canonicalName

    # set up a dict for processing variables to pass around
    processingVars = {
        'operator': operator,
        'inputPath': inputPath,
        'tempID': tempID,
        'ingestType': ingestType,
        'ingestUUID': ingestUUID,
        'filename': filename,
        'input_name': input_name,
        'makeProres': makeProres,
        'packageOutputDir': packageOutputDir,
        'packageObjectDir': packageObjectDir,
        'packageMetadataDir': packageMetadataDir,
        'packageMetadataObjects': packageMetadataObjects,
        'packageLogDir': packageLogDir,
        'aip_staging': aip_staging
    }
    #### END TEST / SET ENV VARS ####
    #################################

    ###########################
    #### LOGGING / CLEANUP ####
    # set up a log file for this ingest
    ingestLogPath = os.path.join(
        packageLogDir,
        tempID + '_' + pymmFunctions.timestamp('now') + '_ingestfile-log.txt')
    with open(ingestLogPath, 'x') as ingestLog:
        print('Laying a log at ' + ingestLogPath)
    ingestLogBoilerplate = {
        'ingestLogPath': ingestLogPath,
        'tempID': tempID,
        'input_name': input_name,
        'filename': filename,
        'operator': operator
    }
    pymmFunctions.ingest_log(
        # message
        'start',
        # status
        'start',
        # ingest boilerplate
        **ingestLogBoilerplate)

    # tell the system log that we are starting
    pymmFunctions.pymm_log(input_name, tempID, operator, '', 'STARTING')

    # if interactive ask about cleanup
    if interactiveMode:
        reset_cleanup_choice()

    # insert database record for this ingest (log 'ingestion start')
    # --> http://id.loc.gov/vocabulary/preservation/eventType/ins.html
    # @fixme
    # @logme # @dbme

    #### END LOGGING / CLEANUP ####
    ###############################

    ###############
    ## DO STUFF! ##
    ###############
    if inputType == 'file':
        # check that input file is actually a/v
        check_av_status(inputPath, interactiveMode,
                        ingestLogBoilerplate)  # @dbme
        mediaconch_check(inputPath, ingestType, ingestLogBoilerplate)  # @dbme
        move_input_file(processingVars)  # @logme # @dbme
        input_file_metadata(ingestLogBoilerplate,
                            processingVars)  # @logme # @dbme
        make_derivs(processingVars)  # @logme # @dbme
    elif inputType == 'dir':
        for _file in source_list:
            # set processing variables per file
            ingestLogBoilerplate['filename'] = os.path.basename(_file)  # @dbme
            processingVars['filename'] = os.path.basename(_file)  # @dbme
            processingVars['inputPath'] = _file  # @dbme
            # check that input file is actually a/v
            check_av_status(_file, interactiveMode,
                            ingestLogBoilerplate)  # @dbme
            mediaconch_check(_file, ingestType, ingestLogBoilerplate)  # @dbme
            move_input_file(processingVars)  # @dbme
            input_file_metadata(ingestLogBoilerplate, processingVars)  # @dbme
            make_derivs(processingVars)  # @dbme
        # reset the processing variables to the original state
        processingVars['filename'] = ''
        processingVars['inputPath'] = inputPath

    # MOVE SIP TO AIP STAGING
    # a) make a hashdeep manifest @fixme
    # b) move it
    move_sip(processingVars)  # @dbme
    packageVerified = False
    # c) audit the hashdeep manifest @fixme
    # packageVerified = result of audit @fixme

    # FINISH LOGGING
    do_cleanup(cleanupStrategy, packageVerified, inputPath, packageOutputDir,
               'done')  # @dbme