def main(): config = pymmFunctions.read_config() args = set_args() requiredArgs = ['inputPath', 'destination'] inputPath = args.inputPath algorithm = args.algorithm removeOriginals = args.removeOriginals destination = args.destination loglevel = args.loglevel logDir = args.logDir now = pymmFunctions.timestamp('now') # Quit if there are required variables missing missingArgs = 0 for _arg in requiredArgs: if getattr(args, _arg) == None: print("CONFIGURATION PROBLEM:\n" "You forgot to set " + _arg + ". It is required.\n" "Try again, but set " + _arg + " with the flag --" + _arg + "\n") missingArgs += 1 if missingArgs > 0: sys.exit() # set up rsync log if loglevel == 'all': pymmLogpath = os.path.join(config['logging']['pymm_log_dir'], 'pymm_log.txt') # AT WHAT POINT WILL WE ACTUALLY WANT TO PYMMLOG A COPY? FINAL AIP XFER? try: rsyncLogpath = os.path.join( logDir, 'rsync_log_' + pymmFunctions.get_base(inputPath) + '_' + pymmFunctions.timestamp('now') + '.txt') except: print("there was a problem getting the rsync log path ....") rsyncLogpath = '' else: rsyncLogpath = '' # sniff what the input is dir_or_file = pymmFunctions.dir_or_file(inputPath) if dir_or_file == False: print("oy you've got big problems. " + inputPath + " is not a directory or a file. what is it? is it a ghost?") sys.exit() # copy the input according to its type elif dir_or_file == 'dir': # add trailing slash for rsync destination directory if not destination[-1] == '/': destination = destination + '/' copy_dir(inputPath, rsyncLogpath, destination) elif dir_or_file == 'file': copy_file(inputPath, rsyncLogpath, destination) else: print("o_O what is going on here? you up to something?") sys.exit()
def add_pbcore_md5_location(CurrentIngest): ''' have to call this after creation of object manifest for SIP and parsing of manifest by report_SIP_fixity() ''' if CurrentIngest.InputObject.pbcoreFile != '': pbcoreFile = CurrentIngest.InputObject.pbcoreFile pbcoreXML = pbcore.PBCoreDocument(pbcoreFile) for _object in CurrentIngest.InputObject.ComponentObjects: # look for component files and add instantiation data if _object.objectCategory == 'file': # add md5 as an identifier to the # pbcoreInstantiation for the file # processingVars['filename'] = _object inputFileMD5 = _object.md5hash attributes = { "source": "BAMPFA {}".format(pymmFunctions.timestamp('iso8601')), "annotation": "messageDigest", "version": "MD5" } makePbcore.add_element_to_instantiation( pbcoreXML, _object.basename, 'instantiationIdentifier', attributes, inputFileMD5) # add 'BAMPFA Digital Repository' as instantiationLocation attributes = {} makePbcore.add_element_to_instantiation( pbcoreXML, _object.basename, 'instantiationLocation', attributes, "BAMPFA Digital Repository") makePbcore.xml_to_file(pbcoreXML, pbcoreFile)
def create_ingestLog(self): self.ingestLogPath = os.path.join( self.packageLogDir, '{}_{}_ingest-log.txt'.format(self.tempID, pymmFunctions.timestamp('now'))) with open(self.ingestLogPath, 'x') as ingestLog: print('Laying a log at ' + self.ingestLogPath)
def insert_event(CurrentIngest, eventType, outcome, status): # this is the THING the event is being performed on theObject = CurrentIngest.currentTargetObject objectIdentifierValue = theObject.objectIdentifierValue # get the name of the computer computer = CurrentIngest.ProcessArguments.computer # get the name of the program, script, or function doing the event caller = CurrentIngest.caller user = CurrentIngest.ProcessArguments.user objectID = theObject.databaseID if CurrentIngest.ProcessArguments.databaseReporting == True: #insert the event eventInsert = dbReporters.EventInsert( eventType, objectID, objectIdentifierValue, pymmFunctions.timestamp('iso8601'), status, outcome, caller, computer, user, eventID=None) eventID = eventInsert.report_to_db() del eventInsert else: eventID = None return eventID
def ingest_log(CurrentIngest, event, outcome, status): stamp = pymmFunctions.timestamp("iso8601") canonicalName = CurrentIngest.InputObject.canonicalName inputPath = CurrentIngest.InputObject.inputPath tempID = CurrentIngest.tempID user = CurrentIngest.ProcessArguments.user filename = CurrentIngest.InputObject.filename ingestLogPath = CurrentIngest.ingestLogPath inputTypeDetail = CurrentIngest.InputObject.inputTypeDetail try: _object = CurrentIngest.currentTargetObject.inputPath except: _object = CurrentIngest.currentTargetObject.objectIdentifierValue if event == "ingestion start": stamp = ("#" * 50) + "\n\n" + stamp + "\n\n" systemInfo = CurrentIngest.systemInfo workingDir = CurrentIngest.ProcessArguments.outdir_ingestsip ingestUUID = CurrentIngest.ingestUUID stuffToLog = [ stamp, "Event Type: ingestion start\n", "Object Canonical Name: {}\n".format(canonicalName), "Object Input Filepath: {}\n".format(inputPath), "Object Temp ID: {}\n".format(tempID), "Object Type: {}\n".format(inputTypeDetail), "Ingest UUID: {}\n".format(ingestUUID), "Ingest Working Directory: {}\n".format(workingDir), "Operator: {}\n".format(user), "\n### SYSTEM INFO: ### \n{}\n".format(systemInfo), ("#" * 50) ] if filename not in ("", None): name = "Object Filename: {}\n".format(filename) stuffToLog.insert(3, name) else: stuffToLog = [ "{} | ".format(stamp), "Status: {} | ".format(status), "Event Type: {} | ".format(event), "Event Outcome: {} | ".format(outcome), "Operator: {} | ".format(user), "Current Target Object: {} | ".format(_object) ] if filename not in ("", None): name = "Object Filename: {} | ".format(filename) path = "Object Filepath: {} | ".format(inputPath) stuffToLog.insert(4, name) stuffToLog.insert(5, path) with open(ingestLogPath, "a+") as ingestLog: for item in stuffToLog: ingestLog.write(item) ingestLog.write("\n\n")
def pymm_log(CurrentIngest, event, outcome, status): check_pymm_log_exists() pymmConfig = pymmFunctions.read_config() pymmLogDir = pymmConfig['logging']['pymm_log_dir'] pymmLogPath = os.path.join(pymmLogDir, 'pymm_log.txt') stamp = pymmFunctions.timestamp('iso8601') systemInfo = CurrentIngest.systemInfo objectRootPath = CurrentIngest.InputObject.inputPath canonicalName = CurrentIngest.InputObject.canonicalName inputTypeDetail = CurrentIngest.InputObject.inputTypeDetail user = CurrentIngest.ProcessArguments.user ingestUUID = CurrentIngest.ingestUUID tempID = CurrentIngest.tempID workingDir = CurrentIngest.ProcessArguments.outdir_ingestsip prefix = '' suffix = '\n' # I think basename gets updated depending on what is getting logged... ? @fixme basename = os.path.basename(objectRootPath) if status == 'STARTING': prefix = ('&' * 50) + '\n\n' stuffToLog = [ prefix, stamp, "\nEvent type: Ingestion start\n", "Object canonical name: {}\n".format(canonicalName), "Object filepath: {}\n".format(objectRootPath), "Object type: {}\n".format(inputTypeDetail), "Ingest UUID: {}\n".format(ingestUUID), "Operator: {}\n".format(user), "Ingest working directory: {}\n".format(workingDir), "\n### SYSTEM INFO: ### \n{}".format(systemInfo), suffix ] elif status in ("ENDING", "ABORTING"): suffix = '\n\n' + ('#' * 50) + "\n\n" stuffToLog = [ prefix, stamp, " | Status: {} |".format(status), " | Event type: Ingestion end | ", "Outcome: {}".format(outcome), suffix ] else: stuffToLog = [ prefix, stamp, " | Status: {}".format(status), " | Object name: {}".format(basename), " | Event type: {}".format(event), " | Event outcome: {}".format(outcome), suffix ] with open(pymmLogPath, 'a') as log: for item in stuffToLog: log.write(item)
def main(): config = pymmFunctions.read_config() args = set_args() requiredArgs = ['inputPath', 'destination'] inputPath = args.inputPath movingSIP = args.movingSIP algorithm = args.algorithm removeOriginals = args.removeOriginals destination = args.destination loglevel = args.loglevel logDir = args.logDir useMV = args.useMV now = pymmFunctions.timestamp('now') # Quit if there are required variables missing missingArgs = 0 try: # see if the input/destination are on the same filesystem # if so, we will use mv rather than rsync for efficiency inputFS = pymmFunctions.get_filesystem_id(inputPath) destFS = pymmFunctions.get_filesystem_id(destination) print(inputFS, destFS) if inputFS == destFS: print("HEYYYY") sameFilesystem = True else: sameFilesystem = False except: sameFilesystem = False for _arg in requiredArgs: if getattr(args, _arg) == None: print("CONFIGURATION PROBLEM:\n" "You forgot to set {0}. It is required.\n" "Try again, but set {0} with the flag --{0}\n".format(_arg)) missingArgs += 1 if missingArgs > 0: sys.exit() if not movingSIP: # set up rsync log if loglevel == 'all': pymmLogpath = os.path.join(config['logging']['pymm_log_dir'], 'pymm_log.txt') try: rsyncLogPath = os.path.join( logDir, 'rsync_log_{}_{}.txt'.format( pymmFunctions.get_base(inputPath), pymmFunctions.timestamp('now'))) except: print("there was a problem getting the rsync log path ....") rsyncLogPath = '' else: rsyncLogPath = '.' # sniff what the input is dir_or_file = pymmFunctions.dir_or_file(inputPath) if dir_or_file == False: print( "oy you've got big problems. {} is not a directory or a file." " what is it? is it a ghost?".format(inputPath)) return False, False # sys.exit(1) # copy the input according to its type elif dir_or_file == 'dir': # add trailing slash for rsync destination directory if not destination[-1] == '/': destination = destination + '/' if not sameFilesystem == True: rsync_object(inputPath, rsyncLogPath, destination) else: mv_object(inputPath, destination) elif dir_or_file == 'file': if not sameFilesystem == True or useMV == False: rsync_object(inputPath, rsyncLogPath, destination) else: mv_object(inputPath, destination) else: print("o_O what is going on here? you up to something?") # sys.exit() else: stagedSIPpath, safe = move_n_verify_sip(inputPath, destination) print(stagedSIPpath) return stagedSIPpath, safe
def manifest_path(inputPath, _uuid, _type): manifestPath = os.path.join( inputPath, '{}_manifest_{}_{}.txt'.format( _type, _uuid, pymmFunctions.timestamp('8601-filename'))) return manifestPath
def main(): ######################### #### SET INGEST ARGS #### args = set_args() inputPath = args.inputPath operator = args.operator report_to_db = args.database_reporting ingestType = args.ingestType makeProres = args.makeProres concatChoice = args.concat cleanupStrategy = args.cleanup_originals interactiveMode = args.interactiveMode # read aip staging dir from config aip_staging = config['paths']['aip_staging'] # make a uuid for the ingest ingestUUID = str(uuid.uuid4()) # make a temp ID based on input path for the ingested object # this will get replaced by the ingest UUID during final package move ...? tempID = pymmFunctions.get_temp_id(inputPath) #### END SET INGEST ARGS #### ############################# ############################# #### TEST / SET ENV VARS #### # sniff whether the input is a file or directory inputType = sniff_input(inputPath, ingestUUID, concatChoice) if not inputType: sys.exit(1) if inputType == 'dir': source_list = pymmFunctions.list_files(inputPath) subs = 0 for _object in source_list: if os.path.isdir(_object): subs += 1 print("\nYou have subdirectory(ies) in your input:" "\n({})\n".format(_object)) if subs > 0: print("This is not currently supported. Exiting!") sys.exit() # create directory paths for ingest... packageOutputDir,packageObjectDir,packageMetadataDir,\ packageMetadataObjects,packageLogDir = prep_package(tempID) # check that required vars are declared & init other vars requiredVars = ['inputPath', 'operator'] if interactiveMode == False: # Quit if there are required variables missing missingVars = 0 for flag in requiredVars: if getattr(args, flag) == None: print(''' CONFIGURATION PROBLEM: YOU FORGOT TO SET ''' + flag + '''. It is required. Try again, but set ''' + flag + ''' with the flag --''' + flag) missingVars += 1 if missingVars > 0: sys.exit() else: # ask operator/input file operator = input("Please enter your name: ") inputPath = input( "Please drag the file you want to ingest into this window___" ).rstrip() inputPath = pymmFunctions.sanitize_dragged_linux_paths(inputPath) # Set up a canonical name that will be passed to each log entry. # For files it's the basename, for dirs it's the dir name. if inputPath: canonicalName = os.path.basename(inputPath) if inputType == 'file': filename = input_name = canonicalName elif inputType == 'dir': filename = '' input_name = canonicalName # set up a dict for processing variables to pass around processingVars = { 'operator': operator, 'inputPath': inputPath, 'tempID': tempID, 'ingestType': ingestType, 'ingestUUID': ingestUUID, 'filename': filename, 'input_name': input_name, 'makeProres': makeProres, 'packageOutputDir': packageOutputDir, 'packageObjectDir': packageObjectDir, 'packageMetadataDir': packageMetadataDir, 'packageMetadataObjects': packageMetadataObjects, 'packageLogDir': packageLogDir, 'aip_staging': aip_staging } #### END TEST / SET ENV VARS #### ################################# ########################### #### LOGGING / CLEANUP #### # set up a log file for this ingest ingestLogPath = os.path.join( packageLogDir, tempID + '_' + pymmFunctions.timestamp('now') + '_ingestfile-log.txt') with open(ingestLogPath, 'x') as ingestLog: print('Laying a log at ' + ingestLogPath) ingestLogBoilerplate = { 'ingestLogPath': ingestLogPath, 'tempID': tempID, 'input_name': input_name, 'filename': filename, 'operator': operator } pymmFunctions.ingest_log( # message 'start', # status 'start', # ingest boilerplate **ingestLogBoilerplate) # tell the system log that we are starting pymmFunctions.pymm_log(input_name, tempID, operator, '', 'STARTING') # if interactive ask about cleanup if interactiveMode: reset_cleanup_choice() # insert database record for this ingest (log 'ingestion start') # --> http://id.loc.gov/vocabulary/preservation/eventType/ins.html # @fixme # @logme # @dbme #### END LOGGING / CLEANUP #### ############################### ############### ## DO STUFF! ## ############### if inputType == 'file': # check that input file is actually a/v check_av_status(inputPath, interactiveMode, ingestLogBoilerplate) # @dbme mediaconch_check(inputPath, ingestType, ingestLogBoilerplate) # @dbme move_input_file(processingVars) # @logme # @dbme input_file_metadata(ingestLogBoilerplate, processingVars) # @logme # @dbme make_derivs(processingVars) # @logme # @dbme elif inputType == 'dir': for _file in source_list: # set processing variables per file ingestLogBoilerplate['filename'] = os.path.basename(_file) # @dbme processingVars['filename'] = os.path.basename(_file) # @dbme processingVars['inputPath'] = _file # @dbme # check that input file is actually a/v check_av_status(_file, interactiveMode, ingestLogBoilerplate) # @dbme mediaconch_check(_file, ingestType, ingestLogBoilerplate) # @dbme move_input_file(processingVars) # @dbme input_file_metadata(ingestLogBoilerplate, processingVars) # @dbme make_derivs(processingVars) # @dbme # reset the processing variables to the original state processingVars['filename'] = '' processingVars['inputPath'] = inputPath # MOVE SIP TO AIP STAGING # a) make a hashdeep manifest @fixme # b) move it move_sip(processingVars) # @dbme packageVerified = False # c) audit the hashdeep manifest @fixme # packageVerified = result of audit @fixme # FINISH LOGGING do_cleanup(cleanupStrategy, packageVerified, inputPath, packageOutputDir, 'done') # @dbme