def gather_metadata(source): ''' Loops through all subfolders that contain pbcore_csv and then harvests the metadata and store in a single file for the purposes of batch import into the DB TEXTWORKS technical database. ''' metadata = [] for root, _, filenames in sorted(os.walk(source)): for filename in filenames: if filename.endswith('pbcore.csv'): with open(os.path.join(root,filename), 'r') as csv_file: csv_rows = csv_file.readlines() if metadata: metadata.append([csv_rows[1]]) else: metadata.append([csv_rows[0]]) metadata.append([csv_rows[1]]) collated_pbcore = os.path.join( ififuncs.make_desktop_logs_dir(), time.strftime("%Y-%m-%dT%H_%M_%S_pbcore.csv") ) with open(collated_pbcore, 'w') as fo: for i in metadata: fo.write(i[0]) return collated_pbcore
def log_report(log_names): ''' Analyzes all the moveit.py logs on the desktop and print a report. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() for i in log_names: if os.path.isfile(i): print "%-*s : %s" % (50, os.path.basename(i)[:-24], analyze_log(i)) else: print i, 'can\'t find log file, trying again...' log_names.remove(i) for logs in os.listdir(desktop_logs_dir): # look at log filename minus the seconds and '.log' if os.path.basename(i)[:-7] in logs: # make sure that the alternate log filename is more recent if int( os.path.basename(logs)[-12:-4].replace('_', '') ) > int( os.path.basename(i)[-12:-4].replace('_', '')): print 'trying to analyze %s' % logs print "%-*s : %s" % ( 50, os.path.basename(logs)[:-24], analyze_log( os.path.join(desktop_logs_dir, logs)) ) log_names.append(os.path.join(desktop_logs_dir, logs))
def main(): ''' Launches the other functions wihch attempt to run multiple copyit.py instances if manifests and matching sidecar directories are found inside of the input directory. ''' args = parse_args() all_files = find_manifest(args) processed_dirs = [] log_names = [] print '\n\n**** All of these folders will be copied to %s\n' % args.o for i in all_files: print i for i in all_files: absolute_path = os.path.join(args.o, os.path.basename(i)) if os.path.isdir(absolute_path): print('%s already exists, skipping') % (absolute_path) else: desktop_logs_dir = make_desktop_logs_dir() if args.l: log_name = copyit.main( ['-l', os.path.join(args.input, i), args.o] ) log_names.append(log_name) else: log_name = copyit.main( [os.path.join(args.input, i), args.o] ) log_names.append(log_name) processed_dirs.append(os.path.basename(os.path.join(args.input, i))) print '********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful' analyze_reports(log_names, desktop_logs_dir)
def log_report(log_names): ''' Analyzes all the moveit.py logs on the desktop and print a report. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() for i in log_names: if os.path.isfile(i): print(("%-*s : %s" % (50, os.path.basename(i)[:-24], analyze_log(i)))) else: print((i, 'can\'t find log file, trying again...')) log_names.remove(i) for logs in os.listdir(desktop_logs_dir): # look at log filename minus the seconds and '.log' if os.path.basename(i)[:-7] in logs: # make sure that the alternate log filename is more recent if int(os.path.basename(logs)[-12:-4].replace( '_', '')) > int( os.path.basename(i)[-12:-4].replace('_', '')): print(('trying to analyze %s' % logs)) print(("%-*s : %s" % (50, os.path.basename(logs)[:-24], analyze_log( os.path.join(desktop_logs_dir, logs))))) log_names.append(os.path.join(desktop_logs_dir, logs))
def setup(full_path, user): ''' Sets up filepaths for the rest of the script. This also checks if a mediaconch xml already exists. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() log_name_source_ = os.path.basename(full_path) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_mediaconch_validation.log" % (desktop_logs_dir, log_name_source_) filename = os.path.basename(full_path) object_dir = os.path.dirname(full_path) parent_dir = os.path.dirname(object_dir) sip_root = os.path.dirname(parent_dir) metadata_dir = os.path.join(parent_dir, 'metadata') manifest = os.path.join( sip_root, os.path.basename(parent_dir) + '_manifest.md5' ) if not os.path.isfile(manifest): print 'manifest does not exist %s' % manifest return 'skipping' if os.path.isdir(metadata_dir): mediaconch_xmlfile_basename = '%s_mediaconch_validation.xml' % filename mediaconch_xmlfile = os.path.join( metadata_dir, mediaconch_xmlfile_basename ) if os.path.isfile(mediaconch_xmlfile): print 'mediaconch xml already exists' return 'skipping' else: print 'no metadata directory found. Exiting.' return log_name_source, user, mediaconch_xmlfile, manifest, full_path, parent_dir
def setup(full_path, user): ''' Sets up filepaths for the rest of the script. This also checks if a mediaconch xml already exists. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() log_name_source_ = os.path.basename(full_path) + time.strftime( "_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_mediaconch_validation.log" % (desktop_logs_dir, log_name_source_) filename = os.path.basename(full_path) object_dir = os.path.dirname(full_path) parent_dir = os.path.dirname(object_dir) sip_root = os.path.dirname(parent_dir) metadata_dir = os.path.join(parent_dir, 'metadata') manifest = os.path.join(sip_root, os.path.basename(parent_dir) + '_manifest.md5') if not os.path.isfile(manifest): print('manifest does not exist %s' % manifest) return 'skipping' if os.path.isdir(metadata_dir): mediaconch_xmlfile_basename = '%s_mediaconch_validation.xml' % filename mediaconch_xmlfile = os.path.join(metadata_dir, mediaconch_xmlfile_basename) if os.path.isfile(mediaconch_xmlfile): print('mediaconch xml already exists') return 'skipping' else: print('no metadata directory found. Exiting.') return log_name_source, user, mediaconch_xmlfile, manifest, full_path, parent_dir
def main(): ''' Launches the other functions wihch attempt to run multiple copyit.py instances if manifests and matching sidecar directories are found inside of the input directory. ''' args = parse_args() all_files = find_manifest(args) processed_dirs = [] log_names = [] print('\n\n - **** All of these folders will be copied to %s\n' % args.o) for i in all_files: absolute_path = os.path.join(args.o, os.path.basename(i)) if os.path.isdir(absolute_path): print(' - %s already exists, skipping' % absolute_path) else: print(' - %s will be copied' % i) time.sleep(2) for i in all_files: absolute_path = os.path.join(args.o, os.path.basename(i)) if os.path.isdir(absolute_path): print(' - %s already exists, skipping' % absolute_path) else: desktop_logs_dir = make_desktop_logs_dir() copyit_cmd = [os.path.join(args.input, i), args.o] if args.l: copyit_cmd.append('-l') elif args.y: copyit_cmd.append('-y') log_name = copyit.main(copyit_cmd) log_names.append(log_name) processed_dirs.append(os.path.basename(os.path.join(args.input, i))) print(' - ********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful') analyze_reports(log_names, desktop_logs_dir)
def main(): ''' Launches functions ''' log_names = [] args = parse_args() desktop_logs_dir = ififuncs.make_desktop_logs_dir() for i in args.i: log_names.append(copyit.main([i, args.o])) print '********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful' masscopy.analyze_reports(log_names, desktop_logs_dir)
def main(): ''' Launches the commands that generate the CSV error report ''' args = parse_args() source = args.input csv_filename = os.path.join( ififuncs.make_desktop_logs_dir(), time.strftime("%Y-%m-%dT%H_%M_%S_videoerrors.csv")) print('Report stored as %s' % csv_filename) if not os.path.isfile(csv_filename): ififuncs.create_csv( csv_filename, ['filename', 'start_time', 'timestamp', 'error', 'notes']) for root, _, filenames in os.walk(source): for filename in filenames: if filename.endswith('.m2t'): with open(csv_filename, 'r') as fo: if not filename in fo.read(): try: start_time = bitc.getffprobe( 'start_time', 'stream=start_time', os.path.join(root, filename)).rsplit()[0] json_output = subprocess.check_output([ 'ffprobe', '-sexagesimal', os.path.join(root, filename), '-show_error', '-show_log', '16', '-show_frames', '-of', 'json' ]) errors = False ffprobe_dict = json.loads(json_output) for values in ffprobe_dict: for more in ffprobe_dict[values]: if 'logs' in more: errors = True print(more['pkt_pts_time'], more['logs']) ififuncs.append_csv( csv_filename, [ filename, start_time, more['pkt_pts_time'], more['logs'], '' ]) if errors == False: ififuncs.append_csv(csv_filename, [ filename, start_time, 'no errors', 'no errors', '' ]) except subprocess.CalledProcessError: ififuncs.append_csv(csv_filename, [ filename, start_time, 'script error - process file manually', '', '' ]) print('Report stored as %s' % csv_filename)
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) root = logging.getLogger() logging.basicConfig(filename=log, filemode='a',level=logging.INFO) logging.info('Started at %s using the following workstation: %s' % (time.strftime("%Y-%m-%dT%H:%M:%S "), getpass.getuser())) #root.setLevel(logging.DEBUG) check_manifest(args.input)
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() new_csv = os.path.join(desktop_logs_dir, os.path.basename(args.csv)) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys()): accession.main([ package, '-user', user, '-p', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register ]) collated_pbcore = gather_metadata(args.input) print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % register print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename( args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) root = logging.getLogger() logging.basicConfig(filename=log, filemode='a', level=logging.INFO) logging.info('Started at %s using the following workstation: %s' % (time.strftime("%Y-%m-%dT%H:%M:%S "), getpass.getuser())) #root.setLevel(logging.DEBUG) check_manifest(args.input)
def make_register(): ''' This sends a placeholder accessions register to the desktop logs directory. This should get rid of some of the more painful, repetitive identifier matching. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() register = os.path.join( desktop_logs_dir, 'register_' + time.strftime("%Y-%m-%dT%H_%M_%S.csv")) ififuncs.create_csv(register, ( 'entry number', 'accession number', 'date acquired', 'date accessioned', 'acquired from', 'acquisition method', 'simple name; basic description; identification; historical information', 'notes')) return register
def make_oe_register(): ''' This sends a placeholder oe register to the desktop logs directory. This should get rid of some of the more painful, repetitive identifier matching. ''' desktop_logs_dir = ififuncs.make_desktop_logs_dir() oe_register = os.path.join( desktop_logs_dir, 'oe_helper_register_' + time.strftime("%Y-%m-%dT%H_%M_%S.csv")) ififuncs.create_csv( oe_register, ('OE No.', 'Date Received', 'Quantity', 'Format', 'Description', 'Contact Name', 'Type of Acquisition', 'Accession No.', 'Additional Information', 'Habitat', 'Vinegar No')) return oe_register
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename( args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log(log_name_source, 'EVENT = validate.py started') ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) manifest = check_manifest(args.input, log_name_source) log_results(manifest, log_name_source, args)
def main(args_): ''' Launches functions that will generate a helper accessions register ''' args = parse_args(args_) sorted_csv_dict = ififuncs.extract_metadata(args.sorted_csv)[0] pbcore_csv_dict = ififuncs.extract_metadata(args.pbcore_csv)[0] filmo_csv_dict = ififuncs.extract_metadata(args.filmo_csv)[0] for accession in sorted_csv_dict: number = accession['accession number'] for technical_record in pbcore_csv_dict: if technical_record['Accession Number'] == number: accession['acquisition method'] = technical_record[ 'Type Of Deposit'] accession['acquired from'] = technical_record['Donor'] accession['date acquired'] = technical_record[ 'Date Of Donation'] for filmographic_record in filmo_csv_dict: if filmographic_record[ 'Filmographic URN'] == technical_record[ 'Reference Number']: if filmographic_record['Title/Name'] == '': title = filmographic_record[ 'Series Title'] + '; ' + filmographic_record[ 'Episode No'] else: title = filmographic_record['Title/Name'] simple = '%s (%s) | %s' % ( title, filmographic_record['Year'], technical_record['dig_object_descrip']) if accession['acquisition method'] == 'Reproduction': simple += ' | Reproduction of %s' % technical_record[ 'TTape Origin'] accession[ 'simple name; basic description; identification; historical information'] = simple desktop_logs_dir = ififuncs.make_desktop_logs_dir() new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_") + 'helper_register.csv' new_csv = os.path.join(desktop_logs_dir, new_csv_filename) with open(new_csv, 'w', encoding='utf-8') as csvfile: fieldnames = ififuncs.extract_metadata(args.sorted_csv)[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in sorted_csv_dict: writer.writerow(i) print('\nYour helper CSV file is located here: %s\n' % new_csv) return new_csv
def main(args_): ''' Launches all other functions when called from the command line. ''' args = make_parser(args_) desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename( args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log(log_name_source, 'EVENT = validate.py started') ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) manifest, error_counter = check_manifest(args, log_name_source) if args.update_log: log_results(manifest, log_name_source, args) return error_counter
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log( log_name_source, 'EVENT = validate.py started' ) ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py') ) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) manifest = check_manifest(args.input, log_name_source) log_results(manifest, log_name_source, args)
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') parser.add_argument('-move', action='store_true', help='Move files instead of copying - much faster!') parser.add_argument( '-justcopy', action='store_true', help='Do not generate destination manifest and verify integrity :(') parser.add_argument( '-y', action='store_true', help= 'Answers YES to the question: Not enough free space, would you like to continue?' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = os.path.abspath(args.source) destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' ''' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) ''' relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) if rootpos == 'y': manifest_destination = os.path.dirname( destination) + '/%s_manifest.md5' % os.path.basename(destination) else: manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print('Destination manifest already exists') if rootpos == 'y': manifest_filename = '%s_manifest.md5' % os.path.basename(destination) else: manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) print('Checking total size of input folder') total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source)) print('Checking if enough space in destination folder') free_space = ififuncs.get_free_space(args.destination) if total_input_size > free_space: print('You do not have enough free space!') if args.y: go_forth_blindly = 'Y' else: go_forth_blindly = ififuncs.ask_yes_no( 'Would you like to continue anyway? Press Y or N') if go_forth_blindly == 'Y': generate_log( log_name_source, 'You do not have enough free space!, but the user has decided to continue anyhow' ) else: generate_log(log_name_source, 'You do not have enough free space! - Exiting') sys.exit() return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
source = args.source source_parent_dir = os.path.dirname(source) normpath = os.path.normpath(source) dirname = os.path.split(os.path.basename(source))[1] relative_path = normpath.split(os.sep)[-1] if args.s: manifest = source_parent_dir + '/%s_manifest.md5' % relative_path else: manifest_ = '/%s_manifest.md5' % relative_path desktop_manifest_dir = make_desktop_manifest_dir() manifest = "%s/%s" % (desktop_manifest_dir, manifest_) log_name_source_ = os.path.basename(args.source) + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_) generate_log(log_name_source, 'move.py started.') generate_log(log_name_source, 'Source: %s' % source) if os.path.isfile(source): print '\nFile checksum is not currently supported, only directories.\n' generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported') generate_log(log_name_source, 'move.py exit') sys.exit() elif not os.path.isdir(source): print ' %s is either not a directory or it does not exist' % source generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source) generate_log(log_name_source, 'move.py exit')
def main(args_): ''' Overly long main function that makes a sidecar manifest. This needs to get broken up into smaller functions. ''' parser = argparse.ArgumentParser(description='Generate manifest with' ' checksums for a directory' ' Written by Kieran O\'Leary.') parser.add_argument( 'source', help='Input directory' ) parser.add_argument( '-s', '-sidecar', action='store_true', help='Generates Sidecar' ) parser.add_argument( '-f', '-felix', action='store_true', help='Felix Meehan workflow - places manifest inside of source directory' ) parser.add_argument( '-sha512', action='store_true', help='Generates sha512 checksums instead of md5' ) args = parser.parse_args(args_) source = args.source source_parent_dir = os.path.dirname(source) normpath = os.path.normpath(source) relative_path = normpath.split(os.sep)[-1] log_name_source_ = os.path.basename( args.source ) + time.strftime("_%Y_%m_%dT%H_%M_%S") if args.s: if args.sha512: manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path else: manifest = source_parent_dir + '/%s_manifest.md5' % relative_path log_name_source = source_parent_dir + '/%s.log' % log_name_source_ elif args.f: if args.sha512: manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path else: manifest = source + '/%s_manifest.md5' % relative_path log_name_source = source_parent_dir + '/%s.log' % log_name_source_ else: if args.sha512: manifest_ = manifest_ = '/%s_manifest-sha512.txt' % relative_path else: manifest_ = '/%s_manifest.md5' % relative_path desktop_manifest_dir = make_desktop_manifest_dir() manifest = "%s/%s" % (desktop_manifest_dir, manifest_) desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_) if args.sha512: module = 'hashlib.sha512' else: module = 'hashlib.md5' generate_log(log_name_source, 'manifest.py started.') if sys.platform == "win32": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Windows' % module ) if sys.platform == "darwin": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=OSX' % module ) elif sys.platform == "linux2": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Linux' % module ) ififuncs.generate_log( log_name_source, 'eventDetail=manifest.py %s' % ififuncs.get_script_version('manifest.py')) generate_log(log_name_source, 'Source: %s' % source) if os.path.isfile(source): print('\nFile checksum is not currently supported, only directories.\n') generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported') generate_log(log_name_source, 'manifest.py exit') sys.exit() elif not os.path.isdir(source): print((' %s is either not a directory or it does not exist' % source)) generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source) generate_log(log_name_source, 'manifest.py exit') sys.exit() remove_bad_files(source, log_name_source) source_count = 0 for _, _, filenames in os.walk(source): # There has to be a better way to count the files.. for _ in filenames: source_count += 1 #works in windows at least if os.path.isfile(manifest): count_in_manifest = manifest_file_count(manifest) if source_count != count_in_manifest: print('This manifest may be outdated as the number of files in your directory does not match the number of files in the manifest') generate_log(log_name_source, 'EVENT = Existing source manifest check - Failure - The number of files in the source directory is not equal to the number of files in the source manifest ') sys.exit() if not os.path.isfile(manifest): try: print('Generating source manifest') generate_log(log_name_source, 'EVENT = Generating source manifest') if args.f: if args.sha512: ififuncs.sha512_manifest(source, manifest, source) else: hashlib_manifest(source, manifest, source) shutil.move(log_name_source, source) else: if args.sha512: ififuncs.sha512_manifest(source, manifest, source_parent_dir) else: hashlib_manifest(source, manifest, source_parent_dir) except OSError: print('You do not have access to this directory. Perhaps it is read only, or the wrong file system\n') sys.exit() else: generate_log(log_name_source, 'EVENT = Existing source manifest check - Source manifest already exists. Script will exit. ') print(('Manifest created in %s' % manifest)) generate_log(log_name_source, 'Manifest created in %s' % manifest) return log_name_source
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY" ) + os.path.basename(args.csv) else: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-pbcore', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] if len(to_accession[package]) == 3: accession_cmd.extend(['-acquisition_type', '13']) accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend( ['-depositor_reference', depositor_reference]) accession_cmd.extend( ['-acquisition_type', acquisition_type[2]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument( 'source', help='Input directory' ) parser.add_argument( 'destination', help='Destination directory' ) parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO' ) parser.add_argument( '-move', action='store_true', help='Move files instead of copying - much faster!' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = args.source destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print 'Destination manifest already exists' manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join( desktop_manifest_dir, manifest_filename ) manifest_sidecar = os.path.join( os.path.dirname(source), relative_path + '_manifest.md5' ) manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.oe_csv: if not args.filmographic: print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..') sys.exit() oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv) initial_oe_list = oe_csv_extraction[0] oe_dicts = process_oe_csv(oe_csv_extraction, args.input) # temp hack while we're performing both workflows helper_csv = args.oe_csv elif args.filmographic: initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0] # temp hack while we're performing both workflows helper_csv = args.filmographic if args.oe_csv or args.filmographic: for line_item in ififuncs.extract_metadata(helper_csv)[0]: try: oe_number = line_item['Object Entry'].lower() except KeyError: oe_number = line_item['OE No.'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if not args.oe_csv: # No need to ask for the reference number if the OE csv option is supplied. # The assumption here is that the OE csv contains the reference numbers though. if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.') depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.') acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) if not args.oe_csv: to_accession = initial_check(args, accession_digits, oe_list, reference_number) else: to_accession = {} for oe_record in oe_dicts: if os.path.isdir(oe_record['source_path']): to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']] accession_digits += 1 for success in sorted(to_accession.keys()): print('%s will be accessioned as %s' % (success, to_accession[success])) register = accession.make_register() if args.filmographic: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic) else: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) if not args.oe_csv: filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[oe_package][1] get_filmographic_titles(to_accession, filmographic_dict) with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no( 'Do you want to proceed?' ) if args.oe_csv: new_csv = args.filmographic if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] for oe_record in oe_dicts: if oe_record['source_path'] == package: if not oe_record['format'].lower() == 'dcdm': accession_cmd.append('-pbcore') if len(to_accession[package]) == 4: if not to_accession[package][2] == 'n/a': accession_cmd.extend(['-acquisition_type', '13']) if args.oe_csv: accession_cmd.extend(['-parent', to_accession[package][2]]) else: accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) print to_accession[package][3] accession_cmd.extend(['-donation_date', to_accession[package][3]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = args.source destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' dirname = raw_input( 'What do you want your destination folder to be called?\n') relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print 'Destination manifest already exists' manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
make_event(premis, 'message digest calculation', 'Checksum manifest for whole package created', [['UUID','9430725d-7523-4071-9063-e8a6ac4f84c4' ]],final_sip_manifest_uuid,[representation_uuid], 'source', 'now') make_event(premis, 'message digest calculation', 'Frame level checksums of images', [['UUID','ee83e19e-cdb1-4d83-91fb-7faf7eff738e' ]], framemd5_uuid, [representation_uuid], 'source', 'now' ) write_premis(doc, premisxml) parser = argparse.ArgumentParser(description='DPX2TIFF specific workflow for IFI' ' Written by Kieran O\'Leary.') parser.add_argument( 'input', nargs='+', help='full path of input directory' ) parser.add_argument( '-o', help='full path of output directory', required=True) args = parser.parse_args() print args desktop_logs_dir = make_desktop_logs_dir() csv_report_filename = os.path.join(desktop_logs_dir, 'dpx_transcode_report' + time.strftime("_%Y_%m_%dT%H_%M_%S") + '.csv') #permission for correct directories sought from user permission = '' all_files = args.input if not permission == 'y' or permission == 'Y': print '\n\n**** All TIFF sequences within these directories will be converted to DPX.\n' for i in all_files: print i permission = raw_input('\n**** These are the directories that wil be turned into DPX. \n**** If this looks ok, please press Y, otherwise, type N\n' ) while permission not in ('Y','y','N','n'): permission = raw_input('\n**** These are the directories that wil be turned into DPX. \n**** If this looks ok, please press Y, otherwise, type N\n') if permission == 'n' or permission == 'N': print 'Exiting at your command- Cheerio for now' sys.exit()