def parse_manifest(manifest, log_name_source): ''' Analyses the manifest to see if any files are missing. Returns a list of missing files and a dictionary containing checksums and paths. ''' source_dir = os.path.join( os.path.dirname(manifest), os.path.basename(manifest).replace('_manifest.md5', '')) source_count, file_list = ififuncs.count_stuff(source_dir) missing_files_list = [] manifest_dict = {} paths = [] proceed = 'Y' os.chdir(os.path.dirname(manifest)) with open(manifest, 'r') as manifest_object: manifest_list = manifest_object.readlines() for entries in manifest_list: checksum = entries.split(' ')[0] if 'manifest-sha512.txt' in manifest: path = entries[130:].replace('\r', '').replace('\n', '') else: path = entries[34:].replace('\r', '').replace('\n', '') path = path.replace('\\', '/') if not os.path.isfile(path): ififuncs.generate_log(log_name_source, '%s is missing' % path) print(('%s is missing' % path)) missing_files_list.append(path) elif os.path.isfile(path): manifest_dict[path] = checksum paths.append(path) manifest_file_count = len(manifest_list) if source_count != manifest_file_count: print( ' - There is masmatch between your file count and the manifest file count' ) print(' - checking which files are different') for i in file_list: if i not in paths: print(( i, 'is present in your source directory but not in the source manifest' )) proceed = ififuncs.ask_yes_no('Do you want to proceed regardless?') if proceed == 'N': print('Exiting') sys.exit() else: if len(missing_files_list) > 0: print( ('The number of missing files: %s' % len(missing_files_list))) ififuncs.generate_log( log_name_source, 'The number of missing files is: %s' % len(missing_files_list)) elif len(missing_files_list) == 0: print('All files present') ififuncs.generate_log(log_name_source, 'All files present') return manifest_dict, missing_files_list
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() new_csv = os.path.join(desktop_logs_dir, os.path.basename(args.csv)) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys()): accession.main([ package, '-user', user, '-p', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register ]) collated_pbcore = gather_metadata(args.input) print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % register print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits) proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys()): accession.main([ package, '-user', user, '-p', '-f', '-number', to_accession[package] ])
def create_content_title_text(sip_path): ''' DCPs are often delivered with inconsistent foldernames. This will rename the parent folder with the value recorded in <ContentTitleText> For example: Original name: CHARBON-SMPTE-24 New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV Rename will only occur if user agrees. ''' objects_dir = os.path.join(sip_path, 'objects') cpl = ififuncs.find_cpl(objects_dir) dcp_dirname = os.path.dirname(cpl) content_title_text = ififuncs.get_contenttitletext(cpl) dci_foldername = os.path.join(objects_dir, content_title_text) if ififuncs.ask_yes_no('Do you want to rename %s with %s ?' % (dcp_dirname, dci_foldername)) == 'Y': os.chdir(os.path.dirname(dcp_dirname)) os.rename(os.path.basename(dcp_dirname), content_title_text) return content_title_text
def create_content_title_text(sip_path): ''' DCPs are often delivered with inconsistent foldernames. This will rename the parent folder with the value recorded in <ContentTitleText> For example: Original name: CHARBON-SMPTE-24 New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV Rename will only occur if user agrees. ''' objects_dir = os.path.join(sip_path, 'objects') cpl = ififuncs.find_cpl(objects_dir) dcp_dirname = os.path.dirname(cpl) content_title_text = ififuncs.get_contenttitletext(cpl) dci_foldername = os.path.join(objects_dir, content_title_text) if ififuncs.ask_yes_no( 'Do you want to rename %s with %s ?' % (dcp_dirname, dci_foldername) ) == 'Y': os.chdir(os.path.dirname(dcp_dirname)) os.rename(os.path.basename(dcp_dirname), content_title_text) return content_title_text
def create_content_title_text(sip_path, args): ''' DCPs are often delivered with inconsistent foldernames. This will rename the parent folder with the value recorded in <ContentTitleText> For example: Original name: CHARBON-SMPTE-24 New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV Rename will only occur if user agrees. ''' cpl = ififuncs.find_cpl(args.i[0]) objects_dir = os.path.join(sip_path, 'objects') dcp_dirname = os.path.dirname(cpl) content_title = ififuncs.get_contenttitletext(cpl) dci_foldername = os.path.join(objects_dir, content_title) rename_dcp = ififuncs.ask_yes_no( 'Do you want to rename %s with %s ?' % (os.path.basename(dcp_dirname), dci_foldername)) if rename_dcp == 'N': print('Exiting') sys.exit() return content_title
def main(args): ''' Analyzes a directory containing Object Entry packages and returns their parent or lack thereof. ''' source = args if os.path.basename(source)[:2] == 'oe' or os.path.basename( source)[:3] == 'aaa': oe_uuid_dict = ififuncs.group_ids(os.path.dirname(source)) for root, _, filenames in os.walk(source): for filename in filenames: if filename.endswith('_sip_log.log'): uuid_search = ififuncs.find_parent( os.path.join(root, filename), oe_uuid_dict) if 'not a child' in uuid_search: # Checks if a single AV file is in the objects dir. uuid_dir = os.path.join(os.path.dirname(root)) if file_count(os.path.join(uuid_dir, 'objects')) == 1: print( '%s has no parent but this could be because it is a single file' % os.path.basename(os.path.dirname(uuid_dir))) proceed = ififuncs.ask_yes_no( 'add %s to accession list?' % os.path.basename(os.path.dirname(uuid_dir))) if proceed == 'Y': print( os.path.basename( os.path.dirname(uuid_dir))) return os.path.basename( os.path.dirname(uuid_dir)) else: # master return None elif 'has a parent' in uuid_search: parent = uuid_search.split()[-1] # Commenting this out for now - this just adds the dash really. # print parent[:2].upper() + '-' + parent[2:] print(parent) return parent
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') parser.add_argument('-move', action='store_true', help='Move files instead of copying - much faster!') parser.add_argument( '-justcopy', action='store_true', help='Do not generate destination manifest and verify integrity :(') parser.add_argument( '-y', action='store_true', help= 'Answers YES to the question: Not enough free space, would you like to continue?' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = os.path.abspath(args.source) destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' ''' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) ''' relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) if rootpos == 'y': manifest_destination = os.path.dirname( destination) + '/%s_manifest.md5' % os.path.basename(destination) else: manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print('Destination manifest already exists') if rootpos == 'y': manifest_filename = '%s_manifest.md5' % os.path.basename(destination) else: manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) print('Checking total size of input folder') total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source)) print('Checking if enough space in destination folder') free_space = ififuncs.get_free_space(args.destination) if total_input_size > free_space: print('You do not have enough free space!') if args.y: go_forth_blindly = 'Y' else: go_forth_blindly = ififuncs.ask_yes_no( 'Would you like to continue anyway? Press Y or N') if go_forth_blindly == 'Y': generate_log( log_name_source, 'You do not have enough free space!, but the user has decided to continue anyhow' ) else: generate_log(log_name_source, 'You do not have enough free space! - Exiting') sys.exit() return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) source = args.input uuid_directory = ififuncs.check_for_sip([source]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print 'First three characters must be \'aaa\' and last four characters must be four digits' accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() if args.pbcore: if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = ififuncs.get_reference_number() if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type) print acquisition_type accession_path = os.path.join(os.path.dirname(oe_path), accession_number) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no('Do you want to rename %s with %s' % (oe_number, accession_number)) if proceed == 'Y': os.rename(oe_path, accession_path) if args.register: register = args.register else: register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:], accession_number, '', '', '', '', '', '')) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number) sip_manifest = os.path.join(accession_path, uuid) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) print('Generating Digital Forensics XML') dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) # this is inefficient. The script should not have to ask for reference # number twice if someone wants to insert the filmographic but do not # want to make the pbcore csv, perhaps because the latter already exists. if args.csv: metadata_dir = os.path.join(new_uuid_path, 'metadata') package_filmographic = os.path.join( metadata_dir, Reference_Number + '_filmographic.csv') insert_filmographic(args.csv, Reference_Number, package_filmographic) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Filmographic descriptive metadata added to metadata folder, eventOutcome=%s, agentName=accession.py' % (package_filmographic)) ififuncs.manifest_update(sip_manifest, package_filmographic) ififuncs.sha512_update(sha512_manifest, package_filmographic) print('Filmographic descriptive metadata added to metadata folder') ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py finished') ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: makepbcore_cmd = [ accession_path, '-p', '-user', user, '-reference', Reference_Number ] if args.parent: makepbcore_cmd.extend(['-parent', args.parent]) if args.acquisition_type: makepbcore_cmd.extend( ['-acquisition_type', args.acquisition_type]) if args.donor: makepbcore_cmd.extend(['-donor', args.donor]) if args.donor: makepbcore_cmd.extend( ['-depositor_reference', args.depositor_reference]) if args.donation_date: makepbcore_cmd.extend(['-donation_date', args.donation_date]) makepbcore.main(makepbcore_cmd) else: print 'not a valid package. The input should include a package that has been through Object Entry'
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.oe_csv: if not args.filmographic: print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..') sys.exit() oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv) initial_oe_list = oe_csv_extraction[0] oe_dicts = process_oe_csv(oe_csv_extraction, args.input) # temp hack while we're performing both workflows helper_csv = args.oe_csv elif args.filmographic: initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0] # temp hack while we're performing both workflows helper_csv = args.filmographic if args.oe_csv or args.filmographic: for line_item in ififuncs.extract_metadata(helper_csv)[0]: try: oe_number = line_item['Object Entry'].lower() except KeyError: oe_number = line_item['OE No.'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if not args.oe_csv: # No need to ask for the reference number if the OE csv option is supplied. # The assumption here is that the OE csv contains the reference numbers though. if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.') depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.') acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) if not args.oe_csv: to_accession = initial_check(args, accession_digits, oe_list, reference_number) else: to_accession = {} for oe_record in oe_dicts: if os.path.isdir(oe_record['source_path']): to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']] accession_digits += 1 for success in sorted(to_accession.keys()): print('%s will be accessioned as %s' % (success, to_accession[success])) register = accession.make_register() if args.filmographic: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic) else: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) if not args.oe_csv: filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[oe_package][1] get_filmographic_titles(to_accession, filmographic_dict) with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no( 'Do you want to proceed?' ) if args.oe_csv: new_csv = args.filmographic if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] for oe_record in oe_dicts: if oe_record['source_path'] == package: if not oe_record['format'].lower() == 'dcdm': accession_cmd.append('-pbcore') if len(to_accession[package]) == 4: if not to_accession[package][2] == 'n/a': accession_cmd.extend(['-acquisition_type', '13']) if args.oe_csv: accession_cmd.extend(['-parent', to_accession[package][2]]) else: accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) print to_accession[package][3] accession_cmd.extend(['-donation_date', to_accession[package][3]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY" ) + os.path.basename(args.csv) else: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-pbcore', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] if len(to_accession[package]) == 3: accession_cmd.extend(['-acquisition_type', '13']) accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend( ['-depositor_reference', depositor_reference]) accession_cmd.extend( ['-acquisition_type', acquisition_type[2]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) input = args.input uuid_directory = ififuncs.check_for_sip([input]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print 'First three characters must be \'aaa\' and last four characters must be four digits' accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() accession_path = os.path.join( os.path.dirname(oe_path), accession_number ) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no( 'Do you want to rename %s with %s' % (oe_number, accession_number) ) if proceed == 'Y': os.rename(oe_path, accession_path) register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:6], accession_number, '','','','', '')) ififuncs.generate_log( sipcreator_log, 'EVENT = accession.py started' ) ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py') ) ififuncs.generate_log( sipcreator_log, 'Command line arguments: %s' % args ) ififuncs.generate_log( sipcreator_log, 'EVENT = agentName=%s' % user ) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number ) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number ) sip_manifest = os.path.join( accession_path, uuid ) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join( os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt' ) ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml) ) ififuncs.generate_log( sipcreator_log, 'EVENT = accession.py finished' ) ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: makepbcore.main([accession_path, '-p', '-user', user]) else: print 'not a valid package. The input should include a package that has been through Object Entry'
def main(args_): '''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source_folder = args.i print(args) oe_dict = {} user = ififuncs.determine_user(args) if args.oe: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() oe_digits = int(object_entry.replace('oe', '')) for folder in sorted(os.listdir(source_folder)): full_path = os.path.join(source_folder, folder) if os.path.isdir(full_path): try: folder_contents = os.listdir(full_path) except PermissionError: continue object_entry_complete = 'oe' + str(oe_digits) inputs = [] supplements = [] for files in folder_contents: if os.path.splitext(files)[1][1:].lower() in args.object_extension_pattern: inputs.append(os.path.join(full_path, files)) if os.path.splitext(files)[1][1:].lower() in args.supplement_extension_pattern: supplements.append(os.path.join(full_path, files)) if inputs: print(' - Object Entry: %s\n - Inputs: %s\n - Supplements: %s\n' % (object_entry_complete, inputs, supplements)) oe_dict[object_entry_complete] = [inputs, supplements] oe_digits += 1 else: print('Skipping %s as there are no files in this folder that match the -object_extension_pattern' % full_path) if args.dryrun: print('Exiting as you selected -dryrun') sys.exit() logs = [] if args.y: proceed = 'Y' else: proceed = ififuncs.ask_yes_no( 'Do you want to proceed?' ) if proceed == 'Y': for sips in sorted(oe_dict): print(oe_dict[sips]) sipcreator_cmd = ['-i',] for sipcreator_inputs in oe_dict[sips][0]: sipcreator_cmd.append(sipcreator_inputs) sipcreator_cmd += ['-supplement'] for sipcreator_supplements in oe_dict[sips][1]: sipcreator_cmd.append(sipcreator_supplements) sipcreator_cmd += ['-user', user, '-oe', sips, '-o', args.o] if args.rename_uuid: sipcreator_cmd.append('-rename_uuid') if args.zip: sipcreator_cmd.append('-zip') if args.l: sipcreator_cmd.append('-l') print(sipcreator_cmd) sipcreator_log, _ = sipcreator.main(sipcreator_cmd) logs.append(sipcreator_log) for i in logs: if os.path.isfile(i): print(("%-*s : copyit job was a %s" % (50, os.path.basename(i), analyze_log(i))))