Ejemplo n.º 1
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.oe_csv:
        if not args.filmographic:
            print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..')
            sys.exit()
        oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv)
        initial_oe_list = oe_csv_extraction[0]
        oe_dicts = process_oe_csv(oe_csv_extraction, args.input)
        # temp hack while we're performing both workflows
        helper_csv = args.oe_csv
    elif args.filmographic:
        initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0]
        # temp hack while we're performing both workflows
        helper_csv = args.filmographic
    if args.oe_csv or args.filmographic:
        for line_item in ififuncs.extract_metadata(helper_csv)[0]:
            try:
                oe_number = line_item['Object Entry'].lower()
            except KeyError:
                oe_number = line_item['OE No.'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if not args.oe_csv:
        # No need to ask for the reference number if the OE csv option is supplied.
        # The assumption here is that the OE csv contains the reference numbers though.
        if args.reference:
            reference_number = get_filmographic_number(args.reference)
        else:
            reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.')
    depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.')
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    if not args.oe_csv:
        to_accession = initial_check(args, accession_digits, oe_list, reference_number)
    else:
        to_accession = {}
        for oe_record in oe_dicts:
            if os.path.isdir(oe_record['source_path']):
                to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']]
                accession_digits += 1
    for success in sorted(to_accession.keys()):
        print('%s will be accessioned as %s' %  (success, to_accession[success]))
    register = accession.make_register()
    if args.filmographic:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic)
        else:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        if not args.oe_csv:
            filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic)
            for oe_package in to_accession:
                for filmographic_record in filmographic_dict:
                    if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']:
                        filmographic_record['Reference Number'] = to_accession[oe_package][1]
            get_filmographic_titles(to_accession, filmographic_dict)
            with open(new_csv, 'w') as csvfile:
                fieldnames = headers
                # Removes Object Entry from headings as it's not needed in database.
                del fieldnames[1]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for i in filmographic_dict:
                    i.pop('Object Entry', None)
                    # Only include records that have reference numbers
                    if not i['Reference Number'] == '':
                        writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no(
        'Do you want to proceed?'
    )
    if args.oe_csv:
        new_csv = args.filmographic
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user,
                '-f',
                '-number', to_accession[package][0],
                '-reference', to_accession[package][1],
                '-register', register,
                '-csv', new_csv
            ]
            for oe_record in oe_dicts:
                if oe_record['source_path'] == package:
                    if not oe_record['format'].lower() == 'dcdm':
                        accession_cmd.append('-pbcore')
            if len(to_accession[package]) == 4:
                if not to_accession[package][2] == 'n/a':
                    accession_cmd.extend(['-acquisition_type', '13'])
                    if args.oe_csv:
                        accession_cmd.extend(['-parent', to_accession[package][2]])
                    else:
                        accession_cmd.extend(['-parent', order.main(package)])
                else:
                    accession_cmd.extend(['-donor', donor])
                    accession_cmd.extend(['-depositor_reference', depositor_reference])
                    accession_cmd.extend(['-acquisition_type', acquisition_type[2]])
                    print to_accession[package][3]
                    accession_cmd.extend(['-donation_date', to_accession[package][3]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
Ejemplo n.º 2
0
def main(args_):
    '''
    Launch all the functions for creating an IFI SIP.
    '''
    args = parse_args(args_)
    start = datetime.datetime.now()
    inputs = args.i
    if args.d:
        try:
            import clairmeta
            clairmeta_version = clairmeta.__version__
        except ImportError:
            print(
                'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround'
            )
            sys.exit()
    print(args)
    user = ififuncs.determine_user(args)
    object_entry = get_object_entry(args)
    sip_path = make_folder_path(os.path.join(args.o), args, object_entry)
    uuid, uuid_event = determine_uuid(args, sip_path)
    new_log_textfile = os.path.join(sip_path,
                                    'logs' + '/' + uuid + '_sip_log.log')
    if args.d:
        content_title = create_content_title_text(sip_path, args)
    ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started')
    ififuncs.generate_log(
        new_log_textfile, 'eventDetail=sipcreator.py %s' %
        ififuncs.get_script_version('sipcreator.py'))
    ififuncs.generate_log(new_log_textfile,
                          'Command line arguments: %s' % args)
    ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user)
    ififuncs.generate_log(new_log_textfile, uuid_event)
    if not args.sc:
        ififuncs.generate_log(
            new_log_textfile, 'EVENT = eventType=Identifier assignement,'
            ' eventIdentifierType=object entry, value=%s' % object_entry)
    metadata_dir = os.path.join(sip_path, 'metadata')
    supplemental_dir = os.path.join(metadata_dir, 'supplemental')
    logs_dir = os.path.join(sip_path, 'logs')
    if args.accession:
        accession_number = ififuncs.get_accession_number()
        reference_number = ififuncs.get_reference_number()
        parent = ififuncs.ask_question(
            'What is the parent record? eg MV 1234. Enter n/a if this is a born digital acquisition with no parent.'
        )
        donor = ififuncs.ask_question(
            'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.'
        )
        depositor_reference = ififuncs.ask_question(
            'What is the donor/depositor number? This will not affect Reproductions.'
        )
        acquisition_type = ififuncs.get_acquisition_type('')
        donation_date = ififuncs.ask_question(
            'When was the donation date in DD/MM/YYYY format? Eg. 31/12/1999 - Unfortunately this is NOT using ISO 8601.'
        )
    if args.zip:
        inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls(
            inputs[0], args.o, uuid, new_log_textfile)
        if args.manifest:
            shutil.copy(
                args.manifest,
                args.manifest.replace('_manifest.md5', '_manifest-md5.txt'))
            source_manifest = args.manifest.replace('_manifest.md5',
                                                    '_manifest-md5.txt')
        else:
            source_manifest = os.path.join(
                args.o,
                os.path.basename(args.i[0]) + '_manifest-md5.txt')
            ififuncs.generate_log(
                new_log_textfile,
                'EVENT = message digest calculation, status=started, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP'
            )
            ififuncs.hashlib_manifest(args.i[0], source_manifest,
                                      os.path.dirname(args.i[0]))
            ififuncs.generate_log(
                new_log_textfile,
                'EVENT = message digest calculation, status=finished, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP'
            )
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = packing, status=started, eventType=packing, agentName=makezip.py, eventDetail=Source object to be packed=%s'
            % inputs[0])
        makezip_judgement, zip_file = makezip.main([
            '-i', inputs[0], '-o',
            os.path.join(sip_path, 'objects'), '-basename', uuid + '.zip'
        ])
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = packing, status=finished, eventType=packing, agentName=makezip.py, eventDetail=Source object packed into=%s'
            % zip_file)
        if makezip_judgement is None:
            judgement = 'lossless'
        else:
            judgement = makezip_judgement
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s'
            % judgement)
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s'
            % judgement)
    else:
        log_names = move_files(inputs, sip_path, args, user)
    ififuncs.get_technical_metadata(sip_path, new_log_textfile)
    ififuncs.hashlib_manifest(metadata_dir,
                              metadata_dir + '/metadata_manifest.md5',
                              metadata_dir)
    if args.sc:
        normalise_objects_manifest(sip_path)
    new_manifest_textfile = consolidate_manifests(sip_path, 'objects',
                                                  new_log_textfile)

    if args.zip:
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = Message Digest Calculation, status=started, eventType=message digest calculation, eventDetail=%s module=hashlib'
            % zip_file)
        ififuncs.manifest_update(new_manifest_textfile, zip_file)
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = Message Digest Calculation, status=finished, eventType=message digest calculation, eventDetail=%s module=hashlib'
            % zip_file)
    consolidate_manifests(sip_path, 'metadata', new_log_textfile)
    ififuncs.hashlib_append(logs_dir, new_manifest_textfile,
                            os.path.dirname(os.path.dirname(logs_dir)))
    if args.supplement:
        os.makedirs(supplemental_dir)
        supplement_cmd = [
            '-i', args.supplement, '-user', user, '-new_folder',
            supplemental_dir,
            os.path.dirname(sip_path), '-copy'
        ]
        package_update.main(supplement_cmd)
    if args.zip:
        os.makedirs(supplemental_dir)
        supplement_cmd = [
            '-i', [inputxml, inputtracexml, dfxml, source_manifest], '-user',
            user, '-new_folder', supplemental_dir,
            os.path.dirname(sip_path), '-copy'
        ]
        package_update.main(supplement_cmd)
    if args.sc:
        print('Generating Digital Forensics XML')
        dfxml = accession.make_dfxml(args, sip_path, uuid)
        ififuncs.generate_log(
            new_log_textfile,
            'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml'
            % (dfxml))
        ififuncs.manifest_update(new_manifest_textfile, dfxml)
        sha512_log = manifest.main([sip_path, '-sha512', '-s'])
        sha512_manifest = os.path.join(os.path.dirname(sip_path),
                                       uuid + '_manifest-sha512.txt')
        ififuncs.merge_logs_append(sha512_log, new_log_textfile,
                                   new_manifest_textfile)
        ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512')
        os.remove(sha512_log)
    ififuncs.sort_manifest(new_manifest_textfile)
    if not args.quiet:
        if 'log_names' in locals():
            log_report(log_names)
    finish = datetime.datetime.now()
    print('\n- %s ran this script at %s and it finished at %s' %
          (user, start, finish))
    if args.d:
        process_dcp(sip_path, content_title, args, new_manifest_textfile,
                    new_log_textfile, metadata_dir, clairmeta_version)
    if args.accession:
        register = accession.make_register()
        filmographic_dict = ififuncs.extract_metadata(args.csv)[0]
        for filmographic_record in filmographic_dict:
            if filmographic_record['Reference Number'].lower(
            ) == reference_number.lower():
                if filmographic_record['Title'] == '':
                    title = filmographic_record[
                        'TitleSeries'] + '; ' + filmographic_record['EpisodeNo']
                else:
                    title = filmographic_record['Title']
        oe_register = make_oe_register()
        ififuncs.append_csv(
            oe_register,
            (object_entry.upper()[:2] + '-' + object_entry[2:], donation_date,
             '1', '', title, donor, acquisition_type[1], accession_number,
             'Representation of %s|Reproduction of %s' %
             (reference_number, parent), ''))
        accession_cmd = [
            os.path.dirname(sip_path), '-user', user, '-f', '-number',
            accession_number, '-reference', reference_number, '-register',
            register, '-csv', args.csv, '-pbcore'
        ]
        if not parent.lower() == 'n/a':
            accession_cmd.extend(['-parent', parent])
        accession_cmd.extend(['-donor', donor])
        accession_cmd.extend(['-depositor_reference', depositor_reference])
        accession_cmd.extend(['-acquisition_type', acquisition_type[2]])
        accession_cmd.extend(['-donation_date', donation_date])
        print(accession_cmd)
        accession.main(accession_cmd)
    return new_log_textfile, new_manifest_textfile
Ejemplo n.º 3
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.csv:
        for line_item in ififuncs.extract_metadata(args.csv)[0]:
            oe_number = line_item['Object Entry'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if args.reference:
        reference_number = get_filmographic_number(args.reference)
    else:
        reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question(
        'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.'
    )
    depositor_reference = ififuncs.ask_question(
        'What is the donor/depositor number? This will not affect Reproductions.'
    )
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits, oe_list,
                                 reference_number)
    register = accession.make_register()
    if args.csv:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY"
            ) + os.path.basename(args.csv)
        else:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        filmographic_dict, headers = ififuncs.extract_metadata(args.csv)
        for oe_package in to_accession:
            for filmographic_record in filmographic_dict:
                if os.path.basename(oe_package).upper(
                )[:2] + '-' + os.path.basename(
                        oe_package)[2:] == filmographic_record['Object Entry']:
                    filmographic_record['Reference Number'] = to_accession[
                        oe_package][1]
        with open(new_csv, 'w') as csvfile:
            fieldnames = headers
            # Removes Object Entry from headings as it's not needed in database.
            del fieldnames[1]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for i in filmographic_dict:
                i.pop('Object Entry', None)
                # Only include records that have reference numbers
                if not i['Reference Number'] == '':
                    writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user, '-pbcore', '-f', '-number',
                to_accession[package][0], '-reference',
                to_accession[package][1], '-register', register, '-csv',
                new_csv
            ]
            if len(to_accession[package]) == 3:
                accession_cmd.extend(['-acquisition_type', '13'])
                accession_cmd.extend(['-parent', order.main(package)])
            else:
                accession_cmd.extend(['-donor', donor])
                accession_cmd.extend(
                    ['-depositor_reference', depositor_reference])
                accession_cmd.extend(
                    ['-acquisition_type', acquisition_type[2]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore