コード例 #1
0
def parse_manifest(manifest, log_name_source):
    '''
    Analyses the manifest to see if any files are missing.
    Returns a list of missing files and a dictionary containing checksums
    and paths.
    '''
    source_dir = os.path.join(
        os.path.dirname(manifest),
        os.path.basename(manifest).replace('_manifest.md5', ''))
    source_count, file_list = ififuncs.count_stuff(source_dir)
    missing_files_list = []
    manifest_dict = {}
    paths = []
    proceed = 'Y'
    os.chdir(os.path.dirname(manifest))
    with open(manifest, 'r') as manifest_object:
        manifest_list = manifest_object.readlines()
        for entries in manifest_list:
            checksum = entries.split(' ')[0]
            if 'manifest-sha512.txt' in manifest:
                path = entries[130:].replace('\r', '').replace('\n', '')
            else:
                path = entries[34:].replace('\r', '').replace('\n', '')
            path = path.replace('\\', '/')
            if not os.path.isfile(path):
                ififuncs.generate_log(log_name_source, '%s is missing' % path)
                print(('%s is missing' % path))
                missing_files_list.append(path)
            elif os.path.isfile(path):
                manifest_dict[path] = checksum
                paths.append(path)
    manifest_file_count = len(manifest_list)
    if source_count != manifest_file_count:
        print(
            ' - There is masmatch between your file count and the manifest file count'
        )
        print(' - checking which files are different')
        for i in file_list:
            if i not in paths:
                print((
                    i,
                    'is present in your source directory but not in the source manifest'
                ))
        proceed = ififuncs.ask_yes_no('Do you want to proceed regardless?')
    if proceed == 'N':
        print('Exiting')
        sys.exit()
    else:
        if len(missing_files_list) > 0:
            print(
                ('The number of missing files: %s' % len(missing_files_list)))
            ififuncs.generate_log(
                log_name_source,
                'The number of missing files is: %s' % len(missing_files_list))
        elif len(missing_files_list) == 0:
            print('All files present')
            ififuncs.generate_log(log_name_source, 'All files present')
    return manifest_dict, missing_files_list
コード例 #2
0
ファイル: batchaccession.py プロジェクト: 4lm/IFIscripts
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.csv:
        for line_item in ififuncs.extract_metadata(args.csv)[0]:
            oe_number = line_item['Object Entry'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if args.reference:
        reference_number = get_filmographic_number(args.reference)
    else:
        reference_number = ififuncs.get_reference_number()
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits, oe_list,
                                 reference_number)
    register = accession.make_register()
    if args.csv:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        new_csv = os.path.join(desktop_logs_dir, os.path.basename(args.csv))
        filmographic_dict, headers = ififuncs.extract_metadata(args.csv)
        for oe_package in to_accession:
            for filmographic_record in filmographic_dict:
                if os.path.basename(oe_package).upper(
                )[:2] + '-' + os.path.basename(
                        oe_package)[2:] == filmographic_record['Object Entry']:
                    filmographic_record['Reference Number'] = to_accession[
                        oe_package][1]
        with open(new_csv, 'w') as csvfile:
            fieldnames = headers
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for i in filmographic_dict:
                writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys()):
            accession.main([
                package, '-user', user, '-p', '-f', '-number',
                to_accession[package][0], '-reference',
                to_accession[package][1], '-register', register
            ])
    collated_pbcore = gather_metadata(args.input)
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % register
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
コード例 #3
0
ファイル: batchaccession.py プロジェクト: lsde/IFIscripts
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits)
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys()):
            accession.main([
                package, '-user', user, '-p', '-f', '-number',
                to_accession[package]
            ])
コード例 #4
0
def create_content_title_text(sip_path):
    '''
    DCPs are often delivered with inconsistent foldernames.
    This will rename the parent folder with the value recorded in <ContentTitleText>
    For example:
    Original name: CHARBON-SMPTE-24
    New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV
    Rename will only occur if user agrees.
    '''
    objects_dir = os.path.join(sip_path, 'objects')
    cpl = ififuncs.find_cpl(objects_dir)
    dcp_dirname = os.path.dirname(cpl)
    content_title_text = ififuncs.get_contenttitletext(cpl)
    dci_foldername = os.path.join(objects_dir, content_title_text)
    if ififuncs.ask_yes_no('Do you want to rename %s with %s ?' %
                           (dcp_dirname, dci_foldername)) == 'Y':
        os.chdir(os.path.dirname(dcp_dirname))
        os.rename(os.path.basename(dcp_dirname), content_title_text)
    return content_title_text
コード例 #5
0
ファイル: sipcreator.py プロジェクト: ecodonohoe/IFIscripts
def create_content_title_text(sip_path):
    '''
    DCPs are often delivered with inconsistent foldernames.
    This will rename the parent folder with the value recorded in <ContentTitleText>
    For example:
    Original name: CHARBON-SMPTE-24
    New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV
    Rename will only occur if user agrees.
    '''
    objects_dir = os.path.join(sip_path, 'objects')
    cpl = ififuncs.find_cpl(objects_dir)
    dcp_dirname = os.path.dirname(cpl)
    content_title_text = ififuncs.get_contenttitletext(cpl)
    dci_foldername = os.path.join(objects_dir, content_title_text)
    if ififuncs.ask_yes_no(
            'Do you want to rename %s with %s ?' % (dcp_dirname, dci_foldername)
    ) == 'Y':
        os.chdir(os.path.dirname(dcp_dirname))
        os.rename(os.path.basename(dcp_dirname), content_title_text)
    return content_title_text
コード例 #6
0
def create_content_title_text(sip_path, args):
    '''
    DCPs are often delivered with inconsistent foldernames.
    This will rename the parent folder with the value recorded in <ContentTitleText>
    For example:
    Original name: CHARBON-SMPTE-24
    New name: CHARBON-SMPTE-24-INTEROP-SUBS_TST_S_XX-EN_FR_XX_2K_CHA-20120613_CHA_OV
    Rename will only occur if user agrees.
    '''
    cpl = ififuncs.find_cpl(args.i[0])
    objects_dir = os.path.join(sip_path, 'objects')
    dcp_dirname = os.path.dirname(cpl)
    content_title = ififuncs.get_contenttitletext(cpl)
    dci_foldername = os.path.join(objects_dir, content_title)
    rename_dcp = ififuncs.ask_yes_no(
        'Do you want to rename %s with %s ?' %
        (os.path.basename(dcp_dirname), dci_foldername))
    if rename_dcp == 'N':
        print('Exiting')
        sys.exit()
    return content_title
コード例 #7
0
ファイル: order.py プロジェクト: muthusalami/IFIscripts-1
def main(args):
    '''
    Analyzes a directory containing Object Entry packages and returns their
    parent or lack thereof.
    '''
    source = args
    if os.path.basename(source)[:2] == 'oe' or os.path.basename(
            source)[:3] == 'aaa':
        oe_uuid_dict = ififuncs.group_ids(os.path.dirname(source))
        for root, _, filenames in os.walk(source):
            for filename in filenames:
                if filename.endswith('_sip_log.log'):
                    uuid_search = ififuncs.find_parent(
                        os.path.join(root, filename), oe_uuid_dict)
                    if 'not a child' in uuid_search:
                        # Checks if a single AV file is in the objects dir.
                        uuid_dir = os.path.join(os.path.dirname(root))
                        if file_count(os.path.join(uuid_dir, 'objects')) == 1:
                            print(
                                '%s has no parent but this could be because it is a single file'
                                % os.path.basename(os.path.dirname(uuid_dir)))
                            proceed = ififuncs.ask_yes_no(
                                'add %s to accession list?' %
                                os.path.basename(os.path.dirname(uuid_dir)))
                            if proceed == 'Y':
                                print(
                                    os.path.basename(
                                        os.path.dirname(uuid_dir)))
                                return os.path.basename(
                                    os.path.dirname(uuid_dir))
                        else:
                            # master
                            return None
                    elif 'has a parent' in uuid_search:
                        parent = uuid_search.split()[-1]
                        # Commenting this out for now - this just adds the dash really.
                        # print parent[:2].upper() + '-' + parent[2:]
                        print(parent)
                        return parent
コード例 #8
0
ファイル: copyit.py プロジェクト: sdklly/IFIscripts
def setup(args_):
    '''
    Sets a bunch of filename variables and parses command line.
    some examples:
    if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5
    then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5
    '''
    parser = argparse.ArgumentParser(
        description='Copy directory with checksum comparison'
        'and manifest generation.Written by Kieran O\'Leary.')
    parser.add_argument('source', help='Input directory')
    parser.add_argument('destination', help='Destination directory')
    parser.add_argument(
        '-l',
        '-lto',
        action='store_true',
        help='use gcp instead of rsync on osx for SPEED on LTO')
    parser.add_argument('-move',
                        action='store_true',
                        help='Move files instead of copying - much faster!')
    parser.add_argument(
        '-justcopy',
        action='store_true',
        help='Do not generate destination manifest and verify integrity :(')
    parser.add_argument(
        '-y',
        action='store_true',
        help=
        'Answers YES to the question: Not enough free space, would you like to continue?'
    )
    rootpos = ''
    dircheck = None
    args = parser.parse_args(args_)
    if os.path.isdir(args.source):
        dircheck = check_for_sip(args.source)
    if dircheck != None:
        if os.path.isdir(dircheck):
            source = check_for_sip(args.source)
            destination = os.path.join(args.destination,
                                       os.path.basename(args.source))
            os.makedirs(destination)
    else:
        source = os.path.abspath(args.source)
        destination = args.destination
    normpath = os.path.normpath(source)
    #is there any benefit to this over os.path.basename
    dirname = os.path.split(os.path.basename(source))[1]
    if dirname == '':
        rootpos = 'y'
        '''
        dirname = raw_input(
            'What do you want your destination folder to be called?\n'
        )
        '''
    relative_path = normpath.split(os.sep)[-1]
    # or hardcode
    destination_final_path = os.path.join(destination, dirname)
    if rootpos == 'y':
        manifest_destination = os.path.dirname(
            destination) + '/%s_manifest.md5' % os.path.basename(destination)
    else:
        manifest_destination = destination + '/%s_manifest.md5' % dirname
    if os.path.isfile(manifest_destination):
        print('Destination manifest already exists')
    if rootpos == 'y':
        manifest_filename = '%s_manifest.md5' % os.path.basename(destination)
    else:
        manifest_filename = '%s_manifest.md5' % dirname
    desktop_manifest_dir = make_desktop_manifest_dir()
    # manifest = desktop manifest, looks like this can get rewritten later.
    manifest = os.path.join(desktop_manifest_dir, manifest_filename)
    manifest_sidecar = os.path.join(os.path.dirname(source),
                                    relative_path + '_manifest.md5')
    manifest_root = source + '/%s_manifest.md5' % os.path.basename(source)
    log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S")
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename)
    generate_log(log_name_source, 'copyit.py started.')
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py'))
    generate_log(log_name_source, 'Source: %s' % source)
    generate_log(log_name_source, 'Destination: %s' % destination)
    print('Checking total size of input folder')
    total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source))
    print('Checking if enough space in destination folder')
    free_space = ififuncs.get_free_space(args.destination)
    if total_input_size > free_space:
        print('You do not have enough free space!')
        if args.y:
            go_forth_blindly = 'Y'
        else:
            go_forth_blindly = ififuncs.ask_yes_no(
                'Would you like to continue anyway? Press Y or N')
        if go_forth_blindly == 'Y':
            generate_log(
                log_name_source,
                'You do not have enough free space!, but the user has decided to continue anyhow'
            )
        else:
            generate_log(log_name_source,
                         'You do not have enough free space! - Exiting')
            sys.exit()
    return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
コード例 #9
0
ファイル: accession.py プロジェクト: mcampos-quinn/IFIscripts
def main(args_):
    '''
    Launches the various functions that will accession a package
    '''
    args = parse_args(args_)
    source = args.input
    uuid_directory = ififuncs.check_for_sip([source])
    if uuid_directory is not None:
        oe_path = os.path.dirname(uuid_directory)
        oe_number = os.path.basename(oe_path)
        if args.user:
            user = args.user
        else:
            user = ififuncs.get_user()
        if args.number:
            if args.number[:3] != 'aaa':
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
                accession_number = ififuncs.get_accession_number()
            elif len(args.number[3:]) != 4:
                accession_number = ififuncs.get_accession_number()
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
            elif not args.number[3:].isdigit():
                accession_number = ififuncs.get_accession_number()
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
            else:
                accession_number = args.number
        else:
            accession_number = ififuncs.get_accession_number()
        if args.pbcore:
            if args.reference:
                Reference_Number = args.reference.upper()
            else:
                Reference_Number = ififuncs.get_reference_number()
        if args.acquisition_type:
            acquisition_type = ififuncs.get_acquisition_type(
                args.acquisition_type)
            print acquisition_type
        accession_path = os.path.join(os.path.dirname(oe_path),
                                      accession_number)
        uuid = os.path.basename(uuid_directory)
        new_uuid_path = os.path.join(accession_path, uuid)
        logs_dir = os.path.join(new_uuid_path, 'logs')
        sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log'
        if args.force:
            proceed = 'Y'
        else:
            proceed = ififuncs.ask_yes_no('Do you want to rename %s with %s' %
                                          (oe_number, accession_number))
        if proceed == 'Y':
            os.rename(oe_path, accession_path)
        if args.register:
            register = args.register
        else:
            register = make_register()
        ififuncs.append_csv(register,
                            (oe_number.upper()[:2] + '-' + oe_number[2:],
                             accession_number, '', '', '', '', '', ''))
        ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py started')
        ififuncs.generate_log(
            sipcreator_log, 'eventDetail=accession.py %s' %
            ififuncs.get_script_version('accession.py'))
        ififuncs.generate_log(sipcreator_log,
                              'Command line arguments: %s' % args)
        ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user)
        ififuncs.generate_log(
            sipcreator_log, 'EVENT = eventType=Identifier assignment,'
            ' eventIdentifierType=accession number, value=%s' %
            accession_number)
        ififuncs.generate_log(
            sipcreator_log, 'EVENT = eventType=accession,'
            ' eventIdentifierType=accession number, value=%s' %
            accession_number)
        sip_manifest = os.path.join(accession_path, uuid) + '_manifest.md5'
        sha512_log = manifest.main([new_uuid_path, '-sha512', '-s'])
        sha512_manifest = os.path.join(os.path.dirname(new_uuid_path),
                                       uuid + '_manifest-sha512.txt')
        ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest)
        os.remove(sha512_log)
        print('Generating Digital Forensics XML')
        dfxml = make_dfxml(args, new_uuid_path, uuid)
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml'
            % (dfxml))
        # this is inefficient. The script should not have to ask for reference
        # number twice if someone wants to insert the filmographic but do not
        # want to make the pbcore csv, perhaps because the latter already exists.
        if args.csv:
            metadata_dir = os.path.join(new_uuid_path, 'metadata')
            package_filmographic = os.path.join(
                metadata_dir, Reference_Number + '_filmographic.csv')
            insert_filmographic(args.csv, Reference_Number,
                                package_filmographic)
            ififuncs.generate_log(
                sipcreator_log,
                'EVENT = Metadata extraction - eventDetail=Filmographic descriptive metadata added to metadata folder, eventOutcome=%s, agentName=accession.py'
                % (package_filmographic))
            ififuncs.manifest_update(sip_manifest, package_filmographic)
            ififuncs.sha512_update(sha512_manifest, package_filmographic)
            print('Filmographic descriptive metadata added to metadata folder')
        ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py finished')
        ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5')
        ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512')
        ififuncs.manifest_update(sip_manifest, dfxml)
        ififuncs.sha512_update(sha512_manifest, dfxml)
        if args.pbcore:
            makepbcore_cmd = [
                accession_path, '-p', '-user', user, '-reference',
                Reference_Number
            ]
            if args.parent:
                makepbcore_cmd.extend(['-parent', args.parent])
            if args.acquisition_type:
                makepbcore_cmd.extend(
                    ['-acquisition_type', args.acquisition_type])
            if args.donor:
                makepbcore_cmd.extend(['-donor', args.donor])
            if args.donor:
                makepbcore_cmd.extend(
                    ['-depositor_reference', args.depositor_reference])
            if args.donation_date:
                makepbcore_cmd.extend(['-donation_date', args.donation_date])
            makepbcore.main(makepbcore_cmd)
    else:
        print 'not a valid package. The input should include a package that has been through Object Entry'
コード例 #10
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.oe_csv:
        if not args.filmographic:
            print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..')
            sys.exit()
        oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv)
        initial_oe_list = oe_csv_extraction[0]
        oe_dicts = process_oe_csv(oe_csv_extraction, args.input)
        # temp hack while we're performing both workflows
        helper_csv = args.oe_csv
    elif args.filmographic:
        initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0]
        # temp hack while we're performing both workflows
        helper_csv = args.filmographic
    if args.oe_csv or args.filmographic:
        for line_item in ififuncs.extract_metadata(helper_csv)[0]:
            try:
                oe_number = line_item['Object Entry'].lower()
            except KeyError:
                oe_number = line_item['OE No.'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if not args.oe_csv:
        # No need to ask for the reference number if the OE csv option is supplied.
        # The assumption here is that the OE csv contains the reference numbers though.
        if args.reference:
            reference_number = get_filmographic_number(args.reference)
        else:
            reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.')
    depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.')
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    if not args.oe_csv:
        to_accession = initial_check(args, accession_digits, oe_list, reference_number)
    else:
        to_accession = {}
        for oe_record in oe_dicts:
            if os.path.isdir(oe_record['source_path']):
                to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']]
                accession_digits += 1
    for success in sorted(to_accession.keys()):
        print('%s will be accessioned as %s' %  (success, to_accession[success]))
    register = accession.make_register()
    if args.filmographic:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic)
        else:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        if not args.oe_csv:
            filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic)
            for oe_package in to_accession:
                for filmographic_record in filmographic_dict:
                    if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']:
                        filmographic_record['Reference Number'] = to_accession[oe_package][1]
            get_filmographic_titles(to_accession, filmographic_dict)
            with open(new_csv, 'w') as csvfile:
                fieldnames = headers
                # Removes Object Entry from headings as it's not needed in database.
                del fieldnames[1]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for i in filmographic_dict:
                    i.pop('Object Entry', None)
                    # Only include records that have reference numbers
                    if not i['Reference Number'] == '':
                        writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no(
        'Do you want to proceed?'
    )
    if args.oe_csv:
        new_csv = args.filmographic
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user,
                '-f',
                '-number', to_accession[package][0],
                '-reference', to_accession[package][1],
                '-register', register,
                '-csv', new_csv
            ]
            for oe_record in oe_dicts:
                if oe_record['source_path'] == package:
                    if not oe_record['format'].lower() == 'dcdm':
                        accession_cmd.append('-pbcore')
            if len(to_accession[package]) == 4:
                if not to_accession[package][2] == 'n/a':
                    accession_cmd.extend(['-acquisition_type', '13'])
                    if args.oe_csv:
                        accession_cmd.extend(['-parent', to_accession[package][2]])
                    else:
                        accession_cmd.extend(['-parent', order.main(package)])
                else:
                    accession_cmd.extend(['-donor', donor])
                    accession_cmd.extend(['-depositor_reference', depositor_reference])
                    accession_cmd.extend(['-acquisition_type', acquisition_type[2]])
                    print to_accession[package][3]
                    accession_cmd.extend(['-donation_date', to_accession[package][3]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
コード例 #11
0
ファイル: batchaccession.py プロジェクト: ablwr/IFIscripts
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.csv:
        for line_item in ififuncs.extract_metadata(args.csv)[0]:
            oe_number = line_item['Object Entry'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if args.reference:
        reference_number = get_filmographic_number(args.reference)
    else:
        reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question(
        'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.'
    )
    depositor_reference = ififuncs.ask_question(
        'What is the donor/depositor number? This will not affect Reproductions.'
    )
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits, oe_list,
                                 reference_number)
    register = accession.make_register()
    if args.csv:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY"
            ) + os.path.basename(args.csv)
        else:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        filmographic_dict, headers = ififuncs.extract_metadata(args.csv)
        for oe_package in to_accession:
            for filmographic_record in filmographic_dict:
                if os.path.basename(oe_package).upper(
                )[:2] + '-' + os.path.basename(
                        oe_package)[2:] == filmographic_record['Object Entry']:
                    filmographic_record['Reference Number'] = to_accession[
                        oe_package][1]
        with open(new_csv, 'w') as csvfile:
            fieldnames = headers
            # Removes Object Entry from headings as it's not needed in database.
            del fieldnames[1]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for i in filmographic_dict:
                i.pop('Object Entry', None)
                # Only include records that have reference numbers
                if not i['Reference Number'] == '':
                    writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user, '-pbcore', '-f', '-number',
                to_accession[package][0], '-reference',
                to_accession[package][1], '-register', register, '-csv',
                new_csv
            ]
            if len(to_accession[package]) == 3:
                accession_cmd.extend(['-acquisition_type', '13'])
                accession_cmd.extend(['-parent', order.main(package)])
            else:
                accession_cmd.extend(['-donor', donor])
                accession_cmd.extend(
                    ['-depositor_reference', depositor_reference])
                accession_cmd.extend(
                    ['-acquisition_type', acquisition_type[2]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
コード例 #12
0
def main(args_):
    '''
    Launches the various functions that will accession a package
    '''
    args = parse_args(args_)
    input = args.input
    uuid_directory = ififuncs.check_for_sip([input])
    if uuid_directory is not None:
        oe_path = os.path.dirname(uuid_directory)
        oe_number = os.path.basename(oe_path)
        if args.user:
            user = args.user
        else:
            user = ififuncs.get_user()
        if args.number:
            if args.number[:3] != 'aaa':
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
                accession_number = ififuncs.get_accession_number()
            elif len(args.number[3:]) != 4:
                accession_number = ififuncs.get_accession_number()
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
            elif not args.number[3:].isdigit():
                accession_number = ififuncs.get_accession_number()
                print 'First three characters must be \'aaa\' and last four characters must be four digits'
            else:
                accession_number = args.number
        else:
            accession_number = ififuncs.get_accession_number()
        accession_path = os.path.join(
            os.path.dirname(oe_path), accession_number
        )
        uuid = os.path.basename(uuid_directory)
        new_uuid_path = os.path.join(accession_path, uuid)
        logs_dir = os.path.join(new_uuid_path, 'logs')
        sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log'
        if args.force:
            proceed = 'Y'
        else:
            proceed = ififuncs.ask_yes_no(
                'Do you want to rename %s with %s' % (oe_number, accession_number)
            )
        if proceed == 'Y':
            os.rename(oe_path, accession_path)
        register = make_register()
        ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:6], accession_number, '','','','', ''))
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = accession.py started'
        )
        ififuncs.generate_log(
            sipcreator_log,
            'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py')
        )
        ififuncs.generate_log(
            sipcreator_log,
            'Command line arguments: %s' % args
        )
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = agentName=%s' % user
        )
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = eventType=Identifier assignment,'
            ' eventIdentifierType=accession number, value=%s'
            % accession_number
        )
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = eventType=accession,'
            ' eventIdentifierType=accession number, value=%s'
            % accession_number
        )
        sip_manifest = os.path.join(
            accession_path, uuid
            ) + '_manifest.md5'
        sha512_log = manifest.main([new_uuid_path, '-sha512', '-s'])
        sha512_manifest = os.path.join(
            os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt'
        )
        ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest)
        os.remove(sha512_log)
        dfxml = make_dfxml(args, new_uuid_path, uuid)
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)
        )
        ififuncs.generate_log(
            sipcreator_log,
            'EVENT = accession.py finished'
        )
        ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5')
        ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512')
        ififuncs.manifest_update(sip_manifest, dfxml)
        ififuncs.sha512_update(sha512_manifest, dfxml)
        if args.pbcore:
            makepbcore.main([accession_path, '-p', '-user', user])
    else:
        print 'not a valid package. The input should include a package that has been through Object Entry'
コード例 #13
0
def main(args_):
    ''''
    Launch all the functions for creating an IFI SIP.
    '''
    args = parse_args(args_)
    source_folder = args.i
    print(args)
    oe_dict = {}
    user = ififuncs.determine_user(args)
    if args.oe:
        object_entry = args.oe
    else:
        object_entry = ififuncs.get_object_entry()
    oe_digits = int(object_entry.replace('oe', ''))
    for folder in sorted(os.listdir(source_folder)):
        full_path = os.path.join(source_folder, folder)
        if os.path.isdir(full_path):
            try:
                folder_contents = os.listdir(full_path)
            except PermissionError:
                continue
            object_entry_complete = 'oe' + str(oe_digits)
            inputs = []
            supplements = []
            for files in folder_contents:
                if os.path.splitext(files)[1][1:].lower() in args.object_extension_pattern:
                    inputs.append(os.path.join(full_path, files))
                if os.path.splitext(files)[1][1:].lower() in args.supplement_extension_pattern:
                    supplements.append(os.path.join(full_path, files))
            if inputs:
                print(' - Object Entry: %s\n - Inputs: %s\n - Supplements: %s\n' % (object_entry_complete, inputs, supplements))
                oe_dict[object_entry_complete] = [inputs, supplements]
                oe_digits += 1
            else:
                print('Skipping %s as there are no files in this folder that match the -object_extension_pattern' % full_path)
    if args.dryrun:
        print('Exiting as you selected -dryrun')
        sys.exit()
    logs = []
    if args.y:
        proceed = 'Y'
    else:
        proceed = ififuncs.ask_yes_no(
            'Do you want to proceed?'
        )
    if proceed == 'Y':
        for sips in sorted(oe_dict):
            print(oe_dict[sips])
            sipcreator_cmd = ['-i',]
            for sipcreator_inputs in oe_dict[sips][0]:
                sipcreator_cmd.append(sipcreator_inputs)
            sipcreator_cmd += ['-supplement']
            for sipcreator_supplements in oe_dict[sips][1]:
                sipcreator_cmd.append(sipcreator_supplements)
            sipcreator_cmd += ['-user', user, '-oe', sips, '-o', args.o]
            if args.rename_uuid:
                sipcreator_cmd.append('-rename_uuid')
            if args.zip:
                sipcreator_cmd.append('-zip')
            if args.l:
                sipcreator_cmd.append('-l')
            print(sipcreator_cmd)
            sipcreator_log, _ = sipcreator.main(sipcreator_cmd)
            logs.append(sipcreator_log)
            for i in logs:
                if os.path.isfile(i):
                    print(("%-*s   : copyit job was a %s" % (50, os.path.basename(i), analyze_log(i))))