Beispiel #1
0
def gather_metadata(source):
    '''
    Loops through all subfolders that contain pbcore_csv and then harvests the
    metadata and store in a single file for the purposes of batch import into
    the DB TEXTWORKS technical database.
    '''
    metadata = []
    for root, _, filenames in sorted(os.walk(source)):
        for filename in filenames:
            if filename.endswith('pbcore.csv'):
                with open(os.path.join(root,filename), 'r') as csv_file:
                    csv_rows = csv_file.readlines()
                if metadata:
                    metadata.append([csv_rows[1]])
                else:
                    metadata.append([csv_rows[0]])
                    metadata.append([csv_rows[1]])
    collated_pbcore = os.path.join(
        ififuncs.make_desktop_logs_dir(),
        time.strftime("%Y-%m-%dT%H_%M_%S_pbcore.csv")
    )
    with open(collated_pbcore, 'w') as fo:
        for i in metadata:
            fo.write(i[0])
    return collated_pbcore
Beispiel #2
0
def log_report(log_names):
    '''
    Analyzes all the moveit.py logs on the desktop and print a report.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    for i in log_names:
        if os.path.isfile(i):
            print "%-*s   : %s" % (50, os.path.basename(i)[:-24], analyze_log(i))
        else:
            print i, 'can\'t find log file, trying again...'
            log_names.remove(i)
            for logs in os.listdir(desktop_logs_dir):
                # look at log filename minus the seconds and '.log'
                if os.path.basename(i)[:-7] in logs:
                    # make sure that the alternate log filename is more recent
                    if int(
                            os.path.basename(logs)[-12:-4].replace('_', '')
                    ) > int(
                        os.path.basename(i)[-12:-4].replace('_', '')):
                        print 'trying to analyze %s' % logs
                        print "%-*s   : %s" % (
                            50, os.path.basename(logs)[:-24], analyze_log(
                                os.path.join(desktop_logs_dir, logs))
                            )
                        log_names.append(os.path.join(desktop_logs_dir, logs))
Beispiel #3
0
def main():
    '''
    Launches the other functions wihch attempt to run multiple copyit.py
    instances if manifests and matching sidecar directories are found
    inside of the input directory.
    '''
    args = parse_args()
    all_files = find_manifest(args)
    processed_dirs = []
    log_names = []
    print '\n\n**** All of these folders will be copied to %s\n' % args.o
    for i in all_files:
        print i
    for i in all_files:
        absolute_path = os.path.join(args.o, os.path.basename(i))
        if os.path.isdir(absolute_path):
            print('%s already exists, skipping') % (absolute_path)
        else:
            desktop_logs_dir = make_desktop_logs_dir()
            if args.l:
                log_name = copyit.main(
                    ['-l', os.path.join(args.input, i), args.o]
                )
                log_names.append(log_name)
            else:
                log_name = copyit.main(
                    [os.path.join(args.input, i), args.o]
                )
                log_names.append(log_name)
            processed_dirs.append(os.path.basename(os.path.join(args.input, i)))
            print '********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful'
            analyze_reports(log_names, desktop_logs_dir)
Beispiel #4
0
def log_report(log_names):
    '''
    Analyzes all the moveit.py logs on the desktop and print a report.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    for i in log_names:
        if os.path.isfile(i):
            print(("%-*s   : %s" %
                   (50, os.path.basename(i)[:-24], analyze_log(i))))
        else:
            print((i, 'can\'t find log file, trying again...'))
            log_names.remove(i)
            for logs in os.listdir(desktop_logs_dir):
                # look at log filename minus the seconds and '.log'
                if os.path.basename(i)[:-7] in logs:
                    # make sure that the alternate log filename is more recent
                    if int(os.path.basename(logs)[-12:-4].replace(
                            '_', '')) > int(
                                os.path.basename(i)[-12:-4].replace('_', '')):
                        print(('trying to analyze %s' % logs))
                        print(("%-*s   : %s" %
                               (50, os.path.basename(logs)[:-24],
                                analyze_log(
                                    os.path.join(desktop_logs_dir, logs)))))
                        log_names.append(os.path.join(desktop_logs_dir, logs))
def setup(full_path, user):
    '''
    Sets up filepaths for the rest of the script.
    This also checks if a mediaconch xml already exists.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    log_name_source_ = os.path.basename(full_path) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log_name_source = "%s/%s_mediaconch_validation.log" % (desktop_logs_dir, log_name_source_)
    filename = os.path.basename(full_path)
    object_dir = os.path.dirname(full_path)
    parent_dir = os.path.dirname(object_dir)
    sip_root = os.path.dirname(parent_dir)
    metadata_dir = os.path.join(parent_dir, 'metadata')
    manifest = os.path.join(
        sip_root, os.path.basename(parent_dir) + '_manifest.md5'
    )
    if not os.path.isfile(manifest):
        print 'manifest does not exist %s' % manifest
        return 'skipping'
    if os.path.isdir(metadata_dir):
        mediaconch_xmlfile_basename = '%s_mediaconch_validation.xml' % filename
        mediaconch_xmlfile = os.path.join(
            metadata_dir, mediaconch_xmlfile_basename
        )
        if os.path.isfile(mediaconch_xmlfile):
            print 'mediaconch xml already exists'
            return 'skipping'
    else:
        print 'no metadata directory found. Exiting.'
    return log_name_source, user, mediaconch_xmlfile, manifest, full_path, parent_dir
Beispiel #6
0
def setup(full_path, user):
    '''
    Sets up filepaths for the rest of the script.
    This also checks if a mediaconch xml already exists.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    log_name_source_ = os.path.basename(full_path) + time.strftime(
        "_%Y_%m_%dT%H_%M_%S")
    log_name_source = "%s/%s_mediaconch_validation.log" % (desktop_logs_dir,
                                                           log_name_source_)
    filename = os.path.basename(full_path)
    object_dir = os.path.dirname(full_path)
    parent_dir = os.path.dirname(object_dir)
    sip_root = os.path.dirname(parent_dir)
    metadata_dir = os.path.join(parent_dir, 'metadata')
    manifest = os.path.join(sip_root,
                            os.path.basename(parent_dir) + '_manifest.md5')
    if not os.path.isfile(manifest):
        print('manifest does not exist %s' % manifest)
        return 'skipping'
    if os.path.isdir(metadata_dir):
        mediaconch_xmlfile_basename = '%s_mediaconch_validation.xml' % filename
        mediaconch_xmlfile = os.path.join(metadata_dir,
                                          mediaconch_xmlfile_basename)
        if os.path.isfile(mediaconch_xmlfile):
            print('mediaconch xml already exists')
            return 'skipping'
    else:
        print('no metadata directory found. Exiting.')
    return log_name_source, user, mediaconch_xmlfile, manifest, full_path, parent_dir
Beispiel #7
0
def main():
    '''
    Launches the other functions wihch attempt to run multiple copyit.py
    instances if manifests and matching sidecar directories are found
    inside of the input directory.
    '''
    args = parse_args()
    all_files = find_manifest(args)
    processed_dirs = []
    log_names = []
    print('\n\n - **** All of these folders will be copied to %s\n' % args.o)
    for i in all_files:
        absolute_path = os.path.join(args.o, os.path.basename(i))
        if os.path.isdir(absolute_path):
            print(' - %s already exists, skipping' % absolute_path)
        else:
            print(' - %s will be copied' % i)
    time.sleep(2)
    for i in all_files:
        absolute_path = os.path.join(args.o, os.path.basename(i))
        if os.path.isdir(absolute_path):
            print(' - %s already exists, skipping' % absolute_path)
        else:
            desktop_logs_dir = make_desktop_logs_dir()
            copyit_cmd = [os.path.join(args.input, i), args.o]
            if args.l:
                copyit_cmd.append('-l')
            elif args.y:
                copyit_cmd.append('-y')
            log_name = copyit.main(copyit_cmd)
            log_names.append(log_name)
            processed_dirs.append(os.path.basename(os.path.join(args.input, i)))
            print(' - ********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful')
            analyze_reports(log_names, desktop_logs_dir)
Beispiel #8
0
def main():
    '''
    Launches functions
    '''
    log_names = []
    args = parse_args()
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    for i in args.i:
        log_names.append(copyit.main([i, args.o]))
    print '********\nWARNING - Please check the ifiscripts_logs directory on your Desktop to verify if ALL of your transfers were successful'
    masscopy.analyze_reports(log_names, desktop_logs_dir)
Beispiel #9
0
def main():
    '''
    Launches the commands that generate the CSV error report
    '''
    args = parse_args()
    source = args.input
    csv_filename = os.path.join(
        ififuncs.make_desktop_logs_dir(),
        time.strftime("%Y-%m-%dT%H_%M_%S_videoerrors.csv"))
    print('Report stored as %s' % csv_filename)
    if not os.path.isfile(csv_filename):
        ififuncs.create_csv(
            csv_filename,
            ['filename', 'start_time', 'timestamp', 'error', 'notes'])
    for root, _, filenames in os.walk(source):
        for filename in filenames:
            if filename.endswith('.m2t'):
                with open(csv_filename, 'r') as fo:
                    if not filename in fo.read():
                        try:
                            start_time = bitc.getffprobe(
                                'start_time', 'stream=start_time',
                                os.path.join(root, filename)).rsplit()[0]
                            json_output = subprocess.check_output([
                                'ffprobe', '-sexagesimal',
                                os.path.join(root, filename), '-show_error',
                                '-show_log', '16', '-show_frames', '-of',
                                'json'
                            ])
                            errors = False
                            ffprobe_dict = json.loads(json_output)
                            for values in ffprobe_dict:
                                for more in ffprobe_dict[values]:
                                    if 'logs' in more:
                                        errors = True
                                        print(more['pkt_pts_time'],
                                              more['logs'])
                                        ififuncs.append_csv(
                                            csv_filename, [
                                                filename, start_time,
                                                more['pkt_pts_time'],
                                                more['logs'], ''
                                            ])
                            if errors == False:
                                ififuncs.append_csv(csv_filename, [
                                    filename, start_time, 'no errors',
                                    'no errors', ''
                                ])
                        except subprocess.CalledProcessError:
                            ififuncs.append_csv(csv_filename, [
                                filename, start_time,
                                'script error - process file manually', '', ''
                            ])
    print('Report stored as %s' % csv_filename)
Beispiel #10
0
def main():
    parser = make_parser()
    args = parser.parse_args()
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_)
    root = logging.getLogger()
    logging.basicConfig(filename=log, filemode='a',level=logging.INFO)
    logging.info('Started at %s using the following workstation: %s' % (time.strftime("%Y-%m-%dT%H:%M:%S "), getpass.getuser()))

    #root.setLevel(logging.DEBUG)
    check_manifest(args.input)
Beispiel #11
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.csv:
        for line_item in ififuncs.extract_metadata(args.csv)[0]:
            oe_number = line_item['Object Entry'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if args.reference:
        reference_number = get_filmographic_number(args.reference)
    else:
        reference_number = ififuncs.get_reference_number()
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits, oe_list,
                                 reference_number)
    register = accession.make_register()
    if args.csv:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        new_csv = os.path.join(desktop_logs_dir, os.path.basename(args.csv))
        filmographic_dict, headers = ififuncs.extract_metadata(args.csv)
        for oe_package in to_accession:
            for filmographic_record in filmographic_dict:
                if os.path.basename(oe_package).upper(
                )[:2] + '-' + os.path.basename(
                        oe_package)[2:] == filmographic_record['Object Entry']:
                    filmographic_record['Reference Number'] = to_accession[
                        oe_package][1]
        with open(new_csv, 'w') as csvfile:
            fieldnames = headers
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for i in filmographic_dict:
                writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys()):
            accession.main([
                package, '-user', user, '-p', '-f', '-number',
                to_accession[package][0], '-reference',
                to_accession[package][1], '-register', register
            ])
    collated_pbcore = gather_metadata(args.input)
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % register
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
Beispiel #12
0
def main():
    parser = make_parser()
    args = parser.parse_args()
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source_ = os.path.basename(
        args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_)
    root = logging.getLogger()
    logging.basicConfig(filename=log, filemode='a', level=logging.INFO)
    logging.info('Started at %s using the following workstation: %s' %
                 (time.strftime("%Y-%m-%dT%H:%M:%S "), getpass.getuser()))

    #root.setLevel(logging.DEBUG)
    check_manifest(args.input)
Beispiel #13
0
def make_register():
    '''
    This sends a placeholder accessions register to the desktop logs directory.
    This should get rid of some of the more painful, repetitive identifier matching.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    register = os.path.join(
        desktop_logs_dir, 'register_' + time.strftime("%Y-%m-%dT%H_%M_%S.csv"))
    ififuncs.create_csv(register, (
        'entry number', 'accession number', 'date acquired',
        'date accessioned', 'acquired from', 'acquisition method',
        'simple name; basic description; identification; historical information',
        'notes'))
    return register
Beispiel #14
0
def make_oe_register():
    '''
    This sends a placeholder oe register to the desktop logs directory.
    This should get rid of some of the more painful, repetitive identifier matching.
    '''
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    oe_register = os.path.join(
        desktop_logs_dir,
        'oe_helper_register_' + time.strftime("%Y-%m-%dT%H_%M_%S.csv"))
    ififuncs.create_csv(
        oe_register,
        ('OE No.', 'Date Received', 'Quantity', 'Format', 'Description',
         'Contact Name', 'Type of Acquisition', 'Accession No.',
         'Additional Information', 'Habitat', 'Vinegar No'))
    return oe_register
Beispiel #15
0
def main():
    parser = make_parser()
    args = parser.parse_args()
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source_ = os.path.basename(
        args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir,
                                                       log_name_source_)
    ififuncs.generate_log(log_name_source, 'EVENT = validate.py started')
    ififuncs.generate_log(
        log_name_source, 'eventDetail=validate.py %s' %
        ififuncs.get_script_version('validate.py'))
    ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args)
    manifest = check_manifest(args.input, log_name_source)
    log_results(manifest, log_name_source, args)
Beispiel #16
0
def main(args_):
    '''
    Launches functions that will generate a helper accessions register
    '''
    args = parse_args(args_)
    sorted_csv_dict = ififuncs.extract_metadata(args.sorted_csv)[0]
    pbcore_csv_dict = ififuncs.extract_metadata(args.pbcore_csv)[0]
    filmo_csv_dict = ififuncs.extract_metadata(args.filmo_csv)[0]
    for accession in sorted_csv_dict:
        number = accession['accession number']
        for technical_record in pbcore_csv_dict:
            if technical_record['Accession Number'] == number:
                accession['acquisition method'] = technical_record[
                    'Type Of Deposit']
                accession['acquired from'] = technical_record['Donor']
                accession['date acquired'] = technical_record[
                    'Date Of Donation']
                for filmographic_record in filmo_csv_dict:
                    if filmographic_record[
                            'Filmographic URN'] == technical_record[
                                'Reference Number']:
                        if filmographic_record['Title/Name'] == '':
                            title = filmographic_record[
                                'Series Title'] + '; ' + filmographic_record[
                                    'Episode No']
                        else:
                            title = filmographic_record['Title/Name']
                        simple = '%s (%s) | %s' % (
                            title, filmographic_record['Year'],
                            technical_record['dig_object_descrip'])
                        if accession['acquisition method'] == 'Reproduction':
                            simple += ' | Reproduction of %s' % technical_record[
                                'TTape Origin']
                        accession[
                            'simple name; basic description; identification; historical information'] = simple
    desktop_logs_dir = ififuncs.make_desktop_logs_dir()
    new_csv_filename = time.strftime(
        "%Y-%m-%dT%H_%M_%S_") + 'helper_register.csv'
    new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
    with open(new_csv, 'w', encoding='utf-8') as csvfile:
        fieldnames = ififuncs.extract_metadata(args.sorted_csv)[1]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for i in sorted_csv_dict:
            writer.writerow(i)
    print('\nYour helper CSV file is located here: %s\n' % new_csv)
    return new_csv
Beispiel #17
0
def main(args_):
    '''
    Launches all other functions when called from the command line.
    '''
    args = make_parser(args_)
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source_ = os.path.basename(
        args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir,
                                                       log_name_source_)
    ififuncs.generate_log(log_name_source, 'EVENT = validate.py started')
    ififuncs.generate_log(
        log_name_source, 'eventDetail=validate.py %s' %
        ififuncs.get_script_version('validate.py'))
    ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args)
    manifest, error_counter = check_manifest(args, log_name_source)
    if args.update_log:
        log_results(manifest, log_name_source, args)
    return error_counter
Beispiel #18
0
def main():
    parser = make_parser()
    args = parser.parse_args()
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S")
    log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_)
    ififuncs.generate_log(
        log_name_source,
        'EVENT = validate.py started'
    )
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py')
    )
    ififuncs.generate_log(
        log_name_source,
        'Command line arguments: %s' % args
    )
    manifest = check_manifest(args.input, log_name_source)
    log_results(manifest, log_name_source, args)
Beispiel #19
0
def setup(args_):
    '''
    Sets a bunch of filename variables and parses command line.
    some examples:
    if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5
    then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5
    '''
    parser = argparse.ArgumentParser(
        description='Copy directory with checksum comparison'
        'and manifest generation.Written by Kieran O\'Leary.')
    parser.add_argument('source', help='Input directory')
    parser.add_argument('destination', help='Destination directory')
    parser.add_argument(
        '-l',
        '-lto',
        action='store_true',
        help='use gcp instead of rsync on osx for SPEED on LTO')
    parser.add_argument('-move',
                        action='store_true',
                        help='Move files instead of copying - much faster!')
    parser.add_argument(
        '-justcopy',
        action='store_true',
        help='Do not generate destination manifest and verify integrity :(')
    parser.add_argument(
        '-y',
        action='store_true',
        help=
        'Answers YES to the question: Not enough free space, would you like to continue?'
    )
    rootpos = ''
    dircheck = None
    args = parser.parse_args(args_)
    if os.path.isdir(args.source):
        dircheck = check_for_sip(args.source)
    if dircheck != None:
        if os.path.isdir(dircheck):
            source = check_for_sip(args.source)
            destination = os.path.join(args.destination,
                                       os.path.basename(args.source))
            os.makedirs(destination)
    else:
        source = os.path.abspath(args.source)
        destination = args.destination
    normpath = os.path.normpath(source)
    #is there any benefit to this over os.path.basename
    dirname = os.path.split(os.path.basename(source))[1]
    if dirname == '':
        rootpos = 'y'
        '''
        dirname = raw_input(
            'What do you want your destination folder to be called?\n'
        )
        '''
    relative_path = normpath.split(os.sep)[-1]
    # or hardcode
    destination_final_path = os.path.join(destination, dirname)
    if rootpos == 'y':
        manifest_destination = os.path.dirname(
            destination) + '/%s_manifest.md5' % os.path.basename(destination)
    else:
        manifest_destination = destination + '/%s_manifest.md5' % dirname
    if os.path.isfile(manifest_destination):
        print('Destination manifest already exists')
    if rootpos == 'y':
        manifest_filename = '%s_manifest.md5' % os.path.basename(destination)
    else:
        manifest_filename = '%s_manifest.md5' % dirname
    desktop_manifest_dir = make_desktop_manifest_dir()
    # manifest = desktop manifest, looks like this can get rewritten later.
    manifest = os.path.join(desktop_manifest_dir, manifest_filename)
    manifest_sidecar = os.path.join(os.path.dirname(source),
                                    relative_path + '_manifest.md5')
    manifest_root = source + '/%s_manifest.md5' % os.path.basename(source)
    log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S")
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename)
    generate_log(log_name_source, 'copyit.py started.')
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py'))
    generate_log(log_name_source, 'Source: %s' % source)
    generate_log(log_name_source, 'Destination: %s' % destination)
    print('Checking total size of input folder')
    total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source))
    print('Checking if enough space in destination folder')
    free_space = ififuncs.get_free_space(args.destination)
    if total_input_size > free_space:
        print('You do not have enough free space!')
        if args.y:
            go_forth_blindly = 'Y'
        else:
            go_forth_blindly = ififuncs.ask_yes_no(
                'Would you like to continue anyway? Press Y or N')
        if go_forth_blindly == 'Y':
            generate_log(
                log_name_source,
                'You do not have enough free space!, but the user has decided to continue anyhow'
            )
        else:
            generate_log(log_name_source,
                         'You do not have enough free space! - Exiting')
            sys.exit()
    return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
Beispiel #20
0
source               = args.source
source_parent_dir    = os.path.dirname(source)
normpath             = os.path.normpath(source) 
dirname              = os.path.split(os.path.basename(source))[1]
relative_path        = normpath.split(os.sep)[-1]

if args.s:
    manifest = source_parent_dir + '/%s_manifest.md5' % relative_path
else:
    manifest_ =  '/%s_manifest.md5' % relative_path
    desktop_manifest_dir = make_desktop_manifest_dir()
    manifest = "%s/%s" % (desktop_manifest_dir, manifest_)

log_name_source_                = os.path.basename(args.source)  + time.strftime("_%Y_%m_%dT%H_%M_%S")
desktop_logs_dir = make_desktop_logs_dir()
log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_)


generate_log(log_name_source, 'move.py started.') 
generate_log(log_name_source, 'Source: %s' % source)  

if os.path.isfile(source):
    print '\nFile checksum is not currently supported, only directories.\n'
    generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported')   
    generate_log(log_name_source, 'move.py exit')   
    sys.exit()
elif not os.path.isdir(source):
    print ' %s is either not a directory or it does not exist' % source
    generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source)
    generate_log(log_name_source, 'move.py exit')      
Beispiel #21
0
def main(args_):
    '''
    Overly long main function that makes a sidecar manifest.
    This needs to get broken up into smaller functions.
    '''
    parser = argparse.ArgumentParser(description='Generate manifest with'
                                     ' checksums for a directory'
                                     ' Written by Kieran O\'Leary.')
    parser.add_argument(
        'source',
        help='Input directory'
    )
    parser.add_argument(
        '-s', '-sidecar',
        action='store_true',
        help='Generates Sidecar'
    )
    parser.add_argument(
        '-f', '-felix',
        action='store_true',
        help='Felix Meehan workflow - places manifest inside of source directory'
    )
    parser.add_argument(
        '-sha512',
        action='store_true',
        help='Generates sha512 checksums instead of md5'
    )
    args = parser.parse_args(args_)
    source = args.source
    source_parent_dir = os.path.dirname(source)
    normpath = os.path.normpath(source)
    relative_path = normpath.split(os.sep)[-1]
    log_name_source_ = os.path.basename(
        args.source
    )  + time.strftime("_%Y_%m_%dT%H_%M_%S")
    if args.s:
        if args.sha512:
            manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path
        else:
            manifest = source_parent_dir + '/%s_manifest.md5' % relative_path
        log_name_source = source_parent_dir + '/%s.log' % log_name_source_
    elif args.f:
        if args.sha512:
            manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path
        else:
            manifest = source + '/%s_manifest.md5' % relative_path
        log_name_source = source_parent_dir + '/%s.log' % log_name_source_
    else:
        if args.sha512:
            manifest_ = manifest_ = '/%s_manifest-sha512.txt' % relative_path
        else:
            manifest_ = '/%s_manifest.md5' % relative_path
        desktop_manifest_dir = make_desktop_manifest_dir()
        manifest = "%s/%s" % (desktop_manifest_dir, manifest_)
        desktop_logs_dir = make_desktop_logs_dir()
        log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_)
    if args.sha512:
        module = 'hashlib.sha512'
    else:
        module = 'hashlib.md5'
    generate_log(log_name_source, 'manifest.py started.')
    if sys.platform == "win32":
            generate_log(
                log_name_source,
                'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Windows' % module
            )
    if sys.platform == "darwin":
            generate_log(
                log_name_source,
                'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=OSX' % module
            )
    elif sys.platform == "linux2":
        generate_log(
                log_name_source,
                'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Linux' % module
            )
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=manifest.py %s' % ififuncs.get_script_version('manifest.py'))
    generate_log(log_name_source, 'Source: %s' % source)
    if os.path.isfile(source):
        print('\nFile checksum is not currently supported, only directories.\n')
        generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported')
        generate_log(log_name_source, 'manifest.py exit')
        sys.exit()
    elif not os.path.isdir(source):
        print((' %s is either not a directory or it does not exist' % source))
        generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source)
        generate_log(log_name_source, 'manifest.py exit')
        sys.exit()
    remove_bad_files(source, log_name_source)
    source_count = 0
    for _, _, filenames in os.walk(source):
        # There has to be a better way to count the files..
        for _ in filenames:
            source_count += 1 #works in windows at least
    if os.path.isfile(manifest):
        count_in_manifest = manifest_file_count(manifest)
        if source_count != count_in_manifest:
            print('This manifest may be outdated as the number of files in your directory does not match the number of files in the manifest')
            generate_log(log_name_source, 'EVENT = Existing source manifest check - Failure - The number of files in the source directory is not equal to the number of files in the source manifest ')
            sys.exit()
    if not os.path.isfile(manifest):
        try:
            print('Generating source manifest')
            generate_log(log_name_source, 'EVENT = Generating source manifest')
            if args.f:
                if args.sha512:
                    ififuncs.sha512_manifest(source, manifest, source)
                else:
                    hashlib_manifest(source, manifest, source)
                shutil.move(log_name_source, source)
            else:
                if args.sha512:
                    ififuncs.sha512_manifest(source, manifest, source_parent_dir)
                else:
                    hashlib_manifest(source, manifest, source_parent_dir)
        except OSError:
            print('You do not have access to this directory. Perhaps it is read only, or the wrong file system\n')
            sys.exit()
    else:
        generate_log(log_name_source, 'EVENT = Existing source manifest check - Source manifest already exists. Script will exit. ')
    print(('Manifest created in %s' % manifest))
    generate_log(log_name_source, 'Manifest created in %s' % manifest)
    return log_name_source
Beispiel #22
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.csv:
        for line_item in ififuncs.extract_metadata(args.csv)[0]:
            oe_number = line_item['Object Entry'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if args.reference:
        reference_number = get_filmographic_number(args.reference)
    else:
        reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question(
        'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.'
    )
    depositor_reference = ififuncs.ask_question(
        'What is the donor/depositor number? This will not affect Reproductions.'
    )
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    to_accession = initial_check(args, accession_digits, oe_list,
                                 reference_number)
    register = accession.make_register()
    if args.csv:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY"
            ) + os.path.basename(args.csv)
        else:
            new_csv_filename = time.strftime(
                "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        filmographic_dict, headers = ififuncs.extract_metadata(args.csv)
        for oe_package in to_accession:
            for filmographic_record in filmographic_dict:
                if os.path.basename(oe_package).upper(
                )[:2] + '-' + os.path.basename(
                        oe_package)[2:] == filmographic_record['Object Entry']:
                    filmographic_record['Reference Number'] = to_accession[
                        oe_package][1]
        with open(new_csv, 'w') as csvfile:
            fieldnames = headers
            # Removes Object Entry from headings as it's not needed in database.
            del fieldnames[1]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for i in filmographic_dict:
                i.pop('Object Entry', None)
                # Only include records that have reference numbers
                if not i['Reference Number'] == '':
                    writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no('Do you want to proceed?')
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user, '-pbcore', '-f', '-number',
                to_accession[package][0], '-reference',
                to_accession[package][1], '-register', register, '-csv',
                new_csv
            ]
            if len(to_accession[package]) == 3:
                accession_cmd.extend(['-acquisition_type', '13'])
                accession_cmd.extend(['-parent', order.main(package)])
            else:
                accession_cmd.extend(['-donor', donor])
                accession_cmd.extend(
                    ['-depositor_reference', depositor_reference])
                accession_cmd.extend(
                    ['-acquisition_type', acquisition_type[2]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
Beispiel #23
0
def setup(args_):
    '''
    Sets a bunch of filename variables and parses command line.
    some examples:
    if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5
    then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5
    '''
    parser = argparse.ArgumentParser(
        description='Copy directory with checksum comparison'
                    'and manifest generation.Written by Kieran O\'Leary.')
    parser.add_argument(
        'source', help='Input directory'
    )
    parser.add_argument(
        'destination',
        help='Destination directory'
    )
    parser.add_argument(
        '-l', '-lto',
        action='store_true',
        help='use gcp instead of rsync on osx for SPEED on LTO'
    )
    parser.add_argument(
        '-move',
        action='store_true',
        help='Move files instead of copying - much faster!'
    )
    rootpos = ''
    dircheck = None
    args = parser.parse_args(args_)
    if os.path.isdir(args.source):
        dircheck = check_for_sip(args.source)
    if dircheck != None:
        if os.path.isdir(dircheck):
            source = check_for_sip(args.source)
            destination = os.path.join(args.destination, os.path.basename(args.source))
            os.makedirs(destination)
    else:
        source = args.source
        destination = args.destination
    normpath = os.path.normpath(source)
    #is there any benefit to this over os.path.basename
    dirname = os.path.split(os.path.basename(source))[1]
    if dirname == '':
        rootpos = 'y'
        dirname = raw_input(
            'What do you want your destination folder to be called?\n'
        )
    relative_path = normpath.split(os.sep)[-1]
    # or hardcode
    destination_final_path = os.path.join(destination, dirname)
    manifest_destination = destination + '/%s_manifest.md5' % dirname
    if os.path.isfile(manifest_destination):
        print 'Destination manifest already exists'
    manifest_filename = '%s_manifest.md5' % dirname
    desktop_manifest_dir = make_desktop_manifest_dir()
    # manifest = desktop manifest, looks like this can get rewritten later.
    manifest = os.path.join(
        desktop_manifest_dir, manifest_filename
    )
    manifest_sidecar = os.path.join(
        os.path.dirname(source), relative_path + '_manifest.md5'
    )
    manifest_root = source + '/%s_manifest.md5' % os.path.basename(source)
    log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S")
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename)
    generate_log(log_name_source, 'copyit.py started.')
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py'))
    generate_log(log_name_source, 'Source: %s' % source)
    generate_log(log_name_source, 'Destination: %s'  % destination)
    return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
Beispiel #24
0
def main(args_):
    '''
    Batch process packages by running accession.py and makepbcore.py
    '''
    args = parse_args(args_)
    oe_list = []
    if args.oe_csv:
        if not args.filmographic:
            print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..')
            sys.exit()
        oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv)
        initial_oe_list = oe_csv_extraction[0]
        oe_dicts = process_oe_csv(oe_csv_extraction, args.input)
        # temp hack while we're performing both workflows
        helper_csv = args.oe_csv
    elif args.filmographic:
        initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0]
        # temp hack while we're performing both workflows
        helper_csv = args.filmographic
    if args.oe_csv or args.filmographic:
        for line_item in ififuncs.extract_metadata(helper_csv)[0]:
            try:
                oe_number = line_item['Object Entry'].lower()
            except KeyError:
                oe_number = line_item['OE No.'].lower()
            # this transforms OE-#### to oe####
            transformed_oe = oe_number[:2] + oe_number[3:]
            oe_list.append(transformed_oe)
    if not args.oe_csv:
        # No need to ask for the reference number if the OE csv option is supplied.
        # The assumption here is that the OE csv contains the reference numbers though.
        if args.reference:
            reference_number = get_filmographic_number(args.reference)
        else:
            reference_number = ififuncs.get_reference_number()
    donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.')
    depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.')
    acquisition_type = ififuncs.get_acquisition_type('')
    user = ififuncs.get_user()
    accession_number = get_number(args)
    accession_digits = int(accession_number[3:])
    if not args.oe_csv:
        to_accession = initial_check(args, accession_digits, oe_list, reference_number)
    else:
        to_accession = {}
        for oe_record in oe_dicts:
            if os.path.isdir(oe_record['source_path']):
                to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']]
                accession_digits += 1
    for success in sorted(to_accession.keys()):
        print('%s will be accessioned as %s' %  (success, to_accession[success]))
    register = accession.make_register()
    if args.filmographic:
        desktop_logs_dir = ififuncs.make_desktop_logs_dir()
        if args.dryrun:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic)
        else:
            new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic)
        new_csv = os.path.join(desktop_logs_dir, new_csv_filename)
        if not args.oe_csv:
            filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic)
            for oe_package in to_accession:
                for filmographic_record in filmographic_dict:
                    if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']:
                        filmographic_record['Reference Number'] = to_accession[oe_package][1]
            get_filmographic_titles(to_accession, filmographic_dict)
            with open(new_csv, 'w') as csvfile:
                fieldnames = headers
                # Removes Object Entry from headings as it's not needed in database.
                del fieldnames[1]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for i in filmographic_dict:
                    i.pop('Object Entry', None)
                    # Only include records that have reference numbers
                    if not i['Reference Number'] == '':
                        writer.writerow(i)
    if args.dryrun:
        sys.exit()
    proceed = ififuncs.ask_yes_no(
        'Do you want to proceed?'
    )
    if args.oe_csv:
        new_csv = args.filmographic
    if proceed == 'Y':
        for package in sorted(to_accession.keys(), key=natural_keys):
            accession_cmd = [
                package, '-user', user,
                '-f',
                '-number', to_accession[package][0],
                '-reference', to_accession[package][1],
                '-register', register,
                '-csv', new_csv
            ]
            for oe_record in oe_dicts:
                if oe_record['source_path'] == package:
                    if not oe_record['format'].lower() == 'dcdm':
                        accession_cmd.append('-pbcore')
            if len(to_accession[package]) == 4:
                if not to_accession[package][2] == 'n/a':
                    accession_cmd.extend(['-acquisition_type', '13'])
                    if args.oe_csv:
                        accession_cmd.extend(['-parent', to_accession[package][2]])
                    else:
                        accession_cmd.extend(['-parent', order.main(package)])
                else:
                    accession_cmd.extend(['-donor', donor])
                    accession_cmd.extend(['-depositor_reference', depositor_reference])
                    accession_cmd.extend(['-acquisition_type', acquisition_type[2]])
                    print to_accession[package][3]
                    accession_cmd.extend(['-donation_date', to_accession[package][3]])
            print accession_cmd
            accession.main(accession_cmd)
    collated_pbcore = gather_metadata(args.input)
    sorted_filepath = ififuncs.sort_csv(register, 'accession number')
    print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath
    print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv
    print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
Beispiel #25
0
def setup(args_):
    '''
    Sets a bunch of filename variables and parses command line.
    some examples:
    if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5
    then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5
    '''
    parser = argparse.ArgumentParser(
        description='Copy directory with checksum comparison'
        'and manifest generation.Written by Kieran O\'Leary.')
    parser.add_argument('source', help='Input directory')
    parser.add_argument('destination', help='Destination directory')
    parser.add_argument(
        '-l',
        '-lto',
        action='store_true',
        help='use gcp instead of rsync on osx for SPEED on LTO')
    rootpos = ''
    dircheck = None
    args = parser.parse_args(args_)
    if os.path.isdir(args.source):
        dircheck = check_for_sip(args.source)
    if dircheck != None:
        if os.path.isdir(dircheck):
            source = check_for_sip(args.source)
            destination = os.path.join(args.destination,
                                       os.path.basename(args.source))
            os.makedirs(destination)
    else:
        source = args.source
        destination = args.destination
    normpath = os.path.normpath(source)
    #is there any benefit to this over os.path.basename
    dirname = os.path.split(os.path.basename(source))[1]
    if dirname == '':
        rootpos = 'y'
        dirname = raw_input(
            'What do you want your destination folder to be called?\n')
    relative_path = normpath.split(os.sep)[-1]
    # or hardcode
    destination_final_path = os.path.join(destination, dirname)
    manifest_destination = destination + '/%s_manifest.md5' % dirname
    if os.path.isfile(manifest_destination):
        print 'Destination manifest already exists'
    manifest_filename = '%s_manifest.md5' % dirname
    desktop_manifest_dir = make_desktop_manifest_dir()
    # manifest = desktop manifest, looks like this can get rewritten later.
    manifest = os.path.join(desktop_manifest_dir, manifest_filename)
    manifest_sidecar = os.path.join(os.path.dirname(source),
                                    relative_path + '_manifest.md5')
    manifest_root = source + '/%s_manifest.md5' % os.path.basename(source)
    log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S")
    desktop_logs_dir = make_desktop_logs_dir()
    log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename)
    generate_log(log_name_source, 'copyit.py started.')
    ififuncs.generate_log(
        log_name_source,
        'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py'))
    generate_log(log_name_source, 'Source: %s' % source)
    generate_log(log_name_source, 'Destination: %s' % destination)
    return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
Beispiel #26
0
    make_event(premis, 'message digest calculation', 'Checksum manifest for whole package created', [['UUID','9430725d-7523-4071-9063-e8a6ac4f84c4' ]],final_sip_manifest_uuid,[representation_uuid], 'source', 'now')
    make_event(premis, 'message digest calculation', 'Frame level checksums of images', [['UUID','ee83e19e-cdb1-4d83-91fb-7faf7eff738e' ]], framemd5_uuid, [representation_uuid], 'source', 'now' )
    write_premis(doc, premisxml)

parser = argparse.ArgumentParser(description='DPX2TIFF specific workflow for IFI'
                                ' Written by Kieran O\'Leary.')
parser.add_argument(
                    'input', nargs='+',
                    help='full path of input directory'
                    )
parser.add_argument(
                    '-o',
                    help='full path of output directory', required=True)
args = parser.parse_args()
print args
desktop_logs_dir = make_desktop_logs_dir()
csv_report_filename = os.path.join(desktop_logs_dir, 'dpx_transcode_report' + time.strftime("_%Y_%m_%dT%H_%M_%S") + '.csv')

#permission for correct directories sought from user
permission = ''
all_files = args.input
if not permission == 'y' or permission == 'Y':
    print '\n\n**** All TIFF sequences within these directories will be converted to DPX.\n'
    for i in all_files:
        print i
    permission =  raw_input('\n**** These are the directories that wil be turned into DPX. \n**** If this looks ok, please press Y, otherwise, type N\n' )
    while permission not in ('Y','y','N','n'):
        permission =  raw_input('\n**** These are the directories that wil be turned into DPX. \n**** If this looks ok, please press Y, otherwise, type N\n')
    if permission == 'n' or permission == 'N':
        print 'Exiting at your command- Cheerio for now'
        sys.exit()