Beispiel #1
0
def main():
    now1 = datetime.datetime.now()
    start_time = str(now1)
    end_time = ''  # set later
    # day_offset = now1.weekday() + 1 # Calculate the Sunday of current week
    day_offset = 7  # use past seven days, regardless of current day

    print('Script ' + MY_NAME + ' begun at ' + start_time + '. ')

    if not DEBUG:
        the_sheet_id = '1JA5bRSnYV80sx4m5SOFQ6QJ4u21SXvQeNdNbuRVCdds'
    else:
        the_sheet_id = '1e_TAK8eUsaHltBu9J5bNO1twThqt7_nE5olmz2pdCUw'  # test doc
        day_offset = 14  # use past 2 weeks for testing

    # Set date stamp of start of week (Sunday) to determine recently created accessions.
    begin_of_week = (now1 - datetime.timedelta(day_offset)).date()

    the_sheet_rbml = dataSheet(the_sheet_id, 'rbml!A:Z')
    the_sheet_avery = dataSheet(the_sheet_id, 'avery!A:Z')
    the_sheet_rbmlbooks = dataSheet(the_sheet_id, 'rbmlbooks!A:Z')

    # Location to save output
    if DEBUG is True:
        out_folder = "/cul/cul0/ldpd/archivesspace/test/accessions"
    else:
        out_folder = "/cul/cul0/ldpd/archivesspace/accessions"

    rbml_acc_file = os.path.join(out_folder, 'report_rbml_accessions.json')
    avery_acc_file = os.path.join(out_folder, 'report_avery_accessions.json')
    rbmlbooks_acc_file = os.path.join(out_folder,
                                      'report_rbmlbooks_accessions.json')

    print(' ')

    print('Starting accession report in ' +
          'https://docs.google.com/spreadsheets/d/' + str(the_sheet_id) +
          '/edit?usp=sharing')

    if not DEBUG:
        # Save the accessions as json files. In DEBUG mode, just use the files already saved.
        print('Saving Avery accession data to ' + avery_acc_file + '....')

        # Only fetch file if not in Debug mode
        with open(avery_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(3)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting Avery accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

        print('Saving RBML accession data to ' + rbml_acc_file + '....')

        with open(rbml_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(2)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting RBML accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

        print('Saving RBMLBOOKS accession data to ' + rbmlbooks_acc_file +
              '....')

        with open(rbmlbooks_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(6)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting RBMLBOOKS accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

    print(' ')

    # the_files = [
    #         [avery_acc_file, the_sheet_avery],
    #         [rbml_acc_file, the_sheet_rbml]
    #              ]

    the_recents = {}

    the_info = [{
        'repo_name': 'Avery',
        'repo_id': 3,
        'acc_file': avery_acc_file,
        'the_sheet': the_sheet_avery
    }, {
        'repo_name': 'RBML',
        'repo_id': 2,
        'acc_file': rbml_acc_file,
        'the_sheet': the_sheet_rbml
    }, {
        'repo_name': 'RBMLBOOKS',
        'repo_id': 6,
        'acc_file': rbmlbooks_acc_file,
        'the_sheet': the_sheet_rbmlbooks
    }]

    # The top-level elements to save from the JSON (each can be further processed below)
    the_keys = {
        "title": "title",
        "uri": "uri",
        "repository": "repository",
        "accession_date": "accession_date",
        "id_0": "id_0",
        "id_1": "id_1",
        "id_2": "id_2",
        "id_3": "id_3",
        "extents": "extents",
        "related_resources": "related_resources",
        "collection_management": "collection_management",
        "user_defined": "user_defined",
        "create_time": "create_time",
        "system_mtime": "system_mtime",
        "last_modified_by": "last_modified_by"
    }

    ext_dict = {
        "ext-number": "number",
        "ext-portion": "portion",
        "ext-type": "extent_type"
    }
    for f in the_info:

        the_file = f['acc_file']
        the_target = f['the_sheet']
        repo_name = f['repo_name']

        with open(the_file) as f:
            the_data = json.load(f)

        all_rows = []

        for an_accession in the_data:
            # acc_info : prelim dict for each accession. Do things to it.
            acc_info = {}
            for key, value in the_keys.items():
                try:
                    acc_info.update({key: an_accession[value]})
                except (IndexError, KeyError):
                    acc_info.update({key: ""})

            # Refine elements by extracting subelements, etc.

            # Handle collection_management
            cm = acc_info["collection_management"]
            cm_dict = {
                "processing_priority": "processing_priority",
                "processing_status": "processing_status"
            }
            for key, value in cm_dict.items():
                try:
                    acc_info[key] = cm[value]

                except (IndexError, KeyError, TypeError):
                    acc_info[key] = ''

            acc_info.pop("collection_management")

            # Parse resource id and get bibid
            res = acc_info["related_resources"]
            if len(res) > 0:
                res_url = res[0]["ref"]
                repo = res_url.split('/')[2]
                asid = res_url.split('/')[4]
                bibid = asf.lookupBibID(repo, asid, LOOKUP_CSV)
            else:
                bibid = ''
                asid = ''
            acc_info["resource_bibid"] = bibid
            acc_info["resource_asid"] = asid
            acc_info.pop("related_resources")

            # Parse BibID out of user_defined / integer_1
            try:
                usdef = acc_info["user_defined"]
                acc_info['integer_1'] = usdef['integer_1']
            except:
                acc_info['integer_1'] = ''
            acc_info.pop("user_defined")

            # Fix problem with leading "+" in id_3 (add apostrophe for display)
            acc_info["id_3"] = re.sub(r"^\+", "'+", acc_info["id_3"])

            # Handle repository
            repository = acc_info["repository"]
            if len(repository) > 0:
                repo_url = repository["ref"]
                repo = repo_url.split('/')[2]
            else:
                repo = ''
            acc_info["repo"] = repo
            acc_info.pop("repository")

            # Handle date
            acc_date = acc_info["accession_date"]
            yyyy = int(acc_date.split('-')[0])
            mm = int(acc_date.split('-')[1])
            dd = int(acc_date.split('-')[2])
            the_date = datetime.date(yyyy, mm, dd)
            # due to legacy import issue, some with unknown dates have malformed dates like 0002-01-23. Acknowledge their unknownness.
            if the_date.year < 1700:
                acc_info["accession_date"] = "0000-00-00"
                acc_info["year"] = ""
            else:
                acc_info["year"] = the_date.year

            # Fiscal year
            if the_date.year < 1700:
                acc_info["fiscal-year"] = ""
            else:
                if the_date.month > 6:
                    acc_info["fiscal-year"] = the_date.year + 1
                else:
                    acc_info["fiscal-year"] = the_date.year

            # Handle extents
            ext = acc_info["extents"]
            for key, value in ext_dict.items():
                try:
                    acc_info[key] = ext[0][value]
                except (IndexError, KeyError):
                    acc_info[key] = ''

            acc_info.pop("extents")

            # Clean up titles
            acc_info['title'] = str(acc_info['title']).strip()

            # Uncomment to list records in log.
            # print("processing: " + str(acc_info["uri"]).strip() + ' / ' + str(acc_info["title"]).strip() )

            all_rows.append(acc_info)

        processed_msg = 'Processed ' + \
            str(len(all_rows)) + ' records in ' + repo_name + '.'
        print(processed_msg)

        log_it(SCRIPT_NAME, processed_msg)

        # the_heads = list(all_rows[0].keys())

        # explicitly order the columns, as dict order is unpredictable.
        the_heads = [
            'title', 'uri', 'accession_date', 'id_0', 'id_1', 'id_2', 'id_3',
            'integer_1', 'resource_bibid', 'resource_asid', 'repo', 'year',
            'fiscal-year', 'ext-number', 'ext-portion', 'ext-type',
            'processing_priority', 'processing_status', 'create_time',
            'system_mtime', 'last_modified_by'
        ]

        the_output = []

        # Build row in order specified by the_heads
        for a_row in all_rows:
            # r = list(a_row.values())
            r = [a_row[h] for h in the_heads]
            the_output.append(r)
            # print(a_row)

        # sort by accession_date (the 2nd item in inner lists)
        the_output = sorted(the_output, key=itemgetter(2), reverse=True)

        # Get list of recents
        the_recents[repo_name] = []

        for i in the_output:
            # i[18] = the create date column
            i_date = dateutil.parser.isoparse(i[18]).date()

            if i_date > begin_of_week:

                the_recents[repo_name].append(i)

        # If there are recents, list them
        if the_recents[repo_name]:
            print(' ')
            recent_msg = str(len(the_recents[repo_name])) + \
                ' accessions recently added in ' + repo_name + ': '
            print(recent_msg)
            log_it(SCRIPT_NAME, recent_msg)
            print('-----------')
            for r in the_recents[repo_name]:
                print(r[0])
                print(r[1])
                print('Created ' + str(dateutil.parser.isoparse(r[18]).date()))
                print('Last edited by ' + r[20])
                print('-----------')
        else:
            print(' ')
            recent_msg = 'No recently created accessions in ' + repo_name
            print(recent_msg)
            log_it(SCRIPT_NAME, recent_msg)

            # print(the_recents[repo_name])

        the_output.insert(0, the_heads)

        print(' ')

        the_target.clear()

        print('Writing ' + repo_name + ' data to sheet ...')
        the_target.appendData(the_output)

        print(' ')

    # generate log and add to log tab, if exists.
    the_tabs = the_target.initTabs

    now2 = datetime.datetime.now()
    end_time = str(now2)
    my_duration = str(now2 - now1)

    if DEBUG is True:
        the_log = '[TEST] Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \
            start_time + '. Finished: ' + end_time + \
            ' (duration: ' + my_duration + ').'
    else:
        the_log = 'Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \
            start_time + '. Finished: ' + end_time + \
            ' (duration: ' + my_duration + ').'

    if 'log' in the_tabs:
        log_range = 'log!A:A'
        # today = datetime.datetime.today().strftime('%c')
        dataSheet(the_sheet_id, log_range).appendData([[the_log]])
    else:
        print('*** Warning: There is no log tab in this sheet. ***')

    print(' ')

    print(the_log)
    log_it(SCRIPT_NAME, the_log)

    print(' ')

    exit_msg = 'Script done. Updated data is available at ' + \
        'https://docs.google.com/spreadsheets/d/' + \
        str(the_sheet_id) + '/edit?usp=sharing'
    print(exit_msg)
    log_it(SCRIPT_NAME, exit_msg)
Beispiel #2
0
print(' ')

print('Starting accession report in ' +
      'https://docs.google.com/spreadsheets/d/' + str(the_sheet_id) +
      '/edit?usp=sharing')

if mode == 'Prod':
    # Save the accessions as json files. In Test mode, just use the files already saved.

    print('Saving Avery accession data to ' + avery_acc_file + '....')

    # Only fetch file if not in Test mode
    with open(avery_acc_file, "w+") as f:
        try:
            x = asf.getAccessions(3)
            f.write(x)
        except:
            raise ValueError(
                "There was an error in getting Avery accession data!")

        y = json.loads(x)
        if 'error' in y[0]:
            print(y[0]['error'])

    print('Saving RBML accession data to ' + rbml_acc_file + '....')

    with open(rbml_acc_file, "w+") as f:
        try:
            x = asf.getAccessions(2)
            f.write(x)