Python ASFunctions.getAccessions Beispiele

Programmiersprache: Python

Klasse / Typ: ASFunctions

Methode / Funktion: getAccessions

Beispiele auf hotexamples.com: 2

Python ASFunctions.getAccessions - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die ASFunctions.getAccessions, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

setServer(28)

getResource(9)

postResource(7)

postArchivalObject(7)

lookupByBibID(7)

getArchivalObjectByRef(6)

getResponse(5)

getAccessions(2)

lookupBibID(2)

getByDate(2)

getDigitalObjectFromParent(2)

getEAD(2)

postDigitalObject(2)

postTopContainer(1)

postSubject(1)

postAgent(1)

getSubject(1)

getUnpublished(1)

getTopContainer(1)

getSubjects(1)

getAgent(1)

getResourceByBibID(1)

getEnumeration(1)

getCollectionManagements(1)

getAssessments(1)

getArchivalObject(1)

suppressEnumerationValue(1)

Beispiel #1

Datei anzeigen

Datei: report_accessions.py Projekt: cul/dcps-utils

def main():
    now1 = datetime.datetime.now()
    start_time = str(now1)
    end_time = ''  # set later
    # day_offset = now1.weekday() + 1 # Calculate the Sunday of current week
    day_offset = 7  # use past seven days, regardless of current day

    print('Script ' + MY_NAME + ' begun at ' + start_time + '. ')

    if not DEBUG:
        the_sheet_id = '1JA5bRSnYV80sx4m5SOFQ6QJ4u21SXvQeNdNbuRVCdds'
    else:
        the_sheet_id = '1e_TAK8eUsaHltBu9J5bNO1twThqt7_nE5olmz2pdCUw'  # test doc
        day_offset = 14  # use past 2 weeks for testing

    # Set date stamp of start of week (Sunday) to determine recently created accessions.
    begin_of_week = (now1 - datetime.timedelta(day_offset)).date()

    the_sheet_rbml = dataSheet(the_sheet_id, 'rbml!A:Z')
    the_sheet_avery = dataSheet(the_sheet_id, 'avery!A:Z')
    the_sheet_rbmlbooks = dataSheet(the_sheet_id, 'rbmlbooks!A:Z')

    # Location to save output
    if DEBUG is True:
        out_folder = "/cul/cul0/ldpd/archivesspace/test/accessions"
    else:
        out_folder = "/cul/cul0/ldpd/archivesspace/accessions"

    rbml_acc_file = os.path.join(out_folder, 'report_rbml_accessions.json')
    avery_acc_file = os.path.join(out_folder, 'report_avery_accessions.json')
    rbmlbooks_acc_file = os.path.join(out_folder,
                                      'report_rbmlbooks_accessions.json')

    print(' ')

    print('Starting accession report in ' +
          'https://docs.google.com/spreadsheets/d/' + str(the_sheet_id) +
          '/edit?usp=sharing')

    if not DEBUG:
        # Save the accessions as json files. In DEBUG mode, just use the files already saved.
        print('Saving Avery accession data to ' + avery_acc_file + '....')

        # Only fetch file if not in Debug mode
        with open(avery_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(3)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting Avery accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

        print('Saving RBML accession data to ' + rbml_acc_file + '....')

        with open(rbml_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(2)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting RBML accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

        print('Saving RBMLBOOKS accession data to ' + rbmlbooks_acc_file +
              '....')

        with open(rbmlbooks_acc_file, "w+") as f:
            try:
                x = asf.getAccessions(6)
                f.write(x)
            except:
                raise ValueError(
                    "There was an error in getting RBMLBOOKS accession data!")

            y = json.loads(x)
            if 'error' in y[0]:
                print(y[0]['error'])

    print(' ')

    # the_files = [
    #         [avery_acc_file, the_sheet_avery],
    #         [rbml_acc_file, the_sheet_rbml]
    #              ]

    the_recents = {}

    the_info = [{
        'repo_name': 'Avery',
        'repo_id': 3,
        'acc_file': avery_acc_file,
        'the_sheet': the_sheet_avery
    }, {
        'repo_name': 'RBML',
        'repo_id': 2,
        'acc_file': rbml_acc_file,
        'the_sheet': the_sheet_rbml
    }, {
        'repo_name': 'RBMLBOOKS',
        'repo_id': 6,
        'acc_file': rbmlbooks_acc_file,
        'the_sheet': the_sheet_rbmlbooks
    }]

    # The top-level elements to save from the JSON (each can be further processed below)
    the_keys = {
        "title": "title",
        "uri": "uri",
        "repository": "repository",
        "accession_date": "accession_date",
        "id_0": "id_0",
        "id_1": "id_1",
        "id_2": "id_2",
        "id_3": "id_3",
        "extents": "extents",
        "related_resources": "related_resources",
        "collection_management": "collection_management",
        "user_defined": "user_defined",
        "create_time": "create_time",
        "system_mtime": "system_mtime",
        "last_modified_by": "last_modified_by"
    }

    ext_dict = {
        "ext-number": "number",
        "ext-portion": "portion",
        "ext-type": "extent_type"
    }
    for f in the_info:

        the_file = f['acc_file']
        the_target = f['the_sheet']
        repo_name = f['repo_name']

        with open(the_file) as f:
            the_data = json.load(f)

        all_rows = []

        for an_accession in the_data:
            # acc_info : prelim dict for each accession. Do things to it.
            acc_info = {}
            for key, value in the_keys.items():
                try:
                    acc_info.update({key: an_accession[value]})
                except (IndexError, KeyError):
                    acc_info.update({key: ""})

            # Refine elements by extracting subelements, etc.

            # Handle collection_management
            cm = acc_info["collection_management"]
            cm_dict = {
                "processing_priority": "processing_priority",
                "processing_status": "processing_status"
            }
            for key, value in cm_dict.items():
                try:
                    acc_info[key] = cm[value]

                except (IndexError, KeyError, TypeError):
                    acc_info[key] = ''

            acc_info.pop("collection_management")

            # Parse resource id and get bibid
            res = acc_info["related_resources"]
            if len(res) > 0:
                res_url = res[0]["ref"]
                repo = res_url.split('/')[2]
                asid = res_url.split('/')[4]
                bibid = asf.lookupBibID(repo, asid, LOOKUP_CSV)
            else:
                bibid = ''
                asid = ''
            acc_info["resource_bibid"] = bibid
            acc_info["resource_asid"] = asid
            acc_info.pop("related_resources")

            # Parse BibID out of user_defined / integer_1
            try:
                usdef = acc_info["user_defined"]
                acc_info['integer_1'] = usdef['integer_1']
            except:
                acc_info['integer_1'] = ''
            acc_info.pop("user_defined")

            # Fix problem with leading "+" in id_3 (add apostrophe for display)
            acc_info["id_3"] = re.sub(r"^\+", "'+", acc_info["id_3"])

            # Handle repository
            repository = acc_info["repository"]
            if len(repository) > 0:
                repo_url = repository["ref"]
                repo = repo_url.split('/')[2]
            else:
                repo = ''
            acc_info["repo"] = repo
            acc_info.pop("repository")

            # Handle date
            acc_date = acc_info["accession_date"]
            yyyy = int(acc_date.split('-')[0])
            mm = int(acc_date.split('-')[1])
            dd = int(acc_date.split('-')[2])
            the_date = datetime.date(yyyy, mm, dd)
            # due to legacy import issue, some with unknown dates have malformed dates like 0002-01-23. Acknowledge their unknownness.
            if the_date.year < 1700:
                acc_info["accession_date"] = "0000-00-00"
                acc_info["year"] = ""
            else:
                acc_info["year"] = the_date.year

            # Fiscal year
            if the_date.year < 1700:
                acc_info["fiscal-year"] = ""
            else:
                if the_date.month > 6:
                    acc_info["fiscal-year"] = the_date.year + 1
                else:
                    acc_info["fiscal-year"] = the_date.year

            # Handle extents
            ext = acc_info["extents"]
            for key, value in ext_dict.items():
                try:
                    acc_info[key] = ext[0][value]
                except (IndexError, KeyError):
                    acc_info[key] = ''

            acc_info.pop("extents")

            # Clean up titles
            acc_info['title'] = str(acc_info['title']).strip()

            # Uncomment to list records in log.
            # print("processing: " + str(acc_info["uri"]).strip() + ' / ' + str(acc_info["title"]).strip() )

            all_rows.append(acc_info)

        processed_msg = 'Processed ' + \
            str(len(all_rows)) + ' records in ' + repo_name + '.'
        print(processed_msg)

        log_it(SCRIPT_NAME, processed_msg)

        # the_heads = list(all_rows[0].keys())

        # explicitly order the columns, as dict order is unpredictable.
        the_heads = [
            'title', 'uri', 'accession_date', 'id_0', 'id_1', 'id_2', 'id_3',
            'integer_1', 'resource_bibid', 'resource_asid', 'repo', 'year',
            'fiscal-year', 'ext-number', 'ext-portion', 'ext-type',
            'processing_priority', 'processing_status', 'create_time',
            'system_mtime', 'last_modified_by'
        ]

        the_output = []

        # Build row in order specified by the_heads
        for a_row in all_rows:
            # r = list(a_row.values())
            r = [a_row[h] for h in the_heads]
            the_output.append(r)
            # print(a_row)

        # sort by accession_date (the 2nd item in inner lists)
        the_output = sorted(the_output, key=itemgetter(2), reverse=True)

        # Get list of recents
        the_recents[repo_name] = []

        for i in the_output:
            # i[18] = the create date column
            i_date = dateutil.parser.isoparse(i[18]).date()

            if i_date > begin_of_week:

                the_recents[repo_name].append(i)

        # If there are recents, list them
        if the_recents[repo_name]:
            print(' ')
            recent_msg = str(len(the_recents[repo_name])) + \
                ' accessions recently added in ' + repo_name + ': '
            print(recent_msg)
            log_it(SCRIPT_NAME, recent_msg)
            print('-----------')
            for r in the_recents[repo_name]:
                print(r[0])
                print(r[1])
                print('Created ' + str(dateutil.parser.isoparse(r[18]).date()))
                print('Last edited by ' + r[20])
                print('-----------')
        else:
            print(' ')
            recent_msg = 'No recently created accessions in ' + repo_name
            print(recent_msg)
            log_it(SCRIPT_NAME, recent_msg)

            # print(the_recents[repo_name])

        the_output.insert(0, the_heads)

        print(' ')

        the_target.clear()

        print('Writing ' + repo_name + ' data to sheet ...')
        the_target.appendData(the_output)

        print(' ')

    # generate log and add to log tab, if exists.
    the_tabs = the_target.initTabs

    now2 = datetime.datetime.now()
    end_time = str(now2)
    my_duration = str(now2 - now1)

    if DEBUG is True:
        the_log = '[TEST] Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \
            start_time + '. Finished: ' + end_time + \
            ' (duration: ' + my_duration + ').'
    else:
        the_log = 'Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \
            start_time + '. Finished: ' + end_time + \
            ' (duration: ' + my_duration + ').'

    if 'log' in the_tabs:
        log_range = 'log!A:A'
        # today = datetime.datetime.today().strftime('%c')
        dataSheet(the_sheet_id, log_range).appendData([[the_log]])
    else:
        print('*** Warning: There is no log tab in this sheet. ***')

    print(' ')

    print(the_log)
    log_it(SCRIPT_NAME, the_log)

    print(' ')

    exit_msg = 'Script done. Updated data is available at ' + \
        'https://docs.google.com/spreadsheets/d/' + \
        str(the_sheet_id) + '/edit?usp=sharing'
    print(exit_msg)
    log_it(SCRIPT_NAME, exit_msg)

Beispiel #2

Datei anzeigen

print(' ')

print('Starting accession report in ' +
      'https://docs.google.com/spreadsheets/d/' + str(the_sheet_id) +
      '/edit?usp=sharing')

if mode == 'Prod':
    # Save the accessions as json files. In Test mode, just use the files already saved.

    print('Saving Avery accession data to ' + avery_acc_file + '....')

    # Only fetch file if not in Test mode
    with open(avery_acc_file, "w+") as f:
        try:
            x = asf.getAccessions(3)
            f.write(x)
        except:
            raise ValueError(
                "There was an error in getting Avery accession data!")

        y = json.loads(x)
        if 'error' in y[0]:
            print(y[0]['error'])

    print('Saving RBML accession data to ' + rbml_acc_file + '....')

    with open(rbml_acc_file, "w+") as f:
        try:
            x = asf.getAccessions(2)
            f.write(x)