def count_total(): meta_ids = [] with session_scope() as db_session: recs = retrieve_records(db_session, WCSourceMeta) total = 0 for rec in recs: total += 1 meta_ids.append(rec.wcsmid) return total, meta_ids
def set_oclc_holdings(dst_fh): oclc_numbers = [] hold_not_set = [] with session_scope() as db_session: recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True) for r in recs: if r.wchits.match_oclcNo: oclc_numbers.append(str(r.wchits.match_oclcNo)) # update holdings batch_rec = retrieve_record(db_session, WCSourceBatch) creds = get_credentials(batch_rec.api) token = get_token(creds) with MetadataSession(credentials=token) as session: responses = session.holdings_set_batch(oclc_numbers) holdings = holdings_responses(responses) if holdings: for oclcNo, holding in holdings.items(): recs = retrieve_records(db_session, WCHit, match_oclcNo=oclcNo) for rec in recs: if holding[0] in ("set", "exists"): holding_set = True else: holding_set = False update_hit_record( db_session, WCHit, rec.wchid, holding_set=holding_set, holding_status=holding[0], holding_response=holding[1], ) db_session.commit() # verify all selected had holdings set recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True) for r in recs: if not r.wchits.holding_set: hold_not_set.append(r.wchits.match_oclcNo) fh_csv = os.path.join(os.path.split(dst_fh)[0], "holdings-issues.csv") if hold_not_set: for oclcNo in hold_not_set: save2csv(fh_csv, [oclcNo]) return False else: return True
def get_ftp_connections(system): with session_scope() as db_session: names = retrieve_records(db_session, FTPs, system=system) return [x.name for x in names]
def launch_process( source_fh, data_source, system, library, progbar1, progbar2, process_label, hits, nohits, skipped, meet_crit_counter, fail_user_crit_counter, fail_glob_crit_counter, action, encode_level, mat_type, cat_rules, cat_source, recap_range, id_type="ISBN", api=None, ): """ work notes: 1. iterate through the source files and extract bib/order metadata 2. temporarily persist this data in local datastore 3. iterate over the batch and find best hit for each 4. persist in local store matched record as a pymarc object 5. display results (with all data needed for Sierra import) to user 5. allow user to decide what to write to final file args: source_fh: str, file path data_source: str, 'Sierra export' or 'IDs list' system: str, 'NYPL' or 'BPL' library: str, 'research' or 'branches' progbar1: tkinter widget, overall progressbar progbar2: tkinter widget, task progressbar process_label: tkinter StrinVar, current task label hits: tkinter IntVar, hits counter nohits: tkinter IntVar, failed search counter meet_crit_counter: tkinter IntVar, success match & eval counter fail_user_crit_counter: tkinter IntVar, failed user criteria counter fail_glob_crit_counter: tkinter IntVar, failed global criteria counter action: str, 'catalog' or 'upgrade' encode_level: str, 'any', ... mat_type: str, 'any', print', 'large print', 'dvd', 'bluray' cat_rules: str, 'any', 'RDA-only' cat_source: str, 'any', 'DLC' recap_range: list, uppper and lower limits of Recap numbers id_type: str, 'ISBN', 'UPC', 'ISSN', 'LCCN', 'OCLC #' api: str, name of api to be used for queries """ if mat_type == "": mat_type = None if cat_source == "": cat_source = None module_logger.debug( "Launching W2S process. " "Params: source_fh:{}, data_source:{}, system:{}, " "library:{}, action:{}, encode_level:{}, mat_type:{}, " "cat_rules:{}, cat_source:{}, recap_range:{}, id_type:{}, " "api:{}".format( source_fh, data_source, system, library, action, encode_level, mat_type, cat_rules, cat_source, recap_range, id_type, api, ) ) processed_counter = 0 found_counter = 0 not_found_counter = 0 skipped_counter = 0 remove_previous_process_data() # validate correctness of sierra export process_label.set("reading:") if data_source == "Sierra export": sierra_export_reader(source_fh, system, progbar1, progbar2) elif data_source == "IDs list": id_list_reader(source_fh, progbar1, progbar2) # keep track of recap call numbers if recap_range: recap_no = recap_range[0] else: recap_no = None with session_scope() as db_session: # create batch record batch_rec = insert_or_ignore( db_session, WCSourceBatch, file=source_fh, system=system, library=library, action=action, api=api, data_source=data_source, encode_level=encode_level, mat_type=mat_type, cat_rules=cat_rules, cat_source=cat_source, id_type=id_type, ) db_session.flush() batch_id = batch_rec.wcsbid # parse depending on the data source if data_source == "IDs list": with open(source_fh, "r") as file: reader = csv.reader(file) # skip header reader.next() if id_type == "ISBN": for row in reader: meta = BibOrderMeta( system=system, dstLibrary=library, t020=[parse_isbn(row[0])] ) insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) elif id_type == "UPC": raise OverloadError("Not implemented.") pass # will be implemented later # for row in reader: # meta = BibOrderMeta( # system=system, # dstLibrary=library, # t024=[parse_upc(row[0])]) elif id_type == "OCLC #": for row in reader: meta = BibOrderMeta( system=system, dstLibrary=library, t001=row[0] ) insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) else: raise OverloadError("Not implemented.") elif data_source == "Sierra export": data = sierra_export_data(source_fh, system, library) for meta, single_order in data: if single_order is None: row = ["b{}a".format(meta.sierraId), meta.title] skipped_counter += 1 skipped.set(skipped_counter) save2csv(W2S_SKIPPED_ORD, row) progbar1["maximum"] = progbar1["maximum"] - 3 elif single_order is False: row = ["b{}a".format(meta.sierraId), meta.title] skipped_counter += 1 skipped.set(skipped_counter) save2csv(W2S_MULTI_ORD, row) progbar1["maximum"] = progbar1["maximum"] - 3 else: insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) creds = get_credentials(api) wskey = creds["key"] db_session.commit() # query Worldcat process_label.set("querying:") # reset progbar2 progbar2["value"] = 0 metas = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id) with SearchSession(credentials=wskey) as session: for m in metas: module_logger.debug(m.meta) hit = False if m.meta.t001: query = construct_sru_query( m.meta.t001, keyword_type="OCLC #", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("OCLC# request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 if m.meta.t010 and not hit: query = construct_sru_query( m.meta.t010, keyword_type="LCCN", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("LCCN request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 if m.meta.t020 and not hit: # will iterate over all ISBNs if no hits for isbn in m.meta.t020: query = construct_sru_query( isbn, keyword_type="ISBN", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("ISBN request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 break # stop searching if m.meta.t024 and not hit: for upc in m.meta.t024: query = construct_sru_query( upc, keyword_type="UPC", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("UPC request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 break # stop searching if not hit: not_found_counter += 1 module_logger.debug( "Unable to find any matches in Worldcat for {}.".format(m.meta) ) interpret_search_response(None, db_session, m.wcsmid) hits.set(found_counter) nohits.set(not_found_counter) update_progbar(progbar1) update_progbar(progbar2) processed_counter += 1 db_session.commit() # check if meet criteria process_label.set("analyzing:") progbar2["value"] = 0 rows = retrieve_records(db_session, WCHit, hit=True) for row in rows: results = row.query_results recs = results2record_list(results) for xml_record in recs: fulfills = False fail_types = [] if meets_upgrade_criteria(xml_record): if meets_user_criteria( xml_record, encode_level, mat_type, cat_rules, cat_source ): fulfills = True if action == "upgrade": meet_crit_counter.set(meet_crit_counter.get() + 1) oclcNo = get_oclcNo(xml_record) update_hit_record( db_session, WCHit, row.wchid, match_oclcNo=oclcNo ) update_progbar(progbar1) update_progbar(progbar2) break elif action == "catalog": if meets_catalog_criteria(xml_record, library): fulfills = True meet_crit_counter.set(meet_crit_counter.get() + 1) oclcNo = get_oclcNo(xml_record) update_hit_record( db_session, WCHit, row.wchid, match_oclcNo=oclcNo ) update_progbar(progbar1) update_progbar(progbar2) break else: fail_types.append("global") else: fail_types.append("user") else: fail_types.append("global") if not fulfills: if "user" in fail_types: fail_user_crit_counter.set(fail_user_crit_counter.get() + 1) else: fail_glob_crit_counter.set(fail_glob_crit_counter.get() + 1) db_session.commit() # download and prep process_label.set("downloading:") # reset progbar2 progbar2["value"] = 0 # obtain access token token = get_token(creds) if token.token_str is None: module_logger.error( "Worldcat token not obtained. Error: {}.".format(token.server_response) ) else: module_logger.debug("Worldcat token obtained.") # open Metadata API session with MetadataSession(credentials=token) as session: metas = retrieve_related( db_session, WCSourceMeta, "wchits", wcsbid=batch_id ) for m in metas: if m.wchits.match_oclcNo: xml_record = request_record(session, m.wchits.match_oclcNo) if xml_record is not None: update_hit_record( db_session, WCHit, m.wchits.wchid, match_marcxml=xml_record ) update_progbar(progbar1) update_progbar(progbar2) db_session.commit() # prepare MARC files process_label.set("prepping:") progbar2["value"] = 0 # check if Sierra bib # provided and use # for overlay command line rows = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id) for row in rows: # initial workflow shared by updgrade fuctionality xml_record = row.wchits.match_marcxml if xml_record is not None: marc_record = marcxml2array(xml_record)[0] remove_unsupported_subject_headings(system, marc_record) remove_unwanted_tags(marc_record) remove_ebook_isbns(marc_record) marc_record.remove_fields("901", "907", "945", "949", "947") initials = create_initials_field(system, library, "W2Sbot") marc_record.add_ordered_field(initials) if data_source == "Sierra export": order_data = row.meta if order_data.sierraId: overlay_tag = create_target_id_field( system, order_data.sierraId ) marc_record.add_ordered_field(overlay_tag) if system == "NYPL": marc_record.remove_fields("001", "910") tag_001 = nypl_oclcNo_field(xml_record) marc_record.add_ordered_field(tag_001) # add Sierra bib code 3 and default location if library == "branches": defloc = NBIB_DEFAULT_LOCATIONS["branches"] elif library == "research": defloc = NBIB_DEFAULT_LOCATIONS["research"] tag_949 = create_command_line_field("*b3=h;bn={};".format(defloc)) marc_record.add_ordered_field(tag_949) if action == "catalog": # add call number & persist if data_source == "Sierra export": order_data = row.meta local_fields = create_local_fields( xml_record, system, library, order_data=order_data, recap_no=recap_no, ) else: # data source a list of IDs local_fields = create_local_fields( xml_record, system, library, recap_no=recap_no ) if local_fields: for field in local_fields: if field is not None: marc_record.add_ordered_field(field) if system == "NYPL" and library == "research": recap_no += 1 update_hit_record( db_session, WCHit, row.wchits.wchid, prepped_marc=marc_record ) update_progbar(progbar1) update_progbar(progbar2) # make sure W2S stays within assigned Recap range if system == "NYPL" and library == "research": if action == "catalog": if recap_no > recap_range[1]: raise OverloadError( "Used all available ReCAP call numbers " "assigned for W2S." ) # show completed progbar1["value"] = progbar1["maximum"] progbar2["value"] = progbar2["maximum"]
def get_template_names(agent): # agent arg must be 3 letter code with session_scope() as session: values = retrieve_records(session, NYPLOrderTemplate, agent=agent) return [x.tName for x in values]