def create_marc_file(system, dst_fh, no_holdings_msg=None): with session_scope() as db_session: recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True) for r in recs: marc = r.wchits.prepped_marc if marc: # delete unsupported headings # remove_unsupported_subject_headings(marc) # remove_unwanted_tags(marc) # remove_ebook_isbns(marc) # add barcode if added by user if r.barcode is not None: for field in marc.get_fields("949"): if field.indicators == [" ", "1"]: field.add_subfield("i", r.barcode) if no_holdings_msg: msg = "OCLC holdings not updated" if system == "NYPL": field = marc["901"] elif system == "BPL": field = marc["947"] if "h" not in field: field.add_subfield("h", msg) else: field["h"] = msg try: write_marc21(dst_fh, marc) except TypeError: module_logger.error( "Unable to create marc file for record: " "wchid: {}, oclcNo: {}".format( r.wchits.wchid, r.wchits.match_oclcNo ) ) raise if ".mrc" in dst_fh: dst_fh = dst_fh.replace(".mrc", ".csv") else: dst_fh = "{}.csv".format(dst_fh) header = ["position", "result", "title", "ISBN"] save2csv(dst_fh, header) with session_scope() as db_session: recs = retrieve_related(db_session, WCSourceMeta, "wchits") for r in recs: if r.selected and r.wchits.prepped_marc: result = "pass" else: result = "reject" try: row = [r.wchits.wchid, result, r.meta.title, r.meta.t020[0]] except IndexError: row = [r.wchits.wchid, result, r.meta.title, None] save2csv(dst_fh, row)
def save_stats(): module_logger.debug('Saving batch stats.') batch = shelve.open(BATCH_META) timestamp = batch['timestamp'] system = batch['system'] library = batch['library'] agent = batch['agent'] file_qty = len(batch['file_names']) batch.close() try: df = reports.shelf2dataframe(BATCH_STATS, system) except ValueError: df = None if df is not None: stats = reports.create_stats(system, df) with session_scope() as session: # find out if timestamp already added # if not add records # add batch record record = insert_or_ignore(session, PVR_Batch, timestamp=timestamp, system=system, library=library, agent=agent, file_qty=file_qty) session.flush() bid = record.bid for row in stats.iterrows(): name = row[1]['vendor'] record = insert_or_ignore(session, Vendor, name=name) session.flush() vid = record.vid if system == 'nypl': record = insert_or_ignore(session, PVR_File, bid=bid, vid=vid, new=row[1]['insert'], dups=row[1]['attach'], updated=row[1]['update'], mixed=row[1]['mixed'], other=row[1]['other']) else: record = insert_or_ignore(session, PVR_File, bid=bid, vid=vid, new=row[1]['insert'], dups=row[1]['attach'], updated=row[1]['update']) else: module_logger.warning( 'Unable to created dataframe from the BATCH_STATS.') raise OverloadError( 'Encountered problems while trying to save statistics.')
def cumulative_bpl_stats(start_date, end_date): """ Produces dataframe with cumulative statistics of processed BPL records """ stmn = text( """ SELECT pvr_batch.system, sum(pvr_file.new), sum(pvr_file.dups), sum(pvr_file.updated), vendor.name FROM pvr_file JOIN pvr_batch ON pvr_file.bid = pvr_batch.bid JOIN vendor ON pvr_file.vid = vendor.vid WHERE pvr_batch.system = "bpl" AND pvr_batch.timestamp>=:start_date AND pvr_batch.timestamp<:end_date GROUP BY vendor.name """ ) stmn = stmn.bindparams(start_date=start_date, end_date=end_date) with session_scope() as session: results = session.execute(stmn) labels = ["system", "insert", "attach", "overlay", "vendor"] df = pd.DataFrame.from_records(results, columns=labels) df["total loaded"] = df["insert"] + df["attach"] + df["overlay"] df = df[["vendor", "insert", "attach", "overlay", "total loaded"]] return df
def store_connection(name, host, folder, user, password, system): if name == '': name = None if host == '': host = None if folder == '': folder = None if system == '': system = None if user == '': user = None else: user = base64.b64encode(user) if password == '': password = None else: password = base64.b64encode(password) try: with session_scope() as db_session: insert_or_ignore(db_session, FTPs, name=name, host=host, folder=folder, user=user, password=password, system=system) except IntegrityError as e: module_logger.error('Unable to store FTP details. Error: {}'.format(e)) raise OverloadError('Error. The name of the new connection is\n.' 'already used or some of the required elements\n' 'are missing')
def create_report(query_date=None): if query_date is None: query_date = date.today().strftime('%Y-%m-%d') stmn = """SELECT timestamp, bid, bibs.title, bibs.b_call, copies.oid, copies.copies, code, description FROM tickets JOIN bibs ON tickets.bid = bibs.id JOIN copies ON tickets.id = copies.tid JOIN tick_conf_joiner ON tick_conf_joiner.tid = tickets.id JOIN conflicts ON tick_conf_joiner.cid = conflicts.id WHERE timestamp LIKE "{}%" """.format(query_date) msg = [] msg.append('BPL QCbot report for day {}:'.format(query_date)) msg.append('{}\t{}\t{}\t{}\t{}\t{}\t{}'.format( 'bib id' + ' ' * 4, 'order id' + ' ' * 2, 'copies', 'title' + ' ' * 20, 'call #' + ' ' * 19, 'error code', 'error description')) with session_scope() as session: results = run_query(session, stmn) for record in results: # shorten title if needed try: title = record.title[:25] except IndexError: title = record.title except TypeError: title = '' # shorten call number if needed try: callNo = record.b_call[:25] except IndexError: callNo = record.b_call except TypeError: callNo = '' # copies clen = 6 - len('{}'.format(record.copies)) copies = '{}{}'.format(record.copies, ' ' * clen) # title tlen = 25 - len(title) title = '{}{}'.format(title, ' ' * tlen) # call number cnlen = 25 - len(callNo) callNo = '{}{}'.format(callNo, ' ' * cnlen) # error code code = '{}{}'.format(record.code, ' ' * 3) # create a new line with data msg.append('b{}a\to{}a\t{}\t{}\t{}\t{}\t{}'.format( record.bid, record.oid, copies, title, callNo, code, record.description)) return '\n'.join(msg)
def update_template(otid, record): try: with session_scope() as session: update_nypl_template(session, otid, **record) except IntegrityError as e: module_logger.error('IntegrityError on template update: {}'.format(e)) raise OverloadError('Duplicate/missing template name\n' 'or missing primary matchpoint')
def save_template(record): try: with session_scope() as session: insert_or_ignore(session, NYPLOrderTemplate, **record) except IntegrityError as e: module_logger.error('IntegrityError on template save: {}'.format(e)) raise OverloadError('Duplicate/missing template name\n' 'or missing primary matchpoint')
def save_cart_info(sheet_id, system_id): with session_scope() as session: rec = insert(session, Cart, system_id=system_id, shopping_cart_id=sheet_id) session.flush() return (rec.rid)
def count_total(): meta_ids = [] with session_scope() as db_session: recs = retrieve_records(db_session, WCSourceMeta) total = 0 for rec in recs: total += 1 meta_ids.append(rec.wcsmid) return total, meta_ids
def populate_branch_tab(creds, system_id, sheet_id): data = [] with session_scope() as session: branch_records = retrieve_records_ordered_by_code(session, Branch, system_id=system_id) for record in branch_records: if record.code: data.append([record.code]) append2sheet(creds, sheet_id, 'branch codes', data)
def delete_connection(name, system): with session_scope() as db_session: try: delete_record(db_session, FTPs, name=name, system=system) except Exception as exc: _, _, exc_traceback = sys.exc_info() tb = format_traceback(exc, exc_traceback) module_logger.error( 'Unhandled error of deletion of FTP details. {}'.format(tb)) raise OverloadError(exc)
def remove_previous_process_data(): module_logger.debug("Deleting previous process data.") with session_scope() as db_session: # deletes WCSourceBatch data and all related tables delete_all_table_data(db_session, WCSourceBatch) module_logger.debug("Data from previous run has been deleted.") try: os.remove(W2S_MULTI_ORD) os.remove(W2S_SKIPPED_ORD) except WindowsError: pass
def cumulative_nypl_stats(start_date, end_date): stmn = text( """ SELECT pvr_batch.system, sum(pvr_file.new), sum(pvr_file.dups), sum(pvr_file.updated), vendor.name FROM pvr_file JOIN pvr_batch ON pvr_file.bid = pvr_batch.bid JOIN vendor ON pvr_file.vid = vendor.vid WHERE pvr_batch.system = "nypl" AND pvr_batch.timestamp>=:start_date AND pvr_batch.timestamp<:end_date GROUP BY vendor.name """ ) stmn = stmn.bindparams(start_date=start_date, end_date=end_date) with session_scope() as session: results = session.execute(stmn) nypl_labels = [ "system", "library", "insert", "attach", "overlay", "mixed", "other", "vendor", ] df = pd.DataFrame.from_records(results, columns=nypl_labels) bdf = df[df["library"] == "branches"] bdf = bdf[["vendor", "insert", "attach", "overlay", "mixed", "other"]] bdf["total loaded"] = bdf["insert"] + bdf["attach"] + bdf["overlay"] bdf.columns = [ "vendor", "insert", "attach", "overlay", "mixed dups", "research dups", "total loaded", ] rdf = df[df["library"] == "research"] rdf = rdf[["vendor", "insert", "attach", "overlay", "mixed", "other"]] rdf["total loaded"] = rdf["insert"] + rdf["attach"] + rdf["overlay"] rdf.columns = [ "vendor", "insert", "attach", "overlay", "mixed dups", "branches dups", "total loaded", ] return (bdf, rdf)
def get_connection_details(name, system): with session_scope() as db_session: record = retrieve_record(db_session, FTPs, name=name, system=system) if record.user: user = base64.b64decode(record.user) else: user = '' if record.password: password = base64.b64decode(record.password) else: password = '' return (record.host, user, password, record.folder)
def set_oclc_holdings(dst_fh): oclc_numbers = [] hold_not_set = [] with session_scope() as db_session: recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True) for r in recs: if r.wchits.match_oclcNo: oclc_numbers.append(str(r.wchits.match_oclcNo)) # update holdings batch_rec = retrieve_record(db_session, WCSourceBatch) creds = get_credentials(batch_rec.api) token = get_token(creds) with MetadataSession(credentials=token) as session: responses = session.holdings_set_batch(oclc_numbers) holdings = holdings_responses(responses) if holdings: for oclcNo, holding in holdings.items(): recs = retrieve_records(db_session, WCHit, match_oclcNo=oclcNo) for rec in recs: if holding[0] in ("set", "exists"): holding_set = True else: holding_set = False update_hit_record( db_session, WCHit, rec.wchid, holding_set=holding_set, holding_status=holding[0], holding_response=holding[1], ) db_session.commit() # verify all selected had holdings set recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True) for r in recs: if not r.wchits.holding_set: hold_not_set.append(r.wchits.match_oclcNo) fh_csv = os.path.join(os.path.split(dst_fh)[0], "holdings-issues.csv") if hold_not_set: for oclcNo in hold_not_set: save2csv(fh_csv, [oclcNo]) return False else: return True
def persist_choice(meta_ids, selected, barcode_var=None): with session_scope() as db_session: for mid in meta_ids: if barcode_var: if barcode_var.get(): barcode = barcode_var.get() else: barcode = None else: barcode = None update_meta_record( db_session, WCSourceMeta, mid, selected=selected, barcode=barcode )
def set_new_branch(tabs, sheet_id): """ Parses shopping cart with provided google sheet id and updates dst_branch_id column in hold table of the datastore based on staff selection args: tabs: list of str, list of names of google sheets in the spreadsheet sheet_id: str, google sheet id """ creds = get_access_token() service = discovery.build('sheets', 'v4', credentials=creds) sheet = service.spreadsheets() for tab in tabs: results = sheet.values().get( spreadsheetId=sheet_id, range=tab).execute() values = results.get('values', []) for row in values[1:]: iid = None try: iid = int(row[6]) # find row with actual data if iid: try: loc_code = row[7].strip() dst_branch_id = BRANCH_CODES[loc_code][0] except IndexError: dst_branch_id = BRANCH_CODES[None][0] # print(f'dst_id:{dst_branch_id}') except IndexError: # row with no data (example a section heading row) pass except ValueError: # log as error or warning pass if iid is not None: with session_scope() as session: hold_rec = retrieve_record( session, Hold, item_id=iid, outstanding=False, issued=False) if hold_rec: update_record( session, Hold, hold_rec.sid, dst_branch_id=dst_branch_id, issued=True)
def save2store(fh, system_id): data = sierra_export_reader(fh) with session_scope() as session: branch_idx = create_code_idx(session, Branch, system_id=system_id) mat_cat_idx = create_code_idx(session, MatCat, system_id=system_id) audn_idx = create_code_idx(session, Audience) lang_idx = create_code_idx(session, Language) itemtype_idx = create_code_idx(session, ItemType, system_id=system_id) for element in data: # parse source shelf code and store it shelfcode_id = get_shelfcode_id( session, element.location, system_id) overflow_item = dict( system_id=system_id, bib_id=prep_ids(element.bib_id), title=prep_title(element.title), author=prep_author(element.author), call_no=element.call_no.strip(), item_id=prep_ids(element.item_id), src_branch_id=determine_branch_id( element.location, branch_idx), src_branch_shelf_id=shelfcode_id, pub_date=parse_pub_date(element.pub_info), bib_created_date=string2date(element.bib_created_date), item_created_date=string2date(element.item_created_date), mat_cat_id=get_mat_cat_id( element.call_no, element.location, element.opac_msg, system_id, mat_cat_idx), audn_id=get_audience_id(element.location, audn_idx), lang_id=get_language_id(element.call_no, lang_idx), item_type_id=get_itemtype_id( element.item_type, itemtype_idx), last_out_date=string2date(element.last_out_date), total_checkouts=string2int(element.total_checkouts), total_renewals=string2int(element.total_renewals)) insert(session, OverflowItem, **overflow_item)
def issue_holds(api_url, sierra_key, sierra_secret, account_id, cart_id=None): if cart_id is None: cart_id = get_latest_cart_record().sid with SierraSession(api_url, sierra_key, sierra_secret) as ils_session: with session_scope() as db_session: recs = retrieve_records(db_session, Hold, cart_id=cart_id, outstanding=False, issued=True) for rec in recs: if rec.dst_branch_id != 1: dst_branch = retrieve_record(db_session, Branch, sid=rec.dst_branch_id) response = ils_session.hold_place_on_item( account_id, rec.item_id, dst_branch.code) print( f'i{rec.item_id}a,{rec.dst_branch_id},{dst_branch.code},{response.status_code},{response.text}' ) else: print(f'i{rec.item_id}a,{rec.dst_branch_id},None,,')
def get_bib(meta_id): data = [] with session_scope() as db_session: r = retrieve_one_related(db_session, WCSourceMeta, "wchits", wcsmid=meta_id) sierra_data = dict( title=r.meta.title, sierraId=r.meta.sierraId, oid=r.meta.oid, locs=r.meta.locs, venNote=r.meta.venNote, note=r.meta.note, intNote=r.meta.intNote, choice=r.selected, barcode=r.barcode, ) if r.wchits.prepped_marc: worldcat_data = str(r.wchits.prepped_marc).splitlines() else: worldcat_data = None data.append((r.wchits.wchid, sierra_data, worldcat_data)) db_session.expunge_all() return data
def launch_process( source_fh, data_source, system, library, progbar1, progbar2, process_label, hits, nohits, skipped, meet_crit_counter, fail_user_crit_counter, fail_glob_crit_counter, action, encode_level, mat_type, cat_rules, cat_source, recap_range, id_type="ISBN", api=None, ): """ work notes: 1. iterate through the source files and extract bib/order metadata 2. temporarily persist this data in local datastore 3. iterate over the batch and find best hit for each 4. persist in local store matched record as a pymarc object 5. display results (with all data needed for Sierra import) to user 5. allow user to decide what to write to final file args: source_fh: str, file path data_source: str, 'Sierra export' or 'IDs list' system: str, 'NYPL' or 'BPL' library: str, 'research' or 'branches' progbar1: tkinter widget, overall progressbar progbar2: tkinter widget, task progressbar process_label: tkinter StrinVar, current task label hits: tkinter IntVar, hits counter nohits: tkinter IntVar, failed search counter meet_crit_counter: tkinter IntVar, success match & eval counter fail_user_crit_counter: tkinter IntVar, failed user criteria counter fail_glob_crit_counter: tkinter IntVar, failed global criteria counter action: str, 'catalog' or 'upgrade' encode_level: str, 'any', ... mat_type: str, 'any', print', 'large print', 'dvd', 'bluray' cat_rules: str, 'any', 'RDA-only' cat_source: str, 'any', 'DLC' recap_range: list, uppper and lower limits of Recap numbers id_type: str, 'ISBN', 'UPC', 'ISSN', 'LCCN', 'OCLC #' api: str, name of api to be used for queries """ if mat_type == "": mat_type = None if cat_source == "": cat_source = None module_logger.debug( "Launching W2S process. " "Params: source_fh:{}, data_source:{}, system:{}, " "library:{}, action:{}, encode_level:{}, mat_type:{}, " "cat_rules:{}, cat_source:{}, recap_range:{}, id_type:{}, " "api:{}".format( source_fh, data_source, system, library, action, encode_level, mat_type, cat_rules, cat_source, recap_range, id_type, api, ) ) processed_counter = 0 found_counter = 0 not_found_counter = 0 skipped_counter = 0 remove_previous_process_data() # validate correctness of sierra export process_label.set("reading:") if data_source == "Sierra export": sierra_export_reader(source_fh, system, progbar1, progbar2) elif data_source == "IDs list": id_list_reader(source_fh, progbar1, progbar2) # keep track of recap call numbers if recap_range: recap_no = recap_range[0] else: recap_no = None with session_scope() as db_session: # create batch record batch_rec = insert_or_ignore( db_session, WCSourceBatch, file=source_fh, system=system, library=library, action=action, api=api, data_source=data_source, encode_level=encode_level, mat_type=mat_type, cat_rules=cat_rules, cat_source=cat_source, id_type=id_type, ) db_session.flush() batch_id = batch_rec.wcsbid # parse depending on the data source if data_source == "IDs list": with open(source_fh, "r") as file: reader = csv.reader(file) # skip header reader.next() if id_type == "ISBN": for row in reader: meta = BibOrderMeta( system=system, dstLibrary=library, t020=[parse_isbn(row[0])] ) insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) elif id_type == "UPC": raise OverloadError("Not implemented.") pass # will be implemented later # for row in reader: # meta = BibOrderMeta( # system=system, # dstLibrary=library, # t024=[parse_upc(row[0])]) elif id_type == "OCLC #": for row in reader: meta = BibOrderMeta( system=system, dstLibrary=library, t001=row[0] ) insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) else: raise OverloadError("Not implemented.") elif data_source == "Sierra export": data = sierra_export_data(source_fh, system, library) for meta, single_order in data: if single_order is None: row = ["b{}a".format(meta.sierraId), meta.title] skipped_counter += 1 skipped.set(skipped_counter) save2csv(W2S_SKIPPED_ORD, row) progbar1["maximum"] = progbar1["maximum"] - 3 elif single_order is False: row = ["b{}a".format(meta.sierraId), meta.title] skipped_counter += 1 skipped.set(skipped_counter) save2csv(W2S_MULTI_ORD, row) progbar1["maximum"] = progbar1["maximum"] - 3 else: insert_or_ignore( db_session, WCSourceMeta, wcsbid=batch_id, meta=meta ) update_progbar(progbar1) update_progbar(progbar2) creds = get_credentials(api) wskey = creds["key"] db_session.commit() # query Worldcat process_label.set("querying:") # reset progbar2 progbar2["value"] = 0 metas = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id) with SearchSession(credentials=wskey) as session: for m in metas: module_logger.debug(m.meta) hit = False if m.meta.t001: query = construct_sru_query( m.meta.t001, keyword_type="OCLC #", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("OCLC# request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 if m.meta.t010 and not hit: query = construct_sru_query( m.meta.t010, keyword_type="LCCN", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("LCCN request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 if m.meta.t020 and not hit: # will iterate over all ISBNs if no hits for isbn in m.meta.t020: query = construct_sru_query( isbn, keyword_type="ISBN", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("ISBN request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 break # stop searching if m.meta.t024 and not hit: for upc in m.meta.t024: query = construct_sru_query( upc, keyword_type="UPC", mat_type=mat_type, cat_source=cat_source, ) res = session.sru_query(query=query) module_logger.debug("UPC request: {}".format(res.url)) hit = interpret_search_response(res, db_session, m.wcsmid) if hit: found_counter += 1 break # stop searching if not hit: not_found_counter += 1 module_logger.debug( "Unable to find any matches in Worldcat for {}.".format(m.meta) ) interpret_search_response(None, db_session, m.wcsmid) hits.set(found_counter) nohits.set(not_found_counter) update_progbar(progbar1) update_progbar(progbar2) processed_counter += 1 db_session.commit() # check if meet criteria process_label.set("analyzing:") progbar2["value"] = 0 rows = retrieve_records(db_session, WCHit, hit=True) for row in rows: results = row.query_results recs = results2record_list(results) for xml_record in recs: fulfills = False fail_types = [] if meets_upgrade_criteria(xml_record): if meets_user_criteria( xml_record, encode_level, mat_type, cat_rules, cat_source ): fulfills = True if action == "upgrade": meet_crit_counter.set(meet_crit_counter.get() + 1) oclcNo = get_oclcNo(xml_record) update_hit_record( db_session, WCHit, row.wchid, match_oclcNo=oclcNo ) update_progbar(progbar1) update_progbar(progbar2) break elif action == "catalog": if meets_catalog_criteria(xml_record, library): fulfills = True meet_crit_counter.set(meet_crit_counter.get() + 1) oclcNo = get_oclcNo(xml_record) update_hit_record( db_session, WCHit, row.wchid, match_oclcNo=oclcNo ) update_progbar(progbar1) update_progbar(progbar2) break else: fail_types.append("global") else: fail_types.append("user") else: fail_types.append("global") if not fulfills: if "user" in fail_types: fail_user_crit_counter.set(fail_user_crit_counter.get() + 1) else: fail_glob_crit_counter.set(fail_glob_crit_counter.get() + 1) db_session.commit() # download and prep process_label.set("downloading:") # reset progbar2 progbar2["value"] = 0 # obtain access token token = get_token(creds) if token.token_str is None: module_logger.error( "Worldcat token not obtained. Error: {}.".format(token.server_response) ) else: module_logger.debug("Worldcat token obtained.") # open Metadata API session with MetadataSession(credentials=token) as session: metas = retrieve_related( db_session, WCSourceMeta, "wchits", wcsbid=batch_id ) for m in metas: if m.wchits.match_oclcNo: xml_record = request_record(session, m.wchits.match_oclcNo) if xml_record is not None: update_hit_record( db_session, WCHit, m.wchits.wchid, match_marcxml=xml_record ) update_progbar(progbar1) update_progbar(progbar2) db_session.commit() # prepare MARC files process_label.set("prepping:") progbar2["value"] = 0 # check if Sierra bib # provided and use # for overlay command line rows = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id) for row in rows: # initial workflow shared by updgrade fuctionality xml_record = row.wchits.match_marcxml if xml_record is not None: marc_record = marcxml2array(xml_record)[0] remove_unsupported_subject_headings(system, marc_record) remove_unwanted_tags(marc_record) remove_ebook_isbns(marc_record) marc_record.remove_fields("901", "907", "945", "949", "947") initials = create_initials_field(system, library, "W2Sbot") marc_record.add_ordered_field(initials) if data_source == "Sierra export": order_data = row.meta if order_data.sierraId: overlay_tag = create_target_id_field( system, order_data.sierraId ) marc_record.add_ordered_field(overlay_tag) if system == "NYPL": marc_record.remove_fields("001", "910") tag_001 = nypl_oclcNo_field(xml_record) marc_record.add_ordered_field(tag_001) # add Sierra bib code 3 and default location if library == "branches": defloc = NBIB_DEFAULT_LOCATIONS["branches"] elif library == "research": defloc = NBIB_DEFAULT_LOCATIONS["research"] tag_949 = create_command_line_field("*b3=h;bn={};".format(defloc)) marc_record.add_ordered_field(tag_949) if action == "catalog": # add call number & persist if data_source == "Sierra export": order_data = row.meta local_fields = create_local_fields( xml_record, system, library, order_data=order_data, recap_no=recap_no, ) else: # data source a list of IDs local_fields = create_local_fields( xml_record, system, library, recap_no=recap_no ) if local_fields: for field in local_fields: if field is not None: marc_record.add_ordered_field(field) if system == "NYPL" and library == "research": recap_no += 1 update_hit_record( db_session, WCHit, row.wchits.wchid, prepped_marc=marc_record ) update_progbar(progbar1) update_progbar(progbar2) # make sure W2S stays within assigned Recap range if system == "NYPL" and library == "research": if action == "catalog": if recap_no > recap_range[1]: raise OverloadError( "Used all available ReCAP call numbers " "assigned for W2S." ) # show completed progbar1["value"] = progbar1["maximum"] progbar2["value"] = progbar2["maximum"]
def get_ftp_connections(system): with session_scope() as db_session: names = retrieve_records(db_session, FTPs, system=system) return [x.name for x in names]
def audience_label_idx(): with session_scope() as session: audn_recs = retrieve_records(session, Audience) audn_idx = {a.label: a.rid for a in audn_recs} return audn_idx
def cumulative_vendor_stats(start_date, end_date): """ Produces dataframe of vendor statistics during span of time """ with session_scope() as session: query = session.query( PVR_Batch.system, PVR_Batch.library, func.sum(PVR_File.new), func.sum(PVR_File.dups), func.sum(PVR_File.updated), func.sum(PVR_File.mixed), func.sum(PVR_File.other), Vendor.name, ) query = query.join(PVR_File, PVR_File.vid == Vendor.vid) nypl_br_results = ( query.filter( PVR_Batch.timestamp >= start_date, PVR_Batch.timestamp < end_date, PVR_Batch.system == "nypl", PVR_Batch.library == "branches", ) .group_by(Vendor.name) .all() ) nypl_rl_results = ( query.filter( PVR_Batch.timestamp >= start_date, PVR_Batch.timestamp < end_date, PVR_Batch.system == "nypl", PVR_Batch.library == "research", ) .group_by(Vendor.name) .all() ) bpl_results = ( query.filter( PVR_Batch.timestamp >= start_date, PVR_Batch.timestamp < end_date, PVR_Batch.system == "bpl", ) .group_by(Vendor.name) .all() ) labels = [ "system", "library", "insert", "attach", "overlay", "mixed", "other", "vendor", ] nbdf = pd.DataFrame.from_records(nypl_br_results, columns=labels) nrdf = pd.DataFrame.from_records(nypl_rl_results, columns=labels) bdf = pd.DataFrame.from_records(bpl_results, columns=labels) nbdf["total loaded"] = nbdf["insert"] + nbdf["attach"] + nbdf["overlay"] nbdf = nbdf[ ["vendor", "insert", "attach", "overlay", "total loaded", "mixed", "other"] ] nrdf["total loaded"] = nrdf["insert"] + nrdf["attach"] + nrdf["overlay"] nrdf = nrdf[ ["vendor", "insert", "attach", "overlay", "total loaded", "mixed", "other"] ] bdf["total loaded"] = bdf["insert"] + bdf["attach"] + bdf["overlay"] bdf = bdf[["vendor", "insert", "attach", "overlay", "total loaded"]] return nbdf, nrdf, bdf
def get_latest_cart_record(): # retrieve last entered cart with session_scope() as session: rec = retrieve_last_record(session, Cart) session.expunge(rec) return rec
def analize(report_fh=None): # # clean-up Bibs and Orders tables to prep # datastore for new set with session_scope() as session: delete_table_data(session, Orders) delete_table_data(session, Bibs) fetched = False if report_fh is None: s = shelve.open(SETTINGS, flag='r') host = s['ftp_host'] user = s['ftp_user'] passw = s['ftp_pass'] ret = s['orders_retention'] # fetch the latests Sierra report fetched = ftp_download(host, user, passw, 'bpl') s.close() if fetched: data_generator = report_data(DATA, ret) else: main_logger.warning('No new sierra report - skippig analysis') # perform ftp maintenance ftp_maintenance(host, user, passw, 'bpl') else: s = shelve.open(settings, flag='r') ret = s['orders_retention'] data_generator = report_data(report_fh, ret) fetched = True if fetched: # since bibs and orders are somewhat vetted by the sierra_parser # it's OK to add them in bulk to datastore # if any exception encountered the whole batch will be rolled back! try: with session_scope() as session: for record in data_generator: bib = record[0] order = record[1] insert_or_ignore(session, Bibs, **bib) insert_or_ignore(session, Orders, **order) except Exception as e: main_logger.critical('Unable to add data {} to datastore. ' 'Error: {}'.format(file, e)) # update conflicts table and prepare queries # this time enter each conflict in it's own session # so well formed queries can be used queries = dict() conflicts = conflict2dict() # update Conflict table in datastore for conflict in conflicts: queries[conflict['id']] = conflict['query'] conflict.pop('query', None) try: with session_scope() as session: insert_or_update(session, Conflicts, **conflict) except Exception as e: main_logger.critical( 'unable to add data to datastore: {}, error: {}'.format( conflict, e)) # run conflict queries and save errors in the datastore for cid, query in queries.iteritems(): try: with session_scope() as session: results = run_query(session, query) for row in results: tic = dict(bid=row.bid, title=row.title) ticket = insert_or_ignore(session, Tickets, **tic) # flush session so ticket obj gets id needed for joiner session.flush() joiner = dict(tid=ticket.id, cid=cid) insert_or_ignore(session, TickConfJoiner, **joiner) cop = dict(tid=ticket.id, oid=row.oid, copies=row.copies) insert_or_ignore(session, Copies, **cop) except Exception as e: # think about better logging here main_logger.critical('Unable to add data to datastore, error: ' '{}, {}: {}'.format(e, row, cid)) # email error report service = create_gmail_service() report = create_report() addresses = get_addresses() to = ','.join(addresses['to']) sender = addresses['from'] subject = 'BPL QC Report for {}'.format(date.today().strftime('%Y-%m-%d')) msg = create_message(sender, to, subject, report) # send message send_message(service, 'me', msg)
def get_template_names(agent): # agent arg must be 3 letter code with session_scope() as session: values = retrieve_records(session, NYPLOrderTemplate, agent=agent) return [x.tName for x in values]
def delete_template(otid): with session_scope() as session: delete_record(session, NYPLOrderTemplate, otid=otid)
def get_batch_criteria_record(): with session_scope() as db_session: rec = retrieve_record(db_session, WCSourceBatch) db_session.expunge_all() return rec
def get_total_number_of_branches(system_id): with session_scope() as session: branch_count = count_records(session, Branch, system_id=system_id) return branch_count