def test_nypl_branch_BT_SERIES_Spanish_prefix(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "J SPA E COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) correct_indicators = [" ", " "] correct_subfields = ["p", "J SPA", "a", "E", "c", "COMPOUND NAME"] self.assertEqual(correct_indicators, mod_bib.get_fields("091")[0].indicators) self.assertEqual(correct_subfields, mod_bib.get_fields("091")[0].subfields)
def test_nypl_branches_BT_SERIES_YA_graphic_novel_compound_name(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) correct_indicators = [" ", " "] correct_subfields = [ "f", "GRAPHIC", "a", "GN FIC", "c", "COMPOUND NAME" ] self.assertEqual(correct_indicators, mod_bib.get_fields("091")[0].indicators) self.assertEqual(correct_subfields, mod_bib.get_fields("091")[0].subfields)
def test_nypl_branch_BT_SERIES_exception(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field(tag="091", indicators=[" ", " "], subfields=["a", "J B EDISON C"])) for tag in tags: bib.add_ordered_field(tag) with self.assertRaises(AssertionError): patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib)
def test_bib_no_oclc_prefix(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="bl00000001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "Amalivre", bib) self.assertEqual(mod_bib.get_fields("001")[0].data, "bl00000001")
def create_record(row): """Take a row from the csv dict and return a pymarc.Record""" rec = Record() rec.leader = "00000ntm#a22000005c#4500" rec.add_ordered_field( pymarc.Field(tag="005", data=datetime.datetime.now().strftime("%Y%m%d%H%M%S.0"))) # generiere Inhalt für 245 if not row['Signatur modern']: return "Keine Signatur vorhanden" else: if row["Bd."]: val245 = f"UBG Ms {row['Signatur modern'].strip()}/{row['Bd.'].strip()}" else: val245 = f"UBG Ms {row['Signatur modern'].strip()}" rec.add_ordered_field( Field(tag='245', indicators=['0', '0'], subfields=['a', val245])) # Umfangsangabe in 300 if "rolle" in row["Umfang"].lower(): sfa = row["Umfang"].strip() else: sfa = f'{row["Umfang"].strip()} Blätter' sfc = f'{row["Format"].strip()}, {row["Größe h : b "].strip().replace(":", "x")}' if sfa.startswith(" "): sfa = "" if sfc.startswith(", "): sfc = sfc[2:] if sfc.endswith(", "): sfc = sfc[:-2] rec.add_ordered_field( Field(tag='300', indicators=[' ', ' '], subfields=["a", sfa, "c", sfc])) if row["Signatur alt"]: rec.add_field( Field( tag='500', indicators=[' ', ' '], subfields=[ 'a', f'Historische Signatur der Universitätsbibliothek Graz: {row["Signatur alt"].strip()}' ])) rec.add_ordered_field( Field(tag="500", indicators=[" ", " "], subfields=["a", "Stand 2018"])) beschreibstoff = row["Beschreibstoff"].strip() rec.add_ordered_field( Field(tag="340", indicators=[" ", " "], subfields=["a", beschreibstoff])) rec.add_ordered_field( pymarc.Field(tag="264", indicators=[" ", "1"], subfields=["c", f"[{get_date(row)}]"])) rec.add_field( Field(tag="710", indicators=["2", " "], subfields=[ "a", "Universitätsbibliothek Graz", "0", "(DE-588)18018-X", "4", "own" ])) date = get_date(row) if date == "Datum unbekannt": print("Kein Datum vorhanden: " + val245) return f"{val245}: Kein Datum vorhanden" else: year = date_008(date) if year is None: print("Keine Jahreszahl für 008 extrahierbar: " + val245) return f"{val245}: Keine Jahreszahl für 008 extrahierbar." date_on_file = datetime.datetime.now().strftime("%y%m%d") data008 = date_on_file + "s" + year + " " + "xx " + "||||" "|" + " " + "||||" + " 00|||| ||" rec.add_ordered_field(Field(tag="008", data=data008)) vorbes_nat_pers = [] if row["1. VB natürl. Personen"] != '': vorbes_nat_pers.append(row["1. VB natürl. Personen"].strip()) if row["2. VB natürl. Personen"] != '': vorbes_nat_pers.append(row["2. VB natürl. Personen"].strip()) if len(vorbes_nat_pers) > 0: for pers in vorbes_nat_pers: if pers not in vb_pers: print(f"Person nicht vorhanden: {pers}") continue else: persfield = Field(tag='700', indicators=['1', ' '], subfields=vb_pers[pers] + ['4', 'fmo']) if "," not in vb_pers[pers][1]: persfield.indicators = ['0', ' '] rec.add_ordered_field(persfield) vorbes_kor = [] if row["1. Vorbesitz Institution"] != '': vorbes_kor.append(row["1. Vorbesitz Institution"].strip()) if row["2. Vorbesitz Institution"] != '': vorbes_kor.append(row["2. Vorbesitz Institution"].strip()) if len(vorbes_kor) > 0: for kor in vorbes_kor: if kor not in vb_kor: korfield = Field(tag='710', indicators=['2', ' '], subfields=['a', kor, '4', 'fmo']) rec.add_ordered_field(korfield) print(korfield) else: korfield = Field(tag='710', indicators=['2', ' '], subfields=vb_kor[kor] + ['4', 'fmo']) rec.add_ordered_field(korfield) standort = "SSHS" signatur = "Ms " + row["Signatur modern"] rec.add_field( Field(tag="995", indicators=[" ", " "], subfields=[ "b", "BHB", "c", standort, "h", signatur, "a", row["Signatur alt"], "9", "LOCAL" ])) return rec
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) title_col = col_headers.index('Title') subj_person_col = col_headers.index('Subject_Person') subj_topical_col = col_headers.index('Subject_Topical') subj_place_col = col_headers.index('Subject_Place') subj_corp_col = col_headers.index('Subject_Jurisdictional') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') pub_date_col = col_headers.index('Date') # previously Publication Date copy_date_col = col_headers.index('Copyright Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') edition_col = col_headers.index('Edition') source_col = col_headers.index('Source') # source_acq_col = col_headers.index('Source of Acquisition') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') # hist_note_col = col_headers.index('Historical Note') notes_col = col_headers.index('Note') characters_col = col_headers.index('Characters') synopsis_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') in_series_col = col_headers.index('Is Part of Series') black_creators_col = col_headers.index('Black Creators (MARC 590)') black_chars_col = col_headers.index('Black Characters (MARC 590)') isbn_col = col_headers.index('ISBN') color_col = col_headers.index('Color?') series_note_col = col_headers.index('Series Note') copyright_holder_col = col_headers.index('Copyright holder') gcd_uri_col = col_headers.index('Grand Comics Database') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam a2200000Ii 4500' field_040 = Field(tag = '040', indicators = [' ',' '], subfields = [ 'a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag = '049', indicators = [' ',' '], subfields = [ 'a', 'VMCS']) field_336_text = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag = '337', indicators = [' ',' '], subfields = [ 'a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag = '338', indicators = [' ',' '], subfields = [ 'a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag = '380', indicators = [' ',' '], subfields = [ 'a', 'Comic books and graphic novels.']) field_506 = Field(tag = '506', indicators = ['1',' '], subfields = [ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.']) field_542 = Field(tag = '542', indicators = [' ',' '], subfields = [ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/']) field_588 = Field(tag = '588', indicators = ['0',' '], subfields = [ 'a', 'Description based on indicia and Grand Comics Database.']) field_989 = Field(tag = '989', indicators = [' ',' '], subfields = [ 'a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) subj_person = sheet.cell(row, subj_person_col).value if subj_person: subj_person = [x.strip() for x in subj_person.split(';')] subj_topical = sheet.cell(row, subj_topical_col).value if subj_topical: subj_topical = [x.strip() for x in subj_topical.split(';')] subj_place = sheet.cell(row, subj_place_col).value if subj_place: subj_place = [x.strip() for x in subj_place.split(';')] subj_corp = sheet.cell(row, subj_corp_col).value if subj_corp: subj_corp = [x.strip() for x in subj_corp.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = str(sheet.cell(row, pages_col).value) pub_date = str(sheet.cell(row, pub_date_col).value) pub_date_str = date_from_string(pub_date) pub_date_year = year_from_date(pub_date_str) copy_date = '' copy_date = str(sheet.cell(row, copy_date_col).value) copy_date_str = date_from_string(copy_date) copy_date_year = year_from_date(copy_date_str) pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value edition = sheet.cell(row, edition_col).value source = sheet.cell(row, source_col).value # source_acq = sheet.cell(row, source_acq_col).value characters = sheet.cell(row, characters_col).value black_creators = sheet.cell(row, black_creators_col).value if black_creators: black_creators = [x.strip() for x in black_creators.split(';')] black_chars = sheet.cell(row, black_chars_col).value if black_chars: black_chars = [x.strip() for x in black_chars.split(';')] isbn = str(sheet.cell(row, isbn_col).value) color = sheet.cell(row, color_col).value series_note = sheet.cell(row, series_note_col).value gcd_uri = sheet.cell(row, gcd_uri_col).value country_code = country_code_from_pub_place(pub_place) copyright_holder = [] if sheet.cell(row, copyright_holder_col).value: copyright_holder = sheet.cell(row, copyright_holder_col).value copyright_holder = [x.strip() for x in copyright_holder.split(';')] writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] # hist_note = [] # if sheet.cell(row, hist_note_col).value: # hist_note = sheet.cell(row, hist_note_col).value notes = [] if sheet.cell(row, notes_col).value: notes = sheet.cell(row, notes_col).value synopsis = [] if sheet.cell(row, synopsis_col).value: synopsis = sheet.cell(row, synopsis_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value in_series = sheet.cell(row, in_series_col).value contribs = {} if copyright_holder: for i in copyright_holder: contribs.update({i: ['copyright holder']}) else: if writer: for i in writer: contribs.update({i: ['writer']}) if penciller: for i in penciller: if i in contribs: role_list = contribs[i] role_list.append('penciller') contribs.update({i: role_list}) else: contribs.update({i: ['penciller']}) if inker: for i in inker: if i in contribs: role_list = contribs[i] role_list.append('inker') contribs.update({i: role_list}) else: contribs.update({i: ['inker']}) if colorist: for i in colorist: if i in contribs: role_list = contribs[i] role_list.append('colorist') contribs.update({i: role_list}) else: contribs.update({i: ['colorist']}) if letterer: for i in letterer: if i in contribs: role_list = contribs[i] role_list.append('letterer') contribs.update({i: role_list}) else: contribs.update({i: ['letterer']}) if cover_artist: for i in cover_artist: if i in contribs: role_list = contribs[i] role_list.append('cover artist') contribs.update({i: role_list}) else: contribs.update({i: ['cover artist']}) if editor: for i in editor: if i in contribs: role_list = contribs[i] role_list.append('editor') contribs.update({i: role_list}) else: contribs.update({i: ['editor']}) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') if copy_date: data_008 = today + 't' + pub_date_year + copy_date_year + country_code + 'a 6 000 1 eng d' else: data_008 = today + 's' + pub_date_year + ' ' + country_code + 'a 6 000 1 eng d' field_008 = Field(tag = '008', data = data_008) record.add_ordered_field(field_008) if isbn: field_020 = Field(tag = '020', indicators = [' ',' '], subfields = [ 'a', isbn]) record.add_ordered_field(field_020) subfields_099 = subfields_from_string(title) if 'b' in subfields_099: subfields_099.pop(3) subfields_099.pop(2) if 'n' in subfields_099: subfields_099[subfields_099.index('n')] = 'a' if subfields_099[1].endswith(',') or subfields_099[1].endswith(':'): subfields_099[1] = subfields_099[1][:-1] field_099 = Field(tag = '099', indicators = [' ','9'], subfields = subfields_099) record.add_ordered_field(field_099) for i in contribs: if i == list(contribs.keys())[0] and 'copyright holder' in contribs[i]: # first contributor is copyright holder subfield_content = subfields_from_string_relator(i, contribs[i]) field_110 = Field(tag = '110', indicators = ['2', ' '], subfields = subfield_content) record.add_ordered_field(field_110) elif i == list(contribs.keys())[0] and 'writer' in contribs[i]: # first contributor is a writer subfield_content = subfields_from_string_relator(i, contribs[i]) field_100 = Field(tag = '100', indicators = ['1', ' '], subfields = subfield_content) record.add_ordered_field(field_100) else: subfield_content = subfields_from_string_relator(i, contribs[i]) if ',' not in subfield_content[1]: field_710 = Field(tag = '710', indicators = ['2',' '], subfields = subfield_content) record.add_ordered_field(field_710) else: field_700 = Field(tag = '700', indicators = ['1',' '], subfields = subfield_content) record.add_ordered_field(field_700) if contribs and ('writer' in contribs[list(contribs.keys())[0]] or 'copyright holder' in contribs[list(contribs.keys())[0]]): f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title, 'The '): f245_ind2 = 4 elif str.startswith(title, 'An '): f245_ind2 = 3 elif str.startswith(title, 'A '): f245_ind2 = 2 subfields_245 = subfields_from_string(title) # If writer exists, add $c for first writer if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append(name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag = '245', indicators = [f245_ind1, f245_ind2], subfields = subfields_245) record.add_ordered_field(field_245) if edition: if not edition.endswith('.'): edition += '.' field_250 = Field(tag = '250', indicators = [' ', ' '], subfields = [ 'a', edition]) record.add_ordered_field(field_250) field_264_1 = Field(tag = '264', indicators = [' ','1'], subfields = [ 'a', pub_place + ' :', 'b', publisher + ',', 'c', pub_date_str + '.']) record.add_ordered_field(field_264_1) if copy_date: field_264_4 = Field(tag = '264', indicators = [' ','4'], subfields = [ 'c', '©' + copy_date_str]) record.add_ordered_field(field_264_4) if color == 'yes': subfields_300 = [ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.'] elif color == 'no': subfields_300 = [ 'a', pages + ' pages :', 'b', 'black and white illustrations.'] field_300 = Field(tag = '300', indicators = [' ',' '], subfields = subfields_300) record.add_ordered_field(field_300) if title_to_series(title): subfields_490 = title_to_series(title) field_490 = Field(tag = '490', indicators = ['1',' '], subfields = subfields_490) record.add_ordered_field(field_490) if series_note: if not series_note.endswith('.'): series_note += '.' field_490_series_note = Field(tag = '490', indicators = ['1', ' '], subfields = ['a', series_note]) record.add_ordered_field(field_490_series_note) # if hist_note: # field_500_hist = Field(tag = '500', # indicators = [' ',' '], # subfields = [ # 'a', hist_note + '.']) # record.add_ordered_field(field_500_hist) if notes: field_500_notes = Field(tag = '500', indicators = [' ',' '], subfields = [ 'a', notes + '.']) record.add_ordered_field(field_500_notes) if toc: if not toc.endswith('.') and not toc.endswith('?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag = '505', indicators = ['0',' '], subfields = [ 'a', toc]) record.add_ordered_field(field_505) if synopsis: field_520 = Field(tag = '520', indicators = [' ',' '], subfields = [ 'a', synopsis]) record.add_ordered_field(field_520) if black_creators: for i in black_creators: if not i.endswith('.'): i += '.' field_590_creators = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_creators) if black_chars: for i in black_chars: if not i.endswith('.'): i += '.' field_590_chars = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_chars) if source: field_541_source = Field(tag = '541', indicators = [' ',' '], subfields = [ 'a', source + '.']) record.add_ordered_field(field_541_source) # if source_acq: # field_541_source_acq = Field(tag = '541', # indicators = [' ',' '], # subfields = [ # 'a', source_acq + '.']) # record.add_ordered_field(field_541_source_acq) if subj_person: for i in subj_person: i_subfields = subfields_from_string(i) # Set first indicator based on presence of comma in $a if 'a' in i_subfields: if ',' in i_subfields[i_subfields.index('a') + 1]: field_600_ind1 = '1' else: field_600_ind1 = '0' if '1' in i_subfields: last_except_subf1 = i_subfields.index('1') - 1 else: last_except_subf1 = len(i_subfields) - 1 if i_subfields[last_except_subf1].endswith(','): i_subfields[last_except_subf1] = re.sub(r'^(.*),$', r'\g<1>.', i_subfields[last_except_subf1]) if not i_subfields[last_except_subf1].endswith('.') and not i_subfields[last_except_subf1].endswith(')') and not i_subfields[last_except_subf1].endswith('?') and not i_subfields[last_except_subf1].endswith('-'): i_subfields[last_except_subf1] += '.' field_600 = Field(tag = '600', indicators = [field_600_ind1,'0'], subfields = i_subfields) record.add_ordered_field(field_600) if subj_topical: for i in subj_topical: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_650 = Field(tag = '650', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_650) if subj_place: for i in subj_place: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_651 = Field(tag = '651', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_651) if subj_corp: for i in subj_corp: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_610 = Field(tag = '610', indicators = ['1','0'], subfields = i_subfields) record.add_ordered_field(field_610) if genre: for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag = '655', indicators = [' ','7'], subfields = [ 'a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: field_500_chars = Field(tag = '500', indicators = [' ', ' '], subfields = [ 'a', characters]) record.add_ordered_field(field_500_chars) if gcd_uri: title_758 = subfields_from_string(title)[1] if title_758.endswith(',') or title_758.endswith(':'): title_758 = title_758[:-1] field_758 = Field(tag = '758', indicators = [' ',' '], subfields = [ '4', 'http://rdaregistry.info/Elements/m/P30135', 'i', 'Has work manifested:', 'a', title_758, '1', gcd_uri]) record.add_ordered_field(field_758) if in_series: subfields_773 = subfields_from_string(in_series) field_773 = Field(tag = '773', indicators = ['0','8'], subfields = subfields_773) record.add_ordered_field(field_773) subfields_852 = [ 'b', 'CARRIER', 'c', 'carrspec'] if len(subfields_099) == 4: subfields_852.append('h') subfields_852.append(subfields_099[1]) subfields_852.append('i') subfields_852.append(subfields_099[3]) if len(subfields_099) == 2: subfields_852.append('h') subfields_852.append(subfields_099[1]) if edition: if edition.endswith('.'): edition = edition[:-1] subfields_852.append('z') subfields_852.append(edition) field_852 = Field(tag = '852', indicators = ['8',' '], subfields = subfields_852) record.add_ordered_field(field_852) outmarc.write(record.as_marc()) print() outmarc.close()
bib = open(OUT, 'w') #writer = MARCWriter(open('retrobi.mrc','wb')) # MAIN ----------------- with open(IN, 'rb') as f: for LINE in f: # INIT ----------------- #record = Record(force_utf8=True) record = Record() record.leader = ' nab a22 4a 4500' # overwrite internal(pymarc.record) LDR tag record.add_ordered_field(Field(tag='FMT', data='RS')) record.add_ordered_field(Field(tag='003', data='CZ PrUCL')) record.add_ordered_field(Field(tag='005', data='20201231')) record.add_ordered_field( Field(tag='040', indicators=['\\', '\\'], subfields=['a', 'ABB060', 'b', 'cze'])) #record.add_ordered_field(Field(tag='041', indicators=['0','\\'], subfields=['a', 'cze'])) record.add_ordered_field( Field(tag='336', indicators=['\\', '\\'], subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent'])) record.add_ordered_field( Field(tag='337', indicators=['\\', '\\'], subfields=['a', u'bez média', 'b', 'n', '2', 'rdamedia']))
def stub_marc_bib(): tags = [] marc_bib = Record() marc_bib.leader = "00000nam a2200000u 4500" tags.append(Field(tag="001", data="ocm0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append(Field(tag="001", data="ocn1111")) tags.append( Field( tag="019", indicators=[" ", " "], subfields=["a", "some-id-001"], )) tags.append( Field( tag="020", indicators=[" ", " "], subfields=["a", "isbn001", "b", "isbn002"], )) tags.append( Field( tag="024", indicators=[" ", " "], subfields=["a", "upc001"], )) tags.append( Field( tag="037", indicators=[" ", " "], subfields=["a", "some-id-0001", "b", "test-distributor"], )) tags.append( Field( tag="037", indicators=[" ", " "], subfields=["a", "some-id-0002", "b", "Overdrive, Inc."], )) tags.append( Field( tag="084", indicators=[" ", " "], subfields=["a", "some-classification", "2", "test-thesaurus"], )) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "some-callnumber"], )) tags.append( Field( tag="099", indicators=[" ", " "], subfields=["a", "some-callnumber"], )) tags.append( Field( tag="263", indicators=[" ", " "], subfields=["a", "some-date"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url1", "3", "public-note-1"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url2", "3", "public-note-2"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url3", "3", "public-note-3"], )) tags.append( Field( tag="838", indicators=[" ", " "], subfields=["u", "EBSCOhost", "b", "EBSC", "n", "11111"], )) for tag in tags: marc_bib.add_ordered_field(tag) return marc_bib
def test_barcodes_duplicates_in_two_bpl_files(self): bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000003')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044539', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh1, bib) bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000001')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh1, bib) bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000001')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh2, bib) self.assertEqual( default.barcode_duplicates([self.fh1, self.fh2], 'bpl'), { u'34444849044538': [('barcode1_dup_test.mrc', 2), ('barcode2_dup_test.mrc', 1)] })
def make_bib(fh, oclc_code, library_code, blanketPO, selector_code, order): """creates bib & order record in MARC21 format with UTF-8 encoded charset """ record = Record() tags = [] # MARC leader if order.mat_bib in ('h', 'v'): MARCmatType = 'g' elif order.mat_bib in ('i', 'u'): MARCmatType = 'i' elif order.mat_bib in ('j', 'y'): MARCmatType = 'j' elif order.mat_bib == 'a': MARCmatType = 'a' else: MARCmatType = 'a' if order.lang == 'eng': order_code3 = 'd' else: order_code3 = 'f' record.leader = f'00000n{MARCmatType}m a2200000u 4500' # 001 field tags.append(Field(tag='001', data=order.wlo)) # 008 field # needs to take into account differences between different # non-print formats dateCreated = date.strftime(date.today(), '%y%m%d') tag008 = f'{dateCreated}s xx 000 u {order.lang} d' if order.resource.pub_date is not None: tag008 = tag008[:7] + order.resource.pub_date + tag008[11:] tags.append(Field(tag='008', data=tag008)) # 020 field if order.resource.isbn is not None: tags.append(Field(tag='020', indicators=[' ', ' '], subfields=['a', order.resource.isbn])) # 024 field if order.resource.upc is not None: tags.append(Field(tag='024', indicators=['1', ' '], subfields=['a', order.resource.upc])) # 028 field if order.resource.other_no is not None: tags.append(Field(tag='028', indicators=['6', '0'], subfields=['a', order.resource.other_no])) # 040 field tags.append(Field( tag='040', indicators=[' ', ' '], subfields=[ 'a', oclc_code, 'b', 'eng', 'c', oclc_code])) # # 100 author_present = False if order.resource.author is not None: author_present = True subfields = ['a', order.resource.author] tags.append(Field( tag='100', indicators=['1', ' '], subfields=subfields)) # 245 field # add format to title for non-print mat if MARCmatType == 'g': order.resource.title += ' (DVD)' elif MARCmatType == 'i': order.resource.title += ' (Audiobook)' elif MARCmatType == 'j': order.resource.title += ' (CD)' if author_present: t245_ind1 = '1' else: t245_ind1 = '0' subfields = ['a', order.resource.title] tags.append(Field( tag='245', indicators=[t245_ind1, '0'], subfields=subfields)) # 264 subfields = [] if order.resource.pub_place is not None: subfieldA = ['a', order.resource.pub_place] subfields.extend(subfieldA) if order.resource.publisher is not None: subfieldB = ['b', order.resource.publisher] subfields.extend(subfieldB) if order.resource.pub_date is None: subfieldC = ['c', '[date not specified]'] else: subfieldC = ['c', order.resource.pub_date] subfields.extend(subfieldC) tags.append(Field( tag='264', indicators=[' ', '1'], subfields=subfields)) # 300 field if MARCmatType == 'g': container = 'videodisc ; 4 3/4 in.' elif MARCmatType == 'i': container = 'sound disc ; 4 3/4 in.' elif MARCmatType == 'j': container = 'sound disc ; 4 3/4 in.' else: container = 'pages ; cm.' tags.append(Field( tag='300', indicators=[' ', ' '], subfields=['a', container])) # 940 field tags.append(Field( tag='940', indicators=[' ', ' '], subfields=['a', 'brief wlo record'])) # 960 field subfields = [] if oclc_code == 'BKL': # subfield_A = ['a', BPL_ORDERS['acqType']] # set by load table subfield_C = ['c', selector_code] subfield_M = ['m', BPL_ORDERS['status']] subfield_N = ['n', BPL_ORDERS['tloc']] subfield_Z = ['z', BPL_ORDERS['currency']] subfields.extend(subfield_C) elif oclc_code == 'NYP': # subfield_A = ['a', NYPL_ORDERS['acqType']] # set by load table subfield_D = ['d', library_code] subfield_E = ['e', order_code3] subfield_M = ['m', NYPL_ORDERS['status']] subfield_N = ['n', NYPL_ORDERS['tloc']] subfield_Y = ['y', NYPL_ORDERS['volumes']] subfield_Z = ['z', NYPL_ORDERS['currency']] subfields.extend(subfield_D) subfields.extend(subfield_E) subfields.extend(subfield_Y) subfield_F = ['f', order.audn] subfield_G = ['g', order.mat_ord] subfield_O = ['o', order.copies] subfield_Q = ['q', order.order_date] subfield_S = ['s', f'{order.resource.price_disc:.2f}'] subfield_T = ['t', order.locs] subfield_U = ['u', order.funds] subfield_V = ['v', order.vendor] subfield_W = ['w', order.lang] subfields.extend(subfield_F) subfields.extend(subfield_G) subfields.extend(subfield_M) subfields.extend(subfield_N) subfields.extend(subfield_O) subfields.extend(subfield_Q) subfields.extend(subfield_S) subfields.extend(subfield_T) subfields.extend(subfield_U) subfields.extend(subfield_V) subfields.extend(subfield_W) subfields.extend(subfield_Z) tags.append(Field(tag='960', indicators=[' ', ' '], subfields=subfields)) # 961 field subfields = [] subfield_I = ['i', order.wlo] if order.poPerLine is not None: subfield_H = ['h', order.poPerLine] subfields.extend(subfield_H) if blanketPO is not None: subfield_M = ['m', blanketPO] subfields.extend(subfield_M) if order.note is not None: subfield_D = ['d', order.note] subfields.extend(subfield_D) subfields.extend(subfield_I) tags.append(Field( tag='961', indicators=[' ', ' '], subfields=subfields)) # construct & send to file for tag in tags: record.add_ordered_field(tag) save2marc(fh, record)
def record_crosswalk(record): # A new record object is created. As we walk through fields in the original record, we will add the mapped fields to this new one. newrecord = Record() # Local variables to hold values that will be used outside of the for loop are defined here. recordid = '' callnr = '' callorigin = '' newclassif = '' # The first 572 field is mapped differently, this variable enables this behaviour. After the first 572 is mapped, it is set to False. firstsubject = True # Walk through each field in the original record for field in record.get_fields(): # 001 field will not be mapped as is, but is recorded as record ID. It will be stored as a 035 later on. if field.tag == '001': recordid = field.value() # 008 field is mapped as is (if it exists) elif field.tag == '008': newrecord.add_field(field) # 019__$a field is mapped to 680__$i elif field.tag == '019': try: indexnote = field.get_subfields('a')[0] newrecord.add_ordered_field( Field(tag='680', indicators=[' ', ' '], subfields=['i', indexnote])) except IndexError: #print(f"WARNING: record {recordid} has no 172__$a.") print( f"{recordid},019__$a,{field.value},Notice avec 019 mais sans 019__$a" ) # 035 fields are mapped as is (if they exist) elif field.tag == '035': newrecord.add_field(field) # 172__$2 is mapped to 084__$a according to which vocabulary is being mapped. elif field.tag == '172': vocab = field.get_subfields('2')[0] if vocab in ["BCUR1", "BCUR2", "BCUR3"]: mappedvalue = "CLASBCUR" elif vocab in ["vddoc", "vddoc-la"]: mappedvalue = "vddoc" elif vocab == "laf": mappedvalue = "laf" else: mappedvalue = vocab #print(f"WARNING: 172__$2 for record {recordid} ({vocab}) is not in the list of mapped vocabularies.") print( f"{recordid},172__$2,{vocab},Ne figure pas sur la liste des vocabulaires traités" ) newrecord.add_ordered_field( Field(tag='084', indicators=[' ', ' '], subfields=['a', mappedvalue])) # 172__$a will be mapped to 153__$a later on try: callnr = field.get_subfields('a')[0] except IndexError: #print(f"WARNING: record {recordid} has no 172__$a.") print(f"{recordid},172__$a,,Notice sans champ 172__$a") # The first 572 is mapped to 153__$j (concatenating subfields) elif field.tag == '572': if firstsubject == True: # Extract subfields and concatenate them. The get_subfield() method will return them in the # order they are stored in the record, so no reordering is required. newclassif = ' -- '.join( field.get_subfields('a', 'c', 'd', 'e', 'h', 'l', 'm', 's', 't', 'v', 'x', 'X', 'y', 'z')) firstsubject = False # Look for unexpected subfields if len(field.get_subfields('9', '[')) > 0: #print(f"WARNING: Record {recordid} has unexpected 752 subfields:") #print(field) print( f"{recordid},752,{field.value},Sous-champ(s) 752 inattendu(s)" ) # All 572s are mapped to 753s # Keeping the oringial subfield structure subjectfield = field subjectfield.tag = '753' newrecord.add_ordered_field(subjectfield) # 680 fields are mapped as is (if they exist) elif field.tag == '680': newrecord.add_ordered_field(field) # Log all unmapped fields, except 003, 005, 039, 040 and 072 elif field.tag not in ['003', '005', '039', '040', '072']: #print(f"SKIPPED: Field not mapped for record {recordid}: {field}") print(f"{recordid},{field.tag},{field},Champ non mappé") # Check for empty or missing call numbers if len(callnr) < 1: #print(f"WARNING: Record {recordid} has an empty call number in 153__$a") print(f"{recordid},153__$a,,Indice vide ou manquant") # Put the 153 field together if len(newclassif) < 1: # If there is no concatenated classification string, it was a record without 572, only store the call number. # If the target is in one of the BCUR* vocabularies, also add the target as a $a if target in ["BCURmu", "BCURpt", "BCURcg"]: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'a', target])) else: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr])) else: # If there is a concatenated classification string, same process but with the new classification in a $j if target in ["BCURmu", "BCURpt", "BCURcg"]: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'a', target, 'j', newclassif])) else: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'j', newclassif])) # Add the existing 001 field (record id) as an additional 035 with (vtls_reroVD) prefix. newrecord.add_ordered_field( Field(tag='035', indicators=[' ', ' '], subfields=['a', "(vtls_reroVD)" + recordid])) # 040__$a is set to static value "RNV vdbcul" newrecord.add_ordered_field( Field(tag='040', indicators=[' ', ' '], subfields=['a', "RNV vdbcul"])) # Edit and map the leader field # Position 17 is set to 'o' for temporary classifications (input file includes "temp") leader = list(record.leader) leader[6] = 'w' if inputfile.find('temp') > -1: leader[17] = 'o' else: leader[17] = 'n' newrecord.leader = ''.join(leader) return newrecord
if res_type == '': rec_LDR[6] = 'a' # code for text else: rec_LDR[ 6] = res_type # if the "Resource Type" is not blank, use that code instead of 'a' rec_LDR[7] = 'm' # code for monographic record new_marc_rec.leader = ''.join( rec_LDR ) # join the list of LDR bytes into a string and assign to the 'leader' field of the MARC record #-------------------------------------------- # Create 001 and 040 MARC fields for record number and cataloging source rec_001 = Field(tag='001', data='000' + str(rec_cnt)) rec_040 = Field(tag='040', indicators=[' ', ' '], subfields=['a', 'NNU', 'b', 'eng', 'c', 'NNU']) new_marc_rec.add_ordered_field(rec_001) new_marc_rec.add_ordered_field(rec_040) #-------------------------------------------- # Create a 245 Title MARC field title = fields[0].strip() if not title == '': rec_245a = title.split(':')[0] rec_245b = title.split(':')[1] rec_245 = Field(tag='245', indicators=['0', '0'], subfields=['a', rec_245a + ':', 'b', rec_245b]) new_marc_rec.add_ordered_field(rec_245) #-------------------------------------------- # Create a 246 Alternate Title MARC field alt_title = fields[1].strip() if not alt_title == '':
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('setname', help='bepress collection setname (e.g., diss201019)') parser.add_argument( 'input', help= 'path to bepress spreadsheet (containing DOIs) in "Excel 97-2003 Workbook (.xls)" format' ) # parser.add_argument('output', help='save directory') # parser.add_argument('--production', help='production DOIs', action='store_true') args = parser.parse_args(arglist) # Read config file and parse setnames into lists by category config = configparser.ConfigParser(allow_no_value=True) config.read('local_settings.ini') etd_setnames = [] for i in config.items('ETD'): etd_setnames.append(i[0]) # Add additional categories here setname = args.setname input = Path(args.input) # jsonmerge setup schema = {"properties": {"entries": {"mergeStrategy": "append"}}} merger = Merger(schema) # Timestamp output date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(date_time) print() print('------------------------------------------------------------') print('------------------------------------------------------------') # Read Bepress spreadsheet # TODO check that setname matches spreadsheet? # print() # print('Reading spreadsheet...') book_in = xlrd.open_workbook(str(input)) sheet1 = book_in.sheet_by_index(0) # get first sheet # sheet1_name = book_in.sheet_names()[0] # name of first sheet sheet1_col_headers = sheet1.row_values(0) try: doi_col_index = sheet1_col_headers.index('doi') except ValueError: print('DOI field not found in bepress metadata') url_col_index = sheet1_col_headers.index('calc_url') # Read URLs and DOIs from spreadsheet bepress_data = {} for row in range(1, sheet1.nrows): bepress_url = sheet1.cell(row, url_col_index).value bepress_doi = sheet1.cell(row, doi_col_index).value bepress_data[bepress_url] = bepress_doi print(bepress_data) # Read query criteria from file, inserting setname and starting bib number with open('query_setname_no_doi_bib_limiter.json', 'r') as file: data = file.read().replace('SETNAME', setname).replace('bxxxxxxx', 'b1000000') # print(data) # Authenticate to get token, using Client Credentials Grant https://techdocs.iii.com/sierraapi/Content/zReference/authClient.htm key_secret = config.get('Sierra API', 'key') + ':' + config.get( 'Sierra API', 'secret') key_secret_encoded = base64.b64encode( key_secret.encode('UTF-8')).decode('UTF-8') headers = { 'Authorization': 'Basic ' + key_secret_encoded, 'Content-Type': 'application/x-www-form-urlencoded' } response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/token', headers=headers) j = response.json() token = j['access_token'] auth = 'Bearer ' + token headers = {'Accept': 'application/json', 'Authorization': auth} # Search Sierra for records with URL+setname and no DOI in 024 field limit = 2000 response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit=' + str(limit), headers=headers, data=data) # print(response.text) j = response.json() records_returned = j['total'] # print('Records returned:', j['total']) j_all = j if j['total'] == 0: print('No ' + setname + ' records in Sierra are missing DOIs') else: # If limit was reached, repeat until all record IDs are retrieved while j['total'] == limit: # print('--------------------------------') last_record_id = j['entries'][-1:][0]['link'].replace( 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '') # print('id of last record returned:', last_record_id) next_record_id = str(int(last_record_id) + 1) # print('id of starting record for next query:', next_record_id) # Read query criteria from file, inserting setname with open('query_setname_no_doi_bib_limiter.json', 'r') as file: data = file.read().replace('SETNAME', setname).replace( 'bxxxxxxx', 'b' + next_record_id) response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit=' + str(limit), headers=headers, data=data) j = response.json() records_returned += j['total'] print('Found ' + records_returned + ' ' + setname + ' Sierra records that are missing DOIs') # print(response.text) # Add new response to previous ones j_all = merger.merge(j_all, j) j_all['total'] = records_returned # print(j_all) # Put bib IDs in list bib_id_list = [] for i in j_all['entries']: bib_id = i['link'].replace( 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '') bib_id_list.append(bib_id) # print(bib_id_list) # Get bib varField info for all records, 500 bib IDs at a time fields = 'varFields' #querystring = {'id':'3323145', 'fields':fields} j_data_all = {} records_returned_data = 0 chunk_size = 499 for i in range(0, len(bib_id_list), chunk_size): bib_id_list_partial = bib_id_list[i:i + chunk_size] querystring = { 'id': ','.join(bib_id_list_partial), 'fields': fields, 'limit': limit } response = requests.request( 'GET', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', headers=headers, params=querystring) j_data = response.json() records_returned_data += j_data['total'] j_data_all = merger.merge(j_data_all, j_data) j_data_all['total'] = records_returned_data # Parse varField data for OCLC number and URL sierra_data = {} for i in j_data_all['entries']: id = i['id'] var_fields = i['varFields'] sierra_url = '' for v in var_fields: if 'marcTag' in v: if '001' in v['marcTag']: oclc_num = v['content'] if '856' in v['marcTag']: for s in v['subfields']: if 'u' in s['tag']: if 'commons.lib.jmu.edu' in s['content']: if sierra_url: sierra_url += ';' sierra_url += s['content'] # Turn bib id into bib number bib_reversed = id[::-1] total = 0 for i, digit, in enumerate(bib_reversed): prod = (i + 2) * int(digit) total += prod checkdigit = total % 11 if checkdigit == 10: checkdigit = 'x' bib_num = 'b' + id + str(checkdigit) # print(bib_num) # print('OCLC number:', oclc_num) # print('URL:', sierra_url) # print() sierra_data[bib_num] = (oclc_num, sierra_url) print(sierra_data) # Create short MARC records with bib number and DOI fields, and create spreadsheet with OCLC numbers and DOI fields outmarc = open('shortrecs.mrc', 'wb') outtext = open('searchkeys.txt', 'w') outbook = xlwt.Workbook() outsheet = outbook.add_sheet('Sheet 1') col_headers = ['OCLC Number', 'Bib Number', '024', '856'] for x, y in enumerate(col_headers, 0): outsheet.write(0, x, y) outbook.save('OCLC Changes.xls') for i, j in enumerate(sierra_data, 1): print(i) print(j) # Get DOI from spreadsheet data doi_url = bepress_data[sierra_data[j][1]] print(doi_url) doi = doi_url.replace('https://doi.org/', '') spreadsheet_024 = '7\$a' + doi + '$2doi' #spreadsheet_856 = '40$zFull-text of dissertation on the Internet$u' + doi_url # Get OCLC number oclcnum = sierra_data[j][0] field_907 = Field(tag='907', indicators=[' ', ' '], subfields=['a', '.' + j]) field_024 = Field(tag='024', indicators=['7', ' '], subfields=['a', doi, '2', 'doi']) field_856 = Field(tag='856', indicators=['4', '0'], subfields=[ 'z', 'Full-text of dissertation on the Internet', 'u', doi_url ]) field_506_1 = Field( tag='506', indicators=['0', ' '], subfields=[ 'a', 'James Madison University Libraries is providing a metadata record and hyperlink to this full-text resource.', 'f', 'Unrestricted online access', '2', 'star' ]) field_506_2 = Field(tag='506', indicators=['0', ' '], subfields=[ 'a', 'Open access content.', 'f', 'Open access content', '2', 'star' ]) field_540 = Field( tag='540', indicators=[' ', ' '], subfields=[ 'a', 'This work is licensed under a Creative Commons Attribution-NonCommercial-No Derivative Works 4.0 License.', 'u', 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode' ]) record = Record() record.add_ordered_field(field_907) record.add_ordered_field(field_024) record.add_ordered_field(field_856) record.add_ordered_field(field_506_1) record.add_ordered_field(field_506_2) record.add_ordered_field(field_540) outmarc.write(record.as_marc()) outtext.write(oclcnum + '\n') outsheet.write(i, 0, oclcnum) outsheet.write(i, 1, j) outsheet.write(i, 2, spreadsheet_024) outsheet.write(i, 3, doi_url) outbook.save('OCLC Changes.xls') outmarc.close() outtext.close()
class TestBibsUtilities(unittest.TestCase): """ Tests utitlities in the bibs module responsible for reading and writing MARC records in various formats """ def setUp(self): """ create record in MARC21 format and other to simulate operations on them """ # Test MARC record self.marc_bib = Record() self.marc_bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001-test-control_field")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="949", indicators=[" ", "1"], subfields=[ "i", "33333818132462", "l", "fea0f", "p", "9.99", "t", "102", "v", "AMALIVRE", ], )) tags.append( Field( tag="949", indicators=[" ", "1"], subfields=[ "i", "33333818132464", "l", "fea0f", "p", "9.99", "t", "102", "v", "AMALIVRE", ], )) tags.append( Field( tag="960", indicators=[" ", " "], subfields=[ "i", "33333818132466", "l", "fea0f", "p", "9.99", "t", "102", "v", "AMALIVRE", ], )) for tag in tags: self.marc_bib.add_ordered_field(tag) # temp file self.fh_out = "MARCtest.mrc" def tearDown(self): self.marc_bib = None try: os.remove(self.fh_out) except OSError: pass def test_write_marc21(self): bibs.write_marc21(self.fh_out, self.marc_bib) contents = open(self.fh_out).read() self.assertEqual( contents, "00266nam a2200085u 4500001002400000245001500024949004700039949004700086960004700133\x1e0001-test-control_field\x1e00\x1faTest title\x1e 1\x1fi33333818132462\x1flfea0f\x1fp9.99\x1ft102\x1fvAMALIVRE\x1e 1\x1fi33333818132464\x1flfea0f\x1fp9.99\x1ft102\x1fvAMALIVRE\x1e \x1fi33333818132466\x1flfea0f\x1fp9.99\x1ft102\x1fvAMALIVRE\x1e\x1d", ) def test_read_marc21_returns_pymarc_reader(self): # should return an instance of pymarc reader reader = bibs.read_marc21("test.mrc") self.assertIs(type(reader), MARCReader) # def test_count_bibs_when_not_marc_file(self): # with self.assertRaises(OverloadError): # bibs.count_bibs('test.json') def test_read_from_json_retuns_pymarc_reader(self): reader = JSONReader("test.json") self.assertIs(type(reader), JSONReader) def test_create_tag_910_for_nypl_branches(self): self.assertEqual(str(bibs.create_tag_910("NYPL", "branches")), "=910 \\\\$aBL") def test_create_tag_910_for_nypl_research(self): self.assertEqual(str(bibs.create_tag_910("NYPL", "research")), "=910 \\\\$aRL") def test_create_tag_910_for_bpl(self): self.assertIsNone(bibs.create_tag_910("BPL", None)) def test_create_tag_910_invalid_nypl_branch(self): with self.assertRaises(ValueError): bibs.create_tag_910("NYPL", None) def test_create_target_id_field_exceptions(self): with self.assertRaises(ValueError): bibs.create_target_id_field("nypl", "012345") def test_create_target_id_field_returns_instance_of_pymarc_Field(self): self.assertIsInstance(bibs.create_target_id_field("nypl", "01234567"), Field) def test_create_target_id_field_returns_correct_field_values(self): self.assertEqual( bibs.create_target_id_field("bpl", "01234567").tag, "907") self.assertEqual( bibs.create_target_id_field("bpl", "01234567").indicators, [" ", " "]) self.assertEqual( bibs.create_target_id_field("bpl", "01234567").subfields, ["a", ".b01234567a"], ) self.assertEqual( bibs.create_target_id_field("nypl", "01234567").tag, "945") self.assertEqual( bibs.create_target_id_field("nypl", "01234567").indicators, [" ", " "]) self.assertEqual( bibs.create_target_id_field("nypl", "01234567").subfields, ["a", ".b01234567a"], ) def test_check_sierra_id_presence(self): self.assertFalse(bibs.check_sierra_id_presence("nypl", self.marc_bib)) self.assertFalse(bibs.check_sierra_id_presence("bpl", self.marc_bib)) # add 945 self.marc_bib.add_field( Field(tag="945", indicators=[" ", " "], subfields=["a", ".b01234567a"])) self.assertTrue(bibs.check_sierra_id_presence("nypl", self.marc_bib)) self.marc_bib.add_field( Field(tag="907", indicators=[" ", " "], subfields=["a", ".b01234567a"])) self.assertTrue(bibs.check_sierra_id_presence("bpl", self.marc_bib)) def test_check_sierra_command_tag_presence_False(self): self.assertFalse(bibs.sierra_command_tag(self.marc_bib)) self.marc_bib.add_field( Field(tag="949", indicators=[" ", "1"], subfields=["a", "*b2=a;"])) self.marc_bib.add_field( Field(tag="949", indicators=[" ", " "], subfields=["b", "*b2=a;"])) self.assertFalse(bibs.sierra_command_tag(self.marc_bib)) def test_check_sierra_command_tag_presence_True(self): self.marc_bib.add_field( Field(tag="949", indicators=[" ", " "], subfields=["a", "*b2=a;"])) self.assertTrue(bibs.sierra_command_tag(self.marc_bib)) def test_check_sierra_command_tag_presence_exception(self): self.marc_bib.add_field( Field(tag="949", indicators=[" ", " "], subfields=["a", ""])) with self.assertRaises(IndexError): bibs.sierra_command_tag(self.marc_bib) def test_create_field_from_template(self): template = dict( option="skip", tag="949", ind1=None, ind2="1", subfields={ "a": "foo", "b": "bar" }, ) field = bibs.create_field_from_template(template) self.assertIsInstance(field, Field) self.assertEqual(field.tag, "949") self.assertEqual(field.indicators, [" ", "1"]) self.assertEqual(field["a"], "foo") self.assertEqual(field["b"], "bar") def test_set_nypl_sierra_bib_default_location_for_branches_new(self): # test when no command line present bib = bibs.set_nypl_sierra_bib_default_location( "branches", self.marc_bib) for field in bib.get_fields("949"): if field.indicators == [" ", " "]: command = field self.assertEqual(str(command), "=949 \\\\$a*bn=zzzzz;") def test_set_nypl_sierra_bib_default_location_for_branches_present(self): # test adding location skipped when "bn=" command already present self.marc_bib.add_field( Field(tag="949", indicators=[" ", " "], subfields=["a", "*recs=a;bn=xxx;"])) bib = bibs.set_nypl_sierra_bib_default_location( "branches", self.marc_bib) for field in bib.get_fields("949"): if field.indicators == [" ", " "]: command = field self.assertEqual(str(command), "=949 \\\\$a*recs=a;bn=xxx;") def test_set_nypl_sierra_bib_default_location_for_branches_other_command_present( self, ): # simulate command line with other parameters present self.marc_bib.add_field( Field(tag="949", indicators=[" ", " "], subfields=["a", "*recs=a"])) bib = bibs.set_nypl_sierra_bib_default_location( "branches", self.marc_bib) for field in bib.get_fields("949"): if field.indicators == [" ", " "]: command = field self.assertEqual(str(command), "=949 \\\\$a*recs=a;bn=zzzzz;") def test_set_nypl_sierra_bib_default_location_for_research_new(self): # test when no command line present bib = bibs.set_nypl_sierra_bib_default_location( "research", self.marc_bib) for field in bib.get_fields("949"): if field.indicators == [" ", " "]: command = field self.assertEqual(str(command), "=949 \\\\$a*bn=xxx;") def test_bibmeta_object(self): meta = bibs.BibMeta(self.marc_bib, sierraId="12345678") self.assertIsInstance(meta, bibs.BibMeta) self.assertEqual(meta.t001, "0001-test-control_field") self.assertIsNone(meta.t005) self.assertEqual(meta.t020, []) self.assertEqual(meta.t022, []) self.assertEqual(meta.t024, []) self.assertEqual(meta.t028, []) self.assertEqual(meta.sierraId, "12345678") self.assertIsNone(meta.bCallNumber) self.assertEqual(meta.rCallNumber, []) def test_vendor_bibmeta_object(self): self.marc_bib.add_field( Field(tag="945", indicators=[" ", " "], subfields=["a", ".b01234569a"])) meta = bibs.VendorBibMeta(self.marc_bib, vendor="Amalivre", dstLibrary="rl") self.assertIsInstance(meta, bibs.VendorBibMeta) self.assertEqual(meta.t001, "0001-test-control_field") self.assertIsNone(meta.t005) self.assertEqual(meta.t020, []) self.assertEqual(meta.t022, []) self.assertEqual(meta.t024, []) self.assertEqual(meta.t028, []) self.assertIsNone(meta.bCallNumber) self.assertEqual(meta.rCallNumber, []) self.assertEqual(meta.vendor, "Amalivre") self.assertEqual(meta.dstLibrary, "rl") self.assertEqual(meta.sierraId, "01234569") self.assertEqual( meta.barcodes, ["33333818132462", "33333818132464", "33333818132466"]) def test_vendor_bibmeta_object_when_sierra_id_is_provided(self): # nypl scenario self.marc_bib.add_field( Field(tag="945", indicators=[" ", " "], subfields=["a", ".b01234567a"])) meta = bibs.VendorBibMeta(self.marc_bib, vendor="BTODC", dstLibrary="branches") self.assertEqual(meta.sierraId, "01234567") # bpl scencario self.marc_bib.remove_fields("945") self.marc_bib.add_field( Field(tag="907", indicators=[" ", " "], subfields=["a", ".b01234568a"])) meta = bibs.VendorBibMeta(self.marc_bib, vendor="BTCLS", dstLibrary="branches") self.assertEqual(meta.sierraId, "01234568")
class TestInhouseBibMeta(unittest.TestCase): """ Inhouse meta analysis tests """ def setUp(self): # Test MARC record # NYPL bib self.n_marc = Record() self.n_marc.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="o1234")) tags.append(Field(tag="003", data="OCoLC")) tags.append( Field(tag="049", indicators=[" ", " "], subfields=["a", "NYPP"])) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) for tag in tags: self.n_marc.add_ordered_field(tag) # BPL bib self.b_marc = Record() self.b_marc.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="o1234")) tags.append(Field(tag="003", data="OCoLC")) tags.append( Field(tag="049", indicators=[" ", " "], subfields=["a", "BKL"])) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) for tag in tags: self.b_marc.add_ordered_field(tag) def tearDown(self): # Test MARC record # NYPL bib self.n_marc = Record() # BPL bib self.b_marc = Record() def test_nypl_ownLibrary_no_locations(self): locs = [] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertIsNone(meta.ownLibrary) def test_nypl_ownLibrary_xxx(self): locs = ["xxx"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "research") def test_nypl_ownLibrary_zzzzz(self): locs = ["zzzzz"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "branches") def test_nypl_ownLibrary_mixed_order(self): locs = ["zzzzz", "xxx"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_ownLibrary_mixed_explicit_my(self): locs = ["mya0f", "myd"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_ownLibrary_mixed_explicit_ma(self): locs = ["mya0n", "mal"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_ownLibrary_branch_locs_only(self): locs = sierra_dicts.NYPL_BRANCHES.keys() locs.remove("ma") locs.remove("sc") meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "branches") def test_nypl_ownLibrary_research_my_locs(self): locs = ["myd", "myh", "mym", "myt"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "research") def test_nypl_ownLibrary_branches_my_locs(self): locs = ["mya0n", "mya0v", "myj0f"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "branches") def test_nypl_ownLibrary_resarch_ma_loc(self): locs = ["mal"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "research") def test_nypl_ownLibrary_research_sc_loc(self): locs = ["sc"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "research") def test_nypl_branch_full_no_order(self): tag = Field(tag="091", indicators=[" ", " "], subfields=["f", "FIC", "a", "ADAMS"]) self.n_marc.add_ordered_field(tag) meta = bibs.InhouseBibMeta(self.n_marc) self.assertEqual(meta.ownLibrary, "branches") def test_nypl_research_full_no_order(self): tag = Field(tag="852", indicators=["8", " "], subfields=["h", "ReCAP 0001"]) self.n_marc.add_ordered_field(tag) meta = bibs.InhouseBibMeta(self.n_marc) self.assertEqual(meta.ownLibrary, "research") def test_nypl_mixed_full_no_order(self): tag = Field(tag="852", indicators=["8", " "], subfields=["h", "ReCAP 0001"]) self.n_marc.add_ordered_field(tag) tag = Field(tag="091", indicators=[" ", " "], subfields=["f", "FIC", "a", "ADAMS"]) self.n_marc.add_ordered_field(tag) meta = bibs.InhouseBibMeta(self.n_marc) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_mixed_branch_full_research_order(self): tag = Field(tag="091", indicators=[" ", " "], subfields=["f", "FIC", "a", "ADAMS"]) self.n_marc.add_ordered_field(tag) locs = ["xxx"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_mixed_research_full_branch_order(self): tag = Field(tag="852", indicators=["8", " "], subfields=["h", "ReCAP 0001"]) self.n_marc.add_ordered_field(tag) locs = ["zzzzz"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_nypl_mixed_research_full_branch_order_explicit(self): tag = Field(tag="852", indicators=["8", " "], subfields=["h", "ReCAP 0001"]) self.n_marc.add_ordered_field(tag) self.assertFalse("091" in self.n_marc) locs = ["mya0n"] meta = bibs.InhouseBibMeta(self.n_marc, locations=locs) self.assertEqual(meta.ownLibrary, "mixed") def test_bpl_ownLibrary_order(self): meta = bibs.InhouseBibMeta(self.b_marc) self.assertIsNone(meta.ownLibrary) def test_bpl_ownLibrary_full(self): tag = Field(tag="099", indicators=[" ", " "], subfields=["a", "FIC", "a", "ADAMS"]) self.b_marc.add_ordered_field(tag) meta = bibs.InhouseBibMeta(self.b_marc) self.assertEqual(meta.ownLibrary, "branches")
def make_bib(row: namedtuple, sequence: int): bib = Record() # leader bib.leader = "00000cem a2200000Mi 4500" tags = [] # 001 tag tags.append(Field(tag="001", data=f"bkops{sequence}")) # 003 tag tags.append(Field(tag="003", data="BookOps")) # 005 tag timestamp = create_timestamp() tags.append(Field(tag="005", data=timestamp)) # 007 tag tags.append(Field( tag="007", data="aj canzn", )) # 008 tag dateCreated = date.strftime(date.today(), "%y%m%d") pub_year = encode_pub_year(row.pub_year) data = f"{dateCreated}s{pub_year} xx |||||| a | | und d" tags.append(Field(tag="008", data=data)) # 034 tag esc = encode_scale(row.scale) if esc is not None: tags.append( Field(tag="034", indicators=["1", " "], subfields=["a", "a", "b", esc])) # 110 tag tags.append( Field( tag="110", indicators=["1", " "], subfields=["a", f"{row.author},", "e", "cartographer."], )) # 245 tag tags.append( Field(tag="245", indicators=["1", "0"], subfields=["a", f"{row.title}."])) # 246 tag if row.alt_title: tags.append( Field(tag="246", indicators=["3", " "], subfields=["a", row.alt_title])) # 255 tag nsc = norm_scale(row.scale) tags.append(Field(tag="255", indicators=[" ", " "], subfields=["a", nsc])) # 264 tag npub_date = norm_pub_date(row.pub_year) tags.append( Field( tag="264", indicators=[" ", "1"], subfields=[ "a", "[Place of publication not identified] :", "b", f"{row.author},", "c", npub_date, ], )) # tag 300 tags.append( Field( tag="300", indicators=[" ", " "], subfields=["a", "1 folded map :", "b", "color"], )) tags.append( Field( tag="336", indicators=[" ", " "], subfields=[ "a", "cartographic image", "b", "cri", "2", "rdacontent" ], )) tags.append( Field( tag="337", indicators=[" ", " "], subfields=["a", "unmediated", "b", "n", "2", "rddcontent"], )) tags.append( Field( tag="338", indicators=[" ", " "], subfields=["a", "sheet", "b", "nb", "2", "rdacontent"], )) # 490 tag if row.series: tags.append( Field(tag="490", indicators=["0", " "], subfields=["a", row.series])) # 500 tag if row.note: tags.append( Field(tag="500", indicators=[" ", " "], subfields=["a", f"{row.note}."])) # 505 tag if row.content: tags.append( Field(tag="505", indicators=["0", " "], subfields=["a", f"{row.content}."])) # 650 tags if row.subjects: subject_fields = encode_subjects(row.subjects) tags.extend(subject_fields) # 655 tag if row.genre: tags.append( Field( tag="655", indicators=[" ", "7"], subfields=["a", f"{row.genre}.", "2", "lcgft"], )) # tag 852 if row.call_number: tags.append( Field(tag="852", indicators=["8", " "], subfields=["h", row.call_number])) for t in tags: bib.add_ordered_field(t) return bib
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') # parser.add_argument('--production', help='production DOIs', action='store_true') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) # print(col_headers) # print() title_col = col_headers.index('Title') subj_col = col_headers.index('Subject') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') date_col = col_headers.index('Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') source_col = col_headers.index('Source') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') hist_note_col = col_headers.index('Historical Note') note_col = col_headers.index('Note') characters_col = col_headers.index('Characters') story_arc_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') series_col = col_headers.index('Is Part of Series') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam 2200000Ii 4500' field_040 = Field( tag='040', indicators=[' ', ' '], subfields=['a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag='049', indicators=[' ', ' '], subfields=['a', 'VMCM']) field_336_text = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag='337', indicators=[' ', ' '], subfields=['a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag='338', indicators=[' ', ' '], subfields=['a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag='380', indicators=[' ', ' '], subfields=['a', 'Comic books and graphic novels.']) field_506 = Field( tag='506', indicators=['1', ' '], subfields=[ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.' ]) field_542 = Field(tag='542', indicators=[' ', ' '], subfields=[ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/' ]) field_588 = Field( tag='588', indicators=['0', ' '], subfields=[ 'a', 'Description based on indicia and Grand Comics Database.' ]) field_989 = Field(tag='989', indicators=[' ', ' '], subfields=['a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) lower_title = parse_title(lowercase_title(title)) title = parse_title(sheet.cell(row, title_col).value) has_part_title = False if len(title) == 3: has_part_title = True subj = sheet.cell(row, subj_col).value subj = [x.strip() for x in subj.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = sheet.cell(row, pages_col).value date = sheet.cell(row, date_col).value[0:4] pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value source = sheet.cell(row, source_col).value # writer = sheet.cell(row, writer_col).value writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value # print(colorist) # print('COLORIST FROM SHEET=' + colorist + '=END') # print(bool(colorist)) colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] hist_note = [] if sheet.cell(row, hist_note_col).value: hist_note = sheet.cell(row, hist_note_col).value note = [] if sheet.cell(row, note_col).value: note = sheet.cell(row, note_col).value characters = [] if sheet.cell(row, characters_col).value: characters = sheet.cell(row, characters_col).value characters = [x.strip() for x in characters.split(';')] story_arc = [] if sheet.cell(row, story_arc_col).value: story_arc = sheet.cell(row, story_arc_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value series = sheet.cell(row, series_col).value # print(cover_artist) # print(characters) # print(writer) # print(subfields_from_string(writer[0])) # print(name_direct_order(subfields_from_string(writer[0])[1])) # print(title) # print(parse_title(title)) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') data_008 = today + 't' + date + date + 'xx a 6 000 1 eng d' field_008 = Field(tag='008', data=data_008) record.add_ordered_field(field_008) subfields_099 = [] if has_part_title: subfields_099 = ['a', title[0] + ': ' + title[1], 'a', title[2]] else: subfields_099 = ['a', title[0], 'a', title[1]] field_099 = Field(tag='099', indicators=[' ', '9'], subfields=subfields_099) record.add_ordered_field(field_099) if writer: # Add 100 for first writer subfield_content = subfields_from_string_relator( writer[0], 'writer') field_100 = Field(tag='100', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_100) # Multiple writers if len(writer) > 1: # Add 700s for all writers after the first for i in writer[1:]: subfield_content = subfields_from_string_relator( i, 'writer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if writer: f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title[0], 'The '): f245_ind2 = 4 elif str.startswith(title[0], 'An '): f245_ind2 = 3 elif str.startswith(title[0], 'A '): f245_ind2 = 2 subfields_245 = [] if has_part_title: subfields_245 = [ 'a', title[0] + '.', 'p', title[1] + ',', 'n', title[2] ] else: subfields_245 = ['a', title[0] + ',', 'n', title[1]] # If writer exists, add $c if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append( name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag='245', indicators=[f245_ind1, f245_ind2], subfields=subfields_245) record.add_ordered_field(field_245) field_264_1 = Field(tag='264', indicators=[' ', '1'], subfields=[ 'a', pub_place + ' :', 'b', publisher + ',', 'c', date + '.' ]) record.add_ordered_field(field_264_1) field_264_4 = Field(tag='264', indicators=[' ', '4'], subfields=['c', '©' + date]) record.add_ordered_field(field_264_4) field_300 = Field(tag='300', indicators=[' ', ' '], subfields=[ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.' ]) record.add_ordered_field(field_300) subfields_490 = [] if has_part_title: subfields_490 = [ 'a', lower_title[0] + '. ' + lower_title[1] + ' ;', 'v', lower_title[2] ] else: subfields_490 = ['a', lower_title[0] + ' ;', 'v', lower_title[1]] field_490 = Field(tag='490', indicators=['1', ' '], subfields=subfields_490) record.add_ordered_field(field_490) if hist_note: field_500_hist = Field(tag='500', indicators=[' ', ' '], subfields=['a', hist_note + '.']) record.add_ordered_field(field_500_hist) if note: field_500_note = Field(tag='500', indicators=[' ', ' '], subfields=['a', note + '.']) record.add_ordered_field(field_500_note) if toc: if not toc.endswith('.') and not toc.endswith( '?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag='505', indicators=['0', ' '], subfields=['a', toc]) record.add_ordered_field(field_505) if story_arc: field_520 = Field(tag='520', indicators=[' ', ' '], subfields=[ 'a', '"' + story_arc + '" -- Grand Comics Database.' ]) record.add_ordered_field(field_520) field_561 = Field(tag='561', indicators=[' ', ' '], subfields=['a', source + '.']) record.add_ordered_field(field_561) for i in subj: if not i.endswith('.') and not i.endswith(')'): i += '.' field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag='655', indicators=[' ', '7'], subfields=['a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: # print(characters) subfield_content = 'Characters: ' for i in characters[:-1]: subfield_content += i + '; ' subfield_content += characters[-1] + '.' field_500 = Field(tag='500', indicators=[' ', ' '], subfields=['a', subfield_content]) record.add_ordered_field(field_500) # Create 600 and 650 for "Fictitious character" entries # TODO check for existing 650 and don't add if a duplicate if any('Fictitious character' in c for c in characters): fic_chars = [ c for c in characters if 'Fictitious character' in c ] for i in fic_chars: fic_char_name = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<1>', i) fic_char_c = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<2>', i) field_600 = Field( tag='600', indicators=['0', '0'], subfields=['a', fic_char_name, 'c', fic_char_c]) record.add_ordered_field(field_600) field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) if penciller: for i in penciller: subfield_content = subfields_from_string_relator( i, 'penciller') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if inker: for i in inker: subfield_content = subfields_from_string_relator(i, 'inker') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if colorist: for i in colorist: subfield_content = subfields_from_string_relator(i, 'colorist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if letterer: for i in letterer: subfield_content = subfields_from_string_relator(i, 'letterer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if cover_artist: for i in cover_artist: subfield_content = subfields_from_string_relator( i, 'cover artist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if editor: for i in editor: subfield_content = subfields_from_string_relator(i, 'editor') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) # field_700 = Field(tag = '700', # indicators = ['7',' '], # subfields = [ # 'a', doi, # '2', 'doi']) subfields_773 = subfields_from_string(series) field_773 = Field(tag='773', indicators=['0', '8'], subfields=subfields_773) record.add_ordered_field(field_773) subfields_830 = [] if has_part_title: subfields_830 = [ 'a', lower_title[0] + '.', 'p', lower_title[1] + ' ;', 'v', lower_title[2] + '.' ] else: subfields_830 = [ 'a', lower_title[0] + ' ;', 'v', lower_title[1] + '.' ] field_830 = Field(tag='830', indicators=[' ', '0'], subfields=subfields_830) record.add_ordered_field(field_830) outmarc.write(record.as_marc()) print() outmarc.close()
class TestRemoveUnsupportedSubjectHeadings(unittest.TestCase): """Tests removal from bib unwanted subject headings""" def setUp(self): self.bib = Record() self.bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) for tag in tags: self.bib.add_ordered_field(tag) def test_None(self): self.assertIsNone( patches.remove_unsupported_subject_headings("NYPL", None)) def test_removal_of_local_subject_fields(self): tags = [] tags.append( Field(tag="650", indicators=["0", "0"], subfields=["a", "term"])) tags.append( Field(tag="653", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="654", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="690", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="691", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="696", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="697", indicators=[" ", " "], subfields=["a", "term"])) tags.append( Field(tag="699", indicators=[" ", " "], subfields=["a", "term"])) for tag in tags: self.bib.add_ordered_field(tag) patches.remove_unsupported_subject_headings("NYPL", self.bib) self.assertTrue("650" in self.bib) self.assertFalse("653" in self.bib) self.assertFalse("654" in self.bib) self.assertFalse("690" in self.bib) self.assertFalse("691" in self.bib) self.assertFalse("696" in self.bib) self.assertFalse("697" in self.bib) self.assertFalse("698" in self.bib) self.assertFalse("699" in self.bib) def test_preserve_lc_subjects(self): tags = [] tags.append( Field(tag="600", indicators=["0", "0"], subfields=["a", "Smith, John"])) tags.append( Field(tag="610", indicators=["2", "0"], subfields=["a", "Inc."])) tags.append( Field(tag="611", indicators=[" ", "0"], subfields=["a", "Event"])) tags.append( Field(tag="630", indicators=[" ", "0"], subfields=["a", "Title"])) tags.append( Field(tag="650", indicators=[" ", "0"], subfields=["a", "Subject"])) tags.append( Field(tag="655", indicators=[" ", "0"], subfields=["a", "Genre"])) for tag in tags: self.bib.add_ordered_field(tag) patches.remove_unsupported_subject_headings("BPL", self.bib) self.assertTrue("600" in self.bib) self.assertTrue("610" in self.bib) self.assertTrue("611" in self.bib) self.assertTrue("630" in self.bib) self.assertTrue("650" in self.bib) self.assertTrue("655" in self.bib) def test_preserve_specific_vocabularies(self): tags = [] tags.append( Field(tag="600", indicators=[" ", "7"], subfields=["a", "Genre", "2", "fast"])) tags.append( Field(tag="630", indicators=[" ", "1"], subfields=["a", "Children's subject"])) tags.append( Field(tag="650", indicators=[" ", "7"], subfields=["a", "Genre", "2", "gsafd"])) tags.append( Field(tag="651", indicators=[" ", "7"], subfields=["a", "Genre", "2", "lcgft"])) tags.append( Field(tag="655", indicators=[" ", "7"], subfields=["a", "Genre", "2", "gmgpc"])) for tag in tags: self.bib.add_ordered_field(tag) patches.remove_unsupported_subject_headings("NYPL", self.bib) self.assertTrue("600" in self.bib) self.assertTrue("630" in self.bib) self.assertTrue("650" in self.bib) self.assertTrue("651" in self.bib) self.assertTrue("655" in self.bib) def test_removal_of_unsupported_vocabularies_nypl(self): tags = [] tags.append( Field(tag="600", indicators=["0", "7"], subfields=["a", "Smith, John"])) tags.append( Field(tag="610", indicators=["2", "7"], subfields=["a", "Inc.", "2", "biasac"])) tags.append( Field(tag="650", indicators=[" ", "4"], subfields=["a", "Event"])) tags.append( Field( tag="630", indicators=[" ", "7"], subfields=["a", "Title", "2", "rbgenr"], )) tags.append( Field(tag="655", indicators=[" ", "7"], subfields=["a", "Genre", "2", "att"])) for tag in tags: self.bib.add_ordered_field(tag) patches.remove_unsupported_subject_headings("NYPL", self.bib) self.assertFalse("600" in self.bib) self.assertFalse("610" in self.bib) self.assertFalse("650" in self.bib) self.assertFalse("630" in self.bib) self.assertFalse("655" in self.bib) def test_removal_of_unsupported_vocabularies_bpl(self): tags = [] tags.append( Field(tag="600", indicators=["0", "7"], subfields=["a", "Smith, John"])) tags.append( Field(tag="610", indicators=["2", "7"], subfields=["a", "Inc.", "2", "biasac"])) tags.append( Field(tag="650", indicators=[" ", "4"], subfields=["a", "Event"])) tags.append( Field( tag="630", indicators=[" ", "7"], subfields=["a", "Title", "2", "rbgenr"], )) tags.append( Field(tag="650", indicators=[" ", "1"], subfields=["a", "Children's"])) tags.append( Field(tag="655", indicators=[" ", "7"], subfields=["a", "Genre", "2", "att"])) for tag in tags: self.bib.add_ordered_field(tag) patches.remove_unsupported_subject_headings("BPL", self.bib) self.assertFalse("600" in self.bib) self.assertFalse("610" in self.bib) self.assertFalse("630" in self.bib) self.assertFalse("650" in self.bib) self.assertFalse("655" in self.bib)
def game_record(data, control_number, suppressed=True, status_code="-"): """ Creates a record object from data namedtuple args: data: namedtuple returns: record: pymarc.Record object """ record = Record() record.leader = "00000crm a2200000M 4500" tags = [] # 001 - control field tags.append(Field(tag="001", data=control_number)) # 005 tags.append( Field(tag="005", data=datetime.strftime(datetime.now(), "%y%m%d%H%M%S.%f")) ) # 008 date_created = date.strftime(date.today(), "%y%m%d") if data.pub_date: t008 = f"{date_created}s{data.pub_date} xxu vneng d" else: t008 = f"{date_created}n xxu vneng d" tags.append(Field(tag="008", data=t008)) # 020 for isbn in data.isbn: tags.append(Field(tag="020", indicators=[" ", " "], subfields=["a", isbn])) # 024 for upc in data.upc: tags.append(Field(tag="024", indicators=["1", " "], subfields=["a", upc])) # 040 tags.append( Field( tag="040", indicators=[" ", " "], subfields=["a", "BKL", "b", "eng", "e", "rda", "c", "BKL"], ) ) # 099 tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "BOARD GAME"])) # 245 (no final puctuation neeeded per new PCC ISBD policy) subfields = [] if not data.title: raise ValueError("Missing title data") else: subfields.extend(["a", data.title]) if data.subtitle: subfields[-1] = f"{subfields[-1]} : " subfields.extend(["b", data.subtitle]) if data.title_part: subfields[-1] = f"{subfields[-1]}. " subfields.extend(["p", data.title_part]) # add 246 tag ind2 = check_article(data.title_part) tags.append( Field( tag="246", indicators=["1", ind2], subfields=["a", data.title_part[int(ind2) :]], ) ) if data.author: subfields[-1] = f"{subfields[-1]} / " subfields.extend(["c", data.author]) ind2 = check_article(data.title) tags.append(Field(tag="245", indicators=["0", ind2], subfields=subfields)) # 246 - other title for title in data.title_other: tags.append(Field(tag="246", indicators=["1", "3"], subfields=["a", title])) # 264 publication tags subfields = [] if data.pub_place: subfields.extend(["a", f"{data.pub_place}:"]) else: subfields.extend(["a", "[Place of publication not identified]:"]) if data.publisher: subfields.extend(["b", f"{data.publisher},"]) else: subfields.extend(["b", "[publisher not identified],"]) if data.pub_date: subfields.extend(["c", data.pub_date]) else: subfields.extend(["c", "[date of publication not identified]"]) tags.append(Field(tag="264", indicators=[" ", "1"], subfields=subfields)) # 300 tag tags.append( Field(tag="300", indicators=[" ", " "], subfields=["a", "1 board game"]) ) # RDA 3xx tags tags.append( Field( tag="336", indicators=[" ", " "], subfields=["a", "three-dimensional form", "b", "tdf", "2", "rdacontent"], ) ) tags.append( Field( tag="337", indicators=[" ", " "], subfields=["a", "unmediated", "b", "n", "2", "rdamedia"], ) ) tags.append( Field( tag="338", indicators=[" ", " "], subfields=["a", "object", "b", "nr", "2", "rdacarrier"], ) ) # 500 notes tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Number of players: {data.players}"], ) ) tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Game duration: {data.duration}"], ) ) # content note 505 if data.content: tags.append( Field(tag="505", indicators=["0", " "], subfields=["a", data.content]) ) # 520 summary if data.desc: tags.append(Field(tag="520", indicators=[" ", " "], subfields=["a", data.desc])) # 521 note tags.append(Field(tag="521", indicators=[" ", " "], subfields=["a", data.age])) # 655 genre tags.append( Field( tag="655", indicators=[" ", "7"], subfields=["a", "Board games.", "2", "lcgft"], ) ) # 856 fields (link to project) tags.append( Field( tag="856", indicators=["4", " "], subfields=[ "u", "https://www.bklynlibrary.org/boardgamelibrary", "z", "Board Game Library website", ], ) ) # 960 item field for barcode in data.central_barcodes: subfields = [ "i", barcode, "l", "02abg", "p", data.price, "q", "11", "t", "53", "r", "i", "s", status_code, ] tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields)) for barcode in data.crown_barcodes: subfields = [ "i", barcode, "l", "30abg", "p", data.price, "q", "11", "t", "53", "r", "i", "s", status_code, ] tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields)) # 949 command line if suppressed: opac_display_command = "b3=n" else: opac_display_command = "" tags.append( Field( tag="949", indicators=[" ", " "], subfields=["a", f"*b2=o;{opac_display_command}"], ) ) for tag in tags: record.add_ordered_field(tag) return record
from pymarc import Record my_new_record = Record() my_new_fields = [] my_new_fields.append(Field('003', data='Nz')) my_new_fields.append( Field(tag='100', indicators=['1', ' '], subfields=['a', 'Gattuso, Jay,', 'd', 'd1978-'])) my_new_fields.append( Field(tag='245', indicators=['1', '0'], subfields=[ 'a', 'Goats. Are they the best animals? :', 'b', 'What about Cats!? /' ])) my_new_fields.append( Field(tag='650', indicators=[' ', '0'], subfields=['a', 'Goats', 'b', 'Competitive Pet Keeping'])) my_new_fields.append( Field(tag='650', indicators=[' ', '0'], subfields=['a', 'Cats', 'b', 'Competitive Pet Keeping'])) for my_new_field in my_new_fields: my_new_record.add_ordered_field(my_new_field) print(my_new_record)
def make_bib(data: namedtuple): bib = Record() tags = [] locker_num = determine_locker_num(data.comp_name) # leader bib.leader = "00000nrm a2200000Mi 4500" # 008 tag dateCreated = date.strftime(date.today(), "%y%m%d") tags.append( Field(tag="008", data=f"{dateCreated}s2019 xx 00 r|und d") ) # 099 tag tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "LAPTOP"])) # 245 tag tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", f"{locker_num}."]) ) # single sub A 246 tags lap_num = determine_lap_num(data.comp_name) alt_titles = [ "Laptop circulation", "Laptops in the branches", "Wireless laptops", "Circulating laptops", "Laptop computers", f"32_PUBLAP{lap_num}", ] for at in alt_titles: tags.append(Field(tag="246", indicators=["3", " "], subfields=["a", at])) # complex 246 tags tags.append( Field( tag="246", indicators=["3", " "], subfields=["a", f"{data.type}.", "n", locker_num], ) ) tags.append( Field( tag="246", indicators=["3", " "], subfields=["a", f"{data.type}.", "n", f"32_PUBLAP{lap_num}"], ) ) # 300 tag tags.append( Field(tag="300", indicators=[" ", " "], subfields=["a", "1 laptop computer"]) ) # 500 tag tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Serial number: {data.serial}"], ) ) # 960 tag item_note = construct_item_note(locker_num, lap_num, data,) tags.append( Field( tag="960", indicators=[" ", " "], subfields=[ "l", "32lap", "t", "49", "r", "7", "q", "7", "s", "g", "n", f"{item_note}", ], ) ) # commnad line tag tags.append( Field(tag="949", indicators=[" ", " "], subfields=["a", f"*b2=7;bn=32;"]) ) for t in tags: bib.add_ordered_field(t) return bib