def test_process_record(self): for record_type in self.records: for r in self.records[record_type]: original_record = Record() #nimiön 6. paikasta katsotaan tietuetyyppi: if record_type == "music": original_record.leader = "XXXXXXcX" elif record_type == "text": original_record.leader = "XXXXXXaX" elif record_type == "movie": original_record.leader = "XXXXXXgX" else: raise ValueError("Testattava aineistotyyppi on tuntematon") original_record.add_field(Field(tag='001', data='00000001')) if record_type == "movie": original_record.add_field(Field(tag='007', data='v')) original_fields = [] for field in r['original']: original_fields.append(field) original_record.add_field(self.str_to_marc(field)) new_record = self.cc.process_record(original_record) new_fields = [] result_fields = [] for field in new_record.get_fields(): if not field.tag in ['001', '007']: new_fields.append(str(field)) for field in r['converted']: result_fields.append(field) self.assertEqual(result_fields, new_fields)
def platform2pymarc_obj(data=None): """ converts platform bib data into pymarc object args: data in json format return: pymarc Record obj """ record = Record(to_unicode=True, force_utf8=True) # parse variable fields varFields = data.get("varFields") for f in varFields: if f.get("fieldTag") == "_": record.leader = f.get("content") # control fields case elif f.get("subfields") is None: field = Field( tag=f.get("marcTag"), indicators=[f.get("ind1"), f.get("ind2")], data=f.get("content"), ) record.add_field(field) else: # variable fields subfields = [] for d in f.get("subfields"): subfields.append(d.get("tag")) subfields.append(d.get("content")) field = Field( tag=f.get("marcTag"), indicators=[f.get("ind1"), f.get("ind2")], subfields=subfields, ) record.add_field(field) return record
def test_nypl_branches_BT_SERIES_YA_graphic_novel_compound_name(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) correct_indicators = [" ", " "] correct_subfields = [ "f", "GRAPHIC", "a", "GN FIC", "c", "COMPOUND NAME" ] self.assertEqual(correct_indicators, mod_bib.get_fields("091")[0].indicators) self.assertEqual(correct_subfields, mod_bib.get_fields("091")[0].subfields)
def test_nypl_branch_BT_SERIES_Spanish_prefix(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "J SPA E COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) correct_indicators = [" ", " "] correct_subfields = ["p", "J SPA", "a", "E", "c", "COMPOUND NAME"] self.assertEqual(correct_indicators, mod_bib.get_fields("091")[0].indicators) self.assertEqual(correct_subfields, mod_bib.get_fields("091")[0].subfields)
def transpose_to_marc21(record): Mrecord=Record(force_utf8=True) Mrecord.leader=record["_LEADER"] for field in record: if isint(field): if int(field)<10: if isinstance(record[field],list): for elem in record[field]: Mrecord.add_field(Field(tag=field,data=elem)) elif isinstance(record[field],str): Mrecord.add_field(Field(tag=field,data=record[field])) else: for subfield in record[field]: for ind, values in subfield.items(): indicators=[] subfields=[] for elem in values: for k,v in elem.items(): if isinstance(v,str): subfields.append(k) subfields.append(v) elif isinstance(v,list): for subfield_elem in v: subfields.append(k) subfields.append(subfield_elem) for elem in ind: indicators.append(elem) Mrecord.add_field(Field(tag=str(field), indicators=indicators, subfields=subfields)) return Mrecord.as_marc()
def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record
def record_sorted(record: Record) -> Record: result = Record() result.leader = record.leader for i in range(1000): field_name = str(i) while len(field_name) < 3: field_name = "0" + field_name # 先寻出旧数据中所有的字段名下字段 old_fields = record.get_fields(field_name) for field in old_fields: result.add_field(field) return result
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)])) record.leader = " a " writer = MARCWriter(open("test/foo", "wb")) writer.write(record) writer.close() reader = MARCReader(open("test/foo", "rb"), to_unicode=True) record = next(reader) self.assertEqual(record["245"]["a"], chr(0x1234)) reader.close() os.remove("test/foo")
def write_record(self, references, field035="", field008=""): """Zapise do suboru jeden record vo forme iso2709 Arguments: field035 -- retazec obsahujuci data do pola 035 field008 -- retazec obsahujuci data do pola 008 references {set(reference)} -- set ohlasov na zapisanie do pola 591 """ if (field008 == ""): field008 = self.CONST_FIELD_008 record = Record(force_utf8=True) record.add_field(Field(tag='008', data=field008)) record.add_field( Field(tag='035', indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2], subfields=['a', field035])) for i in references: record.add_field(i.to_marc_field()) record.leader = record.leader[:5] + 'n' + record.leader[6:] record.leader = record.leader[:7] + 'b' + record.leader[8:] record.leader = record.leader[:18] + 'a' + record.leader[19:] self.writer.write(record)
def test_nypl_branch_BT_SERIES_exception(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field(tag="091", indicators=[" ", " "], subfields=["a", "J B EDISON C"])) for tag in tags: bib.add_ordered_field(tag) with self.assertRaises(AssertionError): patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib)
def __next__(self): jobj = next(self.iter) rec = Record() rec.leader = jobj['leader'] for field in jobj['fields']: k,v = list(field.items())[0] if 'subfields' in v and hasattr(v,'update'): # flatten m-i-j dict to list in pymarc subfields = [] for sub in v['subfields']: for code,value in sub.items(): subfields.extend((code,value)) fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']]) else: fld = Field(tag=k,data=v) rec.add_field(fld) return rec
def __next__(self): jobj = next(self.iter) rec = Record() rec.leader = jobj['leader'] for field in jobj['fields']: k,v = list(field.items())[0] if 'subfields' in v and hasattr(v,'update'): # flatten m-i-j dict to list in pymarc subfields = [] for sub in v['subfields']: for code,value in sub.items(): subfields.extend((code,value)) fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']]) else: fld = Field(tag=k,data=v) rec.add_field(fld) return rec
def __next__(self): jobj = next(self.iter) rec = Record() rec.leader = jobj["leader"] for field in jobj["fields"]: k, v = list(field.items())[0] if "subfields" in v and hasattr(v, "update"): # flatten m-i-j dict to list in pymarc subfields = [] for sub in v["subfields"]: for code, value in sub.items(): subfields.extend((code, value)) fld = Field(tag=k, subfields=subfields, indicators=[v["ind1"], v["ind2"]]) else: fld = Field(tag=k, data=v) rec.add_field(fld) return rec
def next(self): """ To support iteration. """ record_data = '' line = self.file_handle.readline() if not line: raise StopIteration key = line[0:9] current_key = key while key == current_key: record_data += line position = self.file_handle.tell() line = self.file_handle.readline() key = line[0:9] self.file_handle.seek(position) record = Record() for recordln in record_data.splitlines(): tag = recordln[10:13] ind1 = recordln[13:14] ind2 = recordln[14:15] rest = recordln[18:] #if tag == 'FMT': pass if tag == 'LDR': record.leader = rest.replace('^', ' ') elif tag < '010' and tag.isdigit(): if tag == '008': rest = rest.replace('^', ' ') record.add_field(Field(tag=tag, data=rest)) else: subfields = list() subfield_data = rest.split('$$') subfield_data.pop(0) for subfield in subfield_data: subfields.extend([subfield[0], subfield[1:]]) record.add_field( Field(tag=tag, indicators=[ind1, ind2], subfields=subfields)) return record
def test_bib_no_oclc_prefix(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="bl00000001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"], )) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "Amalivre", bib) self.assertEqual(mod_bib.get_fields("001")[0].data, "bl00000001")
def record_crosswalk(record): # A new record object is created. As we walk through fields in the original record, we will add the mapped fields to this new one. newrecord = Record() # Local variables to hold values that will be used outside of the for loop are defined here. recordid = '' callnr = '' callorigin = '' newclassif = '' # The first 572 field is mapped differently, this variable enables this behaviour. After the first 572 is mapped, it is set to False. firstsubject = True # Walk through each field in the original record for field in record.get_fields(): # 001 field will not be mapped as is, but is recorded as record ID. It will be stored as a 035 later on. if field.tag == '001': recordid = field.value() # 008 field is mapped as is (if it exists) elif field.tag == '008': newrecord.add_field(field) # 019__$a field is mapped to 680__$i elif field.tag == '019': try: indexnote = field.get_subfields('a')[0] newrecord.add_ordered_field( Field(tag='680', indicators=[' ', ' '], subfields=['i', indexnote])) except IndexError: #print(f"WARNING: record {recordid} has no 172__$a.") print( f"{recordid},019__$a,{field.value},Notice avec 019 mais sans 019__$a" ) # 035 fields are mapped as is (if they exist) elif field.tag == '035': newrecord.add_field(field) # 172__$2 is mapped to 084__$a according to which vocabulary is being mapped. elif field.tag == '172': vocab = field.get_subfields('2')[0] if vocab in ["BCUR1", "BCUR2", "BCUR3"]: mappedvalue = "CLASBCUR" elif vocab in ["vddoc", "vddoc-la"]: mappedvalue = "vddoc" elif vocab == "laf": mappedvalue = "laf" else: mappedvalue = vocab #print(f"WARNING: 172__$2 for record {recordid} ({vocab}) is not in the list of mapped vocabularies.") print( f"{recordid},172__$2,{vocab},Ne figure pas sur la liste des vocabulaires traités" ) newrecord.add_ordered_field( Field(tag='084', indicators=[' ', ' '], subfields=['a', mappedvalue])) # 172__$a will be mapped to 153__$a later on try: callnr = field.get_subfields('a')[0] except IndexError: #print(f"WARNING: record {recordid} has no 172__$a.") print(f"{recordid},172__$a,,Notice sans champ 172__$a") # The first 572 is mapped to 153__$j (concatenating subfields) elif field.tag == '572': if firstsubject == True: # Extract subfields and concatenate them. The get_subfield() method will return them in the # order they are stored in the record, so no reordering is required. newclassif = ' -- '.join( field.get_subfields('a', 'c', 'd', 'e', 'h', 'l', 'm', 's', 't', 'v', 'x', 'X', 'y', 'z')) firstsubject = False # Look for unexpected subfields if len(field.get_subfields('9', '[')) > 0: #print(f"WARNING: Record {recordid} has unexpected 752 subfields:") #print(field) print( f"{recordid},752,{field.value},Sous-champ(s) 752 inattendu(s)" ) # All 572s are mapped to 753s # Keeping the oringial subfield structure subjectfield = field subjectfield.tag = '753' newrecord.add_ordered_field(subjectfield) # 680 fields are mapped as is (if they exist) elif field.tag == '680': newrecord.add_ordered_field(field) # Log all unmapped fields, except 003, 005, 039, 040 and 072 elif field.tag not in ['003', '005', '039', '040', '072']: #print(f"SKIPPED: Field not mapped for record {recordid}: {field}") print(f"{recordid},{field.tag},{field},Champ non mappé") # Check for empty or missing call numbers if len(callnr) < 1: #print(f"WARNING: Record {recordid} has an empty call number in 153__$a") print(f"{recordid},153__$a,,Indice vide ou manquant") # Put the 153 field together if len(newclassif) < 1: # If there is no concatenated classification string, it was a record without 572, only store the call number. # If the target is in one of the BCUR* vocabularies, also add the target as a $a if target in ["BCURmu", "BCURpt", "BCURcg"]: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'a', target])) else: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr])) else: # If there is a concatenated classification string, same process but with the new classification in a $j if target in ["BCURmu", "BCURpt", "BCURcg"]: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'a', target, 'j', newclassif])) else: newrecord.add_ordered_field( Field(tag='153', indicators=[' ', ' '], subfields=['a', callnr, 'j', newclassif])) # Add the existing 001 field (record id) as an additional 035 with (vtls_reroVD) prefix. newrecord.add_ordered_field( Field(tag='035', indicators=[' ', ' '], subfields=['a', "(vtls_reroVD)" + recordid])) # 040__$a is set to static value "RNV vdbcul" newrecord.add_ordered_field( Field(tag='040', indicators=[' ', ' '], subfields=['a', "RNV vdbcul"])) # Edit and map the leader field # Position 17 is set to 'o' for temporary classifications (input file includes "temp") leader = list(record.leader) leader[6] = 'w' if inputfile.find('temp') > -1: leader[17] = 'o' else: leader[17] = 'n' newrecord.leader = ''.join(leader) return newrecord
def stub_marc_bib(): tags = [] marc_bib = Record() marc_bib.leader = "00000nam a2200000u 4500" tags.append(Field(tag="001", data="ocm0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append(Field(tag="001", data="ocn1111")) tags.append( Field( tag="019", indicators=[" ", " "], subfields=["a", "some-id-001"], )) tags.append( Field( tag="020", indicators=[" ", " "], subfields=["a", "isbn001", "b", "isbn002"], )) tags.append( Field( tag="024", indicators=[" ", " "], subfields=["a", "upc001"], )) tags.append( Field( tag="037", indicators=[" ", " "], subfields=["a", "some-id-0001", "b", "test-distributor"], )) tags.append( Field( tag="037", indicators=[" ", " "], subfields=["a", "some-id-0002", "b", "Overdrive, Inc."], )) tags.append( Field( tag="084", indicators=[" ", " "], subfields=["a", "some-classification", "2", "test-thesaurus"], )) tags.append( Field( tag="091", indicators=[" ", " "], subfields=["a", "some-callnumber"], )) tags.append( Field( tag="099", indicators=[" ", " "], subfields=["a", "some-callnumber"], )) tags.append( Field( tag="263", indicators=[" ", " "], subfields=["a", "some-date"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url1", "3", "public-note-1"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url2", "3", "public-note-2"], )) tags.append( Field( tag="856", indicators=[" ", "3"], subfields=["u", "url3", "3", "public-note-3"], )) tags.append( Field( tag="838", indicators=[" ", " "], subfields=["u", "EBSCOhost", "b", "EBSC", "n", "11111"], )) for tag in tags: marc_bib.add_ordered_field(tag) return marc_bib
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) title_col = col_headers.index('Title') subj_person_col = col_headers.index('Subject_Person') subj_topical_col = col_headers.index('Subject_Topical') subj_place_col = col_headers.index('Subject_Place') subj_corp_col = col_headers.index('Subject_Jurisdictional') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') pub_date_col = col_headers.index('Date') # previously Publication Date copy_date_col = col_headers.index('Copyright Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') edition_col = col_headers.index('Edition') source_col = col_headers.index('Source') # source_acq_col = col_headers.index('Source of Acquisition') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') # hist_note_col = col_headers.index('Historical Note') notes_col = col_headers.index('Note') characters_col = col_headers.index('Characters') synopsis_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') in_series_col = col_headers.index('Is Part of Series') black_creators_col = col_headers.index('Black Creators (MARC 590)') black_chars_col = col_headers.index('Black Characters (MARC 590)') isbn_col = col_headers.index('ISBN') color_col = col_headers.index('Color?') series_note_col = col_headers.index('Series Note') copyright_holder_col = col_headers.index('Copyright holder') gcd_uri_col = col_headers.index('Grand Comics Database') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam a2200000Ii 4500' field_040 = Field(tag = '040', indicators = [' ',' '], subfields = [ 'a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag = '049', indicators = [' ',' '], subfields = [ 'a', 'VMCS']) field_336_text = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag = '337', indicators = [' ',' '], subfields = [ 'a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag = '338', indicators = [' ',' '], subfields = [ 'a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag = '380', indicators = [' ',' '], subfields = [ 'a', 'Comic books and graphic novels.']) field_506 = Field(tag = '506', indicators = ['1',' '], subfields = [ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.']) field_542 = Field(tag = '542', indicators = [' ',' '], subfields = [ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/']) field_588 = Field(tag = '588', indicators = ['0',' '], subfields = [ 'a', 'Description based on indicia and Grand Comics Database.']) field_989 = Field(tag = '989', indicators = [' ',' '], subfields = [ 'a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) subj_person = sheet.cell(row, subj_person_col).value if subj_person: subj_person = [x.strip() for x in subj_person.split(';')] subj_topical = sheet.cell(row, subj_topical_col).value if subj_topical: subj_topical = [x.strip() for x in subj_topical.split(';')] subj_place = sheet.cell(row, subj_place_col).value if subj_place: subj_place = [x.strip() for x in subj_place.split(';')] subj_corp = sheet.cell(row, subj_corp_col).value if subj_corp: subj_corp = [x.strip() for x in subj_corp.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = str(sheet.cell(row, pages_col).value) pub_date = str(sheet.cell(row, pub_date_col).value) pub_date_str = date_from_string(pub_date) pub_date_year = year_from_date(pub_date_str) copy_date = '' copy_date = str(sheet.cell(row, copy_date_col).value) copy_date_str = date_from_string(copy_date) copy_date_year = year_from_date(copy_date_str) pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value edition = sheet.cell(row, edition_col).value source = sheet.cell(row, source_col).value # source_acq = sheet.cell(row, source_acq_col).value characters = sheet.cell(row, characters_col).value black_creators = sheet.cell(row, black_creators_col).value if black_creators: black_creators = [x.strip() for x in black_creators.split(';')] black_chars = sheet.cell(row, black_chars_col).value if black_chars: black_chars = [x.strip() for x in black_chars.split(';')] isbn = str(sheet.cell(row, isbn_col).value) color = sheet.cell(row, color_col).value series_note = sheet.cell(row, series_note_col).value gcd_uri = sheet.cell(row, gcd_uri_col).value country_code = country_code_from_pub_place(pub_place) copyright_holder = [] if sheet.cell(row, copyright_holder_col).value: copyright_holder = sheet.cell(row, copyright_holder_col).value copyright_holder = [x.strip() for x in copyright_holder.split(';')] writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] # hist_note = [] # if sheet.cell(row, hist_note_col).value: # hist_note = sheet.cell(row, hist_note_col).value notes = [] if sheet.cell(row, notes_col).value: notes = sheet.cell(row, notes_col).value synopsis = [] if sheet.cell(row, synopsis_col).value: synopsis = sheet.cell(row, synopsis_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value in_series = sheet.cell(row, in_series_col).value contribs = {} if copyright_holder: for i in copyright_holder: contribs.update({i: ['copyright holder']}) else: if writer: for i in writer: contribs.update({i: ['writer']}) if penciller: for i in penciller: if i in contribs: role_list = contribs[i] role_list.append('penciller') contribs.update({i: role_list}) else: contribs.update({i: ['penciller']}) if inker: for i in inker: if i in contribs: role_list = contribs[i] role_list.append('inker') contribs.update({i: role_list}) else: contribs.update({i: ['inker']}) if colorist: for i in colorist: if i in contribs: role_list = contribs[i] role_list.append('colorist') contribs.update({i: role_list}) else: contribs.update({i: ['colorist']}) if letterer: for i in letterer: if i in contribs: role_list = contribs[i] role_list.append('letterer') contribs.update({i: role_list}) else: contribs.update({i: ['letterer']}) if cover_artist: for i in cover_artist: if i in contribs: role_list = contribs[i] role_list.append('cover artist') contribs.update({i: role_list}) else: contribs.update({i: ['cover artist']}) if editor: for i in editor: if i in contribs: role_list = contribs[i] role_list.append('editor') contribs.update({i: role_list}) else: contribs.update({i: ['editor']}) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') if copy_date: data_008 = today + 't' + pub_date_year + copy_date_year + country_code + 'a 6 000 1 eng d' else: data_008 = today + 's' + pub_date_year + ' ' + country_code + 'a 6 000 1 eng d' field_008 = Field(tag = '008', data = data_008) record.add_ordered_field(field_008) if isbn: field_020 = Field(tag = '020', indicators = [' ',' '], subfields = [ 'a', isbn]) record.add_ordered_field(field_020) subfields_099 = subfields_from_string(title) if 'b' in subfields_099: subfields_099.pop(3) subfields_099.pop(2) if 'n' in subfields_099: subfields_099[subfields_099.index('n')] = 'a' if subfields_099[1].endswith(',') or subfields_099[1].endswith(':'): subfields_099[1] = subfields_099[1][:-1] field_099 = Field(tag = '099', indicators = [' ','9'], subfields = subfields_099) record.add_ordered_field(field_099) for i in contribs: if i == list(contribs.keys())[0] and 'copyright holder' in contribs[i]: # first contributor is copyright holder subfield_content = subfields_from_string_relator(i, contribs[i]) field_110 = Field(tag = '110', indicators = ['2', ' '], subfields = subfield_content) record.add_ordered_field(field_110) elif i == list(contribs.keys())[0] and 'writer' in contribs[i]: # first contributor is a writer subfield_content = subfields_from_string_relator(i, contribs[i]) field_100 = Field(tag = '100', indicators = ['1', ' '], subfields = subfield_content) record.add_ordered_field(field_100) else: subfield_content = subfields_from_string_relator(i, contribs[i]) if ',' not in subfield_content[1]: field_710 = Field(tag = '710', indicators = ['2',' '], subfields = subfield_content) record.add_ordered_field(field_710) else: field_700 = Field(tag = '700', indicators = ['1',' '], subfields = subfield_content) record.add_ordered_field(field_700) if contribs and ('writer' in contribs[list(contribs.keys())[0]] or 'copyright holder' in contribs[list(contribs.keys())[0]]): f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title, 'The '): f245_ind2 = 4 elif str.startswith(title, 'An '): f245_ind2 = 3 elif str.startswith(title, 'A '): f245_ind2 = 2 subfields_245 = subfields_from_string(title) # If writer exists, add $c for first writer if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append(name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag = '245', indicators = [f245_ind1, f245_ind2], subfields = subfields_245) record.add_ordered_field(field_245) if edition: if not edition.endswith('.'): edition += '.' field_250 = Field(tag = '250', indicators = [' ', ' '], subfields = [ 'a', edition]) record.add_ordered_field(field_250) field_264_1 = Field(tag = '264', indicators = [' ','1'], subfields = [ 'a', pub_place + ' :', 'b', publisher + ',', 'c', pub_date_str + '.']) record.add_ordered_field(field_264_1) if copy_date: field_264_4 = Field(tag = '264', indicators = [' ','4'], subfields = [ 'c', '©' + copy_date_str]) record.add_ordered_field(field_264_4) if color == 'yes': subfields_300 = [ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.'] elif color == 'no': subfields_300 = [ 'a', pages + ' pages :', 'b', 'black and white illustrations.'] field_300 = Field(tag = '300', indicators = [' ',' '], subfields = subfields_300) record.add_ordered_field(field_300) if title_to_series(title): subfields_490 = title_to_series(title) field_490 = Field(tag = '490', indicators = ['1',' '], subfields = subfields_490) record.add_ordered_field(field_490) if series_note: if not series_note.endswith('.'): series_note += '.' field_490_series_note = Field(tag = '490', indicators = ['1', ' '], subfields = ['a', series_note]) record.add_ordered_field(field_490_series_note) # if hist_note: # field_500_hist = Field(tag = '500', # indicators = [' ',' '], # subfields = [ # 'a', hist_note + '.']) # record.add_ordered_field(field_500_hist) if notes: field_500_notes = Field(tag = '500', indicators = [' ',' '], subfields = [ 'a', notes + '.']) record.add_ordered_field(field_500_notes) if toc: if not toc.endswith('.') and not toc.endswith('?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag = '505', indicators = ['0',' '], subfields = [ 'a', toc]) record.add_ordered_field(field_505) if synopsis: field_520 = Field(tag = '520', indicators = [' ',' '], subfields = [ 'a', synopsis]) record.add_ordered_field(field_520) if black_creators: for i in black_creators: if not i.endswith('.'): i += '.' field_590_creators = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_creators) if black_chars: for i in black_chars: if not i.endswith('.'): i += '.' field_590_chars = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_chars) if source: field_541_source = Field(tag = '541', indicators = [' ',' '], subfields = [ 'a', source + '.']) record.add_ordered_field(field_541_source) # if source_acq: # field_541_source_acq = Field(tag = '541', # indicators = [' ',' '], # subfields = [ # 'a', source_acq + '.']) # record.add_ordered_field(field_541_source_acq) if subj_person: for i in subj_person: i_subfields = subfields_from_string(i) # Set first indicator based on presence of comma in $a if 'a' in i_subfields: if ',' in i_subfields[i_subfields.index('a') + 1]: field_600_ind1 = '1' else: field_600_ind1 = '0' if '1' in i_subfields: last_except_subf1 = i_subfields.index('1') - 1 else: last_except_subf1 = len(i_subfields) - 1 if i_subfields[last_except_subf1].endswith(','): i_subfields[last_except_subf1] = re.sub(r'^(.*),$', r'\g<1>.', i_subfields[last_except_subf1]) if not i_subfields[last_except_subf1].endswith('.') and not i_subfields[last_except_subf1].endswith(')') and not i_subfields[last_except_subf1].endswith('?') and not i_subfields[last_except_subf1].endswith('-'): i_subfields[last_except_subf1] += '.' field_600 = Field(tag = '600', indicators = [field_600_ind1,'0'], subfields = i_subfields) record.add_ordered_field(field_600) if subj_topical: for i in subj_topical: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_650 = Field(tag = '650', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_650) if subj_place: for i in subj_place: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_651 = Field(tag = '651', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_651) if subj_corp: for i in subj_corp: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_610 = Field(tag = '610', indicators = ['1','0'], subfields = i_subfields) record.add_ordered_field(field_610) if genre: for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag = '655', indicators = [' ','7'], subfields = [ 'a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: field_500_chars = Field(tag = '500', indicators = [' ', ' '], subfields = [ 'a', characters]) record.add_ordered_field(field_500_chars) if gcd_uri: title_758 = subfields_from_string(title)[1] if title_758.endswith(',') or title_758.endswith(':'): title_758 = title_758[:-1] field_758 = Field(tag = '758', indicators = [' ',' '], subfields = [ '4', 'http://rdaregistry.info/Elements/m/P30135', 'i', 'Has work manifested:', 'a', title_758, '1', gcd_uri]) record.add_ordered_field(field_758) if in_series: subfields_773 = subfields_from_string(in_series) field_773 = Field(tag = '773', indicators = ['0','8'], subfields = subfields_773) record.add_ordered_field(field_773) subfields_852 = [ 'b', 'CARRIER', 'c', 'carrspec'] if len(subfields_099) == 4: subfields_852.append('h') subfields_852.append(subfields_099[1]) subfields_852.append('i') subfields_852.append(subfields_099[3]) if len(subfields_099) == 2: subfields_852.append('h') subfields_852.append(subfields_099[1]) if edition: if edition.endswith('.'): edition = edition[:-1] subfields_852.append('z') subfields_852.append(edition) field_852 = Field(tag = '852', indicators = ['8',' '], subfields = subfields_852) record.add_ordered_field(field_852) outmarc.write(record.as_marc()) print() outmarc.close()
def game_record(data, control_number, suppressed=True, status_code="-"): """ Creates a record object from data namedtuple args: data: namedtuple returns: record: pymarc.Record object """ record = Record() record.leader = "00000crm a2200000M 4500" tags = [] # 001 - control field tags.append(Field(tag="001", data=control_number)) # 005 tags.append( Field(tag="005", data=datetime.strftime(datetime.now(), "%y%m%d%H%M%S.%f")) ) # 008 date_created = date.strftime(date.today(), "%y%m%d") if data.pub_date: t008 = f"{date_created}s{data.pub_date} xxu vneng d" else: t008 = f"{date_created}n xxu vneng d" tags.append(Field(tag="008", data=t008)) # 020 for isbn in data.isbn: tags.append(Field(tag="020", indicators=[" ", " "], subfields=["a", isbn])) # 024 for upc in data.upc: tags.append(Field(tag="024", indicators=["1", " "], subfields=["a", upc])) # 040 tags.append( Field( tag="040", indicators=[" ", " "], subfields=["a", "BKL", "b", "eng", "e", "rda", "c", "BKL"], ) ) # 099 tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "BOARD GAME"])) # 245 (no final puctuation neeeded per new PCC ISBD policy) subfields = [] if not data.title: raise ValueError("Missing title data") else: subfields.extend(["a", data.title]) if data.subtitle: subfields[-1] = f"{subfields[-1]} : " subfields.extend(["b", data.subtitle]) if data.title_part: subfields[-1] = f"{subfields[-1]}. " subfields.extend(["p", data.title_part]) # add 246 tag ind2 = check_article(data.title_part) tags.append( Field( tag="246", indicators=["1", ind2], subfields=["a", data.title_part[int(ind2) :]], ) ) if data.author: subfields[-1] = f"{subfields[-1]} / " subfields.extend(["c", data.author]) ind2 = check_article(data.title) tags.append(Field(tag="245", indicators=["0", ind2], subfields=subfields)) # 246 - other title for title in data.title_other: tags.append(Field(tag="246", indicators=["1", "3"], subfields=["a", title])) # 264 publication tags subfields = [] if data.pub_place: subfields.extend(["a", f"{data.pub_place}:"]) else: subfields.extend(["a", "[Place of publication not identified]:"]) if data.publisher: subfields.extend(["b", f"{data.publisher},"]) else: subfields.extend(["b", "[publisher not identified],"]) if data.pub_date: subfields.extend(["c", data.pub_date]) else: subfields.extend(["c", "[date of publication not identified]"]) tags.append(Field(tag="264", indicators=[" ", "1"], subfields=subfields)) # 300 tag tags.append( Field(tag="300", indicators=[" ", " "], subfields=["a", "1 board game"]) ) # RDA 3xx tags tags.append( Field( tag="336", indicators=[" ", " "], subfields=["a", "three-dimensional form", "b", "tdf", "2", "rdacontent"], ) ) tags.append( Field( tag="337", indicators=[" ", " "], subfields=["a", "unmediated", "b", "n", "2", "rdamedia"], ) ) tags.append( Field( tag="338", indicators=[" ", " "], subfields=["a", "object", "b", "nr", "2", "rdacarrier"], ) ) # 500 notes tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Number of players: {data.players}"], ) ) tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Game duration: {data.duration}"], ) ) # content note 505 if data.content: tags.append( Field(tag="505", indicators=["0", " "], subfields=["a", data.content]) ) # 520 summary if data.desc: tags.append(Field(tag="520", indicators=[" ", " "], subfields=["a", data.desc])) # 521 note tags.append(Field(tag="521", indicators=[" ", " "], subfields=["a", data.age])) # 655 genre tags.append( Field( tag="655", indicators=[" ", "7"], subfields=["a", "Board games.", "2", "lcgft"], ) ) # 856 fields (link to project) tags.append( Field( tag="856", indicators=["4", " "], subfields=[ "u", "https://www.bklynlibrary.org/boardgamelibrary", "z", "Board Game Library website", ], ) ) # 960 item field for barcode in data.central_barcodes: subfields = [ "i", barcode, "l", "02abg", "p", data.price, "q", "11", "t", "53", "r", "i", "s", status_code, ] tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields)) for barcode in data.crown_barcodes: subfields = [ "i", barcode, "l", "30abg", "p", data.price, "q", "11", "t", "53", "r", "i", "s", status_code, ] tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields)) # 949 command line if suppressed: opac_display_command = "b3=n" else: opac_display_command = "" tags.append( Field( tag="949", indicators=[" ", " "], subfields=["a", f"*b2=o;{opac_display_command}"], ) ) for tag in tags: record.add_ordered_field(tag) return record
#-------------------------------------------- # Modify the default LDR field in the new MARC record object rec_LDR = list( new_marc_rec.leader ) # split the LDR bytes into a list so you can modify based on index position rec_LDR[5] = 'n' # code for new record res_type = fields[12].strip() if res_type == '': rec_LDR[6] = 'a' # code for text else: rec_LDR[ 6] = res_type # if the "Resource Type" is not blank, use that code instead of 'a' rec_LDR[7] = 'm' # code for monographic record new_marc_rec.leader = ''.join( rec_LDR ) # join the list of LDR bytes into a string and assign to the 'leader' field of the MARC record #-------------------------------------------- # Create 001 and 040 MARC fields for record number and cataloging source rec_001 = Field(tag='001', data='000' + str(rec_cnt)) rec_040 = Field(tag='040', indicators=[' ', ' '], subfields=['a', 'NNU', 'b', 'eng', 'c', 'NNU']) new_marc_rec.add_ordered_field(rec_001) new_marc_rec.add_ordered_field(rec_040) #-------------------------------------------- # Create a 245 Title MARC field title = fields[0].strip() if not title == '': rec_245a = title.split(':')[0] rec_245b = title.split(':')[1]
#trash = open(TRASH, 'w') bib = open(OUT, 'w') #writer = MARCWriter(open('retrobi.mrc','wb')) # MAIN ----------------- with open(IN, 'rb') as f: for LINE in f: # INIT ----------------- #record = Record(force_utf8=True) record = Record() record.leader = ' nab a22 4a 4500' # overwrite internal(pymarc.record) LDR tag record.add_ordered_field(Field(tag='FMT', data='RS')) record.add_ordered_field(Field(tag='003', data='CZ PrUCL')) record.add_ordered_field(Field(tag='005', data='20201231')) record.add_ordered_field( Field(tag='040', indicators=['\\', '\\'], subfields=['a', 'ABB060', 'b', 'cze'])) #record.add_ordered_field(Field(tag='041', indicators=['0','\\'], subfields=['a', 'cze'])) record.add_ordered_field( Field(tag='336', indicators=['\\', '\\'], subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent'])) record.add_ordered_field( Field(tag='337', indicators=['\\', '\\'],
def convert(cs, language, g): vocId = cs.get("vocabulary_code") # variable for a bit complicated constants and casting/converting them to appropiate types helper_variables = { "vocCode" : (cs.get("vocabulary_code") + "/" + LANGUAGES[language] \ if cs.getboolean("multilanguage", fallback=False) \ else vocId), "groupingClasses" : [URIRef(x) for x in cs.get("groupingClasses", fallback=",".join(GROUPINGCLASSES)).split(",")], "groupingClassesDefault" : [URIRef(x) for x in cs.parser.get("DEFAULT", "groupingClasses", fallback=",".join(GROUPINGCLASSES)).split(",")], 'modificationDates': cs.get("modificationDates", fallback=None), 'keepModified' : cs.get("keepModifiedAfter", fallback=None), 'keepGroupingClasses' : cs.getboolean("keepGroupingClasses", fallback=False), 'defaultOutputFileName' : "yso2marc-" + cs.name.lower() + "-" + language + ".mrcx" } if helper_variables['keepModified']: helper_variables['keepModifiedLimit'] = False \ if cs.get("keepModifiedAfter", fallback=KEEPMODIFIEDAFTER).lower() == "all" \ else datetime.date(datetime.strptime(cs.get("keepModifiedAfter"), "%Y-%m-%d")) if cs.get("output", fallback=None): parts = cs.get("languages").split(",") if len(parts) > 1: output = cs.get("output") if len(output.split(".")) > 1: helper_variables["outputFileName"] = ".".join( output.split(".") [:-1]) + "-" + language + "." + output.split(".")[-1] else: helper_variables["outputFileName"] = output + "-" + language if not "outputFileName" in helper_variables: helper_variables["outputFileName"] = cs.get( "output", fallback=helper_variables["defaultOutputFileName"]) #modified_dates on dict-objekti, joka sisältää tietueen id:n avaimena ja #arvona tuplen, jossa on tietueen viimeinen muokkauspäivämäärä ja tietueen sisältö MD5-tiivisteenä if helper_variables['modificationDates']: if os.path.isfile(helper_variables['modificationDates']): with open(helper_variables['modificationDates'], 'rb') as pickle_file: try: modified_dates = pickle.load(pickle_file) except EOFError: logging.error( "The file %s for modification dates is empty " % helper_variables['modificationDates']) sys.exit(2) else: modified_dates = {} logging.info( "Processing vocabulary with vocabulary code '%s' in language '%s'" % (vocId, language)) incrementor = 0 writer_records_counter = 0 ET_namespaces = {"marcxml": "http://www.loc.gov/MARC21/slim"} handle = open( cs.get("output", fallback=helper_variables["defaultOutputFileName"]), "wb") writer = XMLWriter(handle) # listataan preflabelit, jotta voidaan karsia alt_labelit, jotka toisessa käsitteessä pref_labelina pref_labels = set() for conc in g.subjects(RDF.type, SKOS.Concept): pref_label = g.preferredLabel(conc, lang=language) if pref_label: pref_labels.add(str(pref_label[0][1])) # vain nämä mts-käsiteryhmät otetaan mukaan, ryhmän nimestä ei tehdä MARC21-tietuetta ids = {"occupations": ['m2332'], "titles": ['m121', 'm3764']} uris = {} for key in ids: uris[key] = set() for id in ids[key]: uris[key].add(MTS + id) for group in g.subjects(RDF.type, ISOTHES.ConceptGroup): for key in uris: if any(str(group).endswith(uri) for uri in uris[key]): get_member_groups(g, group, uris[key]) concs = [] if helper_variables['keepModified']: concs = [] for uri in modified_dates: if modified_dates[uri][0] >= helper_variables['keepModifiedLimit']: concs.append(URIRef(uri)) else: for conc in g.subjects(RDF.type, SKOS.Concept): concs.append(conc) #luotujen käsitteiden tunnukset, joilla voidaan selvittää modification_dates-listan avulla poistetut käsitteet created_concepts = set() for concept in concs: #vain ammateista ja arvonimistä luodaan MARC21-tietueet if not (concept in uris['occupations'] or concept in uris['titles']): continue created_concepts.add(str(concept)) incrementor += 1 if incrementor % 1000 == 0: logging.info("Processing %sth concept" % (incrementor)) #skipataan ryhmittelevät käsitteet if not helper_variables['keepGroupingClasses']: if any(conceptType in helper_variables["groupingClasses"] for conceptType in g.objects(concept, RDF.type)): continue rec = Record() rec.leader = cs.get("leaderNew", fallback=LEADERNEW) # 024 muut standarditunnukset - käsitteen URI tallennetaan tähän rec.add_field( Field(tag='024', indicators=['7', ' '], subfields=['a', concept, '2', "uri"])) # 040 luetteloiva organisaatio rec.add_field( Field(tag='040', indicators=[' ', ' '], subfields=[ 'a', cs.get("creatorAgency", fallback=CREATOR_AGENCY), 'b', LANGUAGES[language], 'f', helper_variables["vocCode"] ])) valueProps = sorted(getValues(g, concept, SKOS.prefLabel, language=language), key=lambda o: o.value) if len(valueProps) == 0: logging.warning( "Could not find preflabel for concept %s in language %s. Skipping the whole concept." % (concept, language)) continue elif len(valueProps) != 1: logging.warning( "Multiple prefLabels detected for concept %s in language %s. Choosing the first." % (concept, language)) if concept in uris['occupations']: tag = "174" subfield_code = "a" elif concept in uris['titles']: tag = "168" subfield_code = "d" rec.add_field( Field(tag=tag, indicators=[' ', ' '], subfields=[ subfield_code, decomposedÅÄÖtoUnicodeCharacters( unicodedata.normalize(NORMALIZATION_FORM, str(valueProps[0].value))) ])) # skos:altLabel -> 467, 474 # 450 katso-viittaus # jätetään tuottamatta 45X-kentät, jotka ovat toisessa käsitteessä 15X-kenttinä, paitsi altLabelein kohdalla seen_values = set() for valueProp in sorted(getValues(g, concept, [SKOS.altLabel], language=language), key=lambda o: str(o.value)): if valueProp.prop != SKOS.altLabel and str( valueProp.value) in pref_labels: continue if valueProp.prop == SKOS.hiddenLabel: if str(valueProp.value) in seen_values: continue seen_values.add(str(valueProp.value)) if concept in uris['occupations']: tag = "474" subfield_code = "a" elif concept in uris['titles']: tag = "468" subfield_code = "d" rec.add_field( Field(tag=tag, indicators=[' ', ' '], subfields=[ subfield_code, decomposedÅÄÖtoUnicodeCharacters( unicodedata.normalize(NORMALIZATION_FORM, str(valueProp.value))) ])) valueProps = getValues(g, concept, [ SKOS.prefLabel, SKOS.exactMatch, SKOS.closeMatch, SKOS.broadMatch, SKOS.narrowMatch, SKOS.relatedMatch ]) fields = list( ) # kerätään kentät tähän muuttujaan, joka sitten lopuksi järjestetään for valueProp in valueProps: if valueProp.prop == SKOS.prefLabel: # suodatetaan samankieliset, jotka menivät jo 1xx-kenttiin # valueProp.value sisältää tässä poikkeuksellisesti jo halutun literaalin # (vrt. kun muissa on solmu) if valueProp.value.language == language: continue else: # otetaan vain viittaukset samaan sanastoon continue if concept in uris['occupations']: tag = "774" subfield_code = "a" elif concept in uris['titles']: tag = "768" subfield_code = "d" sub2 = "mts" + "/" + LANGUAGES[valueProp.value.language] fields.append( Field(tag=tag, indicators=[' ', ' '], subfields=[ subfield_code, decomposedÅÄÖtoUnicodeCharacters( unicodedata.normalize(NORMALIZATION_FORM, str(valueProp.value))), '4', 'EQ', '2', sub2, '0', concept ])) # sort fields and add them for sorted_field in sorted(fields, key=lambda o: (o.tag, o.value().lower())): rec.add_field(sorted_field) writer_records_counter += 1 writer.write(rec) if helper_variables['modificationDates']: md5 = hashlib.md5() md5.update(str.encode(str(rec))) hash = md5.hexdigest() if str(concept) in modified_dates: if not hash == modified_dates[str(concept)][1]: modified_dates[str(concept)] = (date.today(), hash) else: modified_dates[str(concept)] = (date.today(), hash) #tuotetaan poistetut käsitteet, kun haetaan muuttuneet käsitteet #jos tietue on modified_dates-parametrillä määritettyssä tiedostossa, mutta ei graafissa, tulkitana poistetuksi tietueeksi #mts:ssä ei ole deprekointipäiviä # if helper_variables['keepModified']: concs = [] for conc in g.subjects(RDF.type, SKOS.Concept): if conc in uris['occupations'] or conc in uris['titles']: concs.append(str(conc)) for conc in modified_dates: if conc not in concs: #jos ei ole hajautussummaa (tuplen 2. arvo), luodaan deprekoitu käsite if modified_dates[conc][1]: rec = Record() rec.leader = cs.get("leaderDeleted0", fallback=LEADERDELETED0) rec.add_field( Field(tag='024', indicators=['7', ' '], subfields=['a', conc, '2', "uri"])) modified_dates[conc] = (date.today(), "") writer_records_counter += 1 writer.write(rec) if handle is not sys.stdout: writer.close() if helper_variables['modificationDates']: with open(helper_variables['modificationDates'], 'wb') as output: pickle.dump(modified_dates, output, pickle.HIGHEST_PROTOCOL) #jos luodaan kaikki käsitteet, tuotetaan tuotetaan lopuksi käsitteet laveassa XML-muodossa #if not helper_variables['keepModified']: parser = ET.XMLParser(remove_blank_text=True, strip_cdata=False) file_path = helper_variables["outputFileName"] tree = ET.parse(file_path, parser) e = tree.getroot() handle = open( cs.get("output", fallback=helper_variables["defaultOutputFileName"]), "wb") handle.write( ET.tostring(e, encoding='UTF-8', pretty_print=True, xml_declaration=True)) if handle is not sys.stdout: handle.close() # lokitetaan vähän tietoa konversiosta logging.info("Processed %s concepts. Wrote %s MARCXML records." % (incrementor, writer_records_counter)) if cs.get("outputSpecified", fallback=None) == None: outputChannel = sys.stdout.buffer with open( cs.get("output", fallback=helper_variables['defaultOutputFileName']), "rb") as f: shutil.copyfileobj(f, outputChannel) if cs.get("outputSpecified", fallback=None) == None: os.remove( cs.get("output", fallback=helper_variables['defaultOutputFileName'])) logging.info("Conversion completed: %s" % datetime.now().replace(microsecond=0).isoformat())
def book_to_mark21_file(book,owner, xml = False): #New record record = Record() # Number and value explanation : http://www.loc.gov/marc/bibliographic/bdleader.html # Adding Leader tags l = list(record.leader) l[5] = 'n' # New l[6] = 'a' #For manuscript file use 't' l[7] = 'm' # Monograph l[9] = 'a' l[19] = '#' record.leader = "".join(l) # Category of material - Text record.add_field(record_control_field('007','t')) #Languages languages = book.languages.all() if languages: for lang in languages: record.add_field(record_control_field('008',lang.code)) else: record.add_field(record_control_field('008','eng')) #ISBN - International Standard Book Number isbn = models.Identifier.objects.filter(book=book).exclude(identifier='pub_id').exclude(identifier='urn').exclude(identifier='doi') for identifier in isbn: if book.book_type: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)+' '+book.book_type])) else: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)])) #Source of acquisition try: base_url = models.Setting.objects.get(group__name='general', name='base_url').value except: base_url='localhost:8000' book_url = 'http://%s/editor/submission/%s/' % (base_url, book.id) record.add_field(record_field('030',['#','#'],['b', book_url])) # Main entry - Personal name authors = book.author.all() author_names='' for author in authors: auhtor_names=author_names+author.full_name()+' ' name=author.last_name+', '+author.first_name if author.middle_name: name=name+' '+author.middle_name[:1]+'.' record.add_field(record_field('100',['1','#'],['a', name])) #Title statement title_words = (book.title).split(' ') first_word = title_words[0] if first_word.lower() == 'the': record.add_field(record_field('245',['1','4'],['a', book.title,'c',author_names])) else: record.add_field(record_field('245',['1','0'],['a', book.title,'c',author_names])) #Publication try: press_name = models.Setting.objects.get(group__name='general', name='press_name').value except: press_name=None try: city = models.Setting.objects.get(group__name='general', name='city').value except: city = None publication_info=[] if book.publication_date: #Press' city if city : publication_info.append('a') publication_info.append(str(city)) #Press' name if press_name: publication_info.append('b') publication_info.append(str(press_name)) #Date of Publication publication_info.append('c') publication_info.append(str(book.publication_date)) record.add_field(record_field('260',['#','#'],publication_info)) #Physical details if book.pages: record.add_field(record_field('300',['#','#'],['a',str(book.pages)+' pages'])) #Content type record.add_field(record_field('336',['#','#'],['a', 'text','2','rdacontent'])) #Media type record.add_field(record_field('337',['#','#'],['a', 'unmediated','2','rdamedia'])) #Carrier type record.add_field(record_field('338',['#','#'],['a', 'volume','2','rdacarrier'])) #Language note if languages: for lang in languages: record.add_field(record_field('546',['#','#'],['a', lang.display])) else: record.add_field(record_field('546',['#','#'],['a', 'In English'])) press_editors = book.press_editors.all() #editors for editor in press_editors: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (editor.last_name,editor.first_name),'e','Press editor'])) #Series if book.series: record.add_field(record_field('830',['#','0'],['a', book.series.name ])) if book.series.editor: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (book.series.editor.last_name,book.series.editor.first_name),'e','Series editor'])) #Add record to file title= book.title if not xml: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.dat' file=handle_marc21_file(record.as_marc(),filename, book, owner) else: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.xml' content=record_to_xml(record, quiet=False, namespace=False) file=handle_marc21_file(content,filename, book, owner) return file.pk
def make_bib(data: namedtuple): bib = Record() tags = [] locker_num = determine_locker_num(data.comp_name) # leader bib.leader = "00000nrm a2200000Mi 4500" # 008 tag dateCreated = date.strftime(date.today(), "%y%m%d") tags.append( Field(tag="008", data=f"{dateCreated}s2019 xx 00 r|und d") ) # 099 tag tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "LAPTOP"])) # 245 tag tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", f"{locker_num}."]) ) # single sub A 246 tags lap_num = determine_lap_num(data.comp_name) alt_titles = [ "Laptop circulation", "Laptops in the branches", "Wireless laptops", "Circulating laptops", "Laptop computers", f"32_PUBLAP{lap_num}", ] for at in alt_titles: tags.append(Field(tag="246", indicators=["3", " "], subfields=["a", at])) # complex 246 tags tags.append( Field( tag="246", indicators=["3", " "], subfields=["a", f"{data.type}.", "n", locker_num], ) ) tags.append( Field( tag="246", indicators=["3", " "], subfields=["a", f"{data.type}.", "n", f"32_PUBLAP{lap_num}"], ) ) # 300 tag tags.append( Field(tag="300", indicators=[" ", " "], subfields=["a", "1 laptop computer"]) ) # 500 tag tags.append( Field( tag="500", indicators=[" ", " "], subfields=["a", f"Serial number: {data.serial}"], ) ) # 960 tag item_note = construct_item_note(locker_num, lap_num, data,) tags.append( Field( tag="960", indicators=[" ", " "], subfields=[ "l", "32lap", "t", "49", "r", "7", "q", "7", "s", "g", "n", f"{item_note}", ], ) ) # commnad line tag tags.append( Field(tag="949", indicators=[" ", " "], subfields=["a", f"*b2=7;bn=32;"]) ) for t in tags: bib.add_ordered_field(t) return bib
def create_record(row): """Take a row from the csv dict and return a pymarc.Record""" rec = Record() rec.leader = "00000ntm#a22000005c#4500" rec.add_ordered_field( pymarc.Field(tag="005", data=datetime.datetime.now().strftime("%Y%m%d%H%M%S.0"))) # generiere Inhalt für 245 if not row['Signatur modern']: return "Keine Signatur vorhanden" else: if row["Bd."]: val245 = f"UBG Ms {row['Signatur modern'].strip()}/{row['Bd.'].strip()}" else: val245 = f"UBG Ms {row['Signatur modern'].strip()}" rec.add_ordered_field( Field(tag='245', indicators=['0', '0'], subfields=['a', val245])) # Umfangsangabe in 300 if "rolle" in row["Umfang"].lower(): sfa = row["Umfang"].strip() else: sfa = f'{row["Umfang"].strip()} Blätter' sfc = f'{row["Format"].strip()}, {row["Größe h : b "].strip().replace(":", "x")}' if sfa.startswith(" "): sfa = "" if sfc.startswith(", "): sfc = sfc[2:] if sfc.endswith(", "): sfc = sfc[:-2] rec.add_ordered_field( Field(tag='300', indicators=[' ', ' '], subfields=["a", sfa, "c", sfc])) if row["Signatur alt"]: rec.add_field( Field( tag='500', indicators=[' ', ' '], subfields=[ 'a', f'Historische Signatur der Universitätsbibliothek Graz: {row["Signatur alt"].strip()}' ])) rec.add_ordered_field( Field(tag="500", indicators=[" ", " "], subfields=["a", "Stand 2018"])) beschreibstoff = row["Beschreibstoff"].strip() rec.add_ordered_field( Field(tag="340", indicators=[" ", " "], subfields=["a", beschreibstoff])) rec.add_ordered_field( pymarc.Field(tag="264", indicators=[" ", "1"], subfields=["c", f"[{get_date(row)}]"])) rec.add_field( Field(tag="710", indicators=["2", " "], subfields=[ "a", "Universitätsbibliothek Graz", "0", "(DE-588)18018-X", "4", "own" ])) date = get_date(row) if date == "Datum unbekannt": print("Kein Datum vorhanden: " + val245) return f"{val245}: Kein Datum vorhanden" else: year = date_008(date) if year is None: print("Keine Jahreszahl für 008 extrahierbar: " + val245) return f"{val245}: Keine Jahreszahl für 008 extrahierbar." date_on_file = datetime.datetime.now().strftime("%y%m%d") data008 = date_on_file + "s" + year + " " + "xx " + "||||" "|" + " " + "||||" + " 00|||| ||" rec.add_ordered_field(Field(tag="008", data=data008)) vorbes_nat_pers = [] if row["1. VB natürl. Personen"] != '': vorbes_nat_pers.append(row["1. VB natürl. Personen"].strip()) if row["2. VB natürl. Personen"] != '': vorbes_nat_pers.append(row["2. VB natürl. Personen"].strip()) if len(vorbes_nat_pers) > 0: for pers in vorbes_nat_pers: if pers not in vb_pers: print(f"Person nicht vorhanden: {pers}") continue else: persfield = Field(tag='700', indicators=['1', ' '], subfields=vb_pers[pers] + ['4', 'fmo']) if "," not in vb_pers[pers][1]: persfield.indicators = ['0', ' '] rec.add_ordered_field(persfield) vorbes_kor = [] if row["1. Vorbesitz Institution"] != '': vorbes_kor.append(row["1. Vorbesitz Institution"].strip()) if row["2. Vorbesitz Institution"] != '': vorbes_kor.append(row["2. Vorbesitz Institution"].strip()) if len(vorbes_kor) > 0: for kor in vorbes_kor: if kor not in vb_kor: korfield = Field(tag='710', indicators=['2', ' '], subfields=['a', kor, '4', 'fmo']) rec.add_ordered_field(korfield) print(korfield) else: korfield = Field(tag='710', indicators=['2', ' '], subfields=vb_kor[kor] + ['4', 'fmo']) rec.add_ordered_field(korfield) standort = "SSHS" signatur = "Ms " + row["Signatur modern"] rec.add_field( Field(tag="995", indicators=[" ", " "], subfields=[ "b", "BHB", "c", standort, "h", signatur, "a", row["Signatur alt"], "9", "LOCAL" ])) return rec
def make_bib(fh, oclc_code, library_code, blanketPO, selector_code, order): """creates bib & order record in MARC21 format with UTF-8 encoded charset """ record = Record() tags = [] # MARC leader if order.mat_bib in ('h', 'v'): MARCmatType = 'g' elif order.mat_bib in ('i', 'u'): MARCmatType = 'i' elif order.mat_bib in ('j', 'y'): MARCmatType = 'j' elif order.mat_bib == 'a': MARCmatType = 'a' else: MARCmatType = 'a' if order.lang == 'eng': order_code3 = 'd' else: order_code3 = 'f' record.leader = f'00000n{MARCmatType}m a2200000u 4500' # 001 field tags.append(Field(tag='001', data=order.wlo)) # 008 field # needs to take into account differences between different # non-print formats dateCreated = date.strftime(date.today(), '%y%m%d') tag008 = f'{dateCreated}s xx 000 u {order.lang} d' if order.resource.pub_date is not None: tag008 = tag008[:7] + order.resource.pub_date + tag008[11:] tags.append(Field(tag='008', data=tag008)) # 020 field if order.resource.isbn is not None: tags.append(Field(tag='020', indicators=[' ', ' '], subfields=['a', order.resource.isbn])) # 024 field if order.resource.upc is not None: tags.append(Field(tag='024', indicators=['1', ' '], subfields=['a', order.resource.upc])) # 028 field if order.resource.other_no is not None: tags.append(Field(tag='028', indicators=['6', '0'], subfields=['a', order.resource.other_no])) # 040 field tags.append(Field( tag='040', indicators=[' ', ' '], subfields=[ 'a', oclc_code, 'b', 'eng', 'c', oclc_code])) # # 100 author_present = False if order.resource.author is not None: author_present = True subfields = ['a', order.resource.author] tags.append(Field( tag='100', indicators=['1', ' '], subfields=subfields)) # 245 field # add format to title for non-print mat if MARCmatType == 'g': order.resource.title += ' (DVD)' elif MARCmatType == 'i': order.resource.title += ' (Audiobook)' elif MARCmatType == 'j': order.resource.title += ' (CD)' if author_present: t245_ind1 = '1' else: t245_ind1 = '0' subfields = ['a', order.resource.title] tags.append(Field( tag='245', indicators=[t245_ind1, '0'], subfields=subfields)) # 264 subfields = [] if order.resource.pub_place is not None: subfieldA = ['a', order.resource.pub_place] subfields.extend(subfieldA) if order.resource.publisher is not None: subfieldB = ['b', order.resource.publisher] subfields.extend(subfieldB) if order.resource.pub_date is None: subfieldC = ['c', '[date not specified]'] else: subfieldC = ['c', order.resource.pub_date] subfields.extend(subfieldC) tags.append(Field( tag='264', indicators=[' ', '1'], subfields=subfields)) # 300 field if MARCmatType == 'g': container = 'videodisc ; 4 3/4 in.' elif MARCmatType == 'i': container = 'sound disc ; 4 3/4 in.' elif MARCmatType == 'j': container = 'sound disc ; 4 3/4 in.' else: container = 'pages ; cm.' tags.append(Field( tag='300', indicators=[' ', ' '], subfields=['a', container])) # 940 field tags.append(Field( tag='940', indicators=[' ', ' '], subfields=['a', 'brief wlo record'])) # 960 field subfields = [] if oclc_code == 'BKL': # subfield_A = ['a', BPL_ORDERS['acqType']] # set by load table subfield_C = ['c', selector_code] subfield_M = ['m', BPL_ORDERS['status']] subfield_N = ['n', BPL_ORDERS['tloc']] subfield_Z = ['z', BPL_ORDERS['currency']] subfields.extend(subfield_C) elif oclc_code == 'NYP': # subfield_A = ['a', NYPL_ORDERS['acqType']] # set by load table subfield_D = ['d', library_code] subfield_E = ['e', order_code3] subfield_M = ['m', NYPL_ORDERS['status']] subfield_N = ['n', NYPL_ORDERS['tloc']] subfield_Y = ['y', NYPL_ORDERS['volumes']] subfield_Z = ['z', NYPL_ORDERS['currency']] subfields.extend(subfield_D) subfields.extend(subfield_E) subfields.extend(subfield_Y) subfield_F = ['f', order.audn] subfield_G = ['g', order.mat_ord] subfield_O = ['o', order.copies] subfield_Q = ['q', order.order_date] subfield_S = ['s', f'{order.resource.price_disc:.2f}'] subfield_T = ['t', order.locs] subfield_U = ['u', order.funds] subfield_V = ['v', order.vendor] subfield_W = ['w', order.lang] subfields.extend(subfield_F) subfields.extend(subfield_G) subfields.extend(subfield_M) subfields.extend(subfield_N) subfields.extend(subfield_O) subfields.extend(subfield_Q) subfields.extend(subfield_S) subfields.extend(subfield_T) subfields.extend(subfield_U) subfields.extend(subfield_V) subfields.extend(subfield_W) subfields.extend(subfield_Z) tags.append(Field(tag='960', indicators=[' ', ' '], subfields=subfields)) # 961 field subfields = [] subfield_I = ['i', order.wlo] if order.poPerLine is not None: subfield_H = ['h', order.poPerLine] subfields.extend(subfield_H) if blanketPO is not None: subfield_M = ['m', blanketPO] subfields.extend(subfield_M) if order.note is not None: subfield_D = ['d', order.note] subfields.extend(subfield_D) subfields.extend(subfield_I) tags.append(Field( tag='961', indicators=[' ', ' '], subfields=subfields)) # construct & send to file for tag in tags: record.add_ordered_field(tag) save2marc(fh, record)
def make_bib(row: namedtuple, sequence: int): bib = Record() # leader bib.leader = "00000cem a2200000Mi 4500" tags = [] # 001 tag tags.append(Field(tag="001", data=f"bkops{sequence}")) # 003 tag tags.append(Field(tag="003", data="BookOps")) # 005 tag timestamp = create_timestamp() tags.append(Field(tag="005", data=timestamp)) # 007 tag tags.append(Field( tag="007", data="aj canzn", )) # 008 tag dateCreated = date.strftime(date.today(), "%y%m%d") pub_year = encode_pub_year(row.pub_year) data = f"{dateCreated}s{pub_year} xx |||||| a | | und d" tags.append(Field(tag="008", data=data)) # 034 tag esc = encode_scale(row.scale) if esc is not None: tags.append( Field(tag="034", indicators=["1", " "], subfields=["a", "a", "b", esc])) # 110 tag tags.append( Field( tag="110", indicators=["1", " "], subfields=["a", f"{row.author},", "e", "cartographer."], )) # 245 tag tags.append( Field(tag="245", indicators=["1", "0"], subfields=["a", f"{row.title}."])) # 246 tag if row.alt_title: tags.append( Field(tag="246", indicators=["3", " "], subfields=["a", row.alt_title])) # 255 tag nsc = norm_scale(row.scale) tags.append(Field(tag="255", indicators=[" ", " "], subfields=["a", nsc])) # 264 tag npub_date = norm_pub_date(row.pub_year) tags.append( Field( tag="264", indicators=[" ", "1"], subfields=[ "a", "[Place of publication not identified] :", "b", f"{row.author},", "c", npub_date, ], )) # tag 300 tags.append( Field( tag="300", indicators=[" ", " "], subfields=["a", "1 folded map :", "b", "color"], )) tags.append( Field( tag="336", indicators=[" ", " "], subfields=[ "a", "cartographic image", "b", "cri", "2", "rdacontent" ], )) tags.append( Field( tag="337", indicators=[" ", " "], subfields=["a", "unmediated", "b", "n", "2", "rddcontent"], )) tags.append( Field( tag="338", indicators=[" ", " "], subfields=["a", "sheet", "b", "nb", "2", "rdacontent"], )) # 490 tag if row.series: tags.append( Field(tag="490", indicators=["0", " "], subfields=["a", row.series])) # 500 tag if row.note: tags.append( Field(tag="500", indicators=[" ", " "], subfields=["a", f"{row.note}."])) # 505 tag if row.content: tags.append( Field(tag="505", indicators=["0", " "], subfields=["a", f"{row.content}."])) # 650 tags if row.subjects: subject_fields = encode_subjects(row.subjects) tags.extend(subject_fields) # 655 tag if row.genre: tags.append( Field( tag="655", indicators=[" ", "7"], subfields=["a", f"{row.genre}.", "2", "lcgft"], )) # tag 852 if row.call_number: tags.append( Field(tag="852", indicators=["8", " "], subfields=["h", row.call_number])) for t in tags: bib.add_ordered_field(t) return bib
def epub_to_marc(fname, conf_file=None): ns = { 'n': 'urn:oasis:names:tc:opendocument:xmlns:container', 'pkg': 'http://www.idpf.org/2007/opf', 'dc': 'http://purl.org/dc/elements/1.1/' } # prepare to read from the .epub file zip = zipfile.ZipFile(fname) # find the contents metafile txt = zip.read('META-INF/container.xml') tree = etree.fromstring(txt) for el in tree: for elel in el: for item in elel.items(): if item[0] == 'full-path': cfname = item[1] # grab the metadata block from the contents metafile cf = zip.read(cfname) tree = etree.fromstring(cf) p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0] # Read from the config file conf = configparser.ConfigParser() if conf_file: conf.read(conf_file) else: conf.read_string(DEFAULT_CONF) leader_dict = {} tag_005_dict = {} tag_006_dict = {} tag_007_dict = {} tag_008_dict = {} tag_040_dict = {} tag_264_dict = {} sections = conf.sections() for section in sections: if section == 'leader': for option in conf.options(section): leader_dict[option] = conf.get(section, option) elif section == '006': for option in conf.options(section): tag_006_dict[option] = conf.get(section, option) elif section == '007': for option in conf.options(section): tag_007_dict[option] = conf.get(section, option) elif section == '008': for option in conf.options(section): tag_008_dict[option] = conf.get(section, option) elif section == '040': for option in conf.options(section): tag_040_dict[option] = conf.get(section, option) elif section == '264': for option in conf.options(section): tag_264_dict[option] = conf.get(section, option) record = Record(force_utf8=True) # set the leader record.leader = build_leader(leader_dict) # I *think* it's updating the 'Base Address of Data' position when # it is written to file, so I have kept characters 12-16 blank. # Field 005 record.add_field(Field(tag='005', data=build_tag_005())) # Field 006 record.add_field(Field(tag='006', data=build_tag_006(tag_006_dict, tag_008_dict))) # Field 007 record.add_field(Field(tag='007', data=build_tag_007(tag_007_dict))) # Field 008 record.add_field(Field(tag='008', data=build_tag_008(tag_008_dict, p, ns))) # Field 020 if p.xpath('dc:identifier[@id="ISBN"]/text()', namespaces=ns): epub_isbn = p.xpath( 'dc:identifier[@id="ISBN"]/text()', namespaces=ns)[0].strip() epub_field = Field( tag = '020', indicators = [' ', ' '], subfields = ['a', epub_isbn, 'q', 'epub'] ) elif p.xpath('dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns): epub_isbn = p.xpath( 'dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns)[0].strip() epub_field = Field( tag = '020', indicators = [' ', ' '], subfields = ['a', epub_isbn, 'q', 'epub'] ) # Field 040 # First, check if the indicators are empty and if they are, # turn them into single spaces. for value in ('indicator_1', 'indicator_2'): if tag_040_dict[value] == '': tag_040_dict[value] = ' ' record.add_field(Field( tag = '040', indicators = [tag_040_dict['indicator_1'], tag_040_dict['indicator_2']], subfields = ['a', tag_040_dict['subfield_a'], 'b', tag_040_dict['subfield_b'], 'e', tag_040_dict['subfield_e'], 'c', tag_040_dict['subfield_c']] )) # Field 245 if p.xpath('dc:title/text()',namespaces=ns): full_title = p.xpath('dc:title/text()',namespaces=ns)[0] if ":" in full_title: title = full_title[:full_title.index(':') ].strip() subtitle = full_title[full_title.index(':') + 1:].strip() else: title = full_title subtitle = None if p.xpath('dc:creator/text()', namespaces=ns)[0]: creator_statement = p.xpath('dc:creator/text()', namespaces=ns)[0] if title and subtitle and creator_statement: offset = 0 if ' ' in title: title_words = title.split(' ') if title_words[0].lower() in NON_FILING_WORDS: offset = len(title_words[0]) + 1 record.add_field( Field('245', ['0', offset], ['a', title + " :", 'b', subtitle + " /", 'c', creator_statement])) elif title and creator_statement: offset = 0 if ' ' in title: title_words = title.split(' ') if title_words[0].lower() in NON_FILING_WORDS: offset = len(title_words[0]) + 1 record.add_field( Field('245', ['0', offset], ['a', title + " /", 'c', creator_statement])) # Field 264 if p.xpath('dc:publisher/text()', namespaces=ns) \ and p.xpath('dc:date/text()', namespaces=ns): record.add_field(Field('264', [' ', '1'], ['a', tag_264_dict['subfield_a'] + ' :', 'b', p.xpath('dc:publisher/text()', namespaces=ns)[0] + ", ", 'c', p.xpath('dc:date/text()', namespaces=ns)[0]])) if p.xpath('dc:rights/text()', namespaces=ns): copyright_statement = "" copyright_symbol = "©" rights_words_array = p.xpath('dc:rights/text()', namespaces=ns)[0].split() for word in rights_words_array: if word in copyright_year_range: copyright_statement = copyright_symbol + word if len(copyright_statement) > 4: record.add_field(Field('264', [' ', '4'], ['c', copyright_statement])) return record
def test_barcodes_duplicates_in_two_bpl_files(self): bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000003')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044539', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh1, bib) bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000001')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh1, bib) bib = Record() bib.leader = '00000nam a2200000u 4500' tags = [] tags.append(Field(tag='001', data='ocm00000001')) tags.append( Field(tag='245', indicators=['0', '0'], subfields=['a', 'Test title 1'])) tags.append( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't', '100', 'v', 'BTURBN' ])) for tag in tags: bib.add_ordered_field(tag) bibs.write_marc21(self.fh2, bib) self.assertEqual( default.barcode_duplicates([self.fh1, self.fh2], 'bpl'), { u'34444849044538': [('barcode1_dup_test.mrc', 2), ('barcode2_dup_test.mrc', 1)] })
def output_iso(file_name: str) -> None: output_file_name = file_name[:-4] + ".iso" temp_file_name = "临时文件.iso" # 先刷新output_file_name fp1 = open(output_file_name, 'w', encoding='utf-8') fp1.close() # 用list-dict显示出来 dataFrame_temp = pd.read_csv(file_name, encoding='utf-8', dtype=str).to_dict(orient='records') dataFrame = [] # 先把表格中的全部信息录入dataFrame中.注意,如果是nan的部分,则删掉不计入;另,需要删除掉Unnamed列与continue列 for index, value in enumerate(dataFrame_temp): data_single = {} for k in value: v = str(value[k]) if v == 'nan' or len( v.strip()) == 0 or "Unnamed" in k or "continue" in k: pass else: data_single[k] = v.strip() dataFrame.append(data_single) for data in dataFrame: record = Record() # 先把isbn列筛掉,同时把head列改成000列 data2 = {} for key, value in data.items(): if key == "head": data2["000"] = value elif '0' <= key[0] <= '9': data2[key] = value # 然后对其列进行排序 keys = list(data2.keys()) keys.sort() # 按照排序后的顺序,逐一进行抓取,并添加入record数据 for key in keys: # 如果是"000",是题名 if key == "000": record.leader = data2[key] # 如果是"009"及以内的数据 elif key <= "009": record.add_field(Field(tag=key, data=data2[key])) # 如果是"009"以上的数据,需要把"▼"都换成"|",且把第一个"|"之前的数据作为指示符 elif key > "009": # 替换特殊字符 data2[key] = data2[key].replace("▼", "|") # 选中指示位 indicators = data2[key].split("|")[0] if len(indicators) == 0: indicators = [" ", " "] elif len(indicators) == 1: indicators = [indicators[0], " "] else: indicators = [indicators[0], indicators[1]] # 选中数据内容.按照"|"切割,每段"|"之前写两个数据内容 subfields = [] for words in data2[key].split("|")[1:]: subfields.append(words[0]) subfields.append(words[1:]) # 加入数据 record.add_field( Field(tag=key[:3], indicators=indicators, subfields=subfields)) # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(output_file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)
def output_iso_from_data(file_name: str, isbn_total: list, data_total: dict) -> None: temp_file_name = "临时文件.iso" fp = open(file_name, 'w', encoding='utf-8') fp.close() records = [] for isbn in isbn_total: record = Record() if isbn in data_total: data = data_total[isbn] for key, value in data.items(): # 把一些utf8无法识别的符号替换掉. for character in NON_CHARACTERS_IN_UTF_8: key, value = str(key).replace(character, ""), str(value).replace( character, "") if key in ['continue']: continue elif key[:3] == '000': record.leader = value elif key[:3] <= '009': record.add_field(Field(tag=key[:3], data=value)) else: subfields = [] words = value[2:].replace("$", " ").replace("|", "$").strip() for word in words.split("$"): if len(word.strip()) == 0: continue else: subfields.append(word.strip()[0]) subfields.append(word.strip()[1:]) record.add_field( Field(tag=key[:3], indicators=[value[0], value[1]], subfields=subfields)) if str(record.leader) == str(Record().leader): # 新的数据 record.add_field(Field(tag='001', data=isbn)) record = record_sorted(record) records.append(record) # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) try: writer.write(record) # 测试读取是否有问题(如大英9780714827308) except UnicodeEncodeError: print("编号为:{}的数据格式有误,清空数据以利于输出.".format(isbn)) record = Record() record.add_field(Field(tag='001', data=isbn)) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(file_name, 'a', encoding='utf-8') try: fp2.write(fp1.readline()) except UnicodeDecodeError: # 部分解码有误 如大英9780714827308 fp1.close() fp2.close() with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) record = Record() record.add_field(Field(tag='001', data=isbn)) writer.write(record) fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') # parser.add_argument('--production', help='production DOIs', action='store_true') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) # print(col_headers) # print() title_col = col_headers.index('Title') subj_col = col_headers.index('Subject') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') date_col = col_headers.index('Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') source_col = col_headers.index('Source') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') hist_note_col = col_headers.index('Historical Note') note_col = col_headers.index('Note') characters_col = col_headers.index('Characters') story_arc_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') series_col = col_headers.index('Is Part of Series') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam 2200000Ii 4500' field_040 = Field( tag='040', indicators=[' ', ' '], subfields=['a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag='049', indicators=[' ', ' '], subfields=['a', 'VMCM']) field_336_text = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag='337', indicators=[' ', ' '], subfields=['a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag='338', indicators=[' ', ' '], subfields=['a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag='380', indicators=[' ', ' '], subfields=['a', 'Comic books and graphic novels.']) field_506 = Field( tag='506', indicators=['1', ' '], subfields=[ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.' ]) field_542 = Field(tag='542', indicators=[' ', ' '], subfields=[ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/' ]) field_588 = Field( tag='588', indicators=['0', ' '], subfields=[ 'a', 'Description based on indicia and Grand Comics Database.' ]) field_989 = Field(tag='989', indicators=[' ', ' '], subfields=['a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) lower_title = parse_title(lowercase_title(title)) title = parse_title(sheet.cell(row, title_col).value) has_part_title = False if len(title) == 3: has_part_title = True subj = sheet.cell(row, subj_col).value subj = [x.strip() for x in subj.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = sheet.cell(row, pages_col).value date = sheet.cell(row, date_col).value[0:4] pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value source = sheet.cell(row, source_col).value # writer = sheet.cell(row, writer_col).value writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value # print(colorist) # print('COLORIST FROM SHEET=' + colorist + '=END') # print(bool(colorist)) colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] hist_note = [] if sheet.cell(row, hist_note_col).value: hist_note = sheet.cell(row, hist_note_col).value note = [] if sheet.cell(row, note_col).value: note = sheet.cell(row, note_col).value characters = [] if sheet.cell(row, characters_col).value: characters = sheet.cell(row, characters_col).value characters = [x.strip() for x in characters.split(';')] story_arc = [] if sheet.cell(row, story_arc_col).value: story_arc = sheet.cell(row, story_arc_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value series = sheet.cell(row, series_col).value # print(cover_artist) # print(characters) # print(writer) # print(subfields_from_string(writer[0])) # print(name_direct_order(subfields_from_string(writer[0])[1])) # print(title) # print(parse_title(title)) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') data_008 = today + 't' + date + date + 'xx a 6 000 1 eng d' field_008 = Field(tag='008', data=data_008) record.add_ordered_field(field_008) subfields_099 = [] if has_part_title: subfields_099 = ['a', title[0] + ': ' + title[1], 'a', title[2]] else: subfields_099 = ['a', title[0], 'a', title[1]] field_099 = Field(tag='099', indicators=[' ', '9'], subfields=subfields_099) record.add_ordered_field(field_099) if writer: # Add 100 for first writer subfield_content = subfields_from_string_relator( writer[0], 'writer') field_100 = Field(tag='100', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_100) # Multiple writers if len(writer) > 1: # Add 700s for all writers after the first for i in writer[1:]: subfield_content = subfields_from_string_relator( i, 'writer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if writer: f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title[0], 'The '): f245_ind2 = 4 elif str.startswith(title[0], 'An '): f245_ind2 = 3 elif str.startswith(title[0], 'A '): f245_ind2 = 2 subfields_245 = [] if has_part_title: subfields_245 = [ 'a', title[0] + '.', 'p', title[1] + ',', 'n', title[2] ] else: subfields_245 = ['a', title[0] + ',', 'n', title[1]] # If writer exists, add $c if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append( name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag='245', indicators=[f245_ind1, f245_ind2], subfields=subfields_245) record.add_ordered_field(field_245) field_264_1 = Field(tag='264', indicators=[' ', '1'], subfields=[ 'a', pub_place + ' :', 'b', publisher + ',', 'c', date + '.' ]) record.add_ordered_field(field_264_1) field_264_4 = Field(tag='264', indicators=[' ', '4'], subfields=['c', '©' + date]) record.add_ordered_field(field_264_4) field_300 = Field(tag='300', indicators=[' ', ' '], subfields=[ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.' ]) record.add_ordered_field(field_300) subfields_490 = [] if has_part_title: subfields_490 = [ 'a', lower_title[0] + '. ' + lower_title[1] + ' ;', 'v', lower_title[2] ] else: subfields_490 = ['a', lower_title[0] + ' ;', 'v', lower_title[1]] field_490 = Field(tag='490', indicators=['1', ' '], subfields=subfields_490) record.add_ordered_field(field_490) if hist_note: field_500_hist = Field(tag='500', indicators=[' ', ' '], subfields=['a', hist_note + '.']) record.add_ordered_field(field_500_hist) if note: field_500_note = Field(tag='500', indicators=[' ', ' '], subfields=['a', note + '.']) record.add_ordered_field(field_500_note) if toc: if not toc.endswith('.') and not toc.endswith( '?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag='505', indicators=['0', ' '], subfields=['a', toc]) record.add_ordered_field(field_505) if story_arc: field_520 = Field(tag='520', indicators=[' ', ' '], subfields=[ 'a', '"' + story_arc + '" -- Grand Comics Database.' ]) record.add_ordered_field(field_520) field_561 = Field(tag='561', indicators=[' ', ' '], subfields=['a', source + '.']) record.add_ordered_field(field_561) for i in subj: if not i.endswith('.') and not i.endswith(')'): i += '.' field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag='655', indicators=[' ', '7'], subfields=['a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: # print(characters) subfield_content = 'Characters: ' for i in characters[:-1]: subfield_content += i + '; ' subfield_content += characters[-1] + '.' field_500 = Field(tag='500', indicators=[' ', ' '], subfields=['a', subfield_content]) record.add_ordered_field(field_500) # Create 600 and 650 for "Fictitious character" entries # TODO check for existing 650 and don't add if a duplicate if any('Fictitious character' in c for c in characters): fic_chars = [ c for c in characters if 'Fictitious character' in c ] for i in fic_chars: fic_char_name = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<1>', i) fic_char_c = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<2>', i) field_600 = Field( tag='600', indicators=['0', '0'], subfields=['a', fic_char_name, 'c', fic_char_c]) record.add_ordered_field(field_600) field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) if penciller: for i in penciller: subfield_content = subfields_from_string_relator( i, 'penciller') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if inker: for i in inker: subfield_content = subfields_from_string_relator(i, 'inker') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if colorist: for i in colorist: subfield_content = subfields_from_string_relator(i, 'colorist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if letterer: for i in letterer: subfield_content = subfields_from_string_relator(i, 'letterer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if cover_artist: for i in cover_artist: subfield_content = subfields_from_string_relator( i, 'cover artist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if editor: for i in editor: subfield_content = subfields_from_string_relator(i, 'editor') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) # field_700 = Field(tag = '700', # indicators = ['7',' '], # subfields = [ # 'a', doi, # '2', 'doi']) subfields_773 = subfields_from_string(series) field_773 = Field(tag='773', indicators=['0', '8'], subfields=subfields_773) record.add_ordered_field(field_773) subfields_830 = [] if has_part_title: subfields_830 = [ 'a', lower_title[0] + '.', 'p', lower_title[1] + ' ;', 'v', lower_title[2] + '.' ] else: subfields_830 = [ 'a', lower_title[0] + ' ;', 'v', lower_title[1] + '.' ] field_830 = Field(tag='830', indicators=[' ', '0'], subfields=subfields_830) record.add_ordered_field(field_830) outmarc.write(record.as_marc()) print() outmarc.close()