def read(inputfile): outputfile = inputfile + "_no338_" + \ datetime.datetime.now().isoformat() + ".mrc" has338Count = 0 no338Count = 0 totalCount = 0 supplements = 0 with open(inputfile, 'rb') as f: reader = MARCReader(f) writer = MARCWriter(open(outputfile, 'wb')) while True: try: record = next(reader) totalCount += 1 if not testFor336To338(record): print(record) no338Count += 1 try: writer.write(record) except Exception as e: print("Error with writing.") else: has338Count += 1 if (isSupplement(record)): supplements += 1 except UnicodeDecodeError: print("There was a Unicode error.") except StopIteration: print("End of file.") break writer.close() print( "{0} / {1} ({2} %) records have no 338 field.".format(no338Count, totalCount, countPercentage(no338Count, totalCount))) print("The file contained {0} supplement records.".format(supplements))
def get_blanks_from_iso(file_name: str, file_name_txt: str) -> None: blank_file_name = file_name[:-4] + "_空白isbn.txt" data_file_name = file_name[:-4] + "_纯净数据.iso" temp_file_name = file_name[:-4] + "_临时数据.iso" fp = open(data_file_name, 'w', encoding='utf-8') fp.close() records, datas, blanks = read_iso(file_name), [], [] data_nums = get_isbn_from_txt(file_name_txt) for index, record in enumerate(records): if record_is_blank(record): # 空白数据.写入"空白_isbn.txt". # blanks.append(record.get_fields("001")[0].data) blanks.append(data_nums[index]) else: # 有效数据.写入"_纯净数据.iso" with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(data_file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() fp = open(blank_file_name, 'w', encoding='utf-8') for blank_num in blanks: fp.write(blank_num + "\n") fp.close() os.remove(temp_file_name)
def test_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo')) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234))
def merge_five_isos(directory_name: str) -> None: file_names = ["", "", "", "", ""] keywords = { "Berkeley.iso": 0, "Yale.iso": 1, "Michigan.iso": 2, "US.iso": 3, "British.iso": 4 } for file_name in os.listdir(directory_name): for k, v in keywords.items(): if k == file_name[-len(k):]: file_names[v] = file_name # 把缺失的文件位置删除. for i in range(5): if len(file_names[4 - i]) == 0: del file_names[4 - i] # 依次获取对应文件名下的全部信息. datas, records = [], [] for file_name in file_names: datas.append(read_iso(directory_name + "\\" + file_name)) data_num = len(datas[0]) for i in range(data_num): # 依次从各个文件寻找. for j in range(len(datas)): # 如果找到 if not record_is_blank(datas[j][i]): records.append(datas[j][i]) break # 如果没有找到,增加一个空白数据 elif j == len(datas) - 1: records.append(datas[-1][i]) output_file_name = directory_name + "\\" + "五合一.iso" temp_file_name = "临时文件.iso" # 先刷新output_file_name fp1 = open(output_file_name, 'w', encoding='utf-8') fp1.close() for index, record in enumerate(records): # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(output_file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)
def main(): total_count = 0 valid_count = 0 with open(options['INPUT'], 'rb') as fh: reader = MARCReader(fh, to_unicode=True, force_utf8=True) # 1) first mode: write a MARC output file if not options['--csv']: writer = MARCWriter(open('out.mrc' or options['--output'], 'wb')) for record in reader: # whether we'll include the _bib_ record in export file include_record = False # Koha stores item data in 952 fields, one per item for item in record.get_fields('952'): valid = validate_item(item) total_count += 1 if valid is True: valid_count += 1 # if there's any valid item then the bib should be included include_record = True if include_record is True: writer.write(record) print('Total items: %i | Items included: %i' % (total_count, valid_count)) elif options['--csv']: koha_record_ids = set() for record in reader: total_count += 1 for item in record.get_fields('952'): valid = validate_item(item) if valid: id = record.get_fields(MARC_ID_FIELD)[0].get_subfields( MARC_ID_SUBFIELD)[0] koha_record_ids.add(id) # stop looking at items after we find the first valid one break csvreader = csv.DictReader(open(options['--csv'], 'r')) gg_record_ids = set() for row in csvreader: gg_record_ids.add(row[GG_ID_COLUMN]) print('Total Koha Bibs: %i' % total_count) print('Koha Bibs with circulating items: %i ' % len(koha_record_ids)) print('Total GreenGlass Bibs: %i' % len(gg_record_ids)) print('Weeded Items (I in GG & not in Koha): %i' % len(gg_record_ids - koha_record_ids)) print('Added Items (I in Koha & not in GG): %i' % len(koha_record_ids - gg_record_ids))
def __init__(self, name, encoding="utf-8", path=""): """Pri inicializacii sa pripravi subor na zapisovanie. Arguments: name {str} -- nazov suboru Keyword Arguments: encoding {str} -- kodovanie suboru (default: {"utf-8"}) path {str} -- cesta kde bude subor ulozeny (default: {""}) """ self.CONST_FIELD_008 = "|2018 ne || |||| || ||eng |" self.CONST_FIELD_LEADER = "nab a22001211a 4500" self.CONST_INDICATOR_1 = ' ' self.CONST_INDICATOR_2 = ' ' self.writer = MARCWriter(open(path + name, 'wb'))
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'wb')) writer.write(record) writer.close() reader = MARCReader(open('test/foo', 'rb'), to_unicode=True) record = next(reader) self.assertEqual(record['245']['a'], unichr(0x1234)) reader.close() os.remove('test/foo')
class file_writer: """Stara sa o zapisovanie ohlasov do suboru. """ def __init__(self, name, encoding="utf-8", path=""): """Pri inicializacii sa pripravi subor na zapisovanie. Arguments: name {str} -- nazov suboru Keyword Arguments: encoding {str} -- kodovanie suboru (default: {"utf-8"}) path {str} -- cesta kde bude subor ulozeny (default: {""}) """ self.CONST_FIELD_008 = "|2018 ne || |||| || ||eng |" self.CONST_FIELD_LEADER = "nab a22001211a 4500" self.CONST_INDICATOR_1 = ' ' self.CONST_INDICATOR_2 = ' ' self.writer = MARCWriter(open(path + name, 'wb')) def write_record(self, references, field035="", field008=""): """Zapise do suboru jeden record vo forme iso2709 Arguments: field035 -- retazec obsahujuci data do pola 035 field008 -- retazec obsahujuci data do pola 008 references {set(reference)} -- set ohlasov na zapisanie do pola 591 """ if (field008 == ""): field008 = self.CONST_FIELD_008 record = Record(force_utf8=True) record.add_field(Field(tag='008', data=field008)) record.add_field( Field(tag='035', indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2], subfields=['a', field035])) for i in references: record.add_field(i.to_marc_field()) record.leader = record.leader[:5] + 'n' + record.leader[6:] record.leader = record.leader[:7] + 'b' + record.leader[8:] record.leader = record.leader[:18] + 'a' + record.leader[19:] self.writer.write(record) def close(self): """Ukonci zapis a zavrie subor. """ self.writer.close()
def write_marc(outfile, ptype=None): """Write Student MARC records to outfile""" ecount = count = 0 writer = MARCWriter(open(outfile, 'w')) if ptype: # filter on patron type students = Student.objects.filter(ptype=ptype) else: students = Student.objects.all() for s in students: try: writer.write(s.as_marc()) count += 1 except (TypeError, UnicodeDecodeError, UnicodeEncodeError), e: log.info("%s: %s" % (s.student_id, s.full_name())) log.info("%s" % (s.as_marc().as_marc(), )) log.exception("Error: %s" % (e, )) ecount += 1
def write_marc(outfile, ptype=None): """Write Student MARC records to outfile""" ecount = count = 0 writer = MARCWriter(open(outfile,'w')) if ptype: # filter on patron type students = Student.objects.filter(ptype=ptype) else: students = Student.objects.all() for s in students: try: writer.write(s.as_marc()) count += 1 except (TypeError, UnicodeDecodeError, UnicodeEncodeError), e: log.info("%s: %s" % (s.student_id,s.full_name())) log.info("%s" % (s.as_marc().as_marc(),)) log.exception("Error: %s" % (e,)) ecount += 1
def MakeMARCFile(self, recs, filename): filenameNoExt = re.sub('.\w*$', '', filename) mrcFileName = filenameNoExt + '_OUT.mrc' print('\n<Compiling file to MARC>\n') writer = MARCWriter(open(mrcFileName, "wb")) for r in recs: try: writer.write(r.as_marc()) except: r.force_utf8 = True writer.write(r) writer.close() return recs
def test_edit_mixed_code(self): reader = MARCReader( file('test/mixed-code.dat'), to_unicode=True, force_utf8=True, utf8_handling='ignore' ) writer = MARCWriter(open('test/foo', 'w')) for record in reader: field = Field( tag = '941', indicators = ['',''], subfields = [ 'a', 'x' ] ) record.add_field(field) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) for record in reader: self.assertEquals(type(record), Record) os.remove('test/foo')
def writer(self, data_file): """ Yield a MARCWriter instance. Args: data_file (str): The file basename. """ path = os.path.join(self.path, data_file) with open(path, 'ab') as fh: yield MARCWriter(fh)
def campus_split(): ''' Finds the master format files created by fmt_split(). then writes the records in each format file to separate files for holding campuses based on coding in MARC 049 subfield a. Outputs one file per campus per format. ''' campuses = ['MNGE', 'MNXN'] for campus in campuses: files = [ f for f in os.listdir() if re.match(r'.+(bks|ser|maps|vis|other)\.mrc', f) ] for file in files: with open(file, 'rb') as f: filename = str(file) fpref, fsuf = filename.split('.') writer = MARCWriter(open(fpref + '_' + campus + '.mrc', 'wb')) reader = MARCReader(f) for rec in reader: fields049 = rec.get_fields("049") for field in fields049: suba049 = field.get_subfields("a") for suba in suba049: if campus in suba: writer.write(rec) else: continue writer.close()
def output_iso_from_iso(output_file_name: str, records: list) -> None: output_file_name = output_file_name[:-4] + ".iso" temp_file_name = "临时文件.iso" # 先刷新output_file_name fp1 = open(output_file_name, 'w', encoding='utf-8') fp1.close() for index, record in enumerate(records): # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(output_file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)
def save2marc(outfile, bib): try: writer = MARCWriter(open(outfile, 'ab')) writer.write(bib) except WindowsError: raise WindowsError finally: writer.close()
def write_marc21(outfile, bib): try: writer = MARCWriter(open(outfile, "a")) writer.write(bib) except WindowsError: raise WindowsError finally: writer.close()
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)])) record.leader = " a " writer = MARCWriter(open("test/foo", "wb")) writer.write(record) writer.close() reader = MARCReader(open("test/foo", "rb"), to_unicode=True) record = next(reader) self.assertEqual(record["245"]["a"], chr(0x1234)) reader.close() os.remove("test/foo")
def save2marc(outfile: str, record: Record) -> None: """ Appends MARC records to outfile Args: outfile: file path record: MARC record as pymarc object """ try: writer = MARCWriter(open(outfile, "ab")) writer.write(record) except: raise finally: writer.close()
"20": "2 day Reserve Loan", "21": "Regular Loan", "50": "Audio/Video & Map Loan", "x": "Find me" } multiple930Needed = {} deletedItems = [] loanStatusReader = readCsvFile("LoanStatuses.csv") LoanStatusList = transferToLoansArray(loanStatusReader) deletedItemsreader = readTextFile("deleted_items_list_20190110.txt") deletedItems = transferDeletedItems(deletedItemsreader) SerialsRecordsWriter = MARCWriter(open("itemCreatedSerials.mrc", "wb")) BooksRecordsWriter = MARCWriter(open("itemCreatedBooks.mrc", "wb")) myErrorFile = createPrintFiles("myErrorFile.csv", "w") mydeletesFile = createPrintFiles("deletedItemsFile.csv", "w") myMultipleFile = createPrintFiles("multiple930Needed.csv", "w") myWriterFile = createPrintFiles("itemsCreated.csv", "w") file = open("bibssansitems_with852.mrc", "rb") reader = MARCReader(file, to_unicode=True, force_utf8=True, utf8_handling="strict") number = 1 for record in reader:
def writeMARCRecord(self, record): writer = MARCWriter(self.file) writer.write(record)
scp_vals = ['CDL', 'UC open access'] for fld in record.get_fields('856'): # 856 can have multiple $x for sfld in fld.get_subfields('x'): if sfld in scp_vals: # Should be only one $u per 856 field fld['u'] = 'dummyURL' # If fld was modified, break out of the sfld loop break ### Main code starts here ### if len(sys.argv) != 4: raise ValueError(f'Usage: {sys.argv[0]} in_file out_file case# (3-5)') reader = MARCReader(open(sys.argv[1], 'rb'), utf8_handling="ignore") writer = MARCWriter(open(sys.argv[2], 'wb')) case_no = sys.argv[3] if case_no not in ['3', '4', '5']: raise ValueError(f'Invalid value {case_no}; must be 3, 4, or 5') for record in reader: delete_590(record) delete_599(record) delete_793(record) if case_no == '3': modify_035(record) delete_856(record) elif case_no == '4': modify_035(record) modify_856(record) else: #5
if rda: continue #skip record, go onto next record #time to fix that pesky misspelling... fix_245_misspelling(title_a_raw, word, 'a', title_245) fix_245_misspelling(title_b_raw, word, 'b', title_245) #get the bib record from the 907 field prior to deletion n = marc.get_fields('907') for field in n: bib_rec_num_raw = field.get_subfields('a') bib_rec_num = subfield_to_string(bib_rec_num_raw) #add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium marc.add_field( Field(tag='949', indicators=[' ', ' '], subfields=['a', '*recs-b;ov-%s;' % (bib_rec_num)])) #delete 907, 998, 910, 945 fields for f in marc.get_fields('907', '998', '910', '945'): if f['a'] != '': marc.remove_field(f) #append record to a generic file.dat file writer = MARCWriter(file(word + '.dat', 'a')) writer.write(marc) #closes .dat file writer.close()
""" Base script for DLF Forum 2014 Listening-Based Python workshop. Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit . """ import os from pymarc import Field, MARCReader, MARCWriter, record_to_xml records = MARCReader(open('../../exampledump.mrc'), to_unicode=True, force_utf8=True, utf8_handling='ignore') index = 1 for marc in records: filename_dat = 'examplerecord_%s.dat' % index filename_xml = 'examplerecord_%s.xml' % index writer_dat = MARCWriter(file(filename_dat,'a')) writer_xml = open(filename_xml,'a') writer_dat.write(marc) writer_xml.write(record_to_xml(marc) + "\n") writer_dat.close() writer_xml.close() index += 1
isbns = ['025710491', '8105610391', '9781041740192', '9791037492192', '1049126950', '819251', '4018597182', '978103784952X', '1023894675102', '910384765'] for i in range(10, 20): filename = 'examplerecord_%s.dat' % i filename_xml = 'examplerecord_%s.xml' % i filename_out = 'examplerecord_%s.out' % i records = MARCReader(open(filename), to_unicode=True, force_utf8=True, utf8_handling='ignore') writer_dat = MARCWriter(file(filename_out,'a')) writer_xml = open(filename_xml,'a') for marc in records: isbn_list = marc.get_fields('020') try: isbn_field = isbn_list[0] except Exception, e: j = i - 10 marc.add_ordered_field( Field( tag='020', indicators=[' ', ' '], subfields = ['a', isbns[j]] ))
barcodes = {} for line in bc.readlines(): id_, barcode = (v.strip() for v in line.strip().split('\t')) if id_ in barcodes: barcodes[id_].append(barcode) else: barcodes[id_] = [barcode] m = 150 # max records to process (for DEBUG) DEBUG = False writer = None # Load bibliographic MARC records to add barcodes # Output written to file out.mrc with open(fnbiblio, 'rb') as marcdata: writer = MARCWriter(open('out.mrc', 'wb')) records = MARCReader(marcdata, to_unicode=True) for i, record in enumerate(records): if record is None: print('None record') #continue if record['876']: #print('HOLDING found!') continue else: print('%s Biblio found' % i) enc = record.leader[9] if enc != ' ': print('%s non MARC8 record found!' % i) id_ = record['001'].value().strip()
def fchange_sort(MARCfile, fname): ''' Parses a MARC binary file based on 960 values into separate files for books, serials, maps, visual materials, and other formats. Output is one .mrc file for each format. ''' #open a path to put the files for the FTP server - both OCN and BIB updates sorted_files_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/sorted_for_FTP " + today if not os.path.isdir(sorted_files_path): os.mkdir(sorted_files_path) #make a place to put the files with OCN updates for manual checking ocn_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/OCN_updates_" + today if not os.path.isdir(ocn_updates_path): os.mkdir(ocn_updates_path) #make a place to put the files with URL updates for manual checking url_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/URL_updates_" + today if not os.path.isdir(url_updates_path): os.mkdir(url_updates_path) fname_str = str(fname) print(fname) fname_str = fname_str.replace(".","") fname_str = fname_str.replace("mrc",".mrc") print(fname_str) fpref, fsuf = fname_str.split('.') print(fpref) print(fsuf) print(MARCfile) with open(MARCfile,'rb') as f: reader = MARCReader(f) # first, see if there are OCN or URL changes in the set; this will determine whether creating a file is necessary OCN_change_ct = 0 URL_change_ct = 0 writer_new = False writer_URLs = False for rec in reader: if rec['960']: field_960 = str(rec['960']['a']) if 'OCLC control number change' in field_960: OCN_change_ct += 1 if 'KB URL change' in field_960: URL_change_ct += 1 print("OCN_change_ct " ,OCN_change_ct) print("URL_change_ct ",OCN_change_ct) #if there are OCN updates or KB URL changes, create files to put those records in if OCN_change_ct > 0: writer_new_oclc_num_manual = MARCWriter(open(ocn_updates_path + "/" + fpref + '_new_oclc_num.mrc', 'wb')) writer_new = True print(writer_new) if URL_change_ct > 0: writer_update_URLs = MARCWriter(open(url_updates_path + "/" + fpref + '_update_URLs.mrc', 'wb')) writer_URLs = True print(writer_URLs) #create a file for all updates writer_update_bibs = MARCWriter(open(sorted_files_path + "/" + fpref + '_update_bibs.mrc', 'wb')) v = 0 with open(MARCfile, 'rb') as f: reader = MARCReader(f) for rec in reader: v += 1 print(v) if rec['960']: field_960 = str(rec['960']['a']) print(field_960) #writes record to correct file based on regex matches #these are ordered such that if a 960 field has more than one reason for the update, that the most critical to handle #will be addressed first. These are, in order: OCN change (affects matching), URL change, bib update. #Update: OCN changes can be processed alongside Bib updates. URLs will need to be handled manually due to multi-vols? if 'OCLC control number change' in field_960: writer_update_bibs.write(rec) writer_new_oclc_num_manual.write(rec) if 'KB URL change' in field_960: writer_update_URLs.write(rec) elif 'KB URL change' in field_960: writer_update_URLs.write(rec) writer_update_bibs.write(rec) elif 'Subsequent record output' in field_960: writer_update_bibs.write(rec) elif 'Master record variable field' in field_960: writer_update_bibs.write(rec) else: writer_update_bibs.write(rec) #closes master format files writer_update_bibs.close() if writer_URLs == True: writer_update_URLs.close() if writer_new == True: writer_new_oclc_num_manual.close()
""" Base script for DLF Forum 2014 Listening-Based Python workshop. Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit . """ import os from pymarc import Field, MARCReader, MARCWriter, record_to_xml records = MARCReader(open('examplerecord.mrc'), to_unicode=True, force_utf8=True, utf8_handling='ignore') writer_dat = MARCWriter(file('file.dat', 'a')) writer_xml = open('file.xml', 'a') writer_xml.write('<collection>') for marc in records: other_identifier_list = marc.get_fields('024') other_identifier_field = other_identifier_list[0] other_identifier = other_identifier_field.get_subfields('a') isbn_list = marc.get_fields('020') isbn_field = isbn_list[0] isbn = isbn_field.get_subfields('a')[0] if len(isbn) == 10: isbn = '978' + isbn marc.remove_field(isbn_field)
def output_iso(file_name: str) -> None: output_file_name = file_name[:-4] + ".iso" temp_file_name = "临时文件.iso" # 先刷新output_file_name fp1 = open(output_file_name, 'w', encoding='utf-8') fp1.close() # 用list-dict显示出来 dataFrame_temp = pd.read_csv(file_name, encoding='utf-8', dtype=str).to_dict(orient='records') dataFrame = [] # 先把表格中的全部信息录入dataFrame中.注意,如果是nan的部分,则删掉不计入;另,需要删除掉Unnamed列与continue列 for index, value in enumerate(dataFrame_temp): data_single = {} for k in value: v = str(value[k]) if v == 'nan' or len( v.strip()) == 0 or "Unnamed" in k or "continue" in k: pass else: data_single[k] = v.strip() dataFrame.append(data_single) for data in dataFrame: record = Record() # 先把isbn列筛掉,同时把head列改成000列 data2 = {} for key, value in data.items(): if key == "head": data2["000"] = value elif '0' <= key[0] <= '9': data2[key] = value # 然后对其列进行排序 keys = list(data2.keys()) keys.sort() # 按照排序后的顺序,逐一进行抓取,并添加入record数据 for key in keys: # 如果是"000",是题名 if key == "000": record.leader = data2[key] # 如果是"009"及以内的数据 elif key <= "009": record.add_field(Field(tag=key, data=data2[key])) # 如果是"009"以上的数据,需要把"▼"都换成"|",且把第一个"|"之前的数据作为指示符 elif key > "009": # 替换特殊字符 data2[key] = data2[key].replace("▼", "|") # 选中指示位 indicators = data2[key].split("|")[0] if len(indicators) == 0: indicators = [" ", " "] elif len(indicators) == 1: indicators = [indicators[0], " "] else: indicators = [indicators[0], indicators[1]] # 选中数据内容.按照"|"切割,每段"|"之前写两个数据内容 subfields = [] for words in data2[key].split("|")[1:]: subfields.append(words[0]) subfields.append(words[1:]) # 加入数据 record.add_field( Field(tag=key[:3], indicators=indicators, subfields=subfields)) # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(output_file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)
csvFileWriter = createPrintFiles("coverageAnalysis_Results.csv") csvFileWriter.writerow([ "System Number", "Oclc Number", "Title", "866 subfield A", "930 subfield S", "Analysis result" ]) onlyFileWriter = createPrintFiles("all_explained.csv") onlyFileWriter.writerow([ "System Number", "Oclc Number", "Title", "866 subfield A", "930 subfield S", "How was record manipulated" ]) deleted930FileWriter = createPrintFiles("deletedItems.csv") deleted930FileWriter.writerow( ["System Number", "Oclc Number", "Title", "A 930 deleted due to status"]) changedRecordsWriter = MARCWriter(open("changedRecord.mrc", "wb")) unchangedRecordsWriter = MARCWriter(open("unchangedRecord.mrc", "wb")) deletedRecordsWriter = MARCWriter(open("deletedRecord.mrc", "wb")) rowsDeleted = 0 rowsAdded = 0 currentDir = os.getcwd() filesDir = currentDir + "\\files\\" for filename in os.listdir(filesDir): print(filename) sysNumCountArray = [] originalItemCount = 0 finalItemCount = 0
#!/usr/bin/env python """ write all the records with 856 fields out to an ebooks-only MARC file """ from pymarc import MARCReader, MARCWriter """ the MARCReader params come from the penultimate comment here: github.com/edsu/pymarc/issues/7 basically, these work around mixed character encodings """ allRecords = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' ) onlyEbooks = MARCWriter( file( 'ebooks-edited.MRC', 'w' ) ) errCount = 0 for rec in allRecords: if rec[ '856' ] is not None: try: onlyEbooks.write( rec ) except UnicodeDecodeError: print rec[ '245' ] errCount += 1 print "\nNumber of Errors: ", errCount onlyEbooks.close()
#!/usr/bin/env python """ write out a MARC file of records with multiple 856 fields """ from pymarc import MARCReader, MARCWriter reader = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' ) writer = MARCWriter( file( 'Two856s.MRC', 'w' ) ) limit = 200000 i = 0 for rec in reader: if i < limit: i += 1 if rec[ '856' ] is not None: all856s = rec.get_fields( '856' ) if len( all856s ) > 1: writer.write( rec ) writer.close()
#time to fix that pesky misspelling... fix_245_misspelling(title_a_raw,word,'a',title_245) fix_245_misspelling(title_b_raw,word,'b',title_245) #get the bib record from the 907 field prior to deletion n = marc.get_fields('907') for field in n: bib_rec_num_raw = field.get_subfields('a') bib_rec_num = subfield_to_string(bib_rec_num_raw) #add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium marc.add_field( Field( tag = '949', indicators = [' ',' '], subfields = [ 'a', '*recs-b;ov-%s;' %(bib_rec_num) ])) #delete 907, 998, 910, 945 fields for f in marc.get_fields('907', '998', '910', '945'): if f['a'] != '': marc.remove_field(f) #append record to a generic file.dat file writer = MARCWriter(file(word+'.dat','a')) writer.write(marc) #closes .dat file writer.close()
#!/usr/bin/env python """ given MARC file: - proxy certain vendor 856$u URLs - delete other weird URLs that we don't need - delete all 856$z subfields (make sense in our OPAC) outputs to new MARC file. File paths are hardcoded in the "books" and "processed" variables below. """ from pymarc import MARCReader, MARCWriter books = MARCReader(open('MARC/2013-12-13-full-catalog.MRC'), to_unicode=True, force_utf8=True, utf8_handling='ignore') processed = MARCWriter(file('TEST.MRC', 'w')) # limit output, for quicker testing limit = 300000 i = 0 # initialize stat counters num_total_books = 0 num_proxied_ebooks = 0 num_fields_removed = 0 # list of domains in 856 $u that correspond to an ebook subscription we have subscription_domains = [ 'hdl.handle.net', # ACLS Humanities 'galenet.galegroup.com', # GVRL 'find.galengroup.com', 'www.netlibrary.com', # NetLibrary / EBSCO 'www.netLibrary.com', # case sensitive
def fmt_split(MARCfile): ''' Parses a MARC binary file based on LDR/06-07 values into separate files for books, serials, maps, visual materials, and other formats. Output is one .mrc file for each format. ''' fname_str = str(MARCfile) fpref, fsuf = fname_str.split('.') today = str(date.today()) with open(MARCfile, 'rb') as f: reader = MARCReader(f) #opens a file for each format writer_bks = MARCWriter(open(fpref + '_bks.mrc', 'wb')) writer_ser = MARCWriter(open(fpref + '_ser.mrc', 'wb')) writer_maps = MARCWriter(open(fpref + '_maps.mrc', 'wb')) writer_vis = MARCWriter(open(fpref + '_vis.mrc', 'wb')) writer_other = MARCWriter(open(fpref + '_other.mrc', 'wb')) for rec in reader: field_909 = pymarc.Field(tag='909', indicators=[' ', ' '], subfields=[ 'a', 'bcat', 'b', 'MNU', 'c', today, 'd', 'marcive' ]) rec.add_ordered_field(field_909) ldr = rec.leader #regexes for string matching to determine format bks_re = re.compile('^.{6}am.*') ser_re = re.compile('^.{6}a[s|i].*') maps_re = re.compile('^.{6}e.*') vis_re = re.compile('^.{6}k.*') #determines format based on regex match of LDR/06-07 values bks = bks_re.match(ldr) ser = ser_re.match(ldr) maps = maps_re.match(ldr) vis = vis_re.match(ldr) #writes record to correct file based on regex matches if bks: writer_bks.write(rec) elif ser: writer_ser.write(rec) elif maps: writer_maps.write(rec) elif vis: writer_vis.write(rec) else: writer_other.write(rec) #closes master format files writer_bks.close() writer_ser.close() writer_maps.close() writer_vis.close() writer_other.close()
# MARC field 700 if creators: for creator in creators: marc_record.add_field( Field( tag = '700', indicators = ['1', ''], subfields = [ 'a', f'{creator}', 't', '', ])) # MARC field 856 if identifiers: for identifier in identifiers: marc_record.add_field( Field( tag = '856', indicators = ['4', '2'], subfields = [ '3', 'Finding aid', 'u', f'{identifier}', ])) # write to MARC output file writer = MARCWriter(open(save_file,'ab')) writer.write(marc_record) writer.close() # open up MARC record in default viewer (NOTEPAD most likely) os.system(save_file)
def output_iso_from_data(file_name: str, isbn_total: list, data_total: dict) -> None: temp_file_name = "临时文件.iso" fp = open(file_name, 'w', encoding='utf-8') fp.close() records = [] for isbn in isbn_total: record = Record() if isbn in data_total: data = data_total[isbn] for key, value in data.items(): # 把一些utf8无法识别的符号替换掉. for character in NON_CHARACTERS_IN_UTF_8: key, value = str(key).replace(character, ""), str(value).replace( character, "") if key in ['continue']: continue elif key[:3] == '000': record.leader = value elif key[:3] <= '009': record.add_field(Field(tag=key[:3], data=value)) else: subfields = [] words = value[2:].replace("$", " ").replace("|", "$").strip() for word in words.split("$"): if len(word.strip()) == 0: continue else: subfields.append(word.strip()[0]) subfields.append(word.strip()[1:]) record.add_field( Field(tag=key[:3], indicators=[value[0], value[1]], subfields=subfields)) if str(record.leader) == str(Record().leader): # 新的数据 record.add_field(Field(tag='001', data=isbn)) record = record_sorted(record) records.append(record) # 数据生成完毕,写入临时文件 with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) try: writer.write(record) # 测试读取是否有问题(如大英9780714827308) except UnicodeEncodeError: print("编号为:{}的数据格式有误,清空数据以利于输出.".format(isbn)) record = Record() record.add_field(Field(tag='001', data=isbn)) writer.write(record) # 从临时文件录入到生成文件中 fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(file_name, 'a', encoding='utf-8') try: fp2.write(fp1.readline()) except UnicodeDecodeError: # 部分解码有误 如大英9780714827308 fp1.close() fp2.close() with open(temp_file_name, 'wb') as fh: writer = MARCWriter(fh) record = Record() record.add_field(Field(tag='001', data=isbn)) writer.write(record) fp1, fp2 = open(temp_file_name, 'r', encoding='utf-8'), open(file_name, 'a', encoding='utf-8') fp2.write(fp1.readline()) fp2.write('\n') fp1.close() fp2.close() # 删除临时文件 os.remove(temp_file_name)