def read(inputfile): outputfile = inputfile + "_no338_" + \ datetime.datetime.now().isoformat() + ".mrc" has338Count = 0 no338Count = 0 totalCount = 0 supplements = 0 with open(inputfile, 'rb') as f: reader = MARCReader(f) writer = MARCWriter(open(outputfile, 'wb')) while True: try: record = next(reader) totalCount += 1 if not testFor336To338(record): print(record) no338Count += 1 try: writer.write(record) except Exception as e: print("Error with writing.") else: has338Count += 1 if (isSupplement(record)): supplements += 1 except UnicodeDecodeError: print("There was a Unicode error.") except StopIteration: print("End of file.") break writer.close() print( "{0} / {1} ({2} %) records have no 338 field.".format(no338Count, totalCount, countPercentage(no338Count, totalCount))) print("The file contained {0} supplement records.".format(supplements))
def campus_split(): ''' Finds the master format files created by fmt_split(). then writes the records in each format file to separate files for holding campuses based on coding in MARC 049 subfield a. Outputs one file per campus per format. ''' campuses = ['MNGE', 'MNXN'] for campus in campuses: files = [ f for f in os.listdir() if re.match(r'.+(bks|ser|maps|vis|other)\.mrc', f) ] for file in files: with open(file, 'rb') as f: filename = str(file) fpref, fsuf = filename.split('.') writer = MARCWriter(open(fpref + '_' + campus + '.mrc', 'wb')) reader = MARCReader(f) for rec in reader: fields049 = rec.get_fields("049") for field in fields049: suba049 = field.get_subfields("a") for suba in suba049: if campus in suba: writer.write(rec) else: continue writer.close()
def save2marc(outfile, bib): try: writer = MARCWriter(open(outfile, 'ab')) writer.write(bib) except WindowsError: raise WindowsError finally: writer.close()
def write_marc21(outfile, bib): try: writer = MARCWriter(open(outfile, "a")) writer.write(bib) except WindowsError: raise WindowsError finally: writer.close()
def test_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo')) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234))
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
def MakeMARCFile(self, recs, filename): filenameNoExt = re.sub('.\w*$', '', filename) mrcFileName = filenameNoExt + '_OUT.mrc' print('\n<Compiling file to MARC>\n') writer = MARCWriter(open(mrcFileName, "wb")) for r in recs: try: writer.write(r.as_marc()) except: r.force_utf8 = True writer.write(r) writer.close() return recs
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'wb')) writer.write(record) writer.close() reader = MARCReader(open('test/foo', 'rb'), to_unicode=True) record = next(reader) self.assertEqual(record['245']['a'], unichr(0x1234)) reader.close() os.remove('test/foo')
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)])) record.leader = " a " writer = MARCWriter(open("test/foo", "wb")) writer.write(record) writer.close() reader = MARCReader(open("test/foo", "rb"), to_unicode=True) record = next(reader) self.assertEqual(record["245"]["a"], chr(0x1234)) reader.close() os.remove("test/foo")
def save2marc(outfile: str, record: Record) -> None: """ Appends MARC records to outfile Args: outfile: file path record: MARC record as pymarc object """ try: writer = MARCWriter(open(outfile, "ab")) writer.write(record) except: raise finally: writer.close()
class file_writer: """Stara sa o zapisovanie ohlasov do suboru. """ def __init__(self, name, encoding="utf-8", path=""): """Pri inicializacii sa pripravi subor na zapisovanie. Arguments: name {str} -- nazov suboru Keyword Arguments: encoding {str} -- kodovanie suboru (default: {"utf-8"}) path {str} -- cesta kde bude subor ulozeny (default: {""}) """ self.CONST_FIELD_008 = "|2018 ne || |||| || ||eng |" self.CONST_FIELD_LEADER = "nab a22001211a 4500" self.CONST_INDICATOR_1 = ' ' self.CONST_INDICATOR_2 = ' ' self.writer = MARCWriter(open(path + name, 'wb')) def write_record(self, references, field035="", field008=""): """Zapise do suboru jeden record vo forme iso2709 Arguments: field035 -- retazec obsahujuci data do pola 035 field008 -- retazec obsahujuci data do pola 008 references {set(reference)} -- set ohlasov na zapisanie do pola 591 """ if (field008 == ""): field008 = self.CONST_FIELD_008 record = Record(force_utf8=True) record.add_field(Field(tag='008', data=field008)) record.add_field( Field(tag='035', indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2], subfields=['a', field035])) for i in references: record.add_field(i.to_marc_field()) record.leader = record.leader[:5] + 'n' + record.leader[6:] record.leader = record.leader[:7] + 'b' + record.leader[8:] record.leader = record.leader[:18] + 'a' + record.leader[19:] self.writer.write(record) def close(self): """Ukonci zapis a zavrie subor. """ self.writer.close()
def test_edit_mixed_code(self): reader = MARCReader( file('test/mixed-code.dat'), to_unicode=True, force_utf8=True, utf8_handling='ignore' ) writer = MARCWriter(open('test/foo', 'w')) for record in reader: field = Field( tag = '941', indicators = ['',''], subfields = [ 'a', 'x' ] ) record.add_field(field) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) for record in reader: self.assertEquals(type(record), Record) os.remove('test/foo')
# MARC field 700 if creators: for creator in creators: marc_record.add_field( Field( tag = '700', indicators = ['1', ''], subfields = [ 'a', f'{creator}', 't', '', ])) # MARC field 856 if identifiers: for identifier in identifiers: marc_record.add_field( Field( tag = '856', indicators = ['4', '2'], subfields = [ '3', 'Finding aid', 'u', f'{identifier}', ])) # write to MARC output file writer = MARCWriter(open(save_file,'ab')) writer.write(marc_record) writer.close() # open up MARC record in default viewer (NOTEPAD most likely) os.system(save_file)
#If there are no $a and no $s elif len(subfieldA) == 0 and len(subfieldS) == 0: myRecord = [ SysNum, OclcNum, title, subfieldA, subfieldS, "Unchanged Record no coverage info" ] onlyFileWriter.writerow(myRecord) unchangedRecordsWriter.write(record) else: myRecord = [ SysNum, OclcNum, title, subfieldA, subfieldS, "Other unchanged record" ] onlyFileWriter.writerow(myRecord) unchangedRecordsWriter.write(record) else: deletedRecordsWriter.write(record) items = record.get_fields("930") finalItemCount = finalItemCount + len(items) print("Number of records for " + filename + ": " + str(len(sysNumCountArray))) print("Number of original items for " + filename + ": " + str(originalItemCount)) print("Final number of items for " + filename + ": " + str(finalItemCount)) print("Number of deleted rows: " + str(rowsDeleted)) print("Number of added rows: " + str(rowsAdded)) unchangedRecordsWriter.close() changedRecordsWriter.close()
for domain in weird_domains: if domain in u: rec.remove_field(field) num_fields_removed += 1 for domain in subscription_domains: if domain in u: print rec.title() prefix(field) num_proxied_ebooks += 1 if len(field.get_subfields('u')) == 0: # 856 is useless without $u, delete the field rec.remove_field(field) num_fields_removed += 1 for z in field.get_subfields('z'): deleteZ(field) processed.write(rec) i += 1 processed.close() # stats print "\n" print "Total Records Processed:", num_total_books print "Ebooks Proxied:", num_proxied_ebooks print "856s Deleted:", num_fields_removed print "\n"
""" Base script for DLF Forum 2014 Listening-Based Python workshop. Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit . """ import os from pymarc import Field, MARCReader, MARCWriter, record_to_xml records = MARCReader(open('../../exampledump.mrc'), to_unicode=True, force_utf8=True, utf8_handling='ignore') index = 1 for marc in records: filename_dat = 'examplerecord_%s.dat' % index filename_xml = 'examplerecord_%s.xml' % index writer_dat = MARCWriter(file(filename_dat,'a')) writer_xml = open(filename_xml,'a') writer_dat.write(marc) writer_xml.write(record_to_xml(marc) + "\n") writer_dat.close() writer_xml.close() index += 1
def fmt_split(MARCfile): ''' Parses a MARC binary file based on LDR/06-07 values into separate files for books, serials, maps, visual materials, and other formats. Output is one .mrc file for each format. ''' fname_str = str(MARCfile) fpref, fsuf = fname_str.split('.') today = str(date.today()) with open(MARCfile, 'rb') as f: reader = MARCReader(f) #opens a file for each format writer_bks = MARCWriter(open(fpref + '_bks.mrc', 'wb')) writer_ser = MARCWriter(open(fpref + '_ser.mrc', 'wb')) writer_maps = MARCWriter(open(fpref + '_maps.mrc', 'wb')) writer_vis = MARCWriter(open(fpref + '_vis.mrc', 'wb')) writer_other = MARCWriter(open(fpref + '_other.mrc', 'wb')) for rec in reader: field_909 = pymarc.Field(tag='909', indicators=[' ', ' '], subfields=[ 'a', 'bcat', 'b', 'MNU', 'c', today, 'd', 'marcive' ]) rec.add_ordered_field(field_909) ldr = rec.leader #regexes for string matching to determine format bks_re = re.compile('^.{6}am.*') ser_re = re.compile('^.{6}a[s|i].*') maps_re = re.compile('^.{6}e.*') vis_re = re.compile('^.{6}k.*') #determines format based on regex match of LDR/06-07 values bks = bks_re.match(ldr) ser = ser_re.match(ldr) maps = maps_re.match(ldr) vis = vis_re.match(ldr) #writes record to correct file based on regex matches if bks: writer_bks.write(rec) elif ser: writer_ser.write(rec) elif maps: writer_maps.write(rec) elif vis: writer_vis.write(rec) else: writer_other.write(rec) #closes master format files writer_bks.close() writer_ser.close() writer_maps.close() writer_vis.close() writer_other.close()
#!/usr/bin/env python """ write all the records with 856 fields out to an ebooks-only MARC file """ from pymarc import MARCReader, MARCWriter """ the MARCReader params come from the penultimate comment here: github.com/edsu/pymarc/issues/7 basically, these work around mixed character encodings """ allRecords = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' ) onlyEbooks = MARCWriter( file( 'ebooks-edited.MRC', 'w' ) ) errCount = 0 for rec in allRecords: if rec[ '856' ] is not None: try: onlyEbooks.write( rec ) except UnicodeDecodeError: print rec[ '245' ] errCount += 1 print "\nNumber of Errors: ", errCount onlyEbooks.close()
"2", subfield2, "h", subfieldh, "i", subfieldi, "5", subfield5, "8", subfield8, "f", subfieldf, "F", subfieldF, "w", subfieldw ]) record.add_field(myfield) myWriterFile.writerow([ SysNum, subfieldm, subfield1, subfield2, subfield5, "item added" ]) if subfieldm == "ISSUE": SerialsRecordsWriter.write(record) elif subfieldm == "BOOK": BooksRecordsWriter.write(record) else: mydeletesFile.writerow([SysNum, "a deleted item match"]) else: SystemNum = str(record["001"]) SysNum = cleanSysNumber(SystemNum) myErrorFile.writerow([SysNum, "no 852 or a 930"]) for keys in multiple930Needed: myMultipleFile.writerow([keys, multiple930Needed[keys]]) SerialsRecordsWriter.close() BooksRecordsWriter.close()
fix_245_misspelling(title_a_raw,word,'a',title_245) fix_245_misspelling(title_b_raw,word,'b',title_245) #get the bib record from the 907 field prior to deletion n = marc.get_fields('907') for field in n: bib_rec_num_raw = field.get_subfields('a') bib_rec_num = subfield_to_string(bib_rec_num_raw) #add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium marc.add_field( Field( tag = '949', indicators = [' ',' '], subfields = [ 'a', '*recs-b;ov-%s;' %(bib_rec_num) ])) #delete 907, 998, 910, 945 fields for f in marc.get_fields('907', '998', '910', '945'): if f['a'] != '': marc.remove_field(f) #append record to a generic file.dat file writer = MARCWriter(file(word+'.dat','a')) writer.write(marc) #closes .dat file writer.close()
def fchange_sort(MARCfile, fname): ''' Parses a MARC binary file based on 960 values into separate files for books, serials, maps, visual materials, and other formats. Output is one .mrc file for each format. ''' #open a path to put the files for the FTP server - both OCN and BIB updates sorted_files_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/sorted_for_FTP " + today if not os.path.isdir(sorted_files_path): os.mkdir(sorted_files_path) #make a place to put the files with OCN updates for manual checking ocn_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/OCN_updates_" + today if not os.path.isdir(ocn_updates_path): os.mkdir(ocn_updates_path) #make a place to put the files with URL updates for manual checking url_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/URL_updates_" + today if not os.path.isdir(url_updates_path): os.mkdir(url_updates_path) fname_str = str(fname) print(fname) fname_str = fname_str.replace(".","") fname_str = fname_str.replace("mrc",".mrc") print(fname_str) fpref, fsuf = fname_str.split('.') print(fpref) print(fsuf) print(MARCfile) with open(MARCfile,'rb') as f: reader = MARCReader(f) # first, see if there are OCN or URL changes in the set; this will determine whether creating a file is necessary OCN_change_ct = 0 URL_change_ct = 0 writer_new = False writer_URLs = False for rec in reader: if rec['960']: field_960 = str(rec['960']['a']) if 'OCLC control number change' in field_960: OCN_change_ct += 1 if 'KB URL change' in field_960: URL_change_ct += 1 print("OCN_change_ct " ,OCN_change_ct) print("URL_change_ct ",OCN_change_ct) #if there are OCN updates or KB URL changes, create files to put those records in if OCN_change_ct > 0: writer_new_oclc_num_manual = MARCWriter(open(ocn_updates_path + "/" + fpref + '_new_oclc_num.mrc', 'wb')) writer_new = True print(writer_new) if URL_change_ct > 0: writer_update_URLs = MARCWriter(open(url_updates_path + "/" + fpref + '_update_URLs.mrc', 'wb')) writer_URLs = True print(writer_URLs) #create a file for all updates writer_update_bibs = MARCWriter(open(sorted_files_path + "/" + fpref + '_update_bibs.mrc', 'wb')) v = 0 with open(MARCfile, 'rb') as f: reader = MARCReader(f) for rec in reader: v += 1 print(v) if rec['960']: field_960 = str(rec['960']['a']) print(field_960) #writes record to correct file based on regex matches #these are ordered such that if a 960 field has more than one reason for the update, that the most critical to handle #will be addressed first. These are, in order: OCN change (affects matching), URL change, bib update. #Update: OCN changes can be processed alongside Bib updates. URLs will need to be handled manually due to multi-vols? if 'OCLC control number change' in field_960: writer_update_bibs.write(rec) writer_new_oclc_num_manual.write(rec) if 'KB URL change' in field_960: writer_update_URLs.write(rec) elif 'KB URL change' in field_960: writer_update_URLs.write(rec) writer_update_bibs.write(rec) elif 'Subsequent record output' in field_960: writer_update_bibs.write(rec) elif 'Master record variable field' in field_960: writer_update_bibs.write(rec) else: writer_update_bibs.write(rec) #closes master format files writer_update_bibs.close() if writer_URLs == True: writer_update_URLs.close() if writer_new == True: writer_new_oclc_num_manual.close()
filename_xml = 'examplerecord_%s.xml' % i filename_out = 'examplerecord_%s.out' % i records = MARCReader(open(filename), to_unicode=True, force_utf8=True, utf8_handling='ignore') writer_dat = MARCWriter(file(filename_out,'a')) writer_xml = open(filename_xml,'a') for marc in records: isbn_list = marc.get_fields('020') try: isbn_field = isbn_list[0] except Exception, e: j = i - 10 marc.add_ordered_field( Field( tag='020', indicators=[' ', ' '], subfields = ['a', isbns[j]] )) writer_dat.write(marc) writer_xml.write(record_to_xml(marc) + "\n") writer_dat.close() writer_xml.close()