def main(fd, store_type=None, store_id=None, graph_id=None, gzipped=False): """ Converts MARC21 data stored in fd to a RDFlib graph. """ from rdflib import plugin if store_type: msg = "Need a {} identifier for a disk-based store." assert store_id, msg.format('store') assert graph_id, msg.format('graph') store = plugin.get(store_type, Store)(store_id) else: store = 'default' graph = Graph(store=store, identifier=graph_id) if gzipped: import gzip open = gzip.open try: records = MARCReader(open(fd)) for i, triple in enumerate(process_records(records)): graph.add(triple) if i % 100 == 0: graph.commit() if i % 10000 == 0: print i finally: records.close() return graph
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)])) record.leader = " a " writer = MARCWriter(open("test/foo", "wb")) writer.write(record) writer.close() reader = MARCReader(open("test/foo", "rb"), to_unicode=True) record = next(reader) self.assertEqual(record["245"]["a"], chr(0x1234)) reader.close() os.remove("test/foo")
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'wb')) writer.write(record) writer.close() reader = MARCReader(open('test/foo', 'rb'), to_unicode=True) record = next(reader) self.assertEqual(record['245']['a'], unichr(0x1234)) reader.close() os.remove('test/foo')
def test_read_records(self): self.yc.read_records() output_reader = MARCReader(open(self.output_path, 'rb'), to_unicode=True) test_reader = MARCReader(open("test/converted_records.mrc", 'rb'), to_unicode=True) test_record = Record() while test_record: output_record = next(output_reader, None) test_record = next(test_reader, None) if output_record: output_fields = [] test_fields = [] for field in output_record.get_fields(): output_fields.append(str(field)) for field in test_record.get_fields(): test_fields.append(str(field)) self.assertCountEqual(output_fields, test_fields) self.assertListEqual(output_fields, test_fields) output_reader.close() test_reader.close()
""" Very basic program to dump binary MARC files as text """ import sys from pymarc import MARCReader, TextWriter if len(sys.argv) != 3: raise ValueError(f'Usage: {sys.argv[0]} marc_file text_file') reader = MARCReader(open(sys.argv[1], 'rb'), utf8_handling="ignore") writer = TextWriter(open(sys.argv[2], 'wt')) for record in reader: writer.write(record) writer.close() reader.close()