コード例 #1
0
ファイル: okrand.py プロジェクト: timClicks/okrand
def main(fd, store_type=None, store_id=None, graph_id=None, gzipped=False):
    """
    Converts MARC21 data stored in fd to a RDFlib graph.
    """
    from rdflib import plugin

    if store_type:
        msg = "Need a {} identifier for a disk-based store."
        assert store_id, msg.format('store')
        assert graph_id, msg.format('graph')
        store = plugin.get(store_type, Store)(store_id)
    else:
        store = 'default'

    graph = Graph(store=store, identifier=graph_id)

    if gzipped:
        import gzip
        open = gzip.open

    try:
        records = MARCReader(open(fd))

        for i, triple in enumerate(process_records(records)):
            graph.add(triple)
            if i % 100 == 0:
                graph.commit()
            if i % 10000 == 0:
                print i
    finally:
        records.close()

    return graph
コード例 #2
0
ファイル: test_marc8.py プロジェクト: pejalptar/marc
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)]))
        record.leader = "         a              "
        writer = MARCWriter(open("test/foo", "wb"))
        writer.write(record)
        writer.close()

        reader = MARCReader(open("test/foo", "rb"), to_unicode=True)
        record = next(reader)
        self.assertEqual(record["245"]["a"], chr(0x1234))
        reader.close()

        os.remove("test/foo")
コード例 #3
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'wb'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo', 'rb'), to_unicode=True)
        record = next(reader)
        self.assertEqual(record['245']['a'], unichr(0x1234))
        reader.close()

        os.remove('test/foo')
コード例 #4
0
    def test_read_records(self):
        self.yc.read_records()
        output_reader = MARCReader(open(self.output_path, 'rb'),
                                   to_unicode=True)
        test_reader = MARCReader(open("test/converted_records.mrc", 'rb'),
                                 to_unicode=True)
        test_record = Record()
        while test_record:
            output_record = next(output_reader, None)
            test_record = next(test_reader, None)
            if output_record:
                output_fields = []
                test_fields = []
                for field in output_record.get_fields():
                    output_fields.append(str(field))
                for field in test_record.get_fields():
                    test_fields.append(str(field))
                self.assertCountEqual(output_fields, test_fields)
                self.assertListEqual(output_fields, test_fields)

        output_reader.close()
        test_reader.close()
コード例 #5
0
""" Very basic program to dump binary MARC files as text """
import sys
from pymarc import MARCReader, TextWriter

if len(sys.argv) != 3:
    raise ValueError(f'Usage: {sys.argv[0]} marc_file text_file')
reader = MARCReader(open(sys.argv[1], 'rb'), utf8_handling="ignore")
writer = TextWriter(open(sys.argv[2], 'wt'))

for record in reader:
    writer.write(record)

writer.close()
reader.close()