def test_reading_utf8_without_flag(self): reader = MARCReader(open('test/utf8_without_leader_flag.dat')) record = reader.next() self.assertEquals(type(record), Record) utitle = record['240']['a'] self.assertEquals(type(utitle), str) self.assertEquals(utitle, 'De la solitude a\xcc\x80 la communaute\xcc\x81.') reader = MARCReader(open('test/utf8_without_leader_flag.dat'), to_unicode=True, hide_utf8_warnings=True) record = reader.next() self.assertEquals(type(record), Record) utitle = record['240']['a'] self.assertEquals(type(utitle), unicode) # unless you force utf-8 characters will get lost and # warnings will appear in the terminal self.assertEquals(utitle, 'De la solitude a la communaute .') # force reading as utf-8 reader = MARCReader(open('test/utf8_without_leader_flag.dat'), to_unicode=True, force_utf8=True, hide_utf8_warnings=True) record = reader.next() self.assertEquals(type(record), Record) utitle = record['240']['a'] self.assertEquals(type(utitle), unicode) self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) + ' la communaute' + unichr(0x0301) + '.')
def test_reading_utf8_with_flag(self): reader = MARCReader(open('test/utf8_with_leader_flag.dat')) record = reader.next() self.assertEquals(type(record), Record) utitle = record['240']['a'] self.assertEquals(type(utitle), str) self.assertEquals(utitle, 'De la solitude a\xcc\x80 la communaute\xcc\x81.') reader = MARCReader(open('test/utf8_with_leader_flag.dat'), to_unicode=True) record = reader.next() self.assertEquals(type(record), Record) utitle = record['240']['a'] self.assertEquals(type(utitle), unicode) self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) + ' la communaute' + unichr(0x0301) + '.')
def test_marc8_reader(self): reader = MARCReader(file('test/marc8.dat')) r = reader.next() self.assertEquals(type(r), Record) utitle = r['240']['a'] self.assertEquals(type(utitle), str) self.assertEquals(utitle, 'De la solitude \xe1a la communaut\xe2e.')
def test_marc8_reader_to_unicode_bad_eacc_sequence(self): reader = MARCReader(file('test/bad_eacc_encoding.dat'), to_unicode=True, hide_utf8_warnings=True) try: r = reader.next() self.assertFalse("Was able to decode invalid MARC8") except UnicodeDecodeError: self.assertTrue("Caught UnicodeDecodeError as expected")
def test_marc8_reader_to_unicode_bad_escape(self): reader = MARCReader(file('test/bad_marc8_escape.dat'), to_unicode=True) r = reader.next() self.assertEquals(type(r), Record) upublisher = r['260']['b'] self.assertEquals(type(upublisher), unicode) self.assertEquals(upublisher, u'La Soci\xe9t\x1b,')
def test_marc8_reader_to_unicode(self): reader = MARCReader(file('test/marc8.dat'), to_unicode=True) r = reader.next() self.assertEquals(type(r), Record) utitle = r['240']['a'] self.assertEquals(type(utitle), unicode) self.assertEquals(utitle, u'De la solitude \xe0 la communaut\xe9.')
def test_encode_decode(self): # get raw data from file original = file('test/one.dat').read() # create a record object for the file reader = MARCReader(file('test/one.dat')) record = reader.next() # make sure original data is the same as # the record encoded as MARC raw = record.as_marc() self.assertEqual(original, raw)
def test_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo')) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234))
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
class MARCFetcher(Fetcher): '''Harvest a MARC FILE. Can be local or at a URL''' def __init__(self, url_harvest, extra_data, **kwargs): '''Grab file and copy to local temp file''' super(MARCFetcher, self).__init__(url_harvest, extra_data, **kwargs) self.url_marc_file = url_harvest self.marc_file = tempfile.TemporaryFile() self.marc_file.write(urllib.urlopen(self.url_marc_file).read()) self.marc_file.seek(0) self.marc_reader = MARCReader(self.marc_file, to_unicode=True, utf8_handling='replace') def next(self): '''Return MARC record by record to the controller''' return self.marc_reader.next().as_dict()
class MARCFetcher(Fetcher): '''Harvest a MARC FILE. Can be local or at a URL''' def __init__(self, url_harvest, extra_data): '''Grab file and copy to local temp file''' super(MARCFetcher, self).__init__(url_harvest, extra_data) self.url_marc_file = url_harvest self.marc_file = tempfile.TemporaryFile() self.marc_file.write(urllib.urlopen(self.url_marc_file).read()) self.marc_file.seek(0) self.marc_reader = MARCReader(self.marc_file, to_unicode=True, utf8_handling='replace') def next(self): '''Return MARC record by record to the controller''' return self.marc_reader.next().as_dict()