コード例 #1
0
    def test_reading_utf8_without_flag(self):
        reader = MARCReader(open('test/utf8_without_leader_flag.dat'))
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), str)
        self.assertEquals(utitle,
            'De la solitude a\xcc\x80 la communaute\xcc\x81.')

        reader = MARCReader(open('test/utf8_without_leader_flag.dat'), 
                            to_unicode=True, hide_utf8_warnings=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        # unless you force utf-8 characters will get lost and
        # warnings will appear in the terminal
        self.assertEquals(utitle, 'De la solitude a   la communaute .')

        # force reading as utf-8
        reader = MARCReader(open('test/utf8_without_leader_flag.dat'), 
                            to_unicode=True, force_utf8=True,
                            hide_utf8_warnings=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) +
            ' la communaute' + unichr(0x0301) + '.')
コード例 #2
0
ファイル: marc8.py プロジェクト: mjgiarlo/pymarc
    def test_reading_utf8_without_flag(self):
        reader = MARCReader(open('test/utf8_without_leader_flag.dat'))
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), str)
        self.assertEquals(utitle,
            'De la solitude a\xcc\x80 la communaute\xcc\x81.')

        reader = MARCReader(open('test/utf8_without_leader_flag.dat'), 
                            to_unicode=True, hide_utf8_warnings=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        # unless you force utf-8 characters will get lost and
        # warnings will appear in the terminal
        self.assertEquals(utitle, 'De la solitude a   la communaute .')

        # force reading as utf-8
        reader = MARCReader(open('test/utf8_without_leader_flag.dat'), 
                            to_unicode=True, force_utf8=True,
                            hide_utf8_warnings=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) +
            ' la communaute' + unichr(0x0301) + '.')
コード例 #3
0
ファイル: marc8.py プロジェクト: bokelskere/pymarc
    def test_reading_utf8_with_flag(self):
        reader = MARCReader(open('test/utf8_with_leader_flag.dat'))
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), str)
        self.assertEquals(utitle, 'De la solitude a\xcc\x80 la communaute\xcc\x81.')

        reader = MARCReader(open('test/utf8_with_leader_flag.dat'), 
                            to_unicode=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) + ' la communaute' + unichr(0x0301) + '.')
コード例 #4
0
 def test_marc8_reader(self):
     reader = MARCReader(file('test/marc8.dat'))
     r =  reader.next()
     self.assertEquals(type(r), Record)
     utitle = r['240']['a']
     self.assertEquals(type(utitle), str)
     self.assertEquals(utitle, 'De la solitude \xe1a la communaut\xe2e.')
コード例 #5
0
ファイル: marc8.py プロジェクト: AllenYang0308/pymarc
 def test_marc8_reader_to_unicode_bad_eacc_sequence(self):
     reader = MARCReader(file('test/bad_eacc_encoding.dat'), to_unicode=True, hide_utf8_warnings=True)
     try:
         r =  reader.next()
         self.assertFalse("Was able to decode invalid MARC8") 
     except UnicodeDecodeError:
         self.assertTrue("Caught UnicodeDecodeError as expected") 
コード例 #6
0
 def test_marc8_reader_to_unicode_bad_escape(self):
     reader = MARCReader(file('test/bad_marc8_escape.dat'), to_unicode=True)
     r =  reader.next()
     self.assertEquals(type(r), Record)
     upublisher = r['260']['b']
     self.assertEquals(type(upublisher), unicode)
     self.assertEquals(upublisher, u'La Soci\xe9t\x1b,')
コード例 #7
0
 def test_marc8_reader_to_unicode(self):
     reader = MARCReader(file('test/marc8.dat'), to_unicode=True)
     r =  reader.next()
     self.assertEquals(type(r), Record)
     utitle = r['240']['a']
     self.assertEquals(type(utitle), unicode)
     self.assertEquals(utitle, u'De la solitude \xe0 la communaut\xe9.')
コード例 #8
0
ファイル: marc8.py プロジェクト: mjgiarlo/pymarc
 def test_marc8_reader_to_unicode_bad_escape(self):
     reader = MARCReader(file('test/bad_marc8_escape.dat'), to_unicode=True)
     r =  reader.next()
     self.assertEquals(type(r), Record)
     upublisher = r['260']['b']
     self.assertEquals(type(upublisher), unicode)
     self.assertEquals(upublisher, u'La Soci\xe9t\x1b,')
コード例 #9
0
ファイル: marc8.py プロジェクト: mjgiarlo/pymarc
 def test_marc8_reader_to_unicode(self):
     reader = MARCReader(file('test/marc8.dat'), to_unicode=True)
     r =  reader.next()
     self.assertEquals(type(r), Record)
     utitle = r['240']['a']
     self.assertEquals(type(utitle), unicode)
     self.assertEquals(utitle, u'De la solitude \xe0 la communaut\xe9.')
コード例 #10
0
ファイル: marc8.py プロジェクト: mjgiarlo/pymarc
 def test_marc8_reader(self):
     reader = MARCReader(file('test/marc8.dat'))
     r =  reader.next()
     self.assertEquals(type(r), Record)
     utitle = r['240']['a']
     self.assertEquals(type(utitle), str)
     self.assertEquals(utitle, 'De la solitude \xe1a la communaut\xe2e.')
コード例 #11
0
    def test_reading_utf8_with_flag(self):
        reader = MARCReader(open('test/utf8_with_leader_flag.dat'))
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), str)
        self.assertEquals(utitle,
            'De la solitude a\xcc\x80 la communaute\xcc\x81.')

        reader = MARCReader(open('test/utf8_with_leader_flag.dat'), 
                            to_unicode=True)
        record = reader.next()
        self.assertEquals(type(record), Record)
        utitle = record['240']['a']
        self.assertEquals(type(utitle), unicode)
        self.assertEquals(utitle, u'De la solitude a' + unichr(0x0300) +
            ' la communaute' + unichr(0x0301) + '.')
コード例 #12
0
ファイル: marc8.py プロジェクト: pombredanne/pymarc
 def test_marc8_reader_to_unicode_bad_eacc_sequence(self):
     reader = MARCReader(file('test/bad_eacc_encoding.dat'),
                         to_unicode=True,
                         hide_utf8_warnings=True)
     try:
         r = reader.next()
         self.assertFalse("Was able to decode invalid MARC8")
     except UnicodeDecodeError:
         self.assertTrue("Caught UnicodeDecodeError as expected")
コード例 #13
0
ファイル: encode.py プロジェクト: CaptSolo/pymarc
 def test_encode_decode(self):
     # get raw data from file 
     original = file('test/one.dat').read()
     # create a record object for the file
     reader = MARCReader(file('test/one.dat'))
     record = reader.next()
     # make sure original data is the same as 
     # the record encoded as MARC
     raw = record.as_marc()
     self.assertEqual(original, raw)
コード例 #14
0
ファイル: marc8.py プロジェクト: mattgrayson/pymarc
    def test_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'))
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))
コード例 #15
0
ファイル: encode.py プロジェクト: pombredanne/pymarc
 def test_encode_decode(self):
     # get raw data from file
     original = file('test/one.dat').read()
     # create a record object for the file
     reader = MARCReader(file('test/one.dat'))
     record = reader.next()
     # make sure original data is the same as
     # the record encoded as MARC
     raw = record.as_marc()
     self.assertEqual(original, raw)
コード例 #16
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
コード例 #17
0
class MARCFetcher(Fetcher):
    '''Harvest a MARC FILE. Can be local or at a URL'''
    def __init__(self, url_harvest, extra_data, **kwargs):
        '''Grab file and copy to local temp file'''
        super(MARCFetcher, self).__init__(url_harvest, extra_data, **kwargs)
        self.url_marc_file = url_harvest
        self.marc_file = tempfile.TemporaryFile()
        self.marc_file.write(urllib.urlopen(self.url_marc_file).read())
        self.marc_file.seek(0)
        self.marc_reader = MARCReader(self.marc_file,
                                      to_unicode=True,
                                      utf8_handling='replace')

    def next(self):
        '''Return MARC record by record to the controller'''
        return self.marc_reader.next().as_dict()
コード例 #18
0
class MARCFetcher(Fetcher):
    '''Harvest a MARC FILE. Can be local or at a URL'''
    def __init__(self, url_harvest, extra_data):
        '''Grab file and copy to local temp file'''
        super(MARCFetcher, self).__init__(url_harvest, extra_data)
        self.url_marc_file = url_harvest
        self.marc_file = tempfile.TemporaryFile()
        self.marc_file.write(urllib.urlopen(self.url_marc_file).read())
        self.marc_file.seek(0)
        self.marc_reader = MARCReader(self.marc_file,
                                      to_unicode=True,
                                      utf8_handling='replace')

    def next(self):
        '''Return MARC record by record to the controller'''
        return self.marc_reader.next().as_dict()