Example #1
0
 def test_tricky_tsv(self):
     '''Do we successfully parse a tricky tab-seperated file?'''
     with test_data('tricky.tsv') as tsv:
         u = UnicodeDictReader(tsv, ['email', 'name'])
         for i, row in enumerate(u):
             self.assert_name_email(self.tricky_expected[i]['name'],
                                    self.tricky_expected[i]['email'], row)
Example #2
0
    def test_csv_all_quote(self):
        '''Test a CSV where everything is quoted'''
        csv = BytesIO('''"Example Member","*****@*****.**"
"Another Member","*****@*****.**"'''.encode('utf-8'))
        u = UnicodeDictReader(csv, ['name', 'email'])
        l = list(u)

        self.assertEqual(2, len(l))
        self.assert_name_email('Example Member', '*****@*****.**', l[0])
        self.assert_name_email('Another Member', '*****@*****.**', l[1])
Example #3
0
    def test_ascii(self):
        '''Ensure we read an ASCII encoded CSV'''
        csv = BytesIO(b'''"Michael JasonSmith",[email protected]
Member,[email protected]''')

        u = UnicodeDictReader(csv, ['name', 'email'], encoding='ascii')

        l = list(u)
        self.assertEqual(2, len(l))
        self.assert_name_email('Michael JasonSmith', '*****@*****.**',
                               l[0])
        self.assert_name_email('Member', '*****@*****.**', l[1])
Example #4
0
    def test_utf8(self):
        '''Ensure we read a UTF-8 encoded CSV'''
        csv = BytesIO(b'''"Michael JasonSmith",[email protected]
M\xc3\xa9mb\xc3\xa9r \xf0\x9f\x98\x84,[email protected]''')

        u = UnicodeDictReader(csv, ['name', 'email'], encoding='utf-8')

        l = list(u)
        self.assertEqual(2, len(l))
        self.assert_name_email('Michael JasonSmith', '*****@*****.**',
                               l[0])
        self.assert_name_email('Mémbér \U0001f604', '*****@*****.**', l[1])
Example #5
0
    def test_latin1(self):
        '''Ensure we read a ISO Latin-1 encoded CSV'''
        csv = BytesIO(b'''"Michael JasonSmith",[email protected]
M\xe9mb\xe9r,[email protected]''')

        u = UnicodeDictReader(csv, ['name', 'email'], encoding='latin-1')

        l = list(u)
        self.assertEqual(2, len(l))
        self.assert_name_email('Michael JasonSmith', '*****@*****.**',
                               l[0])
        self.assert_name_email('Mémbér', '*****@*****.**', l[1])
 def test_guess_encoding_ascii(self):
     'Guess ASCII?'
     r = UnicodeDictReader.guess_encoding(BytesIO(b'Member'))
     self.assertEqual('ascii', r)
Example #7
0
 def test_image(self):
     'Do we assume excel if we feed in an image?'
     with test_data('gs-logo-16x16.png') as img:
         r = UnicodeDictReader.guess_dialect(img)
     self.assertEqual('excel', r)
Example #8
0
 def test_sniff_quote_tsv(self):
     '''Test sniffing a tab-seperated file where everything is quoted'''
     with test_data('ascii-quote.tsv') as tsv:
         r = UnicodeDictReader.guess_dialect(tsv)
     self.assert_delimiter('\t', r)
 def test_sniff_quote_tsv(self):
     '''Test sniffing a tab-seperated file where everything is quoted'''
     with test_data('ascii-quote.tsv') as tsv:
         r = UnicodeDictReader.guess_dialect(tsv)
     self.assert_delimiter('\t', r)
 def test_sniff_some_csv(self):
     '''Test sniffing a CSV where somethings are quoted'''
     with test_data('utf8-some.csv') as csv:
         r = UnicodeDictReader.guess_dialect(csv)
     self.assert_delimiter(',', r)
     self.assertTrue(r.doublequote)
 def test_guess_encoding_image(self):
     'Do we assume UTF-8 if we feed in an image?'
     with test_data('gs-logo-16x16.png') as img:
         r = UnicodeDictReader.guess_encoding(img)
     self.assertEqual('utf-8', r)
Example #12
0
 def test_guess_encoding_utf8(self):
     'Guess UTF-8?'
     m = BytesIO(b'\0360\0237\0230\0204 Mémbér')
     r = UnicodeDictReader.guess_encoding(m)
     self.assertEqual('utf-8', r)
Example #13
0
 def test_guess_encoding_latin1(self):
     'Guess ISO Latin-1'
     r = UnicodeDictReader.guess_encoding(BytesIO(b'M\xe9mb\xe9r'))
     self.assertEqual('ISO-8859-2', r)
Example #14
0
 def test_guess_encoding_ascii(self):
     'Guess ASCII?'
     r = UnicodeDictReader.guess_encoding(BytesIO(b'Member'))
     self.assertEqual('ascii', r)
 def test_guess_encoding_latin1(self):
     'Guess ISO Latin-1'
     r = UnicodeDictReader.guess_encoding(BytesIO(b'M\xe9mb\xe9r'))
     self.assertEqual('ISO-8859-2', r)
 def test_guess_encoding_utf8(self):
     'Guess UTF-8?'
     m = BytesIO(b'\0360\0237\0230\0204 Mémbér')
     r = UnicodeDictReader.guess_encoding(m)
     self.assertEqual('utf-8', r)
Example #17
0
 def test_guess_encoding_image(self):
     'Do we assume UTF-8 if we feed in an image?'
     with test_data('gs-logo-16x16.png') as img:
         r = UnicodeDictReader.guess_encoding(img)
     self.assertEqual('utf-8', r)
 def test_sniff_quote_csv(self):
     '''Test sniffing a CSV where everything is quoted'''
     with test_data('ascii-quote.csv') as csv:
         r = UnicodeDictReader.guess_dialect(csv)
     self.assert_delimiter(',', r)
Example #19
0
 def test_sniff_quote_csv(self):
     '''Test sniffing a CSV where everything is quoted'''
     with test_data('ascii-quote.csv') as csv:
         r = UnicodeDictReader.guess_dialect(csv)
     self.assert_delimiter(',', r)
 def test_sniff_some_tsv(self):
     '''Test sniffing a tab-seperated file where somethings are quoted'''
     with test_data('utf8-some.tsv') as tsv:
         r = UnicodeDictReader.guess_dialect(tsv)
     self.assert_delimiter('\t', r)
     self.assertTrue(r.doublequote)
Example #21
0
 def test_sniff_some_csv(self):
     '''Test sniffing a CSV where somethings are quoted'''
     with test_data('utf8-some.csv') as csv:
         r = UnicodeDictReader.guess_dialect(csv)
     self.assert_delimiter(',', r)
     self.assertTrue(r.doublequote)
 def test_image(self):
     'Do we assume excel if we feed in an image?'
     with test_data('gs-logo-16x16.png') as img:
         r = UnicodeDictReader.guess_dialect(img)
     self.assertEqual('excel', r)
Example #23
0
 def test_sniff_some_tsv(self):
     '''Test sniffing a tab-seperated file where somethings are quoted'''
     with test_data('utf8-some.tsv') as tsv:
         r = UnicodeDictReader.guess_dialect(tsv)
     self.assert_delimiter('\t', r)
     self.assertTrue(r.doublequote)