def test_csviter_encoding(self):
        body1 = get_testdata('feeds', 'feed-sample4.csv')
        body2 = get_testdata('feeds', 'feed-sample5.csv')

        response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
        csv = csviter(response)
        self.assertEqual([row for row in csv],
            [{u'id': u'1', u'name': u'latin1', u'value': u'test'},
             {u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'}])

        response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
        csv = csviter(response)
        self.assertEqual([row for row in csv],
            [{u'id': u'1', u'name': u'cp852', u'value': u'test'},
             {u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])
    def test_csviter_wrong_quotechar(self):
        body = get_testdata('feeds', 'feed-sample6.csv')
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        self.assertEqual([row for row in csv],
                         [{u"'id'": u"1",   u"'name'": u"'alpha'",   u"'value'": u"'foobar'"},
                          {u"'id'": u"2",   u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
                          {u"'id'": u"'3'", u"'name'": u"'multi'",   u"'value'": u"'foo"},
                          {u"'id'": u"4",   u"'name'": u"'empty'",   u"'value'": u""}])
    def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
        body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
        response = Response(url="http://example.com/", body=body)
        csv = csviter(response, delimiter='\t')

        self.assertEqual([row for row in csv],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_exception(self):
        body = get_testdata('feeds', 'feed-sample3.csv')

        response = TextResponse(url="http://example.com/", body=body)
        iter = csviter(response)
        next(iter)
        next(iter)
        next(iter)
        next(iter)

        self.assertRaises(StopIteration, next, iter)
    def parse_rows(self, response):
        """Receives a response and a dict (representing each row) with a key for
        each provided (or detected) header of the CSV file.  This spider also
        gives the opportunity to override adapt_response and
        process_results methods for pre and post-processing purposes.
        """

        for row in csviter(response, self.delimiter, self.headers, self.quotechar):
            ret = iterate_spider_output(self.parse_row(response, row))
            for result_item in self.process_results(response, ret):
                yield result_item
Exemple #6
0
    def parse_rows(self, response):
        """Receives a response and a dict (representing each row) with a key for
        each provided (or detected) header of the CSV file.  This spider also
        gives the opportunity to override adapt_response and
        process_results methods for pre and post-processing purposes.
        """

        for row in csviter(response, self.delimiter, self.headers, self.quotechar):
            ret = iterate_spider_output(self.parse_row(response, row))
            for result_item in self.process_results(response, ret):
                yield result_item
Exemple #7
0
    def test_csviter_exception(self):
        body = get_testdata('feeds', 'feed-sample3.csv')

        response = TextResponse(url="http://example.com/", body=body)
        iter = csviter(response)
        next(iter)
        next(iter)
        next(iter)
        next(iter)

        self.assertRaises(StopIteration, next, iter)
    def test_csviter_exception(self):
        body = get_testdata("feeds", "feed-sample3.csv")

        response = TextResponse(url="http://example.com/", body=body)
        iter = csviter(response)
        iter.next()
        iter.next()
        iter.next()
        iter.next()

        self.assertRaises(StopIteration, iter.next)
    def test_csviter_quotechar(self):
        body1 = get_testdata('feeds', 'feed-sample6.csv')
        body2 = get_testdata('feeds', 'feed-sample6.csv').replace(b',', b'|')

        response1 = TextResponse(url="http://example.com/", body=body1)
        csv1 = csviter(response1, quotechar="'")

        self.assertEqual([row for row in csv1],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])

        response2 = TextResponse(url="http://example.com/", body=body2)
        csv2 = csviter(response2, delimiter="|", quotechar="'")

        self.assertEqual([row for row in csv2],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_headers(self):
        sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
        headers, body = sample[0].split(b','), b'\n'.join(sample[1:])

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response, headers=[h.decode('utf-8') for h in headers])

        self.assertEqual([row for row in csv],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': u'foo\nbar'},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_falserow(self):
        body = get_testdata('feeds', 'feed-sample3.csv')
        body = b'\n'.join((body, b'a,b', b'a,b,c,d'))

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        self.assertEqual([row for row in csv],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_headers(self):
        sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
        headers, body = sample[0].split(b','), b'\n'.join(sample[1:])

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response, headers=[h.decode('utf-8') for h in headers])

        self.assertEqual([row for row in csv],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': u'foo\nbar'},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_falserow(self):
        body = get_testdata('feeds', 'feed-sample3.csv')
        body = b'\n'.join((body, b'a,b', b'a,b,c,d'))

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        self.assertEqual([row for row in csv],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_quotechar(self):
        body1 = get_testdata('feeds', 'feed-sample6.csv')
        body2 = get_testdata('feeds', 'feed-sample6.csv').replace(b',', b'|')

        response1 = TextResponse(url="http://example.com/", body=body1)
        csv1 = csviter(response1, quotechar="'")

        self.assertEqual([row for row in csv1],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])

        response2 = TextResponse(url="http://example.com/", body=body2)
        csv2 = csviter(response2, delimiter="|", quotechar="'")

        self.assertEqual([row for row in csv2],
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
    def test_csviter_encoding(self):
        body1 = get_testdata("feeds", "feed-sample4.csv")
        body2 = get_testdata("feeds", "feed-sample5.csv")

        response = TextResponse(url="http://example.com/", body=body1, encoding="latin1")
        csv = csviter(response)
        self.assertEqual(
            [row for row in csv],
            [
                {u"id": u"1", u"name": u"latin1", u"value": u"test"},
                {u"id": u"2", u"name": u"something", u"value": u"\xf1\xe1\xe9\xf3"},
            ],
        )

        response = TextResponse(url="http://example.com/", body=body2, encoding="cp852")
        csv = csviter(response)
        self.assertEqual(
            [row for row in csv],
            [
                {u"id": u"1", u"name": u"cp852", u"value": u"test"},
                {u"id": u"2", u"name": u"something", u"value": u"\u255a\u2569\u2569\u2569\u2550\u2550\u2557"},
            ],
        )
    def test_csviter_encoding(self):
        body1 = get_testdata('feeds', 'feed-sample4.csv')
        body2 = get_testdata('feeds', 'feed-sample5.csv')

        response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
        csv = csviter(response)
        self.assertEqual(
            list(csv),
            [
                {'id': '1', 'name': 'latin1', 'value': 'test'},
                {'id': '2', 'name': 'something', 'value': '\xf1\xe1\xe9\xf3'},
            ]
        )

        response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
        csv = csviter(response)
        self.assertEqual(
            list(csv),
            [
                {'id': '1', 'name': 'cp852', 'value': 'test'},
                {'id': '2', 'name': 'something', 'value': '\u255a\u2569\u2569\u2569\u2550\u2550\u2557'},
            ]
        )
    def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
        body = get_testdata("feeds", "feed-sample3.csv").replace(",", "\t")
        response = Response(url="http://example.com/", body=body)
        csv = csviter(response, delimiter="\t")

        self.assertEqual(
            [row for row in csv],
            [
                {u"id": u"1", u"name": u"alpha", u"value": u"foobar"},
                {u"id": u"2", u"name": u"unicode", u"value": u"\xfan\xedc\xf3d\xe9\u203d"},
                {u"id": u"3", u"name": u"multi", u"value": u"foo\nbar"},
                {u"id": u"4", u"name": u"empty", u"value": u""},
            ],
        )
Exemple #18
0
    def parse_rows(self, response):
        """Receives a response and a dict (representing each row) with a key for
        each provided (or detected) header of the CSV file.  This spider also
        gives the opportunity to override adapt_response and
        process_results methods for pre and post-processing purposes.
        """

        for row in csviter(response, self.delimiter, self.headers):
            ret = self.parse_row(response, row)
            if isinstance(ret, (BaseItem, Request)):
                ret = [ret]
            if not isinstance(ret, (list, tuple)):
                raise TypeError('You cannot return an "%s" object from a spider' % type(ret).__name__)
            for result_item in self.process_results(response, ret):
                yield result_item
    def test_csviter_defaults(self):
        body = get_testdata('feeds', 'feed-sample3.csv')
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        result = [row for row in csv]
        self.assertEqual(result,
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])

        # explicit type check cuz' we no like stinkin' autocasting! yarrr
        for result_row in result:
            self.assertTrue(all((isinstance(k, six.text_type) for k in result_row.keys())))
            self.assertTrue(all((isinstance(v, six.text_type) for v in result_row.values())))
    def test_csviter_falserow(self):
        body = get_testdata("feeds", "feed-sample3.csv")
        body = "\n".join((body, "a,b", "a,b,c,d"))

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        self.assertEqual(
            [row for row in csv],
            [
                {u"id": u"1", u"name": u"alpha", u"value": u"foobar"},
                {u"id": u"2", u"name": u"unicode", u"value": u"\xfan\xedc\xf3d\xe9\u203d"},
                {u"id": u"3", u"name": u"multi", u"value": u"foo\nbar"},
                {u"id": u"4", u"name": u"empty", u"value": u""},
            ],
        )
    def test_csviter_headers(self):
        sample = get_testdata("feeds", "feed-sample3.csv").splitlines()
        headers, body = sample[0].split(","), "\n".join(sample[1:])

        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response, headers=headers)

        self.assertEqual(
            [row for row in csv],
            [
                {u"id": u"1", u"name": u"alpha", u"value": u"foobar"},
                {u"id": u"2", u"name": u"unicode", u"value": u"\xfan\xedc\xf3d\xe9\u203d"},
                {u"id": u"3", u"name": u"multi", u"value": u"foo\nbar"},
                {u"id": u"4", u"name": u"empty", u"value": u""},
            ],
        )
    def test_csviter_defaults(self):
        body = get_testdata('feeds', 'feed-sample3.csv')
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        result = [row for row in csv]
        self.assertEqual(result,
                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])

        # explicit type check cuz' we no like stinkin' autocasting! yarrr
        for result_row in result:
            self.assertTrue(all((isinstance(k, six.text_type) for k in result_row.keys())))
            self.assertTrue(all((isinstance(v, six.text_type) for v in result_row.values())))
Exemple #23
0
    def parse_rows(self, response):
        """Receives a response and a dict (representing each row) with a key for
        each provided (or detected) header of the CSV file.  This spider also
        gives the opportunity to override adapt_response and
        process_results methods for pre and post-processing purposes.
        """

        for row in csviter(response, self.delimiter, self.headers):
            ret = self.parse_row(response, row)
            if isinstance(ret, (BaseItem, Request)):
                ret = [ret]
            if not isinstance(ret, (list, tuple)):
                raise TypeError(
                    'You cannot return an "%s" object from a spider' %
                    type(ret).__name__)
            for result_item in self.process_results(response, ret):
                yield result_item
    def test_csviter_defaults(self):
        body = get_testdata("feeds", "feed-sample3.csv")
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        result = [row for row in csv]
        self.assertEqual(
            result,
            [
                {u"id": u"1", u"name": u"alpha", u"value": u"foobar"},
                {u"id": u"2", u"name": u"unicode", u"value": u"\xfan\xedc\xf3d\xe9\u203d"},
                {u"id": u"3", u"name": u"multi", u"value": u"foo\nbar"},
                {u"id": u"4", u"name": u"empty", u"value": u""},
            ],
        )

        # explicit type check cuz' we no like stinkin' autocasting! yarrr
        for result_row in result:
            self.assert_(all((isinstance(k, unicode) for k in result_row.keys())))
            self.assert_(all((isinstance(v, unicode) for v in result_row.values())))
Exemple #25
0
    def test_csviter_defaults(self):
        body = get_testdata("feeds", "feed-sample3.csv")
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        result = [row for row in csv]
        self.assertEqual(
            result,
            [
                {
                    "id": "1",
                    "name": "alpha",
                    "value": "foobar"
                },
                {
                    "id": "2",
                    "name": "unicode",
                    "value": "\xfan\xedc\xf3d\xe9\u203d"
                },
                {
                    "id": "3",
                    "name": "multi",
                    "value": FOOBAR_NL
                },
                {
                    "id": "4",
                    "name": "empty",
                    "value": ""
                },
            ],
        )

        # explicit type check cuz' we no like stinkin' autocasting! yarrr
        for result_row in result:
            self.assertTrue(
                all((isinstance(k, str) for k in result_row.keys())))
            self.assertTrue(
                all((isinstance(v, str) for v in result_row.values())))
Exemple #26
0
    def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
        body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
        response = Response(url="http://example.com/", body=body)
        csv = csviter(response, delimiter='\t')

        self.assertEqual([row for row in csv],
                         [{
                             u'id': u'1',
                             u'name': u'alpha',
                             u'value': u'foobar'
                         }, {
                             u'id': u'2',
                             u'name': u'unicode',
                             u'value': u'\xfan\xedc\xf3d\xe9\u203d'
                         }, {
                             u'id': u'3',
                             u'name': u'multi',
                             u'value': FOOBAR_NL
                         }, {
                             u'id': u'4',
                             u'name': u'empty',
                             u'value': u''
                         }])
Exemple #27
0
    def test_csviter_wrong_quotechar(self):
        body = get_testdata('feeds', 'feed-sample6.csv')
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response)

        self.assertEqual([row for row in csv],
                         [{
                             u"'id'": u"1",
                             u"'name'": u"'alpha'",
                             u"'value'": u"'foobar'"
                         }, {
                             u"'id'": u"2",
                             u"'name'": u"'unicode'",
                             u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"
                         }, {
                             u"'id'": u"'3'",
                             u"'name'": u"'multi'",
                             u"'value'": u"'foo"
                         }, {
                             u"'id'": u"4",
                             u"'name'": u"'empty'",
                             u"'value'": u""
                         }])
Exemple #28
0
    def test_csviter_delimiter(self):
        body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
        response = TextResponse(url="http://example.com/", body=body)
        csv = csviter(response, delimiter='\t')

        self.assertEqual([row for row in csv],
                         [{
                             'id': '1',
                             'name': 'alpha',
                             'value': 'foobar'
                         }, {
                             'id': '2',
                             'name': 'unicode',
                             'value': '\xfan\xedc\xf3d\xe9\u203d'
                         }, {
                             'id': '3',
                             'name': 'multi',
                             'value': "foo\nbar"
                         }, {
                             'id': '4',
                             'name': 'empty',
                             'value': ''
                         }])
Exemple #29
0
 def parse_rows(self, response):
     for row in csviter(response, self.delimiter, self.headers, self.quotechar):
         ret = iterate_spider_output(self.parse_row(response, row))
         for result_item in self.process_results(response, ret):
             yield result_item
Exemple #30
0
    def test_csviter_quotechar(self):
        body1 = get_testdata("feeds", "feed-sample6.csv")
        body2 = get_testdata("feeds", "feed-sample6.csv").replace(b",", b"|")

        response1 = TextResponse(url="http://example.com/", body=body1)
        csv1 = csviter(response1, quotechar="'")

        self.assertEqual(
            [row for row in csv1],
            [
                {
                    "id": "1",
                    "name": "alpha",
                    "value": "foobar"
                },
                {
                    "id": "2",
                    "name": "unicode",
                    "value": "\xfan\xedc\xf3d\xe9\u203d"
                },
                {
                    "id": "3",
                    "name": "multi",
                    "value": FOOBAR_NL
                },
                {
                    "id": "4",
                    "name": "empty",
                    "value": ""
                },
            ],
        )

        response2 = TextResponse(url="http://example.com/", body=body2)
        csv2 = csviter(response2, delimiter="|", quotechar="'")

        self.assertEqual(
            [row for row in csv2],
            [
                {
                    "id": "1",
                    "name": "alpha",
                    "value": "foobar"
                },
                {
                    "id": "2",
                    "name": "unicode",
                    "value": "\xfan\xedc\xf3d\xe9\u203d"
                },
                {
                    "id": "3",
                    "name": "multi",
                    "value": FOOBAR_NL
                },
                {
                    "id": "4",
                    "name": "empty",
                    "value": ""
                },
            ],
        )