Esempio n. 1
0
    def test_row_generator_from_file_with_text_columns(self):
        data = make_data(2, 3, text_columns=(1, ))
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 2
0
    def test_row_generator_from_file_with_text_columns(self):
        data = make_data(2, 3, text_columns=(1,))
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 3
0
    def test_row_generator_from_quoted_file(self):
        data = make_data(2, 3, text_columns=(1, ), with_quotes=True)
        file = make_file(data)
        unquoted = remove_quotes(data, (1, ))
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 4
0
    def test_row_generator_from_quoted_file(self):
        data = make_data(2, 3, text_columns=(1,), with_quotes=True)
        file = make_file(data)
        unquoted = remove_quotes(data, (1,))
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 5
0
    def test_sentence_splitting(self):
        data = make_data(1, 3, text_columns=(1, ))
        data[0][1] = "This is a sentence. And it is from cell 1."
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=True)
        data[0][1] = [['this', 'is', 'a', 'sentence', '.'],
                      ['and', 'it', 'is', 'from', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(0, n)
Esempio n. 6
0
    def _decap_lower_helper(self, decap=False, lower=False):
        data = make_data(2, 3, text_columns=(1, ))
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=decap, lower=lower)
        unquoted = remove_quotes(data, (1, ))
        unquoted[0][1] = [['this', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['this', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
Esempio n. 7
0
    def test_standard_process(self):
        data = make_data(2, 3, text_columns=(1, ), with_quotes=True)
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        transformer = Extractor(row_gen)
        unquoted = remove_quotes(data, (1, ))
        unquoted[0][1] = [['This', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['This', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
Esempio n. 8
0
    def test_sentence_splitting(self):
        data = make_data(1, 3, text_columns=(1,))
        data[0][1] = "This is a sentence. And it is from cell 1."
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=True)
        data[0][1] = [['this', 'is', 'a', 'sentence', '.'],
                      ['and', 'it', 'is', 'from', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(0, n)
Esempio n. 9
0
    def _decap_lower_helper(self, decap=False, lower=False):
        data = make_data(2, 3, text_columns=(1,))
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=decap, lower=lower)
        unquoted = remove_quotes(data, (1,))
        unquoted[0][1] = [['this', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['this', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
Esempio n. 10
0
    def test_standard_process(self):
        data = make_data(2, 3, text_columns=(1,), with_quotes=True)
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        transformer = Extractor(row_gen)
        unquoted = remove_quotes(data, (1,))
        unquoted[0][1] = [['This', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['This', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
Esempio n. 11
0
    def test_tile_process(self):
        names = ("id", "text", "class")
        data = make_data(2, 3, text_columns=(1, ))
        data.insert(0, names)
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, has_title=True)
        data[1][1] = [['This', 'is', 'cell', '1', '.']]
        data[2][1] = [['This', 'is', 'cell', '1', '.']]
        self.assertEqual(names, transformer.names)
        n = -1

        for n, row in enumerate(transformer, 1):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(2, n)
Esempio n. 12
0
    def test_plain_row_generator_with_escapechar(self):
        data = make_data(2, 3)
        data[0][1] = "cell\\\tA"
        data[1][2] = "cell\tB"
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        count = 0
        data[0][1] = "cell\tA"
        data[1][2] = "cell"
        data[1].append("B")

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 13
0
    def test_tile_process(self):
        names = ("id", "text", "class")
        data = make_data(2, 3, text_columns=(1,))
        data.insert(0, names)
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, has_title=True)
        data[1][1] = [['This', 'is', 'cell', '1', '.']]
        data[2][1] = [['This', 'is', 'cell', '1', '.']]
        self.assertEqual(names, transformer.names)
        n = -1

        for n, row in enumerate(transformer, 1):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(2, n)
Esempio n. 14
0
    def test_plain_row_generator_with_escapechar(self):
        data = make_data(2, 3)
        data[0][1] = "cell\\\tA"
        data[1][2] = "cell\tB"
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        count = 0
        data[0][1] = "cell\tA"
        data[1][2] = "cell"
        data[1].append("B")

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 15
0
    def test_row_generator_with_escapechar(self):
        data = make_data(3, 3, text_columns=(1, ), with_quotes=True)
        data[0][1] = '"Cell with comma, here A."'
        data[1][1] = '"Cell with quote "" char."'
        data[2][1] = '"Cell with both "","" chars."'
        file = make_file(data)
        unquoted = remove_quotes(data, (1, ))
        row_gen = row_generator_from_file(file.name)
        count = 0
        unquoted[1][1] = 'Cell with quote " char.'
        unquoted[2][1] = 'Cell with both "," chars.'

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
Esempio n. 16
0
    def test_row_generator_with_escapechar(self):
        data = make_data(3, 3, text_columns=(1,), with_quotes=True)
        data[0][1] = '"Cell with comma, here A."'
        data[1][1] = '"Cell with quote "" char."'
        data[2][1] = '"Cell with both "","" chars."'
        file = make_file(data)
        unquoted = remove_quotes(data, (1,))
        row_gen = row_generator_from_file(file.name)
        count = 0
        unquoted[1][1] = 'Cell with quote " char.'
        unquoted[2][1] = 'Cell with both "," chars.'

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))