コード例 #1
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_row_generator_from_file_with_text_columns(self):
        data = make_data(2, 3, text_columns=(1, ))
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #2
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_row_generator_from_file_with_text_columns(self):
        data = make_data(2, 3, text_columns=(1,))
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #3
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_row_generator_from_quoted_file(self):
        data = make_data(2, 3, text_columns=(1, ), with_quotes=True)
        file = make_file(data)
        unquoted = remove_quotes(data, (1, ))
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #4
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_row_generator_from_quoted_file(self):
        data = make_data(2, 3, text_columns=(1,), with_quotes=True)
        file = make_file(data)
        unquoted = remove_quotes(data, (1,))
        row_gen = row_generator_from_file(file.name)
        count = 0

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #5
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_sentence_splitting(self):
        data = make_data(1, 3, text_columns=(1, ))
        data[0][1] = "This is a sentence. And it is from cell 1."
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=True)
        data[0][1] = [['this', 'is', 'a', 'sentence', '.'],
                      ['and', 'it', 'is', 'from', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(0, n)
コード例 #6
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def _decap_lower_helper(self, decap=False, lower=False):
        data = make_data(2, 3, text_columns=(1, ))
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=decap, lower=lower)
        unquoted = remove_quotes(data, (1, ))
        unquoted[0][1] = [['this', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['this', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
コード例 #7
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_standard_process(self):
        data = make_data(2, 3, text_columns=(1, ), with_quotes=True)
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        transformer = Extractor(row_gen)
        unquoted = remove_quotes(data, (1, ))
        unquoted[0][1] = [['This', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['This', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
コード例 #8
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_sentence_splitting(self):
        data = make_data(1, 3, text_columns=(1,))
        data[0][1] = "This is a sentence. And it is from cell 1."
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=True)
        data[0][1] = [['this', 'is', 'a', 'sentence', '.'],
                      ['and', 'it', 'is', 'from', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(0, n)
コード例 #9
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def _decap_lower_helper(self, decap=False, lower=False):
        data = make_data(2, 3, text_columns=(1,))
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, decap=decap, lower=lower)
        unquoted = remove_quotes(data, (1,))
        unquoted[0][1] = [['this', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['this', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
コード例 #10
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_standard_process(self):
        data = make_data(2, 3, text_columns=(1,), with_quotes=True)
        file = make_file(data)
        row_gen = row_generator_from_file(file.name)
        transformer = Extractor(row_gen)
        unquoted = remove_quotes(data, (1,))
        unquoted[0][1] = [['This', 'is', 'cell', '1', '.']]
        unquoted[1][1] = [['This', 'is', 'cell', '1', '.']]
        n = -1

        for n, row in enumerate(transformer):
            self.assertEqual(list(unquoted[n]), row)

        self.assertEqual(1, n)
コード例 #11
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_tile_process(self):
        names = ("id", "text", "class")
        data = make_data(2, 3, text_columns=(1, ))
        data.insert(0, names)
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, has_title=True)
        data[1][1] = [['This', 'is', 'cell', '1', '.']]
        data[2][1] = [['This', 'is', 'cell', '1', '.']]
        self.assertEqual(names, transformer.names)
        n = -1

        for n, row in enumerate(transformer, 1):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(2, n)
コード例 #12
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_plain_row_generator_with_escapechar(self):
        data = make_data(2, 3)
        data[0][1] = "cell\\\tA"
        data[1][2] = "cell\tB"
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        count = 0
        data[0][1] = "cell\tA"
        data[1][2] = "cell"
        data[1].append("B")

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #13
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_tile_process(self):
        names = ("id", "text", "class")
        data = make_data(2, 3, text_columns=(1,))
        data.insert(0, names)
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        transformer = Extractor(row_gen, has_title=True)
        data[1][1] = [['This', 'is', 'cell', '1', '.']]
        data[2][1] = [['This', 'is', 'cell', '1', '.']]
        self.assertEqual(names, transformer.names)
        n = -1

        for n, row in enumerate(transformer, 1):
            self.assertEqual(list(data[n]), row)

        self.assertEqual(2, n)
コード例 #14
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_plain_row_generator_with_escapechar(self):
        data = make_data(2, 3)
        data[0][1] = "cell\\\tA"
        data[1][2] = "cell\tB"
        file = make_file(data, sep='\t')
        row_gen = row_generator_from_file(file.name, dialect='plain')
        count = 0
        data[0][1] = "cell\tA"
        data[1][2] = "cell"
        data[1].append("B")

        for expected, received in zip(data, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #15
0
ファイル: test_extract.py プロジェクト: shannonyu/classipy
    def test_row_generator_with_escapechar(self):
        data = make_data(3, 3, text_columns=(1, ), with_quotes=True)
        data[0][1] = '"Cell with comma, here A."'
        data[1][1] = '"Cell with quote "" char."'
        data[2][1] = '"Cell with both "","" chars."'
        file = make_file(data)
        unquoted = remove_quotes(data, (1, ))
        row_gen = row_generator_from_file(file.name)
        count = 0
        unquoted[1][1] = 'Cell with quote " char.'
        unquoted[2][1] = 'Cell with both "," chars.'

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))
コード例 #16
0
ファイル: test_extract.py プロジェクト: conerade67/classipy
    def test_row_generator_with_escapechar(self):
        data = make_data(3, 3, text_columns=(1,), with_quotes=True)
        data[0][1] = '"Cell with comma, here A."'
        data[1][1] = '"Cell with quote "" char."'
        data[2][1] = '"Cell with both "","" chars."'
        file = make_file(data)
        unquoted = remove_quotes(data, (1,))
        row_gen = row_generator_from_file(file.name)
        count = 0
        unquoted[1][1] = 'Cell with quote " char.'
        unquoted[2][1] = 'Cell with both "," chars.'

        for expected, received in zip(unquoted, row_gen):
            self.assertListEqual(expected, received)
            count += 1

        self.assertEqual(count, len(data))