Beispiel #1
0
    def test__delimiter__not_found(self):
        text = 'Vodka&Balalayka&Big bear'
        delimiter = '~'

        assert get_line_piece(text, delimiter, -1) == text
        assert get_line_piece(text, delimiter, 0) == text
        assert get_line_piece(text, delimiter, 1) == text
        assert get_line_piece(text, delimiter, 2) == text
Beispiel #2
0
    def test__no_delimiter(self):
        text = 'Line about nine'
        delimiter = None

        assert get_line_piece(text, delimiter, -1) == text
        assert get_line_piece(text, delimiter, 0) == text
        assert get_line_piece(text, delimiter, 1) == text
        assert get_line_piece(text, delimiter, 2) == text
Beispiel #3
0
    def process_line(self, line: str) -> Optional[str]:
        line_cpy = get_line_piece(line, self.delimiter,
                                  self.delimited_position)

        if len(line_cpy) < self.threshold:
            return None

        return line
Beispiel #4
0
    def process_line(self, line: str) -> Optional[str]:
        line_cpy = get_line_piece(line, self.delimiter,
                                  self.delimited_position)

        # TODO: `line_cpy = line_cpy.lower()` ?
        result = self.ft.predict(line_cpy, k=1)

        if result[1][0] < self.threshold:
            return None

        lang = result[0][0].replace('__label__', '')

        if lang != self.language_code:
            return None

        return line
Beispiel #5
0
    def test__delimiter__found(self):
        text = 'Column 1~Column #2~Description'
        delimiter = '~'

        assert get_line_piece(text, delimiter, -1) == 'Description'
        assert get_line_piece(text, delimiter, 0) == 'Column 1'