Ejemplo n.º 1
0
 def test_tokenize_one_alphabetical_symbol_input(self):
     """check the returned token if an input is alphabetical symbol."""
     actual_result = UniversalTokenizer().tokenize('word')
     instance_of_actual_result = actual_result[0]
     self.assertEqual(instance_of_actual_result.type_of_token, 'Alpha')
     self.assertEqual(instance_of_actual_result.string_representation,
                      'word')
     self.assertEqual(instance_of_actual_result.position, 0)
Ejemplo n.º 2
0
 def test_tokenize_digit_input(self):
     """check the returned token if an input is one digit"""
     actual_result = UniversalTokenizer().tokenize('1')
     instance_of_actual_result = actual_result[0]
     self.assertEqual(instance_of_actual_result.type_of_token, 'Digit')
     self.assertEqual(instance_of_actual_result.string_representation,
                      '1')
     self.assertEqual(instance_of_actual_result.position, 0)
Ejemplo n.º 3
0
 def test_tokenize_punctuation_input(self):
     """check the returned token if an input is punctuation symbol."""
     actual_result = UniversalTokenizer().tokenize('!')
     instance_of_actual_result = actual_result[0]
     self.assertEqual(instance_of_actual_result.type_of_token,
                      'Punctuation')
     self.assertEqual(instance_of_actual_result.string_representation,
                      '!')
     self.assertEqual(instance_of_actual_result.position, 0)
Ejemplo n.º 4
0
 def test_tokenize_space_input(self):
     """check the returned token if an input is a whitespace"""
     actual_result = UniversalTokenizer().tokenize(' ')
     instance_of_actual_result = actual_result[0]
     self.assertEqual(instance_of_actual_result.type_of_token,
                      'Space')
     self.assertEqual(instance_of_actual_result.string_representation,
                      ' ')
     self.assertEqual(instance_of_actual_result.position, 0)
Ejemplo n.º 5
0
    def shelve_db(self, text):
        """ This function gets list of tokens and  creates database o
        nly with alphabetical tokens and digits

        :param text: text, which should be tokenized and placed into db
        :return: d_b - database
        """

        with shelve.open("database", flag='n', protocol=None,
                         writeback=False) as d_b:
            tokens = UniversalTokenizer().tokenize(text)
            counter = 0
            for token in tokens:
                if token.type_of_token == 'Alpha' \
                        or token.type_of_token == 'Digit':
                    d_b[str(counter)] = token.position
                    counter += 1
            print(dict(d_b))  #check whether database is fulfilled
        return d_b
Ejemplo n.º 6
0
 def test_tokenize_empty_string_input(self):
     """check the return value if an input is an empty string"""
     a = UniversalTokenizer().tokenize('')
     self.assertEqual(a, [])
Ejemplo n.º 7
0
 def test_tokenize_wrong_type_of_input_fail(self):
     """check the return value if an input is not a string"""
     with self.assertRaises(TypeError) as error:
         UniversalTokenizer().tokenize([])
Ejemplo n.º 8
0
 def test_type_of_symbol_othertype(self):
     """check whether the returned value is other type."""
     a = UniversalTokenizer().type_define('+')
     self.assertEqual(a, 'Other type')
Ejemplo n.º 9
0
 def test_type_of_symbol_punctuation(self):
     """check whether the returned value is punctuation."""
     a = UniversalTokenizer().type_define('!')
     self.assertEqual(a, 'Punctuation')
Ejemplo n.º 10
0
 def test_type_of_symbol_space(self):
     """check whether the returned value is whitespace."""
     a = UniversalTokenizer().type_define(' ')
     self.assertEqual(a, 'Space')
Ejemplo n.º 11
0
 def test_type_of_symbol_alpha(self):
     """check whether the returned value is alphabetical symbol."""
     a = UniversalTokenizer().type_define('a')
     self.assertEqual(a, 'Alpha')
Ejemplo n.º 12
0
 def test_type_of_symbol_digit(self):
     """check whether the returned value is digit."""
     a = UniversalTokenizer().type_define('4')
     self.assertEqual(a, 'Digit')