def test_non_letter_or_digit(self): tkn = Token(r'''}{[]\|\|?/.,><';:"~`''') self.assertEqual(token_type["UNDEFINED"], tkn.type_id) self.assertFalse(tkn.has_last_colon)
def test_year_token(self): tkn = Token("2016,") self.assertEqual(token_type["YEAR"], tkn.type_id)
def test_date_token_9(self): tkn = Token("2006/04/05,") self.assertEqual(token_type["DATE_SHORT"], tkn.type_id)
def test_date_token_2(self): tkn = Token("2006-5-4") self.assertEqual(token_type["DATE_SHORT"], tkn.type_id)
def test_date_token_8(self): tkn = Token("04-05-2006") self.assertEqual(token_type["DATE_SHORT"], tkn.type_id)
def test_time_incorrect_token(self): tkn = Token("+23:59") self.assertEqual(token_type["UNDEFINED"], tkn.type_id) self.assertFalse(tkn.has_last_colon)
def test_meridiem_token_2(self): tkn = Token("a.m.,") self.assertEqual(token_type["UNDEFINED"], tkn.type_id)
def test_time_token_3(self): tkn = Token("23:59:") res = re.match(attribute_reg_ex["LAST_COLON"], tkn.text) self.assertEqual(token_type["TIME"], tkn.type_id) self.assertTrue(res is not None) self.assertTrue(res.group() == tkn.text)
def test_time_token_4(self): tkn = Token("23:59:55,") self.assertEqual(token_type["TIME"], tkn.type_id) self.assertFalse(tkn.has_last_colon)
def test_undefined_token(self): tkn = Token("@ppzhuk") self.assertEqual(token_type["UNDEFINED"], tkn.type_id)
def test_email_token_2(self): tkn = Token("*****@*****.**") self.assertEqual(token_type["EMAIL"], tkn.type_id)
def test_email_token(self): tkn = Token("<*****@*****.**>") self.assertEqual(token_type["EMAIL"], tkn.type_id)
def test_angle_bracket(self): tkn = Token(">:") res = re.match(attribute_reg_ex["LAST_COLON"], tkn.text) self.assertEqual(token_type["UNDEFINED"], tkn.type_id) self.assertTrue(res is not None) self.assertTrue(res.group() == tkn.text)
def split_tokens(text): tokens = [] for word in text.split(): if word != "": tokens.append(Token(word)) return tokens