Example #1
0
    def test_double_date_2(self):
        text = "*From:* Unknown 19 May 2015 Person (some 5 body)) [mailto: " \
               "[email protected]] *Sent:* 19 May 2015 03:29 PM"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)

        self.assertEqual(len(tokens), 17)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[1].type_name, "DATE_RELATED")
        self.assertEqual(tokens[2].type_name, "DAY")
        self.assertEqual(tokens[3].type_name, "DATE_RELATED")
        self.assertEqual(tokens[4].type_name, "YEAR")
        self.assertEqual(tokens[5].type_name, "UNDEFINED")
        self.assertEqual(tokens[6].type_name, "UNDEFINED")
        self.assertEqual(tokens[7].type_name, "DAY")
        self.assertEqual(tokens[8].type_name, "UNDEFINED")
        self.assertEqual(tokens[9].type_name, "UNDEFINED")
        self.assertEqual(tokens[10].type_name, "EMAIL")
        self.assertEqual(tokens[11].type_name, "DATE_RELATED")
        self.assertEqual(tokens[12].type_name, "DAY")
        self.assertEqual(tokens[13].type_name, "DATE_RELATED")
        self.assertEqual(tokens[14].type_name, "YEAR")
        self.assertEqual(tokens[15].type_name, "TIME")
        self.assertEqual(tokens[16].type_name, "UNDEFINED")

        self.assertFalse(tokens[9].has_last_colon)
Example #2
0
    def test_undefined_tokens_unification(self):
        text = "2014-12-29 18:59 GMT+03:00 xxx <*****@*****.**>:"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)
        tokens = unite_undefined_tokens(tokens)

        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[2].type_name, "UNDEFINED")
        self.assertEqual(tokens[2].text, "GMT+03:00 xxx")
        self.assertTrue(tokens[-1].has_last_colon)
Example #3
0
    def test_undefined_tokens_unification_2(self):
        text = "*From:* Unknown Person (some body)) [mailto: [email protected]] " \
               "*Sent:* 19 May 2015 03:29 PM"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)
        tokens = unite_undefined_tokens(tokens)

        self.assertEqual(len(tokens), 8)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[0].text,
                         "*From:* Unknown Person (some body)) [mailto:")
Example #4
0
    def test_date_short(self):
        text = "2014-12-29 18:59 GMT+03:00 xxx <*****@*****.**>:"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)

        self.assertEqual(len(tokens), 5)
        self.assertEqual(tokens[0].type_name, "DATE_SHORT")
        self.assertEqual(tokens[1].type_name, "TIME")
        self.assertEqual(tokens[2].type_name, "UNDEFINED")
        self.assertEqual(tokens[3].type_name, "UNDEFINED")
        self.assertEqual(tokens[4].type_name, "EMAIL")
        self.assertTrue(tokens[-1].has_last_colon)
Example #5
0
    def test_undefined_tokens_unification_4(self):
        text = "On 04/15/2016 05:08 PM Павел Жук wrote:"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)
        tokens = unite_undefined_tokens(tokens)

        self.assertEqual(len(tokens), 5)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[1].type_name, "DATE_SHORT")
        self.assertEqual(tokens[2].type_name, "TIME")
        self.assertEqual(tokens[3].type_name, "UNDEFINED")
        self.assertEqual(tokens[4].type_name, "UNDEFINED")
        self.assertTrue(tokens[4].has_last_colon)
        self.assertEqual(tokens[3].text, "PM Павел Жук")
Example #6
0
    def test_token_types_2(self):
        text = "*From:* Unknown Person (some body)) [mailto: [email protected]] " \
               "*Sent:* 19 May 2015 03:29 PM"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)

        self.assertEqual(len(tokens), 13)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[5].type_name, "UNDEFINED")
        self.assertEqual(tokens[6].type_name, "EMAIL")
        self.assertEqual(tokens[7].type_name, "DATE_RELATED")
        self.assertEqual(tokens[8].type_name, "DAY")
        self.assertEqual(tokens[9].type_name, "DATE_RELATED")
        self.assertEqual(tokens[10].type_name, "YEAR")
        self.assertEqual(tokens[11].type_name, "TIME")
        self.assertEqual(tokens[12].type_name, "UNDEFINED")
Example #7
0
    def test_undefined_tokens_unification_3(self):
        text = "*From:* Unknown 19 May 2015 Person (some body)) [mailto: " \
               "[email protected]] xxx vvv eee *Sent:* 19 May 2015 03:29 PM zz"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)
        tokens = unite_undefined_tokens(tokens)

        self.assertEqual(len(tokens), 14)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[5].type_name, "UNDEFINED")
        self.assertEqual(tokens[7].type_name, "UNDEFINED")
        self.assertEqual(tokens[13].type_name, "UNDEFINED")
        self.assertEqual(tokens[0].text, "*From:*")
        self.assertEqual(tokens[5].text, "Person (some body)) [mailto:")
        self.assertEqual(tokens[7].text, "xxx vvv eee")
        self.assertEqual(tokens[13].text, "PM zz")
Example #8
0
    def test_one_date_related_token(self):
        text = "16 февр 2016 г в 18:58 Unknown Person (X Y) <*****@*****.**> " \
               "написал(а):"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)

        self.assertEqual(len(tokens), 12)
        self.assertEqual(tokens[0].type_name, "DAY")
        self.assertEqual(tokens[1].type_name, "DATE_RELATED")
        self.assertEqual(tokens[2].type_name, "YEAR")
        self.assertEqual(tokens[3].type_name, "UNDEFINED")
        self.assertEqual(tokens[4].type_name, "UNDEFINED")
        self.assertEqual(tokens[5].type_name, "TIME")
        self.assertEqual(tokens[6].type_name, "UNDEFINED")
        self.assertEqual(tokens[10].type_name, "EMAIL")
        self.assertEqual(tokens[11].type_name, "UNDEFINED")
        self.assertTrue(tokens[-1].has_last_colon)
Example #9
0
    def test_token_types(self):
        text = "On Mon Jul 1 2016 at 2:08 PM Unknown Person " \
               "(some body) <*****@*****.**> wrote:"
        tokens = split_tokens(text)
        tokens = define_date_related_tokens(tokens)
        set_attributes(tokens)

        self.assertEqual(len(tokens), 14)
        self.assertEqual(tokens[0].type_name, "UNDEFINED")
        self.assertEqual(tokens[1].type_name, "DATE_RELATED")
        self.assertEqual(tokens[2].type_name, "DATE_RELATED")
        self.assertEqual(tokens[3].type_name, "DAY")
        self.assertEqual(tokens[4].type_name, "YEAR")
        self.assertEqual(tokens[5].type_name, "UNDEFINED")
        self.assertEqual(tokens[6].type_name, "TIME")
        self.assertEqual(tokens[7].type_name, "UNDEFINED")
        self.assertEqual(tokens[12].type_name, "EMAIL")
        self.assertEqual(tokens[13].type_name, "UNDEFINED")
        self.assertTrue(tokens[13].has_last_colon)