def test_gloss(self): wrapper_list = [{'end': 38, 'start': 20, 'term': u'correo electr\xf3nico', 'type': 'gloss'}] text_annot = u'If you remember the <div class="notranslate gloss">correo electr\xf3nico</div> address, request a password reset email: <div mlid="0" tid="0" >1urlzzz</div>' final_list = inline_to_annotation( text_annot, umtf_wrapper_funcs, final_wrapper=get_wrappers) self.assertEqual(len(wrapper_list), len(final_list[-1]))
def test_real_example(self): inline = u'''If you need spreadsheet support, check out Google Docs and Trello integration with Zapier: <div class="notranslate url">https://zapier.com/zapbook/google-docs/trello/</div>''' # noqa patt = re.compile(ur'\w*<div class=.*?>(?P<name>.*?)<.*?>', re.UNICODE | re.DOTALL) text, markup, wrappers = inline_to_annotation(inline, [ lambda x: default_wrapper_func(x, patt)]) self.assertEqual(markup, {}) self.assertEqual(len(wrappers), 1)
def test_to_annotation_text(self): t_text = u"My car is blue." t_markup = {} t_wrappers = [] inline_text = u"My car is blue." text, markup, wrappers = inline_to_annotation(inline_text) self.assertEqual(text, t_text) self.assertEqual(markup, t_markup) self.assertEqual(wrappers, t_wrappers)
def test_many_gloss(self): wrapper_list = [ {'end': 10, 'start': 7, 'term': u'Pin', 'type': 'gloss'}, {'start': 24, 'term': u'Pin', 'end': 27, 'type': 'gloss'}, {'end': 68, 'start': 59, 'term': u'Pinterest', 'type': 'gloss'}, {'end': 99, 'start': 94, 'term': u'bot\xf3n', 'type': 'gloss'}, {'start': 107, 'term': u'Pin', 'end': 110, 'type': 'gloss'}] text_annot = u'If the <div class="notranslate gloss">Pin</div> is a buyable <div class="notranslate gloss">Pin</div>\u2014something you can buy right on <div class="notranslate gloss">Pinterest</div>\u2014you\'ll see a blue <div mlid="0" tid="0" >Buy it </div><div class="notranslate gloss">bot\xf3n</div> on the <div class="notranslate gloss">Pin</div>.' final_list = inline_to_annotation(text_annot,umtf_wrapper_funcs, final_wrapper=get_wrappers) self.assertEqual(len(wrapper_list), len(final_list[-1]))
def test_to_annotation_markup(self): t_text = u'my car is blue.' t_markup = {10: [{'added_space': False, 'close_tid': 1, 'tag_type': 'open', 'text': u'<b>', 'tid': 0}], 14: [{'added_space': False, 'open_tid': 0, 'tag_type': 'close', 'text': u'</b>', 'tid': 1}]} t_wrappers = [] inline_text = u'my car is <b>blue</b>.' text, markup, wrappers = inline_to_annotation(inline_text) self.assertEqual(text, t_text) self.assertEqual(markup, t_markup) self.assertEqual(wrappers, t_wrappers)
def test_example_file(self): from unbabel_text_utils.umtf_utils.wrappers_func import umtf_funcs_dict, \ umtf_wrapper_funcs raw_file = codecs.open( get_data_dir()+"/inline_examples/examples.txt", encoding="utf-8").read() for i, line in enumerate(raw_file.split("\n")): with self.subTest(i=i): text, markup, wrappers = \ inline_to_annotation(line, umtf_wrapper_funcs, final_wrapper=get_wrappers) # print "I am doing in here...." new_inline = annotation_to_inline( text, markup, wrappers, wrappers_dict=umtf_funcs_dict) self.assertEqual(line, new_inline, "%i\nOrig: %s\nNext: %s\n" % (i, line, new_inline))
def test_to_annotation_markup_wrappers(self): t_text = u'my car is blue.' t_markup = {10: [{'added_space': False, 'close_tid': 1, 'tag_type': 'open', 'text': u'<b>', 'tid': 0}], 14: [{'added_space': False, 'open_tid': 0, 'tag_type': 'close', 'text': u'</b>', 'tid': 1}]} t_wrappers = [{"start": 0, "end": 2, "type": "generic", "text": "my"}, {"start": 3, "end": 6, "type": "generic", "text": "car"}] inline_text = u"<span class='gloss'>my</span> " \ u"<span class='url'>car</span> is <b>blue</b>." text, markup, wrappers = inline_to_annotation( inline_text) self.assertEqual(text, t_text) self.assertEqual(markup, t_markup, "\nGot:%s\nWant:%s\n" % (markup, t_markup)) self.assertEqual(wrappers, t_wrappers, "\nGot:%s\nWant:%s\n" % (wrappers, t_wrappers))