Example #1
0
    def test_HTMLbis(self):
        data = '<em>J.  David</em>'
        result = [((TEXT, u'J. David'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #2
0
    def test_surrounding_format(self):
        data = '<em>Surrounding format elements should be extracted !</em>'
        expected =[((TEXT,
                     u'Surrounding format elements should be extracted !'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #3
0
    def test_HTML3(self):
        data = '-- toto is here -- *I am*'
        result = [((TEXT, u'-- toto is here -- *I am*'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #4
0
    def test_ignore_tags(self):
        data = 'Hello <em> Baby.</em> How are you ?'
        expected = [((TEXT, u'Hello '), (START_FORMAT, 1), (TEXT, u' Baby.'),
                     (END_FORMAT, 1)), ((TEXT, u'How are you ?'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #5
0
    def test_ignore_tags(self):
        data = 'Hello <em> Baby.</em> How are you ?'
        expected = [((TEXT, u'Hello '), (START_FORMAT, 1), (TEXT, u' Baby.'),
                     (END_FORMAT, 1)), ((TEXT, u'How are you ?'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #6
0
    def test_HTML3(self):
        data = '-- toto is here -- *I am*'
        result = [((TEXT, u'-- toto is here -- *I am*'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #7
0
    def test_HTMLbis(self):
        data = '<em>J.  David</em>'
        result = [((TEXT, u'J. David'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #8
0
    def test_surrounding_format(self):
        data = '<em>Surrounding format elements should be extracted !</em>'
        expected =[((TEXT,
                     u'Surrounding format elements should be extracted !'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #9
0
    def test_HTML4(self):
        data = ' <a href="http://www.debian.org/"> Debian </a> Hello.  Toto'
        result =  [((START_FORMAT, 1), (TEXT, u' Debian '), (END_FORMAT, 1),
                    (TEXT, u' Hello.')), ((TEXT, u'Toto'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #10
0
    def test_HTML(self):
        data = '<a href="; t. ffff">hello </a>      GOGO'

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        result = [((START_FORMAT, 1), (TEXT, u'hello '), (END_FORMAT, 1),
                   (TEXT, u' GOGO'))]
        self.assertEqual(segments, result)
Example #11
0
    def test_HTML4(self):
        data = ' <a href="http://www.debian.org/"> Debian </a> Hello.  Toto'
        result =  [((START_FORMAT, 1), (TEXT, u' Debian '), (END_FORMAT, 1),
                    (TEXT, u' Hello.')), ((TEXT, u'Toto'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        self.assertEqual(segments, result)
Example #12
0
    def test_HTML(self):
        data = '<a href="; t. ffff">hello </a>      GOGO'

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)

        result = [((START_FORMAT, 1), (TEXT, u'hello '), (END_FORMAT, 1),
                   (TEXT, u' GOGO'))]
        self.assertEqual(segments, result)
Example #13
0
 def test_paragraph(self):
     """Test formatted paragraph"""
     content = ('<office:text>'
                '<text:p text:style-name="Standard">'
                'hello world'
                '</text:p>'
                '</office:text>')
     content = odt_template % content
     messages = XMLParser(content)
     messages = [unit[0] for unit in get_units(messages)]
     expected = [((TEXT, u'hello world'), )]
     self.assertEqual(messages, expected)
Example #14
0
 def test_paragraph(self):
     """Test formatted paragraph"""
     content = ('<office:text>'
                '<text:p text:style-name="Standard">'
                'hello world'
                '</text:p>'
                '</office:text>')
     content = odt_template % content
     messages = XMLParser(content)
     messages = [unit[0] for unit in get_units(messages)]
     expected = [((TEXT, u'hello world'),)]
     self.assertEqual(messages, expected)
Example #15
0
    def test_table(self):
        content = """
        <office:text>
          <table:table table:name="Tableau1" table:style-name="Tableau1">
            <table:table-column table:style-name="Tableau1.A"
              table:number-columns-repeated="3"/>
            <table:table-row>
              <table:table-cell table:style-name="Tableau1.A1"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">A</text:p>
              </table:table-cell>
              <table:table-cell table:style-name="Tableau1.A1"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">B</text:p>
              </table:table-cell>
              <table:table-cell table:style-name="Tableau1.C1"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">C</text:p>
              </table:table-cell>
            </table:table-row>
            <table:table-row>
              <table:table-cell table:style-name="Tableau1.A2"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">D</text:p>
              </table:table-cell>
              <table:table-cell table:style-name="Tableau1.A2"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">E</text:p>
              </table:table-cell>
              <table:table-cell table:style-name="Tableau1.C2"
                office:value-type="string">
                <text:p text:style-name="Table_20_Contents">F</text:p>
              </table:table-cell>
            </table:table-row>
          </table:table>
        </office:text>
        """

        content = odt_template % content
        messages = XMLParser(content)
        messages = [unit[0] for unit in get_units(messages)]
        expected= [((TEXT, u'A'),),
                   ((TEXT, u'B'),),
                   ((TEXT, u'C'),),
                   ((TEXT, u'D'),),
                   ((TEXT, u'E'),),
                   ((TEXT, u'F'),)]

        self.assertEqual(messages, expected)
Example #16
0
    def test_iter_segmentation(self):
        """Here is a message surrounded by format elements and which contains
        others segments. The segments must be well extracted by the iterative
        algorithm."""

        data = '<span>This text contains many sentences. A sentence. ' \
               'Another one. This text must be well segmented.  </span>'
        expected = [((TEXT, u'This text contains many sentences.'),),
                    ((TEXT, u'A sentence.'),), ((TEXT, u'Another one.'),),
                    ((TEXT, u'This text must be well segmented.'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #17
0
    def test_iter_segmentation(self):
        """Here is a message surrounded by format elements and which contains
        others segments. The segments must be well extracted by the iterative
        algorithm."""

        data = '<span>This text contains many sentences. A sentence. ' \
               'Another one. This text must be well segmented.  </span>'
        expected = [((TEXT, u'This text contains many sentences.'),),
                    ((TEXT, u'A sentence.'),), ((TEXT, u'Another one.'),),
                    ((TEXT, u'This text must be well segmented.'),)]

        segments = []
        for seg, context, offset in get_units(HTMLParser(data)):
            segments.append(seg)
        self.assertEqual(segments, expected)
Example #18
0
    def test_translation_paragraph(self):
        """Test translation of an element content"""
        po = POFile(string='msgctxt "paragraph"\n'
                    'msgid "hello world"\n'
                    'msgstr "hola mundo"\n')
        content = ('<office:text>'
                   '<text:p text:style-name="Standard">'
                   'hello world'
                   '</text:p>'
                   '</office:text>')

        content = odt_template % content
        messages = XMLParser(content)
        messages = translate(messages, po)
        messages = [unit[0] for unit in get_units(messages)]
        self.assertEqual(messages, [((TEXT, u'hola mundo'), )])
Example #19
0
    def test_translation_paragraph(self):
        """Test translation of an element content"""
        po = POFile(string=
            'msgctxt "paragraph"\n'
            'msgid "hello world"\n'
            'msgstr "hola mundo"\n')
        content = ('<office:text>'
                   '<text:p text:style-name="Standard">'
                   'hello world'
                   '</text:p>'
                   '</office:text>')

        content = odt_template % content
        messages = XMLParser(content)
        messages = translate(messages, po)
        messages = [unit[0] for unit in get_units(messages)]
        self.assertEqual(messages, [((TEXT, u'hola mundo'),)])
Example #20
0
 def get_units(self, srx_handler=None):
     for source, context, line in get_units(self.events, srx_handler):
         if len(source) > 1 or subs_expr_solo.match(source[0][1]) is None:
             yield source, context, line
Example #21
0
File: odf.py Project: kennym/itools
 def get_units(self, srx_handler=None):
     for filename in ['content.xml', 'meta.xml', 'styles.xml']:
         events = self.get_events(filename)
         for message in get_units(events, srx_handler):
             # FIXME the line number has no sense here
             yield message
Example #22
0
 def get_units(self, srx_handler=None):
     for filename in ['content.xml', 'meta.xml', 'styles.xml']:
         events = self.get_events(filename)
         for message in get_units(events, srx_handler):
             # FIXME the line number has no sense here
             yield message
Example #23
0
 def get_units(self, srx_handler=None):
     for source, context, line in get_units(self.events, srx_handler):
         if len(source) > 1 or subs_expr_solo.match(source[0][1]) is None:
             yield source, context, line