def setUp(self):
     self.xml_data_space = XMLData(" ", 10.0, 10.0, "Font", 23.0, False,
                                   True)
     self.xml_data_B = XMLData("B", 15.0, 70.0, "Font", 23.0, False, True)
     self.xml_data_y_1 = XMLData(" ", 10.0, 10.0, "Font", 25.0, False, True)
     self.xml_data_y_2 = XMLData(" ", 10.0, 15.0, "Font", 25.0, False, True)
     self.xml_data_y_3 = XMLData(" ", 10.0, 20.0, "Font", 25.0, False, True)
Esempio n. 2
0
 def test_false_on_y_gap(self):
     xml_data = []
     xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 24.0, False, True))
     xml_data.append(XMLData("b", 10.0, 100.0, "Font", 24.0, False, True))
     expected_result = 1
     actual_result = VincentExtractor.check_for_repeated_phrases(self, xml_data, 0)
     self.assertTrue(expected_result == actual_result)
Esempio n. 3
0
 def test_xml_no_space(self):
     xml_data = []
     xml_data.append(XMLData("b", 270.0, 10.0, "Font", 23.0, False, True))
     xml_data.append(XMLData("c", 10.0, 10.0, "Font", 23.0, False, True))
     xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True))
     expected_result = 13.0
     actual_result = VincentExtractor.get_space_size(self, xml_data)
     self.assertTrue(expected_result == actual_result)
 def test_false_on_space(self):
     xml_data = []
     xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True))
     xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True))
     xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True))
     expected_result = 0.0
     actual_result = PerthExtractor.get_space_size(self, xml_data)
     self.assertTrue(expected_result == actual_result)
 def test_xml_only_bold(self):
     xml_data = []
     xml_data.append(XMLData("b", 270.0, 10.0, "Bold", 23.0, False, True))
     xml_data.append(XMLData("c", 10.0, 10.0, "Bold", 23.0, False, True))
     xml_data.append(XMLData("d", 10.0, 10.0, "Bold", 23.0, False, True))
     expected_result = 13.0
     actual_result = PerthExtractor.get_space_size(self, xml_data)
     self.assertTrue(expected_result == actual_result)
 def test_true_on_end(self):
     xml_data = []
     xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True))
     xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True))
     xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True))
     expected_result = 2
     actual_result = PerthExtractor.check_for_repeated_phrases(
         self, xml_data, 0)
     self.assertTrue(expected_result == actual_result)
    def test_dont_enter_while(self):
        xml_data = []
        xml_data.append(XMLData("", 10.0, 15.0, "Font", 25.0, False, True))
        xml_data.append(XMLData("", 10.0, 15.0, "Font", 25.0, False, True))
        expected_result = []
        p = PerthExtractor()

        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
Esempio n. 8
0
    def test_false_on_second_space(self):
        xml_data = []
        xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True))
        xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True))
        expected_result = 0.0
        actual_result = VincentExtractor.get_space_size(self, xml_data)
        self.assertTrue(expected_result == actual_result)

        tured_output = io.StringIO()
    def test_remove_footer(self):
        xml_data = self.add_three_gap()
        xml_data.append(XMLData("", 10.0, 49.0, "Font", 23.0, False, True))
        xml_data.insert(0, XMLData("", 10.0, 49.0, "Font", 23.0, False, True))

        expected_result = []
        p = PerthExtractor()

        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
Esempio n. 10
0
 def test_two_bracket_cs(self):
     xml_data = self.xml_data_list
     xml_data[0].character = "("
     xml_data[1].character = " "
     xml_data[2].character = " "
     xml_data[3].character = ")"
     xml_data.append(XMLData(" ", 170.0, 60.0, "Font", 23.0, False, True))
     xml_data.append(XMLData(" ", 180.0, 60.0, "Font", 23.0, False, True))
     expected_result = ["(  ) "]
     actual_result = self.v.create_sentences(xml_data)
     self.assertEqual(expected_result, actual_result)
    def test_skip_on_two_gap(self):
        xml_data = self.add_two_gap()
        xml_data.insert(0, XMLData("a", 10.0, 100.0, "Font", 23.0, False,
                                   True))
        xml_data.append(XMLData("b", 16.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("b", 770.0, 100.0, "Font", 23.0, False, True))

        expected_result = []
        p = PerthExtractor()

        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
    def test_skip_all_gaps(self):
        xml_data = []
        xml_data.append(XMLData(".", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("a", 270.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("c", 26.0, 10.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True))

        expected_result = []
        p = PerthExtractor()

        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
    def test_two_gap_add(self):
        #xml_data = self.add_two_gap()
        xml_data = []
        xml_data.append(XMLData("A", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData(" ", 26.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("d", 26.0, 10.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True))

        expected_result = []
        p = PerthExtractor()

        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
 def test_add_empty_next_sentence(self):
     letter = XMLData("b", 10.0, 10.0, "Font", 23.0, True, False)
     next_sentence = ""
     p = PerthExtractor()
     expected_result = ["b"]
     actual_result = p.test_add_sentence(next_sentence, letter)
     self.assertTrue(expected_result == actual_result)
    def characters(self, content):
        """ For each character, add it to the XMLData list.

            Parse out the data from each line in the XML document if it has a character.
            If any invalid data is passed to XMLData class, skip that character.

            Args:
                content: The character that is being parsed.

            Returns:
                Void
        """
        if (self.tag == "text" and self.b_box != ""):
            self.current_char = content

            b_box_values = self.process_b_box((self.b_box))

            ' Commented this out, just for now - Pushed back 19 lines'
            #if not "\n" in content:
            if "\n" not in content:
                #print("Keep the \n")
                next_letter = XMLData(content, b_box_values[0],
                                      b_box_values[1], self.font, self.size,
                                      self.bold, self.italics)
                if (next_letter.valid == True):
                    self.xml_data.append(next_letter)

            else:  # It is a \n

                if (len(self.xml_data) > 1):
                    # If the previous letter was a \n keep it, else, ignore // Used to be a "." that we check to keep it
                    if (self.xml_data[-1]).character == "\n":
                        next_letter = XMLData(content, b_box_values[0],
                                              b_box_values[1], self.font,
                                              self.size, self.bold,
                                              self.italics)

                        if (next_letter.valid == True):
                            self.xml_data.append(next_letter)

            self.next_sentence = self.next_sentence + content

            # If we reach a full stop then finish the sentence and add it to sentences
            if content == ".":
                self.sentences.append(self.next_sentence)
                self.next_sentence = ""
    def test_create_remove_footer(self):
        xml_data = []
        xml_data.append(XMLData("a", 10.0, 10.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("d", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData(".", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True))
        xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True))

        p = PerthExtractor()
        expected_result = ["bcd."]
        actual_result = p.create_sentences(xml_data)
        self.assertTrue(all(elem in expected_result for elem in actual_result))
    def test_none_next_sentence(self):
        captured_output = io.StringIO()
        sys.stdout = captured_output

        letter = XMLData("b", 10.0, 10.0, "Font", 23.0, True, False)
        next_sentence = None
        p = PerthExtractor()
        expected_result = []
        actual_result = p.test_add_sentence(next_sentence, letter)

        sys.stdout = sys.__stdout__

        self.assertTrue(expected_result == actual_result)
Esempio n. 18
0
 def test_three_gap_add(self):
     xml_data = self.xml_data_list
     xml_data[1].character = " "
     xml_data[2].character = " "
     xml_data[3].character = " "
     xml_data[4].character = "b"
     xml_data[4].b_y = 60
     xml_data[5].b_y = 70.0
     xml_data[7].b_x = 150.0
     xml_data.append(XMLData(" ", 160.0, 60.0, "Font", 23.0, False, True))
     v = VincentExtractor()
     expected_result = ["a   b"]
     actual_result = v.create_sentences(xml_data)
     self.assertEqual(expected_result, actual_result)
Esempio n. 19
0
 def test_b_y_none(self):
     xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True)
     self.assertTrue(xml_bad.b_y == None)
Esempio n. 20
0
 def test_italics_none(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True,
                       "Italics")
     self.assertTrue(xml_bad.italics == None)
Esempio n. 21
0
 def test_italics_invalid(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True,
                       "Italics")
     self.assertFalse(xml_bad.valid)
Esempio n. 22
0
 def test_bold_none(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True)
     self.assertTrue(xml_bad.bold == None)
Esempio n. 23
0
 def test_bold_invalid(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True)
     self.assertFalse(xml_bad.valid)
Esempio n. 24
0
 def test_size_none(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True)
     self.assertTrue(xml_bad.size == None)
Esempio n. 25
0
 def test_size_invalid(self):
     xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True)
     self.assertFalse(xml_bad.valid)
Esempio n. 26
0
 def test_font_none(self):
     xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True)
     self.assertTrue(xml_bad.font == None)
Esempio n. 27
0
 def setUp(self):
     self.xmldata1 = XMLData("a", 2.6, 5.2, "sans", 12.0, True, False)
Esempio n. 28
0
 def test_font_invalid(self):
     xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True)
     self.assertFalse(xml_bad.valid)
Esempio n. 29
0
 def test_b_y_invalid(self):
     xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True)
     self.assertFalse(xml_bad.valid)
Esempio n. 30
0
class TestXMLData(TestCase):
    def setUp(self):
        self.xmldata1 = XMLData("a", 2.6, 5.2, "sans", 12.0, True, False)

    def test_print_all_data(self):
        xml_expected_output = """Character: a || b_x: 2.6 || b_y: 5.2 || Font: sans || Size: 12.0 || Bold: True || Italics: False"""
        self.assertTrue(self.xmldata1.print_all_data() == xml_expected_output)

    def test_character_expected(self):
        self.assertTrue(self.xmldata1.character == "a")

    def test_x_b_box_expected(self):
        self.assertTrue(self.xmldata1.b_x == 2.6)

    def test_x_y_box_expected(self):
        self.assertTrue(self.xmldata1.b_y == 5.2)

    def test_font_expected(self):
        self.assertTrue(self.xmldata1.font == "sans")

    def test_size_expected(self):
        self.assertTrue(self.xmldata1.size == 12.0)

    def test_bold_expected(self):
        self.assertTrue(self.xmldata1.bold)

    def test_italics_expected(self):
        self.assertFalse(self.xmldata1.italics)

    def test_character_invalid(self):
        xml_bad = XMLData(3, 344.22, 554.22, "Some Font", 24, True, True)
        self.assertFalse(xml_bad.valid)

    def test_character_none(self):
        xml_bad = XMLData(3, 344.22, 554.22, "Some Font", 24, True, True)
        self.assertTrue(xml_bad.character == None)

    def test_b_x_invalid(self):
        xml_bad = XMLData("a", 's', 554.22, "Some Font", 24, True, True)
        self.assertFalse(xml_bad.valid)

    def test_b_x_none(self):
        xml_bad = XMLData("a", 's', 554.22, "Some Font", 24, True, True)
        self.assertTrue(xml_bad.b_x == None)

    def test_b_y_invalid(self):
        xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True)
        self.assertFalse(xml_bad.valid)

    def test_b_y_none(self):
        xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True)
        self.assertTrue(xml_bad.b_y == None)

    def test_font_invalid(self):
        xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True)
        self.assertFalse(xml_bad.valid)

    def test_font_none(self):
        xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True)
        self.assertTrue(xml_bad.font == None)

    def test_size_invalid(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True)
        self.assertFalse(xml_bad.valid)

    def test_size_none(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True)
        self.assertTrue(xml_bad.size == None)

    def test_bold_invalid(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True)
        self.assertFalse(xml_bad.valid)

    def test_bold_none(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True)
        self.assertTrue(xml_bad.bold == None)

    def test_italics_invalid(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True,
                          "Italics")
        self.assertFalse(xml_bad.valid)

    def test_italics_none(self):
        xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True,
                          "Italics")
        self.assertTrue(xml_bad.italics == None)