def setUp(self): self.xml_data_space = XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True) self.xml_data_B = XMLData("B", 15.0, 70.0, "Font", 23.0, False, True) self.xml_data_y_1 = XMLData(" ", 10.0, 10.0, "Font", 25.0, False, True) self.xml_data_y_2 = XMLData(" ", 10.0, 15.0, "Font", 25.0, False, True) self.xml_data_y_3 = XMLData(" ", 10.0, 20.0, "Font", 25.0, False, True)
def test_false_on_y_gap(self): xml_data = [] xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 24.0, False, True)) xml_data.append(XMLData("b", 10.0, 100.0, "Font", 24.0, False, True)) expected_result = 1 actual_result = VincentExtractor.check_for_repeated_phrases(self, xml_data, 0) self.assertTrue(expected_result == actual_result)
def test_xml_no_space(self): xml_data = [] xml_data.append(XMLData("b", 270.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("c", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True)) expected_result = 13.0 actual_result = VincentExtractor.get_space_size(self, xml_data) self.assertTrue(expected_result == actual_result)
def test_false_on_space(self): xml_data = [] xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True)) expected_result = 0.0 actual_result = PerthExtractor.get_space_size(self, xml_data) self.assertTrue(expected_result == actual_result)
def test_xml_only_bold(self): xml_data = [] xml_data.append(XMLData("b", 270.0, 10.0, "Bold", 23.0, False, True)) xml_data.append(XMLData("c", 10.0, 10.0, "Bold", 23.0, False, True)) xml_data.append(XMLData("d", 10.0, 10.0, "Bold", 23.0, False, True)) expected_result = 13.0 actual_result = PerthExtractor.get_space_size(self, xml_data) self.assertTrue(expected_result == actual_result)
def test_true_on_end(self): xml_data = [] xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True)) expected_result = 2 actual_result = PerthExtractor.check_for_repeated_phrases( self, xml_data, 0) self.assertTrue(expected_result == actual_result)
def test_dont_enter_while(self): xml_data = [] xml_data.append(XMLData("", 10.0, 15.0, "Font", 25.0, False, True)) xml_data.append(XMLData("", 10.0, 15.0, "Font", 25.0, False, True)) expected_result = [] p = PerthExtractor() actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_false_on_second_space(self): xml_data = [] xml_data.append(XMLData(" ", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True)) expected_result = 0.0 actual_result = VincentExtractor.get_space_size(self, xml_data) self.assertTrue(expected_result == actual_result) tured_output = io.StringIO()
def test_remove_footer(self): xml_data = self.add_three_gap() xml_data.append(XMLData("", 10.0, 49.0, "Font", 23.0, False, True)) xml_data.insert(0, XMLData("", 10.0, 49.0, "Font", 23.0, False, True)) expected_result = [] p = PerthExtractor() actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_two_bracket_cs(self): xml_data = self.xml_data_list xml_data[0].character = "(" xml_data[1].character = " " xml_data[2].character = " " xml_data[3].character = ")" xml_data.append(XMLData(" ", 170.0, 60.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 180.0, 60.0, "Font", 23.0, False, True)) expected_result = ["( ) "] actual_result = self.v.create_sentences(xml_data) self.assertEqual(expected_result, actual_result)
def test_skip_on_two_gap(self): xml_data = self.add_two_gap() xml_data.insert(0, XMLData("a", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("b", 16.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("b", 770.0, 100.0, "Font", 23.0, False, True)) expected_result = [] p = PerthExtractor() actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_skip_all_gaps(self): xml_data = [] xml_data.append(XMLData(".", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("a", 270.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("c", 26.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True)) expected_result = [] p = PerthExtractor() actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_two_gap_add(self): #xml_data = self.add_two_gap() xml_data = [] xml_data.append(XMLData("A", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 26.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("d", 26.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("d", 10.0, 10.0, "Font", 23.0, False, True)) expected_result = [] p = PerthExtractor() actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_add_empty_next_sentence(self): letter = XMLData("b", 10.0, 10.0, "Font", 23.0, True, False) next_sentence = "" p = PerthExtractor() expected_result = ["b"] actual_result = p.test_add_sentence(next_sentence, letter) self.assertTrue(expected_result == actual_result)
def characters(self, content): """ For each character, add it to the XMLData list. Parse out the data from each line in the XML document if it has a character. If any invalid data is passed to XMLData class, skip that character. Args: content: The character that is being parsed. Returns: Void """ if (self.tag == "text" and self.b_box != ""): self.current_char = content b_box_values = self.process_b_box((self.b_box)) ' Commented this out, just for now - Pushed back 19 lines' #if not "\n" in content: if "\n" not in content: #print("Keep the \n") next_letter = XMLData(content, b_box_values[0], b_box_values[1], self.font, self.size, self.bold, self.italics) if (next_letter.valid == True): self.xml_data.append(next_letter) else: # It is a \n if (len(self.xml_data) > 1): # If the previous letter was a \n keep it, else, ignore // Used to be a "." that we check to keep it if (self.xml_data[-1]).character == "\n": next_letter = XMLData(content, b_box_values[0], b_box_values[1], self.font, self.size, self.bold, self.italics) if (next_letter.valid == True): self.xml_data.append(next_letter) self.next_sentence = self.next_sentence + content # If we reach a full stop then finish the sentence and add it to sentences if content == ".": self.sentences.append(self.next_sentence) self.next_sentence = ""
def test_create_remove_footer(self): xml_data = [] xml_data.append(XMLData("a", 10.0, 10.0, "Font", 23.0, False, True)) xml_data.append(XMLData("b", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("c", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("d", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData(".", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData(" ", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True)) xml_data.append(XMLData("z", 10.0, 100.0, "Font", 23.0, False, True)) p = PerthExtractor() expected_result = ["bcd."] actual_result = p.create_sentences(xml_data) self.assertTrue(all(elem in expected_result for elem in actual_result))
def test_none_next_sentence(self): captured_output = io.StringIO() sys.stdout = captured_output letter = XMLData("b", 10.0, 10.0, "Font", 23.0, True, False) next_sentence = None p = PerthExtractor() expected_result = [] actual_result = p.test_add_sentence(next_sentence, letter) sys.stdout = sys.__stdout__ self.assertTrue(expected_result == actual_result)
def test_three_gap_add(self): xml_data = self.xml_data_list xml_data[1].character = " " xml_data[2].character = " " xml_data[3].character = " " xml_data[4].character = "b" xml_data[4].b_y = 60 xml_data[5].b_y = 70.0 xml_data[7].b_x = 150.0 xml_data.append(XMLData(" ", 160.0, 60.0, "Font", 23.0, False, True)) v = VincentExtractor() expected_result = ["a b"] actual_result = v.create_sentences(xml_data) self.assertEqual(expected_result, actual_result)
def test_b_y_none(self): xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True) self.assertTrue(xml_bad.b_y == None)
def test_italics_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True, "Italics") self.assertTrue(xml_bad.italics == None)
def test_italics_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True, "Italics") self.assertFalse(xml_bad.valid)
def test_bold_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True) self.assertTrue(xml_bad.bold == None)
def test_bold_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True) self.assertFalse(xml_bad.valid)
def test_size_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True) self.assertTrue(xml_bad.size == None)
def test_size_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True) self.assertFalse(xml_bad.valid)
def test_font_none(self): xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True) self.assertTrue(xml_bad.font == None)
def setUp(self): self.xmldata1 = XMLData("a", 2.6, 5.2, "sans", 12.0, True, False)
def test_font_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True) self.assertFalse(xml_bad.valid)
def test_b_y_invalid(self): xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True) self.assertFalse(xml_bad.valid)
class TestXMLData(TestCase): def setUp(self): self.xmldata1 = XMLData("a", 2.6, 5.2, "sans", 12.0, True, False) def test_print_all_data(self): xml_expected_output = """Character: a || b_x: 2.6 || b_y: 5.2 || Font: sans || Size: 12.0 || Bold: True || Italics: False""" self.assertTrue(self.xmldata1.print_all_data() == xml_expected_output) def test_character_expected(self): self.assertTrue(self.xmldata1.character == "a") def test_x_b_box_expected(self): self.assertTrue(self.xmldata1.b_x == 2.6) def test_x_y_box_expected(self): self.assertTrue(self.xmldata1.b_y == 5.2) def test_font_expected(self): self.assertTrue(self.xmldata1.font == "sans") def test_size_expected(self): self.assertTrue(self.xmldata1.size == 12.0) def test_bold_expected(self): self.assertTrue(self.xmldata1.bold) def test_italics_expected(self): self.assertFalse(self.xmldata1.italics) def test_character_invalid(self): xml_bad = XMLData(3, 344.22, 554.22, "Some Font", 24, True, True) self.assertFalse(xml_bad.valid) def test_character_none(self): xml_bad = XMLData(3, 344.22, 554.22, "Some Font", 24, True, True) self.assertTrue(xml_bad.character == None) def test_b_x_invalid(self): xml_bad = XMLData("a", 's', 554.22, "Some Font", 24, True, True) self.assertFalse(xml_bad.valid) def test_b_x_none(self): xml_bad = XMLData("a", 's', 554.22, "Some Font", 24, True, True) self.assertTrue(xml_bad.b_x == None) def test_b_y_invalid(self): xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True) self.assertFalse(xml_bad.valid) def test_b_y_none(self): xml_bad = XMLData("a", 344.22, 'y', "Some Font", 24, True, True) self.assertTrue(xml_bad.b_y == None) def test_font_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True) self.assertFalse(xml_bad.valid) def test_font_none(self): xml_bad = XMLData("a", 344.22, 544.22, 1, 24, True, True) self.assertTrue(xml_bad.font == None) def test_size_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True) self.assertFalse(xml_bad.valid) def test_size_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", "Size", True, True) self.assertTrue(xml_bad.size == None) def test_bold_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True) self.assertFalse(xml_bad.valid) def test_bold_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, "Bold", True) self.assertTrue(xml_bad.bold == None) def test_italics_invalid(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True, "Italics") self.assertFalse(xml_bad.valid) def test_italics_none(self): xml_bad = XMLData("a", 344.22, 544.22, "Some Font", 24, True, "Italics") self.assertTrue(xml_bad.italics == None)