def test_escape_ampersand(self): """Test ampersand handling.""" self.assertEqual(escape_for_xml("A&A"), "A&A") self.assertEqual(escape_for_xml("asdasdsa 2.2<y<3.4 A"), "asdasdsa 2.2<y<3.4 A") self.assertEqual(escape_for_xml("range -4.0< @h<-2.5"), "range -4.0< @h<-2.5") # happens if not unescaped.. self.assertEqual(escape_for_xml("A & A.B"), "A &amp; A.B") longtext = "range 2.7<y<3.8, are presented, <p_T^2> with" self.assertEqual( escape_for_xml(longtext), "range 2.7<y<3.8, are presented, <p_T^2> with") from harvestingkit.html_utils import MathMLParser keep_existing = "for 0.03<x<0.1 and fit to world data" self.assertEqual( escape_for_xml(MathMLParser().unescape(keep_existing)), keep_existing) self.assertEqual( escape_for_xml(MathMLParser().unescape("A&A & B")), "A&A & B") self.assertEqual( escape_for_xml("ont essayé à<ll' pliquer", tags_to_keep=MathMLParser.mathml_elements), "ont essayé à<ll' pliquer")
def test_escape_ampersand(self): self.assertEqual(escape_for_xml("A&A"), "A&A") self.assertEqual(escape_for_xml("A&A & B"), "A&A & B") self.assertEqual(escape_for_xml("A & A.B"), "A & A.B") self.assertEqual(escape_for_xml("asdasdsa <1 A"), "asdasdsa <1 A") self.assertEqual(escape_for_xml("asdasdsa <=1 A"), "asdasdsa <=1 A") self.assertEqual(escape_for_xml("asdasdsa <.2 A"), "asdasdsa <.2 A") self.assertEqual(escape_for_xml("asdasdsa < 2 A"), "asdasdsa < 2 A")
def test_escape_ampersand(self): """Test ampersand handling.""" self.assertEqual(escape_for_xml("A&A"), "A&A") self.assertEqual(escape_for_xml("asdasdsa 2.2<y<3.4 A"), "asdasdsa 2.2<y<3.4 A") self.assertEqual(escape_for_xml("range -4.0< @h<-2.5"), "range -4.0< @h<-2.5") # happens if not unescaped.. self.assertEqual(escape_for_xml("A & A.B"), "A &amp; A.B") longtext = "range 2.7<y<3.8, are presented, <p_T^2> with" self.assertEqual( escape_for_xml(longtext), "range 2.7<y<3.8, are presented, <p_T^2> with" ) from harvestingkit.html_utils import MathMLParser keep_existing = "for 0.03<x<0.1 and fit to world data" self.assertEqual(escape_for_xml(MathMLParser().unescape(keep_existing)), keep_existing) self.assertEqual(escape_for_xml(MathMLParser().unescape("A&A & B")), "A&A & B") self.assertEqual( escape_for_xml("ont essayé à<ll' pliquer", tags_to_keep=MathMLParser.mathml_elements), "ont essayé à<ll' pliquer" )
def html_to_text(cls, html): """Return stripped HTML, keeping only MathML.""" s = cls() s.feed(html) unescaped_data = s.unescape(s.get_data()) return escape_for_xml(unescaped_data, tags_to_keep=s.mathml_elements)