Exemple #1
0
    def test_escape_ampersand(self):
        """Test ampersand handling."""
        self.assertEqual(escape_for_xml("A&A"), "A&A")
        self.assertEqual(escape_for_xml("asdasdsa 2.2<y<3.4 A"),
                         "asdasdsa 2.2&lt;y&lt;3.4 A")
        self.assertEqual(escape_for_xml("range -4.0< @h<-2.5"),
                         "range -4.0&lt; @h&lt;-2.5")
        # happens if not unescaped..
        self.assertEqual(escape_for_xml("A &amp; A.B"), "A &amp;amp; A.B")

        longtext = "range 2.7<y<3.8, are presented, <p_T^2> with"
        self.assertEqual(
            escape_for_xml(longtext),
            "range 2.7&lt;y&lt;3.8, are presented, &lt;p_T^2> with")

        from harvestingkit.html_utils import MathMLParser

        keep_existing = "for 0.03&lt;x&lt;0.1 and fit to world data"
        self.assertEqual(
            escape_for_xml(MathMLParser().unescape(keep_existing)),
            keep_existing)
        self.assertEqual(
            escape_for_xml(MathMLParser().unescape("A&amp;A & B")),
            "A&amp;A &amp; B")
        self.assertEqual(
            escape_for_xml("ont essayé à<ll' pliquer",
                           tags_to_keep=MathMLParser.mathml_elements),
            "ont essayé à&lt;ll' pliquer")
def edit_record_title(value, objectid):
    """Entrypoint for editing title from detailed pages."""
    model, sip, metadata = get_attributes(objectid)

    metadata["titles"][0] = {"title": MathMLParser.html_to_text(value)}
    save_changes(sip, model)
    return json_success_message('title')
 def test_htmlentity_case(self):
     """Test that HTML entities are dealt with smartly."""
     data = (
         u'Project at CERN, Proc. of the Workshop on Future Directions in Detector R&D;'
     )
     expected_data = (
         u'Project at CERN, Proc. of the Workshop on Future Directions in Detector R&amp;D;'
     )
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)
def edit_record_title():
    """Entrypoint for editing title from detailed pages."""
    value = request.values.get('value', '', type=text_type)
    objectid = request.values.get('objectid', 0, type=int)
    obj = WorkflowObject.query.get(objectid)
    obj.data["titles"][0] = {"title": MathMLParser.html_to_text(value)}
    obj.save()
    db.session.commit()
    return json_success_message('title')
 def test_html(self):
     """Test that HTML is stripped."""
     data = (u'<p><roman>CH</roman><sub>3</sub><roman>NH</roman><sub>3</sub>'
             u'<roman>PbX</roman>(<roman>X</roman> = <roman>Br</roman>, '
             u'<roman>I</roman>, <roman>Cl</roman>) perovskites have recently'
             u'been used as light absorbers in hybrid organic-inorganic solid-state'
             u' solar cells, with efficiencies above 15%.</p>')
     expected_data = (u'CH3NH3PbX(X = Br, I, Cl) perovskites have recently'
                      u'been used as light absorbers in hybrid organic-inorganic solid-state '
                      u'solar cells, with efficiencies above 15%.')
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)
def edit_record_title(value, objectid):
    editable_obj = BibWorkflowObject.query.get(objectid)
    data = editable_obj.get_data()

    data[TITLE_FIELD] = MathMLParser.html_to_text(value)
    editable_obj.set_data(data)
    editable_obj.save()

    return jsonify({
        "category": "success",
        "message": "Edit on title was successful"
    })
def edit_record_title(value, objectid):
    """Entrypoint for editing title from detailed pages."""
    editable_obj = BibWorkflowObject.query.get(objectid)
    data = editable_obj.get_data()

    if type(data) is dict:
        deposition = Deposition(editable_obj)
        sip = deposition.get_latest_sip()
        metadata = sip.metadata

        metadata[TITLE][TITLE] = MathMLParser.html_to_text(value)
        sip.package = make_record(sip.metadata).legacy_export_as_marc()
        deposition.save()
    else:
        data[TITLE_FIELD] = MathMLParser.html_to_text(value)
        editable_obj.set_data(data)
        editable_obj.save()

    return jsonify({
        "category": "success",
        "message": "Edit on title was successful."
    })
 def test_html(self):
     """Test that HTML is stripped."""
     data = (
         u'<p><roman>CH</roman><sub>3</sub><roman>NH</roman><sub>3</sub>'
         u'<roman>PbX</roman>(<roman>X</roman> = <roman>Br</roman>, '
         u'<roman>I</roman>, <roman>Cl</roman>) perovskites have recently'
         u'been used as light absorbers in hybrid organic-inorganic solid-state'
         u' solar cells, with efficiencies above 15%.</p>')
     expected_data = (
         u'CH3NH3PbX(X = Br, I, Cl) perovskites have recently'
         u'been used as light absorbers in hybrid organic-inorganic solid-state '
         u'solar cells, with efficiencies above 15%.')
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)
def best_escape(text):
    """
    Try to guess if the text contains HTML or not.
    In the former case, strips out HTML, while preserving MathML and HTML
    entities by using functionality available in Harvesting Kit.
    In the latter case, just escape the text, to make it HTML friendly.
    """
    import bs4
    import cgi
    from harvestingkit.html_utils import MathMLParser
    if isinstance(bs4.BeautifulSoup(text, 'lxml').contents[0], bs4.element.NavigableString):
        return cgi.escape(text)
    else:
        return MathMLParser.html_to_text(text)
 def test_mathml(self):
     """Test that MathML is kept."""
     data = (u'In this paper we continue the study of Q -operators in'
             u' the six-vertex model and its higher spin generalizations.'
             u' In [1] we derived a new expression for the higher spin R'
             u' -matrix associated with the affine quantum algebra '
             u'<math xmlns="http://www.w3.org/1998/Math/MathML" altimg="si1.gif">'
             u'<msub><mrow><mi>U</mi></mrow><mrow><mi>q</mi></mrow></msub>'
             u'<mo stretchy="false">(</mo><mover accent="true"><mrow><mrow>'
             u'<mi mathvariant="italic">sl</mi></mrow><mo stretchy="false">'
             u'(</mo><mn>2</mn><mo stretchy="false">)</mo></mrow><mrow><mo>'
             u'^</mo></mrow></mover><mo stretchy="false">)</mo></math>'
             u' . Taking a special limit in this R -matrix we obtained new'
             u' formulas for the Q -operators acting in the tensor product'
             u' of representation spaces with arbitrary complex spin.')
     self.assertEqual(MathMLParser.html_to_text(data), data)
 def test_mathml(self):
     """Test that MathML is kept."""
     data = (
         u'In this paper we continue the study of Q -operators in'
         u' the six-vertex model and its higher spin generalizations.'
         u' In [1] we derived a new expression for the higher spin R'
         u' -matrix associated with the affine quantum algebra '
         u'<math xmlns="http://www.w3.org/1998/Math/MathML" altimg="si1.gif">'
         u'<msub><mrow><mi>U</mi></mrow><mrow><mi>q</mi></mrow></msub>'
         u'<mo stretchy="false">(</mo><mover accent="true"><mrow><mrow>'
         u'<mi mathvariant="italic">sl</mi></mrow><mo stretchy="false">'
         u'(</mo><mn>2</mn><mo stretchy="false">)</mo></mrow><mrow><mo>'
         u'^</mo></mrow></mover><mo stretchy="false">)</mo></math>'
         u' . Taking a special limit in this R -matrix we obtained new'
         u' formulas for the Q -operators acting in the tensor product'
         u' of representation spaces with arbitrary complex spin.')
     self.assertEqual(MathMLParser.html_to_text(data), data)
 def test_htmlentity(self):
     """Test that HTML entities are kept."""
     data = "This &amp; that and &lt; is there."
     self.assertEqual(MathMLParser.html_to_text(data), data)
 def test_xml_encoding(self):
     """Test that HTML entities are kept."""
     data = "This & that and 2<y<3 is > there."
     expected_data = "This &amp; that and 2&lt;y&lt;3 is > there."
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)
 def test_htmlentity(self):
     """Test that HTML entities are kept."""
     data = "This &amp; that and &lt; is there."
     self.assertEqual(MathMLParser.html_to_text(data), data)
Exemple #15
0
 def test_mathml_mml(self):
     """Test that MathML with mml namespace prefix is handled."""
     abstract = u"""<p>The determination of the Higgs self-coupling is one of the key ingredients for understanding the mechanism behind the electroweak symmetry breaking. An indirect method for constraining the Higgs trilinear self-coupling via single Higgs production at next-to-leading order (NLO) has been proposed in order to avoid the drawbacks of studies with double Higgs production. In this paper we study the Higgs self-interaction through the vector boson fusion (VBF) process <inline-formula><mml:math display="inline"><mml:msup><mml:mi>e</mml:mi><mml:mo>-</mml:mo></mml:msup><mml:mi>p</mml:mi><mml:mo stretchy="false">\u2192</mml:mo><mml:msub><mml:mi>\u03bd</mml:mi><mml:mi>e</mml:mi></mml:msub><mml:mi>h</mml:mi><mml:mi>j</mml:mi></mml:math></inline-formula> at the future LHeC. At NLO level, we compute analytically the scattering amplitudes for relevant processes, in particular those induced by the Higgs self-interaction. A Monte\xa0Carlo simulation and a statistical analysis utilizing the analytic results are then carried out for Higgs production through VBF and decay to <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mover accent="true"><mml:mi>b</mml:mi><mml:mo stretchy="false">\xaf</mml:mo></mml:mover></mml:math></inline-formula>, which yield for the trilinear Higgs self-coupling rescaling parameter <inline-formula><mml:math display="inline"><mml:msub><mml:mi>\u03ba</mml:mi><mml:mi>\u03bb</mml:mi></mml:msub></mml:math></inline-formula> the limit [<inline-formula><mml:math display="inline"><mml:mrow><mml:mo>-</mml:mo><mml:mn>0.57</mml:mn></mml:mrow></mml:math></inline-formula>, 2.98] with <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mtext>\u2009</mml:mtext><mml:mtext>\u2009</mml:mtext><mml:msup><mml:mi>ab</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> integrated luminosity. If we assume about 10% of the signal survives the event selection cuts, and include all the background, the constraint will be broadened to [<inline-formula><mml:math display="inline"><mml:mrow><mml:mo>-</mml:mo><mml:mn>2.11</mml:mn></mml:mrow></mml:math></inline-formula>, 4.63].</p>"""
     expected = u"""The determination of the Higgs self-coupling is one of the key ingredients for understanding the mechanism behind the electroweak symmetry breaking. An indirect method for constraining the Higgs trilinear self-coupling via single Higgs production at next-to-leading order (NLO) has been proposed in order to avoid the drawbacks of studies with double Higgs production. In this paper we study the Higgs self-interaction through the vector boson fusion (VBF) process <math display="inline"><msup><mi>e</mi><mo>-</mo></msup><mi>p</mi><mo stretchy="false">\u2192</mo><msub><mi>\u03bd</mi><mi>e</mi></msub><mi>h</mi><mi>j</mi></math> at the future LHeC. At NLO level, we compute analytically the scattering amplitudes for relevant processes, in particular those induced by the Higgs self-interaction. A Monte\xa0Carlo simulation and a statistical analysis utilizing the analytic results are then carried out for Higgs production through VBF and decay to <math display="inline"><mi>b</mi><mover accent="true"><mi>b</mi><mo stretchy="false">\xaf</mo></mover></math>, which yield for the trilinear Higgs self-coupling rescaling parameter <math display="inline"><msub><mi>\u03ba</mi><mi>\u03bb</mi></msub></math> the limit [<math display="inline"><mrow><mo>-</mo><mn>0.57</mn></mrow></math>, 2.98] with <math display="inline"><mn>2</mn><mtext>\u2009</mtext><mtext>\u2009</mtext><msup><mi>ab</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup></math> integrated luminosity. If we assume about 10% of the signal survives the event selection cuts, and include all the background, the constraint will be broadened to [<math display="inline"><mrow><mo>-</mo><mn>2.11</mn></mrow></math>, 4.63]."""
     self.assertEqual(MathMLParser.html_to_text(abstract), expected)
 def test_xml_encoding(self):
     """Test that HTML entities are kept."""
     data = "This & that and 2<y<3 is > there."
     expected_data = "This &amp; that and 2&lt;y&lt;3 is > there."
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)
 def test_htmlentity_case(self):
     """Test that HTML entities are dealt with smartly."""
     data = (u'Project at CERN, Proc. of the Workshop on Future Directions in Detector R&D;')
     expected_data = (u'Project at CERN, Proc. of the Workshop on Future Directions in Detector R&amp;D;')
     self.assertEqual(MathMLParser.html_to_text(data), expected_data)