Python htmlfile 예제들, translate.storage.html.htmlfile Python 예제들

예제 #1

0

파일 보기

파일: test_html.py 프로젝트: slide333333/translate

 def test_extraction_tag_caption_td_th(self):
     """Check that we can extract table related translatable: th, td and caption"""
     h = html.htmlfile()
     # Example form http://www.w3schools.com/tags/tag_caption.asp
     store = h.parsestring(
         """
         <table>
             <caption>Monthly savings</caption>
             <tr>
                 <th>Month</th>
                 <th>Savings</th>
             </tr>
             <tr>
                 <td>January</td>
                 <td>$100</td>
             </tr>
         </table>"""
     )
     print(store.units[0].source)
     assert len(store.units) == 5
     assert store.units[0].source == "Monthly savings"
     assert store.units[1].source == "Month"
     assert store.units[2].source == "Savings"
     assert store.units[3].source == "January"
     assert store.units[4].source == "$100"

예제 #2

0

파일 보기

파일: test_html.py 프로젝트: XLeonardo/translate-1

    def test_extraction_attr_title(self):
        """Check that we can extract title attribute"""
        h = html.htmlfile()

        # Example form http://www.w3schools.com/tags/att_global_title.asp
        store = h.parsestring("""
            <p><abbr title="World Health Organization">WHO</abbr> was founded in 1948.</p>
            <p title="Free Web tutorials">W3Schools.com</p>""")
        print(store.units[0].source)
        assert len(store.units) == 4
        assert store.units[0].source == "World Health Organization"
        # FIXME this is not ideal we need to either drop title= as we've
        # extracted it already or not extract it earlier
        assert store.units[1].source == '<abbr title="World Health Organization">WHO</abbr> was founded in 1948.'
        assert store.units[2].source == "Free Web tutorials"
        assert store.units[3].source == "W3Schools.com"

        # Example from http://www.netmechanic.com/news/vol6/html_no1.htm
        store = h.parsestring("""
            <table width="100" border="2" title="Henry Jacobs Camp summer 2003 schedule">
        """)
        assert len(store.units) == 1
        assert store.units[0].source == "Henry Jacobs Camp summer 2003 schedule"
        # FIXME this doesn't extract as I'd have expected
        #store = h.parsestring("""
        #    <a href="page1.html" title="HS Jacobs - a UAHC camp in Utica, MS">Henry S. Jacobs Camp</a>
        #""")
        #assert len(store.units) == 2
        #assert store.units[0].source == "HS Jacobs - a UAHC camp in Utica, MS"
        #assert store.units[1].source == "Henry S. Jacobs Camp"
        store = h.parsestring("""
            <form name="application" title="Henry Jacobs camper application" method="  " action="  ">
        """)
        assert len(store.units) == 1
        assert store.units[0].source == "Henry Jacobs camper application"

예제 #3

0

파일 보기

파일: po2html.py 프로젝트: andynicholson/translate

 def mergestore(self, inputstore, templatetext, includefuzzy):
     """converts a file to .po format"""
     self.inputstore = inputstore
     self.inputstore.makeindex()
     self.includefuzzy = includefuzzy
     output_store = html.htmlfile(inputfile=templatetext, callback=self.lookup)
     return output_store.filesrc

예제 #4

0

파일 보기

파일: po2html.py 프로젝트: midhz/translate

 def mergestore(self, inputstore, templatetext, includefuzzy):
     """converts a file to .po format"""
     self.inputstore = inputstore
     self.inputstore.makeindex()
     self.includefuzzy = includefuzzy
     output_store = html.htmlfile(inputfile=templatetext, callback=self.lookup)
     return output_store.filesrc

예제 #5

0

파일 보기

파일: test_html.py 프로젝트: AshishNamdev/verbatim

 def test_escaping_script_and_pre(self):
     """<script> and <pre> can contain < and > and these should not be
     interpretted as tags"""
     h = html.htmlfile()
     store = h.parsestring("<p>We are here</p><script>Some </tag>like data<script></p>")
     print store.units[0].source
     assert len(store.units) == 1

예제 #6

0

파일 보기

파일: test_html.py 프로젝트: surli/translate

    def test_extraction_attr_title(self):
        """Check that we can extract title attribute"""
        h = html.htmlfile()

        # Example form http://www.w3schools.com/tags/att_global_title.asp
        store = h.parsestring("""
            <p><abbr title="World Health Organization">WHO</abbr> was founded in 1948.</p>
            <p title="Free Web tutorials">W3Schools.com</p>""")
        print(store.units[0].source)
        assert len(store.units) == 3
        assert store.units[
            0].source == '<abbr title="World Health Organization">WHO</abbr> was founded in 1948.'
        assert store.units[1].source == "Free Web tutorials"
        assert store.units[2].source == "W3Schools.com"

        # Example from http://www.netmechanic.com/news/vol6/html_no1.htm
        store = h.parsestring("""
            <table width="100" border="2" title="Henry Jacobs Camp summer 2003 schedule">
        """)
        assert len(store.units) == 1
        assert store.units[
            0].source == "Henry Jacobs Camp summer 2003 schedule"

        store = h.parsestring("""
           <div><a href="page1.html" title="HS Jacobs - a UAHC camp in Utica, MS">Henry S. Jacobs Camp</a></div>
        """)
        assert len(store.units) == 2
        assert store.units[0].source == "HS Jacobs - a UAHC camp in Utica, MS"
        assert store.units[1].source == "Henry S. Jacobs Camp"

        store = h.parsestring("""
            <form name="application" title="Henry Jacobs camper application" method="  " action="  ">
        """)
        assert len(store.units) == 1
        assert store.units[0].source == "Henry Jacobs camper application"

예제 #7

0

파일 보기

def test_strip_html_with_pi():
    h = html.htmlfile()
    assert html.strip_html(
        h.pi_escape('<a href="<?$var?>">Something</a>')) == "Something"
    assert html.strip_html(
        h.pi_escape(
            '<a href="<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>">Something</a>'
        )) == "Something"

예제 #8

0

파일 보기

 def test_escaping_script_and_pre(self):
     """<script> and <pre> can contain < and > and these should not be
     interpretted as tags"""
     h = html.htmlfile()
     store = h.parsestring(
         "<p>We are here</p><script>Some </tag>like data<script></p>")
     print(store.units[0].source)
     assert len(store.units) == 1

예제 #9

0

파일 보기

 def convertfile_inner(inputfile, outputstore, keepcomments):
     """Extract translation units from an html file and add to a pofile object."""
     htmlparser = html.htmlfile(inputfile=inputfile)
     for htmlunit in htmlparser.units:
         thepo = outputstore.addsourceunit(htmlunit.source)
         thepo.addlocations(htmlunit.getlocations())
         if keepcomments:
             thepo.addnote(htmlunit.getnotes(), "developer")

예제 #10

0

파일 보기

파일: test_html.py 프로젝트: nguyenngan/translate

 def test_extraction_attr_alt(self):
     """Check that we can extract title attribute"""
     h = html.htmlfile()
     # Example from http://www.netmechanic.com/news/vol6/html_no1.htm
     store = h.parsestring("""
         <img src="cafeteria.jpg" height="200" width="200" alt="UAHC campers enjoy a meal in the camp cafeteria">
     """)
     assert len(store.units) == 1
     assert store.units[0].source == "UAHC campers enjoy a meal in the camp cafeteria"

예제 #11

0

파일 보기

파일: test_html.py 프로젝트: nijel/translate

def test_guess_encoding():
    """Read an encoding header to guess the encoding correctly"""
    h = html.htmlfile()
    assert (h.guess_encoding(
        b"""<META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-8">"""
    ) == "UTF-8")
    assert (h.guess_encoding(
        b"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"><html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><!-- base href="http://home.online.no/~rut-aane/linux.html" --><link rel="shortcut icon" href="http://home.online.no/~rut-aane/peng16x16a.gif"><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"><meta name="Description" content="Linux newbie stuff and a little about Watching TV under Linux"><meta name="MSSmartTagsPreventParsing" content="TRUE"><meta name="GENERATOR" content="Mozilla/4.7 [en] (X11; I; Linux 2.2.5-15 i586) [Netscape]"><title>Some Linux for beginners</title><style type="text/css">"""
    ) == "iso-8859-1")

예제 #12

0

파일 보기

파일: test_html.py 프로젝트: XLeonardo/translate-1

 def test_extraction_attr_alt(self):
     """Check that we can extract title attribute"""
     h = html.htmlfile()
     # Example from http://www.netmechanic.com/news/vol6/html_no1.htm
     store = h.parsestring("""
         <img src="cafeteria.jpg" height="200" width="200" alt="UAHC campers enjoy a meal in the camp cafeteria">
     """)
     assert len(store.units) == 1
     assert store.units[0].source == "UAHC campers enjoy a meal in the camp cafeteria"

예제 #13

0

파일 보기

파일: convert.py 프로젝트: whitemike889/weblate

 def convertfile(storefile):
     store = pofile()
     # Fake input file with a blank filename
     htmlparser = htmlfile(includeuntaggeddata=False,
                           inputfile=BytesIOMode("", storefile.read()))
     for htmlunit in htmlparser.units:
         thepo = store.addsourceunit(htmlunit.source)
         thepo.addlocations(htmlunit.getlocations())
         thepo.addnote(htmlunit.getnotes(), "developer")
     store.removeduplicates("msgctxt")
     return store

예제 #14

0

파일 보기

파일: test_html.py 프로젝트: nijel/translate

    def test_extraction_pre_code():
        """Check that we can preserve lines in the <pre> tag"""
        h = html.htmlfile()
        store = h.parsestring("""
<pre><code>
this is
a multiline
pre tag
</code></pre>
        """)
        assert len(store.units) == 1
        assert store.units[0].source == "this is\na multiline\npre tag"

예제 #15

0

파일 보기

파일: test_html.py 프로젝트: nguyenngan/translate

 def test_extraction_tag_figcaption(self):
     """Check that we can extract figcaption"""
     h = html.htmlfile()
     # Example form http://www.w3schools.com/tags/tag_figcaption.asp
     store = h.parsestring("""
            <figure>
                <img src="img_pulpit.jpg" alt="The Pulpit Rock" width="304" height="228">
                <figcaption>Fig1. - A view of the pulpit rock in Norway.</figcaption>
            </figure>""")
     print(store.units[0].source)
     assert len(store.units) == 2
     assert store.units[0].source == "The Pulpit Rock"
     assert store.units[1].source == "Fig1. - A view of the pulpit rock in Norway."

예제 #16

0

파일 보기

파일: html2po.py 프로젝트: lambacck/translate

 def convertfile(self, inputfile, filename, includeuntagged=False,
                 duplicatestyle="msgctxt", keepcomments=False):
     """converts a html file to .po format"""
     thetargetfile = po.pofile()
     htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged,
                                inputfile=inputfile)
     for htmlunit in htmlparser.units:
         thepo = thetargetfile.addsourceunit(htmlunit.source)
         thepo.addlocations(htmlunit.getlocations())
         if keepcomments:
             thepo.addnote(htmlunit.getnotes(), "developer")
     thetargetfile.removeduplicates(duplicatestyle)
     return thetargetfile

예제 #17

0

파일 보기

파일: test_html.py 프로젝트: XLeonardo/translate-1

 def test_extraction_tag_figcaption(self):
     """Check that we can extract figcaption"""
     h = html.htmlfile()
     # Example form http://www.w3schools.com/tags/tag_figcaption.asp
     store = h.parsestring("""
            <figure>
                <img src="img_pulpit.jpg" alt="The Pulpit Rock" width="304" height="228">
                <figcaption>Fig1. - A view of the pulpit rock in Norway.</figcaption>
            </figure>""")
     print(store.units[0].source)
     assert len(store.units) == 2
     assert store.units[0].source == "The Pulpit Rock"
     assert store.units[1].source == "Fig1. - A view of the pulpit rock in Norway."

예제 #18

0

파일 보기

파일: convert.py 프로젝트: likemike91/weblate

 def convertfile(storefile, template_store):
     store = pofile()
     # Fake input file with a blank filename
     htmlparser = htmlfile(inputfile=BytesIOMode("", storefile.read()))
     for htmlunit in htmlparser.units:
         locations = htmlunit.getlocations()
         if template_store:
             # Transalation
             template = template_store.find_unit_mono("".join(locations))
             if template is None:
                 # Skip locations not present in the source HTML file
                 continue
             # Create unit with matching source
             thepo = store.addsourceunit(template.source)
             thepo.target = htmlunit.source
         else:
             # Source file
             thepo = store.addsourceunit(htmlunit.source)
             thepo.target = htmlunit.source
         thepo.addlocations(htmlunit.getlocations())
         thepo.addnote(htmlunit.getnotes(), "developer")
     store.removeduplicates("msgctxt")
     return store

예제 #19

0

파일 보기

파일: test_html.py 프로젝트: XLeonardo/translate-1

 def test_extraction_tag_caption_td_th(self):
     """Check that we can extract table related translatable: th, td and caption"""
     h = html.htmlfile()
     # Example form http://www.w3schools.com/tags/tag_caption.asp
     store = h.parsestring("""
         <table>
             <caption>Monthly savings</caption>
             <tr>
                 <th>Month</th>
                 <th>Savings</th>
             </tr>
             <tr>
                 <td>January</td>
                 <td>$100</td>
             </tr>
         </table>""")
     print(store.units[0].source)
     assert len(store.units) == 5
     assert store.units[0].source == "Monthly savings"
     assert store.units[1].source == "Month"
     assert store.units[2].source == "Savings"
     assert store.units[3].source == "January"
     assert store.units[4].source == "$100"

예제 #20

0

파일 보기

    def test_extraction_attr_title(self):
        """Check that we can extract title attribute"""
        h = html.htmlfile()

        # Example form http://www.w3schools.com/tags/att_global_title.asp
        store = h.parsestring("""
            <p><abbr title="World Health Organization">WHO</abbr> was founded in 1948.</p>
            <p title="Free Web tutorials">W3Schools.com</p>""")
        print(store.units[0].source)
        assert len(store.units) == 4
        assert store.units[0].source == "World Health Organization"
        # FIXME this is not ideal we need to either drop title= as we've
        # extracted it already or not extract it earlier
        assert store.units[
            1].source == '<abbr title="World Health Organization">WHO</abbr> was founded in 1948.'
        assert store.units[2].source == "Free Web tutorials"
        assert store.units[3].source == "W3Schools.com"

        # Example from http://www.netmechanic.com/news/vol6/html_no1.htm
        store = h.parsestring("""
            <table width="100" border="2" title="Henry Jacobs Camp summer 2003 schedule">
        """)
        assert len(store.units) == 1
        assert store.units[
            0].source == "Henry Jacobs Camp summer 2003 schedule"
        # FIXME this doesn't extract as I'd have expected
        #store = h.parsestring("""
        #    <a href="page1.html" title="HS Jacobs - a UAHC camp in Utica, MS">Henry S. Jacobs Camp</a>
        #""")
        #assert len(store.units) == 2
        #assert store.units[0].source == "HS Jacobs - a UAHC camp in Utica, MS"
        #assert store.units[1].source == "Henry S. Jacobs Camp"
        store = h.parsestring("""
            <form name="application" title="Henry Jacobs camper application" method="  " action="  ">
        """)
        assert len(store.units) == 1
        assert store.units[0].source == "Henry Jacobs camper application"

예제 #21

0

파일 보기

파일: test_html.py 프로젝트: slide333333/translate

 def strip_html(self, str):
     h = html.htmlfile()
     store = h.parsestring(str)
     return "\n".join([u.source for u in store.units])

예제 #22

0

파일 보기

파일: test_html.py 프로젝트: AshishNamdev/verbatim

 def test_self_closing_tags(self):
     h = html.htmlfile()
     store = h.parsestring("<h3>Some text <img><br><img></h3>")
     assert len(store.units) == 1

예제 #23

0

파일 보기

파일: test_html.py 프로젝트: AshishNamdev/verbatim

def test_pi_escaping():
    h = html.htmlfile()
    assert h.pi_escape('<a href="<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>">') == '<a href="<?=($a %lt; $b ? $foo : ($b %gt; c ? $bar : $cat))?>">'

예제 #24

0

파일 보기

파일: test_html.py 프로젝트: AshishNamdev/verbatim

def test_strip_html_with_pi():
    h = html.htmlfile()
    assert html.strip_html(h.pi_escape('<a href="<?$var?>">Something</a>')) == "Something"
    assert html.strip_html(h.pi_escape('<a href="<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>">Something</a>')) == "Something"

예제 #25

0

파일 보기

파일: test_html.py 프로젝트: AshishNamdev/verbatim

def test_guess_encoding():
    """Read an encoding header to guess the encoding correctly"""
    h = html.htmlfile()
    assert h.guess_encoding('''<META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-8">''') == "UTF-8"
    assert h.guess_encoding('''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"><html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><!-- base href="http://home.online.no/~rut-aane/linux.html" --><link rel="shortcut icon" href="http://home.online.no/~rut-aane/peng16x16a.gif"><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"><meta name="Description" content="Linux newbie stuff and a little about Watching TV under Linux"><meta name="MSSmartTagsPreventParsing" content="TRUE"><meta name="GENERATOR" content="Mozilla/4.7 [en] (X11; I; Linux 2.2.5-15 i586) [Netscape]"><title>Some Linux for beginners</title><style type="text/css">''') == "iso-8859-1"

예제 #26

0

파일 보기

def test_pi_escaping():
    h = html.htmlfile()
    assert h.pi_escape(
        '<a href="<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>">'
    ) == '<a href="<?=($a %lt; $b ? $foo : ($b %gt; c ? $bar : $cat))?>">'

예제 #27

0

파일 보기

파일: test_html.py 프로젝트: slide333333/translate

 def test_self_closing_tags(self):
     h = html.htmlfile()
     store = h.parsestring("<h3>Some text <img><br><img></h3>")
     assert len(store.units) == 1