Example #1
0
    def get_html_data(self, html_name):
        try:
            with codecs.open(os.path.join(self.html_Path, html_name), "r",
                             "utf-8") as f:
                content = f.read()

            # 获取html树,通过xpath获取指定内容
            tree = etree.HTML(content)
            node = tree.xpath("//div[@class='content']")[0]
            node.text.encoding('gbk')

            # 通过正则表达式 获取<tr></tr>之间指定的内容
            res_tr = r'<tr>(.*?)</tr>'
            m_tr = re.findall(res_tr, content, re.S | re.M)
            for line in m_tr:
                print(line)
                #获取表格第一列th 属性
                res_th = r'<th>(.*?)</th>'
                m_th = re.findall(res_th, line, re.S | re.M)
                for mm in m_th:
                    print(unicode(mm, 'utf-8'))  #unicode防止乱
                    # #获取表格第二列td 属性值
                res_td = r'<td>(.*?)</td>'
                m_td = re.findall(res_td, line, re.S | re.M)
                for nn in m_td:
                    print(unicode(nn, 'utf-8'))
        except Exception as e:
            print(e)
Example #2
0
def test_stylenames():
    class my:
        class body(html.body):
            style = html.Style(font_size="12pt")

    u = unicode(my.body())
    assert u == '<body style="font-size: 12pt"></body>'
Example #3
0
def test_tag_with_subclassed_attr_simple():
    class my(ns.hello):
        class Attr(ns.hello.Attr):
            hello="world"
    x = my()
    assert x.attr.hello == 'world'
    assert unicode(x) == '<my hello="world"/>'
Example #4
0
def test_tag_with_subclassed_attr_simple():
    class my(ns.hello):
        class Attr(ns.hello.Attr):
            hello = "world"

    x = my()
    assert x.attr.hello == 'world'
    assert unicode(x) == '<my hello="world"/>'
Example #5
0
def test_alternating_style():
    alternating = (
        html.Style(background="white"),
        html.Style(background="grey"),
    )

    class my(html):
        class li(html.li):
            def style(self):
                i = self.parent.index(self)
                return alternating[i % 2]

            style = property(style)

    x = my.ul(my.li("hello"), my.li("world"), my.li("42"))
    u = unicode(x)
    assert u == ('<ul><li style="background: white">hello</li>'
                 '<li style="background: grey">world</li>'
                 '<li style="background: white">42</li>'
                 '</ul>')
Example #6
0
def test_alternating_style():
    alternating = (
        html.Style(background="white"),
        html.Style(background="grey"),
    )
    class my(html):
        class li(html.li):
            def style(self):
                i = self.parent.index(self)
                return alternating[i%2]
            style = property(style)

    x = my.ul(
            my.li("hello"),
            my.li("world"),
            my.li("42"))
    u = unicode(x)
    assert u == ('<ul><li style="background: white">hello</li>'
                     '<li style="background: grey">world</li>'
                     '<li style="background: white">42</li>'
                 '</ul>')
Example #7
0
def test_html_name_stickyness():
    class my(html.p):
        pass

    x = my("hello")
    assert unicode(x) == '<p>hello</p>'
Example #8
0
def test_raw():
    x = ns.some(py.xml.raw("<p>literal</p>"))
    u = unicode(x)
    assert u == "<some><p>literal</p></some>"
Example #9
0
def test_class_None():
    t = html.body(class_=None)
    u = unicode(t)
    assert u == '<body></body>'
Example #10
0
def test_tag_with_text_and_attributes():
    x = ns.some(name="hello", value="world")
    assert x.attr.name == 'hello'
    assert x.attr.value == 'world'
    u = unicode(x)
    assert u == '<some name="hello" value="world"/>'
Example #11
0
def test_tag_with_text_and_attributes():
    x = ns.some(name="hello", value="world")
    assert x.attr.name == 'hello'
    assert x.attr.value == 'world'
    u = unicode(x)
    assert u == '<some name="hello" value="world"/>'
Example #12
0
def test_singleton():
    h = html.head(html.link(href="foo"))
    assert unicode(h) == '<head><link href="foo"/></head>'

    h = html.head(html.script(src="foo"))
    assert unicode(h) == '<head><script src="foo"></script></head>'
Example #13
0
def test_tag_with_text_entity():
    x = ns.hello('world & rest')
    u = unicode(x)
    assert u == "<hello>world &amp; rest</hello>"
Example #14
0
def test_tag_xmlname():
    class my(ns.hello):
        xmlname = 'world'
    u = unicode(my())
    assert u == '<world/>'
Example #15
0
def test_list_nested():
    x = ns.hello([ns.world()]) #pass in a list here
    u = unicode(x)
    assert u == '<hello><world/></hello>'
Example #16
0
def test_tag_nested():
    x = ns.hello(ns.world())
    unicode(x) # triggers parentifying
    assert x[0].parent is x
    u = unicode(x)
    assert u == '<hello><world/></hello>'
Example #17
0
def test_tag_with_raw_attr():
    x = html.object(data=raw('&'))
    assert unicode(x) == '<object data="&"></object>'
Example #18
0
def test_singleton():
    h = html.head(html.link(href="foo"))
    assert unicode(h) == '<head><link href="foo"/></head>'

    h = html.head(html.script(src="foo"))
    assert unicode(h) == '<head><script src="foo"></script></head>'
Example #19
0
def test_class_None():
    t = html.body(class_=None)
    u = unicode(t)
    assert u == '<body></body>'
Example #20
0
def test_tag_with_raw_attr():
    x = html.object(data=raw('&'))
    assert unicode(x) == '<object data="&"></object>'
Example #21
0
def test_list_nested():
    x = ns.hello([ns.world()])  #pass in a list here
    u = unicode(x)
    assert u == '<hello><world/></hello>'
Example #22
0
def test_tag_with_text():
    x = ns.hello("world")
    u = unicode(x)
    assert u == "<hello>world</hello>"
Example #23
0
def test_tag_with_text_entity():
    x = ns.hello('world & rest')
    u = unicode(x)
    assert u == "<hello>world &amp; rest</hello>"
Example #24
0
def test_tag_with_text_and_attributes_entity():
    x = ns.some(name="hello & world")
    assert x.attr.name == "hello & world"
    u = unicode(x)
    assert u == '<some name="hello &amp; world"/>'
Example #25
0
def test_tag_with_text():
    x = ns.hello("world")
    u = unicode(x)
    assert u == "<hello>world</hello>"
Example #26
0
def test_tag_nested():
    x = ns.hello(ns.world())
    unicode(x)  # triggers parentifying
    assert x[0].parent is x
    u = unicode(x)
    assert u == '<hello><world/></hello>'
Example #27
0
def test_raw():
    x = ns.some(py.xml.raw("<p>literal</p>"))
    u = unicode(x)
    assert u == "<some><p>literal</p></some>"
Example #28
0
def test_tag_xmlname():
    class my(ns.hello):
        xmlname = 'world'

    u = unicode(my())
    assert u == '<world/>'
Example #29
0
def test_html_name_stickyness():
    class my(html.p):
        pass
    x = my("hello")
    assert unicode(x) == '<p>hello</p>'
Example #30
0
def test_tag_with_text_and_attributes_entity():
    x = ns.some(name="hello & world")
    assert x.attr.name == "hello & world"
    u = unicode(x)
    assert u == '<some name="hello &amp; world"/>'
Example #31
0
def test_stylenames():
    class my:
        class body(html.body):
            style = html.Style(font_size = "12pt")
    u = unicode(my.body())
    assert u == '<body style="font-size: 12pt"></body>'