def test2() : xpath = '//*[@id="baseInfoWrapDom"]/div[2]/div[1]' content = read_resource("jiang.html") doc = parser.fromstring(content) title_list = doc.xpath('//*[@class="biTitle"]/text()') content_list = doc.xpath('//*[@class="biContent"]/text()') data = {} for index in range(len(title_list)) : btitle = extract_content(title_list[index].encode("utf8")) bcontent = extract_content(content_list[index].encode("utf8")) data[btitle] = bcontent print btitle, bcontent print data json_str = json.dumps(data) print "--------------" print json_str print data.get("中文名")
def xpath_test(): content = read_resource("jiang.html") doc = parser.fromstring(content) root = doc.text_content() print root