Example #1
0
def test2() :
	xpath = '//*[@id="baseInfoWrapDom"]/div[2]/div[1]'
	content = read_resource("jiang.html")
	doc = parser.fromstring(content)

	title_list = doc.xpath('//*[@class="biTitle"]/text()')
	content_list = doc.xpath('//*[@class="biContent"]/text()')

	data = {}

	for index in range(len(title_list)) :
		btitle = extract_content(title_list[index].encode("utf8"))
		bcontent = extract_content(content_list[index].encode("utf8"))
		data[btitle] = bcontent
		print btitle, bcontent

	print data
	json_str = json.dumps(data)
	print "--------------"
	print json_str
	print data.get("中文名")
Example #2
0
def xpath_test():
	content = read_resource("jiang.html")
	doc = parser.fromstring(content)
	root = doc.text_content()
	print root