def parse_page(self, response): #answerurl="http://www.1010manfen.com/qiuda.php?questionid="+getdetailurl(questionhtml) #answerhtml=fetchhtml(answerurl) #if(answerhtml != ""): el = JyeooCrawlLoader(response = response) el.add_value('question_url', response.url) el.add_xpath('label_html', '//div[@class="xiti-content"]/div[@class="ndwz"]') el.add_xpath('question_html', '(//div[@class="timutext"])[1]') el.add_xpath('ans_html', '(//div[@class="answer_inner"])[1]') return el.load_item()
def parse_page(self, response): el = JyeooCrawlLoader(response=response) el.add_value("question_url", response.url) el.add_xpath("label_html", '//div[@class="seotop"]') el.add_xpath("type_html", '//div[@id="q_indexkuai22111"]/span') el.add_xpath("question_html", '//div[@id="q_indexkuai221"]') el.add_xpath("ans_html", '//div[@id="q_indexkuai321"]') el.add_xpath("parse_html", '//div[@id="secinfoPanel"]') el = rmload_start_end(el, "ans_html", '<span class="share_note">', "<!-- Baidu Button END -->") el = rmload_start_end(el, "parse_html", '<div class="seccopyright">', "</div>") return el.load_item()
def parse_page(self, response): el = JyeooCrawlLoader(response = response) el.add_value('question_url', response.url) el.add_xpath('question_html', '//div[@class="result-content"]') el.add_xpath('ans_html', '//div[@class="detail-item"]/div[@class="answer"]') el.add_xpath('parse_html', '//div[@class="detail-item"]/div[@class="analysis"]') el.add_xpath('comments_html', '//div[@class="detail-item"]/div[@class="tips"]') return el.load_item()