Example #1
0
    def parse_page(self, response):
        #answerurl="http://www.1010manfen.com/qiuda.php?questionid="+getdetailurl(questionhtml)
        #answerhtml=fetchhtml(answerurl)
        #if(answerhtml != ""):

        el = JyeooCrawlLoader(response = response)
        el.add_value('question_url', response.url)
        el.add_xpath('label_html', '//div[@class="xiti-content"]/div[@class="ndwz"]')
        el.add_xpath('question_html', '(//div[@class="timutext"])[1]')
        el.add_xpath('ans_html', '(//div[@class="answer_inner"])[1]')

        return el.load_item()
Example #2
0
    def parse_page(self, response):
        el = JyeooCrawlLoader(response=response)
        el.add_value("question_url", response.url)
        el.add_xpath("label_html", '//div[@class="seotop"]')
        el.add_xpath("type_html", '//div[@id="q_indexkuai22111"]/span')
        el.add_xpath("question_html", '//div[@id="q_indexkuai221"]')
        el.add_xpath("ans_html", '//div[@id="q_indexkuai321"]')
        el.add_xpath("parse_html", '//div[@id="secinfoPanel"]')

        el = rmload_start_end(el, "ans_html", '<span class="share_note">', "<!-- Baidu Button END -->")
        el = rmload_start_end(el, "parse_html", '<div class="seccopyright">', "</div>")

        return el.load_item()
Example #3
0
    def parse_page(self, response):
        el = JyeooCrawlLoader(response = response)
        el.add_value('question_url', response.url)
        el.add_xpath('question_html', '//div[@class="result-content"]')
        el.add_xpath('ans_html', '//div[@class="detail-item"]/div[@class="answer"]')
        el.add_xpath('parse_html', '//div[@class="detail-item"]/div[@class="analysis"]')
        el.add_xpath('comments_html', '//div[@class="detail-item"]/div[@class="tips"]')

        return el.load_item()