Exemple #1
0
    def parse_wenda_page(self, response):
        item = WendaAskItem()

        item["tagName"] = self.keyword
        item["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        if response.xpath("//meta[@name='description']"):
            item["description"] = response.xpath(
                "//meta[@name='description']/@content").extract()[0]
        else:
            item["description"] = response.xpath(
                "//meta[@name='Description']/@content").extract()[0]
        item["title"] = response.css("div.dtl-top h1::text").get()
        item["images"] = []

        ptags = response.xpath("//div[@class='atcle-ms']/p")
        text = []
        for p in ptags:
            text.append(p.xpath("string()").extract()[0].strip())

        content = "<br>".join(text)

        item["content"] = content
        item["addtime"] = response.css(
            "div.dtl-info span:nth-child(1)::text").get()
        item["source"] = response.request.url
        item["username"] = ""
        item["headPortrait"] = ""
        item["askList"] = []
        item["topicUrl"] = ""

        replyItem = WendaReplayItem()
        replyItem["title"] = item["title"]
        replyItem["username"] = response.css(
            "dl.dtl-ys dd b a:nth-child(1)::text").get()
        replyItem["images"] = []
        ptags = response.xpath("//div[@class='dtl-reply']/p")
        text = []
        for p in ptags:
            text.append(p.xpath("string()").extract()[0].strip())
        content = "<br>".join(text)
        replyItem["content"] = content
        replyItem["addtime"] = response.css(
            "div.dtl-list div.dtl-time span::text").get()

        item["askList"].append(replyItem)

        yield item
Exemple #2
0
    def parse_wenda_page(self, response):
        item = WendaAskItem()
        item["tagName"] = self.keyword
        item["keyword"] = response.xpath(
            "//meta[@name='Keywords']/@content").extract()[0]
        item["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        item["title"] = response.css(
            "div.ask_cont p.ask_tit::text").get().strip()
        item["images"] = []
        item["content"] = response.css(
            "div.ask_cont div.ask_hid p.txt_ms::text").get().strip()
        item["addtime"] = response.css(
            "div.ask_cont p.txt_nametime span:nth-child(2)::text").get().strip(
            )
        item["source"] = response.request.url
        item["username"] = response.css(
            "div.ask_cont p.txt_nametime span:nth-child(1)::text").get().strip(
            )
        item["headPortrait"] = ""
        item["askList"] = []
        item["topicUrl"] = ""

        replys = response.css("div.selected div.sele_all")

        for reply in replys:
            replyItem = WendaReplayItem()
            replyItem["title"] = item["title"]
            replyItem["username"] = reply.css(
                "div.doc_txt p.doc_xinx span:nth-child(1)::text").get().strip(
                )
            replyItem["likes"] = 0
            replyItem["headPortrait"] = reply.css(
                "div.doc_img a img::attr(src)").get()
            replyItem["images"] = []
            replyItem["content"] = reply.css("p.sele_txt::text").get().strip()
            replyItem["addtime"] = reply.css(
                "div.doc_t_strip div.zwAll p::text").get()

            item["askList"].append(replyItem)

        print("wenda: ", item)

        yield item
Exemple #3
0
    def parse_next_ask(self, response):
        wendaAskItem = response.meta["wendaAskItem"]
        reply_list = response.xpath("//ul[@class='qa-answer-list']")
        if reply_list:
            replys = reply_list.xpath("./li[@class='answer-item']")
            for reply in replys:
                wendaReply = WendaReplayItem()
                wendaReply["title"] = wendaAskItem["title"]
                wendaReply["username"] = reply.xpath(
                    ".//ul[@class='qa-meta']/li[@class='username']//span/text()"
                ).extract()[0]
                wendaReply["images"] = []
                wendaReply["content"] = reply.xpath(
                    ".//div[@class='answer-text']/text()").extract()[0].strip(
                    )
                wendaReply["addtime"] = reply.xpath(
                    ".//ul[@class='qa-meta']/li[@class='timestamp']/abbr/@title"
                ).extract()[0]
                wendaReply["source"] = wendaAskItem["source"]
                wendaReply["headPortrait"] = reply.xpath(
                    "./ul[@class='qa-meta']/li[@class='useravatar']/a/img/@src"
                ).extract()[0]
                wendaReply["likes"] = reply.xpath(
                    "./a[@class='qa-answer-list-vote']/span[@class='n']/em/text()"
                ).extract()[0]

                wendaAskItem["askList"].append(wendaReply)

        current_page = int(
            response.xpath(
                "//div[@class='pagejump']/span[@class='current']/text()").
            extract()[0])

        total_page_text = response.xpath(
            "//div[@class='pagejump']/span[@class='page-number']/text()"
        ).extract()[0]

        result = re.search(r"\d+\.?\d*", total_page_text)

        total_num = int(result.group())

        if current_page == total_num:
            yield wendaAskItem
Exemple #4
0
    def parse_wenda(self, response):

        item = WendaAskItem()

        item["tagName"] = self.keyword
        item["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        item["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        item["title"] = response.xpath(
            "//h1[@class='ts-title']/text()").extract()[0]
        item["images"] = []

        content_table = response.xpath(
            "//div[@class='theme_box post_list']//div[@class='topic_main list_box']//div[@class='fsz_main']")

        if content_table:
            item["content"] = response.xpath(
                "//div[@class='theme_box post_list']//div[@class='topic_main list_box']//div[@class='fsz_main']//table//td/text()").extract()[0].strip()
        else:
            item["content"] = ""

        addtime_span_tag = response.xpath(
            "//div[@class='theme_box post_list']//div[@class='auth_info_main']/em/span")
        if addtime_span_tag:
            item["addtime"] = addtime_span_tag.xpath("./@title").extract()[0]
        else:
            item["addtime"] = response.xpath(
                "//div[@class='auth_info_main']/em").extract()[0].strip().split("发表于 ")[1]
        item["source"] = response.meta["origin_url"]
        item["username"] = response.xpath(
            "//div[@class='theme_box post_list']//div[@class='auth_info_bar']//a/text()").extract()[0]
        if response.xpath(
                "//div[@class='theme_box post_list']/div[@class='auth_info']//div[@class='avatar']/img"):
            item["headPortrait"] = ""
        else:
            item["headPortrait"] = response.xpath(
                "//div[@class='theme_box post_list']/div[@class='auth_info']//div[@class='avatar']/img/@src").extract()[0]
        item["askList"] = []
        item["topicUrl"] = ""

        replys = response.xpath(
            "//div[@class='list_box theme_reply']//div[contains(@class, 'post_list')]")

        for reply in replys:
            replyItem = WendaReplayItem()
            replyItem["title"] = item["title"]
            username_tag = reply.xpath(
                ".//div[@class='user_name']/a")
            if username_tag:
                replyItem["username"] = reply.xpath(
                    ".//div[@class='user_name']/a/text()").extract()[0]
            else:
                continue
            replyItem["images"] = []

            content_table = reply.xpath(".//div[@class='fsz_main']")
            if content_table:
                replyItem["content"] = reply.xpath(
                    ".//div[@class='fsz_main']/table//td/text()").extract()[0].strip()
            else:
                replyItem["content"] = ""
            replyItem['addtime'] = reply.xpath(
                ".//div[@class='auth_info_main']/em/text()").extract()[0].split("发表于 ")[1]
            replyItem["source"] = item["source"]
            avatar_tag = reply.xpath(
                ".//div[@class='avatar']/img")
            if avatar_tag:
                replyItem["headPortrait"] = reply.xpath(
                    ".//div[@class='avatar']/img/@src").extract()[0]
            else:
                replyItem["headPortrait"] = ""
            replyItem["likes"] = 0

            item["askList"].append(replyItem)

        yield item

        tag_box = response.xpath(
            "//div[@class='mod_s tag_box']/div[@class='mod_con']/a/text()").extract()

        keywordItem = KeywordItem()
        keywordItem["keywordList"] = []
        for keyword in tag_box:
            keyword = keyword.strip()
            keywordItem["title"] = keyword
            keywordItem["keywordList"].append(keyword)
            keywordItem["source"] = f'https://www.icheruby.com/tags/'

        yield keywordItem
Exemple #5
0
    def parse_ask(self, response):
        wendaAskItem = WendaAskItem()
        wendaAskItem["tagName"] = self.keyword
        wendaAskItem["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        wendaAskItem["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        wendaAskItem["title"] = response.xpath(
            "//div[@class='qa-title']/h1/text()").extract()[0]
        wendaAskItem["images"] = []
        wendaAskItem["content"] = response.xpath(
            "string(//blockquote[@class='qa-text'])").extract()[0].strip()
        wendaAskItem["addtime"] = response.xpath(
            "//div[@class='qa-related']//span[@class='source']/abbr/text()"
        ).extract()[0]
        wendaAskItem["source"] = response.meta["origin_url"]

        if response.xpath(
                "//div[@class='qa-related']/div[@class='qa-contributor']/ul/li[1]/a"
        ):
            wendaAskItem["username"] = response.xpath(
                "//div[@class='qa-related']/div[@class='qa-contributor']/ul/li[1]/a/span/text()"
            ).extract()[0]
        else:
            wendaAskItem["username"] = response.xpath(
                "//div[@class='qa-related']/div[@class='qa-contributor']/ul/li[1]/span/text()"
            ).extract()[0]
        wendaAskItem["headPortrait"] = ""
        wendaAskItem["askList"] = []
        wendaAskItem["topicUrl"] = ""

        best_content = response.xpath("//div[@class='best-content']")

        if best_content:
            reply = WendaReplayItem()
            reply["title"] = wendaAskItem["title"]
            reply["username"] = best_content.xpath(
                ".//div[@class='qa-contributor']//span[@itemprop='accountName']/text()"
            ).extract()[0].strip()
            reply["images"] = []
            reply["content"] = best_content.xpath(
                "string(.//div[@class='answer-text'])").extract()[0].strip()
            reply["addtime"] = best_content.xpath(
                ".//li[@class='timestamp']/abbr/@title").extract()[0]
            reply["source"] = wendaAskItem["source"]
            reply["headPortrait"] = best_content.xpath(
                ".//p[@class='user-avatar']/a/img/@src").extract()[0]
            reply["likes"] = best_content.xpath(
                ".//div[@class='qa-vote']/a/em/text()").extract()[0]
            wendaAskItem["askList"].append(reply)

        reply_list = response.xpath("//ul[@class='qa-answer-list']")

        if reply_list:
            replys = reply_list.xpath("./li[@class='answer-item']")
            for reply in replys:
                wendaReply = WendaReplayItem()
                wendaReply["title"] = wendaAskItem["title"]
                wendaReply["username"] = reply.xpath(
                    ".//ul[@class='qa-meta']/li[@class='username']//span/text()"
                ).extract()[0]
                wendaReply["images"] = []
                wendaReply["content"] = reply.xpath(
                    ".//div[@class='answer-text']/text()").extract()[0].strip(
                    )
                wendaReply["addtime"] = reply.xpath(
                    ".//ul[@class='qa-meta']/li[@class='timestamp']/abbr/@title"
                ).extract()[0]
                wendaReply["source"] = wendaAskItem["source"]
                wendaReply["headPortrait"] = reply.xpath(
                    "./ul[@class='qa-meta']/li[@class='useravatar']/a/img/@src"
                ).extract()[0]
                wendaReply["likes"] = reply.xpath(
                    "./a[@class='qa-answer-list-vote']/span[@class='n']/em/text()"
                ).extract()[0]

                wendaAskItem["askList"].append(wendaReply)

        pagejump = response.xpath("//div[@class='pagejump']")
        if len(pagejump) > 0:

            pages = response.xpath("//div[@class='pagejump']/a")

            for page in pages:
                if page.xpath("./text()").extract()[0] == "下一页":
                    next_page_url = page.xpath("./@href").extract()[0]
                    yield SplashRequest(response.urljoin(next_page_url),
                                        self.parse_next_ask,
                                        args={'wait': 1},
                                        meta={
                                            'origin_url':
                                            response.urljoin(next_page_url),
                                            'wendaAskItem':
                                            wendaAskItem
                                        })
        else:
            yield wendaAskItem
Exemple #6
0
    def parse_zhidao(self, response):
        ask = WendaAskItem()
        ask["tagName"] = self.keyword
        if response.xpath("//meta[@name='keywords']"):
            ask["keyword"] = response.xpath(
                "//meta[@name='keywords']/@content").extract()[0]
        else:
            ask["keyword"] = ""

        if response.xpath("//meta[@name='description']"):
            ask["description"] = response.xpath(
                "//meta[@name='description']/@content").extract()[0]
        else:
            ask["description"] = ""

        ask["title"] = response.xpath(
            "//article[@id='qb-content']//span[@class='ask-title']/text()"
        ).extract()[0]
        ask["images"] = []

        image_wrap = response.xpath(
            "//article[@id='qb-content']//div[@class='q-img-wp']")
        if (len(image_wrap) > 0):
            ask["images"] = image_wrap.xpath("//img/@src").extract()

        ask["content"] = ""
        ask["addtime"] = ""
        ask["source"] = response.meta["origin_url"]
        ask["username"] = ""
        ask["headPortrait"] = ""
        ask["askList"] = []
        ask["topicUrl"] = ""

        answers = response.xpath(
            "//div[@class='bd-wrap']/div[contains(@class, 'answer')]")

        for answer in answers:
            reply = WendaReplayItem()
            reply["title"] = ask["title"]

            username = answer.xpath(
                ".//div[@class='wgt-replyer-all']//a[@class='reply-user-tohometip'][2]/span"
            )

            if (len(username) > 0):

                reply["username"] = answer.xpath(
                    ".//div[@class='wgt-replyer-all']//a[@class='reply-user-tohometip'][2]/span/text()"
                ).extract()[0]
                reply["images"] = []

                best_text = answer.xpath(
                    ".//div[@class='line content']/div[contains(@class, 'best-text')]"
                )

                if (len(best_text) > 0):
                    ptags = best_text.xpath(".//p")

                    text = []
                    for p in ptags:
                        text.append(p.xpath("string()").extract()[0].strip())

                    reply["content"] = "<br>".join(text)
                else:
                    reply["content"] = answer.xpath(
                        "string(.//div[@class='line content']/div[contains(@class, 'answer-text')])"
                    ).extract()[0].strip()

                reply["addtime"] = answer.xpath(
                    ".//span[@class='wgt-replyer-all-time']/text()").extract(
                    )[0]

                reply["source"] = ask["source"]
                avatar_url = answer.xpath(
                    ".//div[@class='wgt-replyer-all-avatar']/@style").extract(
                    )[0]

                result = re.search(
                    r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]",
                    avatar_url)

                reply["headPortrait"] = result.group()
                good = answer.xpath(
                    ".//div[@class='wgt-eva']/span[contains(@class, 'evaluate-good-3')]"
                )

                if good:
                    reply["likes"] = answer.xpath(
                        ".//div[@class='wgt-eva']/span[contains(@class, 'evaluate-good-3')]/@data-evaluate"
                    ).extract()[0]
                else:
                    reply["likes"] = 0

                ask["askList"].append(reply)

            yield ask
Exemple #7
0
    def parse_question_page(self, response):
        huatiContent = response.meta["huatiContent"]
        wenda = WendaAskItem()
        wenda["tagName"] = self.keyword
        wenda["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        wenda["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        wenda["title"] = response.xpath(
            "//div[@class='QuestionHeader']//h1[@class='QuestionHeader-title']/text()"
        ).extract()[0]

        if response.xpath(
                "//div[@class='QuestionHeader']//div[contains(@class, 'QuestionRichText')]//p"
        ):
            wenda["content"] = response.xpath(
                "string(//div[@class='QuestionHeader']//div[contains(@class, 'QuestionRichText')]//p)"
            ).extract()[0]
        else:
            wenda["content"] = ""

        wenda["description"] = ""
        wenda["images"] = []
        wenda["source"] = response.meta["origin_url"]
        wenda["username"] = ""
        wenda["headPortrait"] = ""
        wenda["askList"] = []
        wenda["addtime"] = ""
        wenda["topicUrl"] = response.meta["topic_url"]

        visits = response.xpath(
            "//div[@class='QuestionFollowStatus']//strong[@class='NumberBoard-itemValue']//@title"
        ).extract()[0]

        replys = response.xpath("//div[@class='List-item']")

        for reply in replys:
            replyItem = WendaReplayItem()
            replyItem["title"] = wenda["title"]

            common_username = reply.xpath(
                ".//div[@class='AuthorInfo-content']//a[@class='UserLink-link']"
            )

            if common_username:
                replyItem["username"] = reply.xpath(
                    ".//div[@class='AuthorInfo-content']//span//text()"
                ).extract()[0]
            else:
                userLink = reply.xpath(
                    ".//div[@class='AuthorInfo-content']//a[@class='UserLink-link']"
                )

                if userLink:
                    replyItem["username"] = reply.xpath(
                        ".//div[@class='AuthorInfo-content']//a[@class='UserLink-link']//text()"
                    ).extract()[0]
                else:
                    replyItem["username"] = reply.xpath(
                        ".//div[@class='AuthorInfo-contet']//span//text()"
                    ).extract()[0]

            replyItem["images"] = []

            text = []
            ptags = reply.xpath(
                ".//div[contains(@class, 'RichContent')]//span[contains(@class, 'RichText')]/p"
            )
            for p in ptags:
                text.append(p.xpath("string()").extract()[0])

            replyItem["content"] = '<br>'.join(text)
            _, addtime = reply.xpath(
                ".//div[@class='ContentItem-time']//span/@data-tooltip"
            ).extract()[0].split("发布于 ")
            replyItem["addtime"] = addtime
            replyItem["source"] = response.meta["origin_url"]
            replyItem["headPortrait"] = reply.xpath(
                ".//span[@class='UserLink AuthorInfo-avatarWrapper']//img/@src"
            ).extract()[0]
            _, likes = reply.xpath(
                ".//div[contains(@class, 'ContentItem-actions')]/span/button[contains(@class, 'VoteButton--up')]/@aria-label"
            ).extract()[0].split("赞同 ")
            replyItem["likes"] = likes
            wenda["askList"].append(replyItem)

        huatiContent["content"] = wenda
        huatiContent["visits"] = visits

        yield huatiContent
    def parse_wenda(self, response):
        ask = WendaAskItem()
        ask["tagName"] = self.keyword
        ask["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        ask["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        ask["title"] = response.xpath(
            "//span[@class='detail-tit']/text()").extract()[0]
        ask["images"] = []
        ask["content"] = ""
        ask["addtime"] = ""
        ask["source"] = response.meta["origin_url"]
        ask["username"] = ""
        ask["headPortrait"] = ""
        ask["askList"] = []
        ask["topicUrl"] = ""

        replys = response.xpath("//div[@class='replay-section answer_item']")
        print(replys)

        if len(replys) > 0:
            for reply in replys:
                replyItem = WendaReplayItem()
                replyItem["title"] = ask["title"]
                if reply.xpath(".//a[@class='user-name']"):
                    replyItem["username"] = reply.xpath(
                        ".//a[@class='user-name']/text()").extract()[0]
                else:
                    replyItem["username"] = reply.xpath(
                        ".//span[@class='user-name']/text()").extract()[0]

                replyItem["images"] = []

                replyItem["content"] = reply.xpath(
                    "string(.//pre[contains(@class, 'answer_con')])").extract(
                    )[0].strip()

                replyItem["addtime"] = reply.xpath(
                    ".//div[@class='user-txt']/text()").extract()[0].split(
                        " 回答")[0]
                replyItem["source"] = response.meta["origin_url"]

                if reply.xpath(".//a[@class='user-thumb']"):
                    replyItem["headPortrait"] = reply.xpath(
                        ".//a[@class='user-thumb']/img/@src").extract()[0]
                else:
                    replyItem["headPortrait"] = reply.xpath(
                        ".//div[@class='user-thumb-box']//img/@src").extract(
                        )[0]

                if reply.xpath(".//div[@class='ft-btn-box']/a"):
                    replyItem["likes"] = reply.xpath(
                        ".//div[@class='ft-btn-box']/a[1]/@data-num").extract(
                        )[0]
                else:
                    replyItem["likes"] = 0

                ask["askList"].append(replyItem)

        yield ask
    def parse_qa(self, response):
        wendaAskItem = WendaAskItem()

        wendaAskItem["tagName"] = self.keyword

        if response.xpath("//meta[@name='keywords']"):
            wendaAskItem["keyword"] = response.xpath(
                "//meta[@name='keywords']/@content").extract()[0]
        else:
            wendaAskItem["keyword"] = ""

        wendaAskItem["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]

        wendaAskItem["title"] = response.xpath(
            "//h1[@class='audio-intro-h1']/text()").extract()[0]

        wendaAskItem["images"] = []
        wendaAskItem["content"] = response.xpath(
            "//div[@class='areat-m']/div[contains(@class, 'audio-intro-main')][1]/p/text()"
        ).extract()[0].strip()
        wendaAskItem["addtime"] = response.xpath(
            "//div[contains(@class, 'intro-ts')]/span[contains(@class, 'date')]/text()"
        ).extract()[0]
        wendaAskItem["source"] = response.meta["origin_url"]
        wendaAskItem["username"] = ""
        wendaAskItem["headPortrait"] = ""
        wendaAskItem["askList"] = []
        wendaAskItem["topicUrl"] = ""

        replyItem = WendaReplayItem()
        replyItem["title"] = wendaAskItem["title"]
        replyItem["username"] = response.xpath(
            "//div[contains(@class, 'audio-intro-l')]/a[@class='a']/span/text()"
        ).extract()[0]
        replyItem["images"] = []
        replyItem["content"] = response.xpath(
            "//div[@class='areat-m']/div[contains(@class, 'audio-intro-main')][2]/p/text()"
        ).extract()[0].strip()
        replyItem["addtime"] = wendaAskItem["addtime"]
        replyItem["source"] = response.meta["origin_url"]
        replyItem["headPortrait"] = response.xpath(
            "//div[contains(@class, 'audio-intro-l')]/a[@class='a']/img/@src"
        ).extract()[0]
        replyItem["likes"] = 0

        wendaAskItem["askList"].append(replyItem)

        otherReplys = response.xpath("//div[@class='ask-mod-item']")

        for otherReply in otherReplys:
            reply = WendaReplayItem()
            reply["title"] = wendaAskItem["title"]
            reply["username"] = otherReply.xpath(
                ".//div[@class='part-left']/a[@class='a']/@title").extract()[0]
            reply["images"] = []
            reply["content"] = otherReply.xpath(
                ".//div[contains(@class, 'audio-intro-main')]/p/text()"
            ).extract()[0].strip()
            reply["addtime"] = wendaAskItem["addtime"]
            reply["source"] = response.meta["origin_url"]
            reply["headPortrait"] = response.xpath(
                ".//div[@class='part-left']/a[@class='a']/img/@src").extract(
                )[0]
            reply["likes"] = 0
            wendaAskItem["askList"].append(reply)

        yield wendaAskItem
Exemple #10
0
    def parse_ask(self, response):
        wendaAskItem = WendaAskItem()
        wendaAskItem["tagName"] = self.keyword
        wendaAskItem["keyword"] = response.xpath(
            "//meta[@name='keywords']/@content").extract()[0]
        wendaAskItem["description"] = response.xpath(
            "//meta[@name='description']/@content").extract()[0]
        wendaAskItem["title"] = response.xpath(
            "//h1[@id='d_askH1']/text()").extract()[0]
        wendaAskItem["images"] = []
        wendaAskItem["content"] = response.xpath(
            "string(//div[@class='b_askcont']/p[@class='crazy_new'])").extract()[0].strip()
        wendaAskItem["addtime"] = response.xpath(
            "//div[@class='b_askab1']//span[2]/text()").extract()[0]
        wendaAskItem["source"] = response.meta["origin_url"]

        if response.xpath(
                "//var[@class='ask_Author']"):
            wendaAskItem["username"] = response.xpath(
                "//var[@class='ask_Author']/text()").extract()[0]
        else:
            wendaAskItem["username"] = "******"
        wendaAskItem["headPortrait"] = ""
        wendaAskItem["askList"] = []
        wendaAskItem["topicUrl"] = ""

        reply_list = response.xpath(
            "//div[contains(@class, 'b_answerbox')]/div[@class='b_answerli']")

        if reply_list:
            for reply in reply_list:
                wendaReply = WendaReplayItem()
                wendaReply["title"] = wendaAskItem["title"]
                if(reply.xpath(
                        ".//div[contains(@class, 'b_answertop')]//span[@class='b_sp1']/a")):
                    wendaReply["username"] = reply.xpath(
                        ".//div[contains(@class, 'b_answertop')]//span[@class='b_sp1']/a/text()").extract()[0]
                else:
                    wendaReply["username"] = reply.xpath(
                        ".//div[contains(@class, 'b_answertop')]//span[@class='b_sp1']/var/text()").extract()[0]
                wendaReply["images"] = []
                if(reply.xpath(
                        ".//div[contains(@class, 'b_answercont')]//div[@class='crazy_new']/p")):
                    wendaReply["content"] = reply.xpath(
                        ".//div[contains(@class, 'b_answercont')]//div[@class='crazy_new']/p/text()").extract()[0].strip()
                else:
                    wendaReply["content"] = ""

                if reply.xpath(".//div[contains(@class, 'b_answercont')]//span[@class='b_anscont_time']"):
                    wendaReply["addtime"] = reply.xpath(
                        ".//div[contains(@class, 'b_answercont')]//span[@class='b_anscont_time']/text()").extract()[0].strip()
                else:
                    wendaReply["addtime"] = ""

                wendaReply["source"] = wendaAskItem["source"]
                if reply.xpath(
                        "./div[contains(@class, 'b_answertop')]/a[@class='b_docface']"):
                    wendaReply["headPortrait"] = reply.xpath(
                        "./div[contains(@class, 'b_answertop')]/a[@class='b_docface']/img/@src").extract()[0]
                else:
                    wendaReply["headPortrait"] = reply.xpath(
                        "./div[contains(@class, 'b_answertop')]/var[@class='b_docface']/img/@src").extract()[0]
                info = reply.xpath(
                    "string(./div[contains(@class, 'b_answertop')]/div[@class='b_answertl']/span[@class='b_sp2'][2])").extract()[0]

                if info:

                    result = re.search(r"\d+\.?\d*", info)
                    if result and result.group():
                        wendaReply["likes"] = result.group()
                    else:
                        wendaReply["likes"] = 0
                else:
                    wendaReply["likes"] = 0

                wendaAskItem["askList"].append(wendaReply)

        yield wendaAskItem