def parse_item(self, response):
        if "threads" in response.url:
            # little trick for allow use only two rules
            items = []

            posts = response.xpath('//ol[@id="posts"]/li')
            url = response.url
            subject = response.xpath('//span[@class="threadtitle"]//text()').extract()[0]
            for post in posts:
                item = PostItemsList()
                author = post.xpath('.//a[contains(@class, "username")]//text()').extract()[0]
                author_link = post.xpath('.//a[contains(@class, "username")]/@href').extract()[0]
                create_date = post.xpath('.//span[@class="date"]//text()').extract()
                # clean create_date
                create_date = u" ".join(date.strip() for date in create_date)
                message = post.xpath('.//div[@class="content"]//text()').extract()
                # clean message
                message = u"".join(msg.strip() for msg in message)

                item["author"] = author
                item["author_link"] = author_link
                item["create_date"] = create_date
                item["post"] = message
                item["tag"] = "epilepsy"
                item["topic"] = subject
                item["url"] = url

                logging.info(item.__str__())
                items.append(item)

            return items
	def get_sub_data(self,response):
		logging.info("get_sub_data")
		author_name_xpath = "//table[@class='discussion_topic']//p[@class='username']/a/text()"
		author_link_xpath = "//table[@class='discussion_topic']//p[@class='username']/a/@href"
		author_posted_xpath = "//table[@class='discussion_topic']//div/span[@class='graytext']/text()"
		author_all_text_xpath = "//table[@class='discussion_topic']//div[@class='discussion_text longtextfix485']/text()"

		author_name = response.xpath(author_name_xpath).extract()
		author_name = str(author_name[0])
		author_name = author_name.replace("\t","")

		author_name = author_name.replace(',',' ')
		author_link = response.xpath(author_link_xpath).extract()
		author_link  = author_link[0]
		author_link = "http://www.dailystrength.org%s"%author_link
		author_posted = response.xpath(author_posted_xpath).extract()
		author_posted = author_posted[0]
		author_posted = author_posted.replace(',','')
		author_posted = author_posted.replace('Posted on','')

		author_all_text = response.xpath(author_all_text_xpath).extract()
		author_all_text = str(author_all_text[0])
		author_all_text = author_all_text.replace(',','')
		author_all_text = author_all_text.replace('\t','')
		author_all_text = author_all_text.replace('  ','')
		author_all_text = author_all_text.replace('\n','')

		topic = response.xpath("//div[contains(@class,'discussion_topic_header_subject')]/text()").extract()[0]

		item = PostItemsList()

		item['author'] = author_name
		item['author_link'] = author_link
		item['condition']="chronic lymphocytic leukemia"
		item['create_date'] = author_posted
		item['post'] = author_all_text
		item['topic'] = topic
		item['url'] = response.url
		print(author_all_text)
		logging.info(item.__str__())
		yield item
    def parse_item(self, response):
        if "threads" in response.url:
            # little trick for allow use only two rules
            items = []
            condition="breast cancer"
            posts = response.xpath('//ol[@id="posts"]/li')
            url = response.url
            subject = response.xpath(
                '//span[@class="threadtitle"]//text()').extract()[0]
            for post in posts:
                item = PostItemsList()
                author = post.xpath(
                    './/a[contains(@class, "username")]//text()').extract()[0]
                author_link = post.xpath(
                    './/a[contains(@class, "username")]/@href').extract()[0]
                
                create_date = post.xpath(
                    './/span[@class="date"]//text()').extract()
                # clean create_date
                create_date = u" ".join(date.strip() for date in create_date)
                message = post.xpath(
                    './/div[@class="content"]//text()').extract()
                # clean message
                message = u"".join(msg.strip() for msg in message)
                message = self.cleanText(message)

                item['author'] = author
                item['author_link'] = author_link
                item['condition'] = condition
                item['create_date'] = self.getDate(create_date)
                item['domain'] = "".join(self.allowed_domains)
                item['post'] = message
                # item['tag'] = ''
                item['topic'] = subject
                item['url'] = url

                logging.info(item.__str__())
                items.append(item)

            return items
    def parse_item(self, response):
        if "threads" in response.url:
            # little trick for allow use only two rules
            items = []

            posts = response.xpath('//ol[@id="posts"]/li')
            url = response.url
            subject = response.xpath(
                '//span[@class="threadtitle"]//text()').extract()[0]
            for post in posts:
                item = PostItemsList()
                author = post.xpath(
                    './/a[contains(@class, "username")]//text()').extract()[0]
                author_link = post.xpath(
                    './/a[contains(@class, "username")]/@href').extract()[0]
                create_date = post.xpath(
                    './/span[@class="date"]//text()').extract()
                # clean create_date
                create_date = u" ".join(date.strip() for date in create_date)
                message = post.xpath(
                    './/div[@class="content"]//text()').extract()
                # clean message
                message = u"".join(msg.strip() for msg in message)

                item['author'] = author
                item['author_link'] = author_link
                item['create_date'] = create_date
                item['post'] = message
                item['tag'] = 'epilepsy'
                item['topic'] = subject
                item['url'] = url

                logging.info(item.__str__())
                items.append(item)

            return items