Esempio n. 1
0
class ArticleScrapperPipeline(object):

    def open_spider(self, spider):
        self.pool = Pool()
        self.queries = QueryPool(self.pool)

    def process_item(self, item, spider):
        for data in item:
            if not data:
                raise DropItem("Missing data!")
        # print item
        self.pool.do_querying(self.queries.insert_article_content, link=item['link'], content=item['content'])
        log.msg('%s content insert into db, complete' % item['link'], level=log.INFO, spider=spider)
        return item

    def close_spider(self, spider):
        self.pool.close_connection()
Esempio n. 2
0
 def open_spider(self, spider):
     self.pool = Pool()
     self.queries = QueryPool(self.pool)