class DBPipeline(object): def __init__(self, mongo_uri, mongo_db, collection_name): self.mongo_uri = mongo_uri self.mongo_db = mongo_db self.collection_name = collection_name @classmethod def from_crawler(cls, crawler): return cls( mongo_uri=crawler.settings.get('MONGO_URI'), mongo_db=crawler.settings.get('MONGO_DATABASE', 'items'), collection_name=crawler.settings.get('MONGO_COLLECTION_NAME')) def open_spider(self, spider): self.client = MongoConnection(self.mongo_uri) self.db = self.client[self.mongo_db] def close_spider(self, spider): self.client.close() def process_item(self, item, spider): if self.get_item({ "company": item["company"], "title": item["title"] }) is None: self.insert_item(item) else: raise DropItem( u'Existing item with same company and title found, skipping.') def get_item(self, item): return self.db[self.collection_name].find_one(item) def insert_item(self, item): try: self.db[self.collection_name].insert_one(dict(item)) return item except Exception as e: raise DropItem( u'Inserting of item into database failed with error: %s' % str(e))