Example #1
0
    def process_item(self, item, spider):
        #collection_name = item.__class__.__name__
        logging.warning('开始插入表%s' % self.mongo_col)
        try:
            dt = DateUtil.convert(item["created_at"])  # 时间格式化
            if dt <= self.recent:  # 数据库中已经有或者太老,不再插入
                return item
            # 以标题作为唯一性依据
            item["mblogid"] = DateUtil.calc_md5(item['title'] + item['user'])
            item["created_at"] = dt
            admin, price, tag = self.extract(item['text'] + item['title'],
                                             self.tAdmin, self.tPrice,
                                             self.tTag)
            item["admin"] = admin
            item["price"] = price
            item["tag"] = tag

            self.db[self.mongo_col].insert(dict(item))
            return item
        except Exception:
            logging.error('编号为:%s的数据插入异常' % item['mblogid'])
Example #2
0
    def process_item(self, item, spider):
        #collection_name = item.__class__.__name__
        logging.warning('开始插入表%s' % self.mongo_col)

        logging.warning('当前插入数据错误表信息......')

        try:
            dt = DateUtil.convert(item["created_at"])  # 时间格式化
            print("dt=====>", dt)
            if dt <= self.recent:  # 数据库中已经有或者太老,不再插入
                return item
            # 以标题作为唯一性依据
            print("item=====>", item)
            #item["mblogid"] = DateUtil.calc_md5(item['title'] + item['user'])
            #修改以title+mblogid值为
            #item["mblogid"] = DateUtil.calc_md5(item['title'] + item['mblogid'])
            #item["mblogid"] =
            print("mblogid====>", item["mblogid"])
            item["created_at"] = dt
            print("created_at====>", item["created_at"])

            # admin, price, tag = self.extract(
            #     item['text'] + item['title'], self.tAdmin, self.tPrice, self.tTag);

            admin, price, tag = self.extract(item['text'], self.tAdmin,
                                             self.tPrice, self.tTag)
            print("admin====>", admin)
            print("price====>", price)
            print("tag====>", tag)
            item["admin"] = admin
            item["price"] = price
            item["tag"] = tag
            print("item=======>", item)
            self.db[self.mongo_col].insert(dict(item))
            return item
        except Exception:
            logging.error('编号为:%s的数据插入异常' % item['mblogid'])