def add_item(): pprint(request.json) spider_job_id = request.json['job_id'] project = request.json['project'] spider = request.json['spider'] item_scraped_count = request.json['item_scraped_count'] item_dropped_count = request.json['item_dropped_count'] start_time = request.json['start_time'] finish_time = request.json['finish_time'] duration = request.json['duration'] finish_reason = request.json['finish_reason'] log_error_count = request.json['log_error_count'] StatsCollectionModel.create( spider_job_id=spider_job_id, project=project, spider=spider, item_scraped_count=item_scraped_count, item_dropped_count=item_dropped_count, start_time=start_time, finish_time=finish_time, finish_reason=finish_reason, log_error_count=log_error_count, duration=duration )
def get_dict_by_spider_job_ids(cls, spider_job_ids): rows = (StatsCollectionModel.select().where( StatsCollectionModel.spider_job_id.in_(spider_job_ids)).dicts()) dct = {} for row in rows: dct[row['spider_job_id']] = row return dct
def delete(cls, project=None, spider=None): query = StatsCollectionModel.delete() if project: query = query.where(StatsCollectionModel.project == project) if spider: query = query.where(StatsCollectionModel.spider == spider) return query.execute()
def count(cls, project=None, spider=None): query = StatsCollectionModel.select() if project: query = query.where(StatsCollectionModel.project == project) if spider: query = query.where(StatsCollectionModel.spider == spider) return query.count()
def list(cls, page=1, size=20, project=None, spider=None, order_prop=None, order_type=None ): """ :param page: :param size: :param project: :param spider: :param order_prop: duration, log_error_count :param order_type: descending, ascending :return: """ query = StatsCollectionModel.select() # 查询条件 if project: query = query.where(StatsCollectionModel.project == project) if spider: query = query.where(StatsCollectionModel.spider == spider) # 排序, 默认创建时间倒排 if order_prop == 'duration': if order_type == 'descending': query = query.order_by(StatsCollectionModel.duration.desc()) else: query = query.order_by(StatsCollectionModel.duration.asc()) elif order_prop == 'log_error_count': if order_type == 'descending': query = query.order_by(StatsCollectionModel.log_error_count.desc()) else: query = query.order_by(StatsCollectionModel.log_error_count.asc()) else: query = query.order_by(StatsCollectionModel.create_time.desc()) # 分页 rows = query.paginate(page, size).dicts() # 计算持续时间 for row in rows: row['duration_str'] = TimeUtil.format_duration(row['duration']) return rows
def list(cls, page=1, size=20, project=None, spider=None): query = StatsCollectionModel.select() if project: query = query.where(StatsCollectionModel.project == project) if spider: query = query.where(StatsCollectionModel.spider == spider) rows = (query.order_by( StatsCollectionModel.create_time.desc()).paginate(page, size).dicts()) # 计算持续时间 for row in rows: row['duration_str'] = TimeUtil.format_duration( (row['finish_time'] - row['start_time']).seconds) return rows