def record_runtime(db,spider,start,end): duration = (end-start).total_seconds() mins = duration / 60 seconds = duration % 60 timespent = repr(math.trunc(mins)) + 'mins ' + repr(math.trunc(seconds)) + 'seconds' if spider in db['runtime']: runtime = db['runtime'][spider] if duration > runtime['max_seconds']: runtime['max_seconds'] = duration if duration < runtime['min_seconds']: runtime['min_seconds'] = duration else: runtime = {'name':spider} runtime['max_seconds'] = duration runtime['min_seconds'] = duration runtime['timespent'] = timespent runtime['start'] = repr(start) runtime['end'] = repr(end) db['runtime'][spider] = runtime db.sync() viewstates.publishStates(db)
def on_start_crawl(db,spider): print 'starting to crawl ', spider, ' on ', repr(datetime.now()) db['crawling'] = spider db.sync() viewstates.publishStates(db) return datetime.now()