def spider_closed(self, spider, reason): """ When the spider closes then store the stats(start time, end time, items scraped, pages crawled) into the database for each scraper. """ items_scraped_count = 0 start_time = self.stats._stats['start_time'] finish_time = self.stats._stats['finish_time'] if 'item_scraped_count' in self.stats._stats: items_scraped_count = self.stats._stats['item_scraped_count'] spider_name = spider.name pages_crawled_count = self.stats._stats[ 'downloader/request_method_count/GET'] # add the scrapy stats to DB stats = ScrapyStats(scrapername=spider_name, start_time=start_time, finish_time=finish_time, items_scraped=items_scraped_count, pages_crawled=pages_crawled_count, servername=SERVER_NAME) add_stats(stats) # update column to db when it stops running spider_ = get_spider(spider_name) spider_.is_running = 0 print spider_ db.session.commit()
def start_crawler(spider_name): item = get_spider(spider_name) spider_location = 'spiders'+ '.' + \ '.'.join([item.spidercls, item.spidercls]) spider = load_object(spider_location) settings = get_spider_settings() crawler = create_crawler_object(spider(), settings) crawler.start()
def update_spider_post(): """ Upload file to spiders/ directory and save the mapping in the spider_mapping file """ spider_name = request.form['spider-name'] spider_desc = request.form['spider-desc'] spider_cls = request.form['spider-cls'] # add spider to db spider = get_spider(spider_name) spider.name = spider_name spider.description = spider_desc spider.spidercls = spider_cls db.session.commit() return redirect(url_for('spiders_bp.list_spiders'))
def update_spider_form(spider_name): spider = get_spider(spider_name) return render_template('add-spider.html', update=True, spider=spider)
def spider_opened(self, spider): spider_ = get_spider(spider.name) spider_.is_running = 1 db.session.commit()