def startspiderbyid(request): # 设置模块查找目录 sys.path.append(SpiderBaseDir) # 保存get请求的值 spider_id = request.GET['spider_id'] spider_argv = request.GET['run_argvs'] # 取出目标爬虫中的模块名称 result = Spider.objects.filter(id=spider_id) spider_obj = result[0] func_name = spider_obj.spider_runfunction # 如果模块名称没有定义就不会执行新的任务,否则使用新的进程运行爬虫程序 if func_name != 'no info': # 使用新进程开始任务 mymodule = __import__(func_name) if spider_argv != '': spider_argv = tuple(spider_argv.split(' ')) p = Process(target=mymodule.main, args=spider_argv) markcontent = '爬虫启动成功, 参数为:' + '<b>' + request.GET[ 'run_argvs'] + '</b>' else: p = Process(target=mymodule.main) markcontent = '爬虫启动成功, 无自定义参数:' p.start() # 创建通知对象 me_obj = Message() mytime = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) me_obj.action_time = mytime me_obj.content = markcontent me_obj.user = '******' me_obj.spider = spider_obj me_obj.save() # 改变数据库中爬虫运行状态 spider_obj.spider_runing = True spider_obj.save() print( '主进程结束标志1:此视图结束,爬虫程序自己运行,但是运行状态会放在redis和log中,使用视图调用并传递到ajax和mysql') return JsonResponse({'data': '启动爬虫成功,请到实时监控界面查看!', 'doing': 1}) else: # 改变数据库中爬虫运行状态 spider_obj.spider_runing = False spider_obj.save() # 创建通知对象 me_obj = Message() mytime = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) me_obj.action_time = mytime me_obj.content = '启动爬虫失败:没有足够的权限' me_obj.user = '******' me_obj.spider = spider_obj me_obj.save() print( '主进程结束标志2:此视图结束,爬虫程序自己运行,但是运行状态会放在redis和log中,使用视图调用并传递到ajax和mysql') return JsonResponse({'data': '权限不足,无法操作!', 'doing': 0})
def getspiderlogbyid(request): # 根据ID获取爬虫任务名称 spider_id = request.GET['spider_id'] result = Spider.objects.filter(id=spider_id) spider_obj = result[0] func_name = spider_obj.spider_runfunction # 先判断爬虫的对应模块是否是默认值no info,否则就返回'done'给前端,前端就会停止循环发送请求 tags = '' if func_name == 'no info': return JsonResponse({'data': 'done', 'content': tags}) else: # 拼接log文件名称,并且试图读取其中的信息,如果没有这个文件就会返回空值 logfilename = SpiderBaseDir + func_name + '/' + func_name + '.log' try: with open(logfilename, 'r') as f: content = f.read() print '读取log文件成功' except Exception as e: print '读取log文件失败', e, logfilename content = '' # 生成html标签内容 tags = '' for each in content.split('\n'): li_tag = '<li>' + each + '</li>' tags += li_tag # 根据名称,在redis查找任务信息中的runing,如果是False前端就会结束循环 redisclient = redis.Redis(host='127.0.0.1', port=6379, db=1) info = redisclient.get(func_name + ':runing') # 根据redis的状态判断是否要作为最后一次发送数据 if info == b'False': # 改变数据库中爬虫运行状态 try: spider_obj.spider_runing = False spider_obj.save() except Exception as e: print '改变mysql中爬虫状态失败', e print 'log信息传输结束' # 创建通知对象 me_obj = Message() mytime = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) me_obj.action_time = mytime me_obj.content = '爬虫运行结束并退出' me_obj.user = '******' me_obj.spider = spider_obj me_obj.save() return JsonResponse({'data': 'done', 'content': tags}) else: return JsonResponse({'data': 'continue', 'content': tags})
def dataopinfo(request): file_name = request.GET['FN'] dir_name = request.GET['DIR'] spider_obj = Spider.objects.filter(spider_runfunction=dir_name)[0] me_obj = Message() mytime = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) me_obj.action_time = mytime me_obj.content = '下载文件:%s' % file_name me_obj.user = '******' me_obj.spider = spider_obj me_obj.save() return JsonResponse({'data': '1'})
def stopspiderbyid(request): spider_id = request.GET['spider_id'] result = Spider.objects.filter(id=spider_id) spider_obj = result[0] func_name = spider_obj.spider_runfunction redisclient = redis.Redis(host='127.0.0.1', port=6379, db=1) redisclient.getset(func_name + ':runing', 'False') spider_obj.spider_runing = False spider_obj.save() # 创建通知对象 me_obj = Message() mytime = str(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) me_obj.action_time = mytime me_obj.content = '手动停止爬虫' me_obj.user = '******' me_obj.spider = spider_obj me_obj.save() print('修改redis和mysql状态,爬虫退出') return JsonResponse({'data': '停止采集成功!'})