def record_monitor(self,log,status): """ 1. 下载数据监控表 """ table_name = 'data_monitor' task_name = '数据收集 ' + self.category_name_zh + ' ' + self.category_abbr_en ####数据库监控表入库 ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主 create_time = get_current_BJT()##入库时间按北京时间 update_time = get_current_BJT()##入库更新时间按北京时间 ##监控表参数设置 exe_fullpath = configs['exe_fullpath'] cmd = 'python3' + ' ' + exe_fullpath + ' ' + self.category_abbr_en + ' ' + self.starttime + ' ' + self.endtime pga=PostgresArchive() config_infos={ 'task_name':task_name, 'create_time':create_time, 'update_time':update_time, 'log':log, 'status':status, 'cmd':cmd, 'data_class':self.data_class, 'research_area':self.research_area, 'website':self.website, 'category_abbr_en':self.category_abbr_en } #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos) #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos) pga.insert_db_table(database_name=db_name, table_name=table_name, config_element = config_infos)
def export_element_table(elment_tablename, starttime, endtime, output_fullpath): """ starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找 endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找 output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip """ log_starttime = get_current_BJT() ####实例化日志类 #loggings=Loggings(log_starttime,'export_element_ace_ep') loggings = Loggings(log_starttime, elment_tablename) despath, name = os.path.split(output_fullpath) if not os.path.exists(despath): os.makedirs(despath) input_rootpath = configs['rootpath'] db_name = configs['db_name'] export_json_rootpath = configs['export_json_rootpath'] #table_name = 'element_ace_ep' table_name = elment_tablename #json_file='element_ace_ep.json' json_file = elment_tablename + '.json' s_year = datetime.datetime.strptime(starttime, "%Y-%m-%d").year s_month = datetime.datetime.strptime(starttime, "%Y-%m-%d").month s_day = datetime.datetime.strptime(starttime, "%Y-%m-%d").day s_hour = 0 s_minute = 0 s_second = 0 e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day e_hour = 23 e_minute = 59 e_second = 59 search_starttime = '\'%s\'' % datetime.datetime( s_year, s_month, s_day, s_hour, s_minute, s_second).strftime('%Y-%m-%d %H:%M:%S') search_endtime = '\'%s\'' % datetime.datetime( e_year, e_month, e_day, e_hour, e_minute, e_second).strftime('%Y-%m-%d %H:%M:%S') #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime) #sqlcmd='SELECT * FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime) sqlcmd = 'SELECT * FROM %s WHERE utc_time BETWEEN %s and %s' % ( table_name, search_starttime, search_endtime) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: raise Exception(traceback.format_exc()) absolute_path_list = [] ##磁盘上存放的绝对路径 #relative_path_list=[]##数据库中的相对路径 if (searchinfos): for searchinfo in searchinfos: pass else: #pass infos = '数据库 %s,产品不存在,时间段%s %s' % (table_name, starttime, endtime) loggings.debug_log(infos) #输出log日志里 #raise Exception(infos) #exit(0) return infos = '数据库表%s 导出数据, %d条' % (table_name, len(searchinfos)) loggings.debug_log(infos) #输出log日志里 ##数据库记录写入jason文件 #json_fullpath = os.path.join(input_rootpath,json_file) #json_fullpath = export_json_rootpath + starttime + '/' + json_file export_json_path = export_json_rootpath + starttime + '/' if not os.path.exists(export_json_path): os.makedirs(export_json_path) json_fullpath = export_json_path + json_file ####方案2 with open(json_fullpath, 'w', encoding='utf-8') as f: f.write(json.dumps(searchinfos, indent=4, ensure_ascii=False)) ####压缩 try: interact_opt.zipCompressLst.zipCompress(absolute_path_list, json_fullpath, output_fullpath) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 #exit(traceback.format_exc()) #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态 raise Exception(traceback.format_exc()) #infos = 'element_ace_ep,压缩并导出' infos = '%s,压缩并导出' % table_name loggings.debug_log(infos) #输出log日志里 ##json临时文件,在压入zip文件之后,可以删除 ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory: # if not os.path.exists(json_fullpath): # pass # else: # os.remove(json_fullpath) # infos = '' # loggings.debug_log(infos)#输出log日志里 return
def export_data(starttime, endtime, output_fullpath): """ 1. starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找 2. endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找 3. output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip 4. 全部导出功能,比较耗时,需要添加导出状态表,前端界面可以查询此状态,查询有效期为3天,3天过后,导出没有结束,就不处理 """ log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, 'export_data') despath, name = os.path.split(output_fullpath) if not os.path.exists(despath): os.makedirs(despath) #input_rootpath = configs['data_rootpath'] input_rootpath = configs['rootpath'] #db_name = 'yjy015' db_name = configs['db_name'] export_json_rootpath = configs['export_json_rootpath'] table_name = 'data_file_info' json_file = 'data_file_info.json' s_year = datetime.datetime.strptime(starttime, "%Y-%m-%d").year s_month = datetime.datetime.strptime(starttime, "%Y-%m-%d").month s_day = datetime.datetime.strptime(starttime, "%Y-%m-%d").day s_hour = 0 s_minute = 0 s_second = 0 e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day e_hour = 23 e_minute = 59 e_second = 59 search_starttime = '\'%s\'' % datetime.datetime( s_year, s_month, s_day, s_hour, s_minute, s_second).strftime('%Y-%m-%d %H:%M:%S') search_endtime = '\'%s\'' % datetime.datetime( e_year, e_month, e_day, e_hour, e_minute, e_second).strftime('%Y-%m-%d %H:%M:%S') #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime) ####必须是select *,因为要导出所有字段 sqlcmd = 'SELECT * FROM %s WHERE start_time BETWEEN %s and %s' % ( table_name, search_starttime, search_endtime) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: raise Exception(traceback.format_exc()) absolute_path_list = [] ##磁盘上存放的绝对路径 #relative_path_list=[]##数据库中的相对路径 if (searchinfos): for searchinfo in searchinfos: path = searchinfo['path'] filename = searchinfo['filename'] absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename #relative_input_fullfilename = '/' + path + '/' + filename absolute_path_list.append(absolute_input_fullfilename) #relative_path_list.append(relative_input_fullfilename) else: infos = '数据库 %s,产品不存在,时间段%s %s' % (table_name, starttime, endtime) loggings.debug_log(infos) #输出log日志里 #raise Exception(infos) ##exit导致整个主进程退出,后面的子函数不调用,如果data导出失败,product导出也会跳过 #exit(0) return #raise Exception('数据库 %s,产品不存在,时间段%s %s'%(table_name,starttime,endtime)) #print ('数据库中不存在,%s' % category_abbr_en) infos = '数据库表%s 导出数据, %d条' % (table_name, len(searchinfos)) loggings.debug_log(infos) #输出log日志里 ##数据库记录写入jason文件 #json_file='data.jason' #json_fullpath = os.path.join(despath,json_file) #json_fullpath = os.path.join(input_rootpath,json_file) export_json_path = export_json_rootpath + starttime + '/' if not os.path.exists(export_json_path): os.makedirs(export_json_path) json_fullpath = export_json_path + json_file ##写入data文件 ####方案1 # with open(json_fullpath,'w',encoding='utf-8') as f: # for searchinfo in searchinfos: # json.dump(searchinfo,f,indent=4,ensure_ascii=False) # #json.dumps(searchinfo,f,indent=4,ensure_ascii=False) # f.write('\n') ####方案2 with open(json_fullpath, 'w', encoding='utf-8') as f: #for searchinfo in searchinfos: f.write(json.dumps(searchinfos, indent=4, ensure_ascii=False)) #json.dumps(searchinfo,f,indent=4,ensure_ascii=False) #f.write('\n') # print (file_list) # input() ####压缩 try: interact_opt.zipCompressLst.zipCompress(absolute_path_list, json_fullpath, output_fullpath) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 #exit(traceback.format_exc()) #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态 raise Exception(traceback.format_exc()) infos = '收集数据data,压缩并导出' loggings.debug_log(infos) #输出log日志里 ##json临时文件,在压入zip文件之后,可以删除 ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory: # if not os.path.exists(json_fullpath): # pass # else: # os.remove(json_fullpath) return
def export_partial_data(category_abbr_en, starttime, endtime, output_fullpath): """ cd /home/YJY015/code;python3 /home/YJY015/code/export.py GFZ_Kp_tab '2020-06-16 00:00:00' '2020-06-18 23:59:59' /home/YJY015/test/20200616_20200618.zip category_abbr_en 用户传入,取数据库中查询此类数据 starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找 endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找 output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip """ log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, 'export_partial_data') despath, name = os.path.split(output_fullpath) if not os.path.exists(despath): os.makedirs(despath) #input_rootpath = configs['data_rootpath'] input_rootpath = configs['rootpath'] #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'data_file_info' # s_year = datetime.datetime.strptime(starttime, "%Y-%m-%d").year # s_month = datetime.datetime.strptime(starttime, "%Y-%m-%d").month # s_day = datetime.datetime.strptime(starttime, "%Y-%m-%d").day # s_hour = 0 # s_minute = 0 # s_second = 0 # e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year # e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month # e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day # e_hour = 23 # e_minute = 59 # e_second = 59 search_starttime = '\'%s\'' % starttime search_endtime = '\'%s\'' % endtime #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime) #sqlcmd="SELECT * FROM %s WHERE category_abbr_en = '%s' and start_time BETWEEN %s and %s"%(table_name,category_abbr_en,search_starttime,search_endtime) #sqlcmd="SELECT data_file_info.*, data_category.* FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = '%s' and data_file_info.start_time BETWEEN %s and %s"%(category_abbr_en,search_starttime,search_endtime) sqlcmd = "SELECT data_file_info.path, data_file_info.filename FROM data_file_info WHERE category_abbr_en = '%s' and data_file_info.start_time BETWEEN %s and %s order by start_time" % ( category_abbr_en, search_starttime, search_endtime) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: raise Exception(traceback.format_exc()) absolute_path_list = [] ##磁盘上存放的绝对路径 #relative_path_list=[]##数据库中的相对路径 if (searchinfos): for searchinfo in searchinfos: path = searchinfo['path'] filename = searchinfo['filename'] absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename #relative_input_fullfilename = '/' + path + '/' + filename absolute_path_list.append(absolute_input_fullfilename) #relative_path_list.append(relative_input_fullfilename) else: pass #raise Exception('数据库 %s,产品不存在,时间段%s %s'%(table_name,starttime,endtime)) #print ('数据库中不存在,%s' % category_abbr_en) # infos = sqlcmd + ' 未搜索到数据' # loggings.debug_log(infos) # exit(0) infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en, starttime, endtime) loggings.debug_log(infos) #输出log日志里 #raise Exception(infos) #exit(0) return infos = '数据库表data_file_info导出数据, %d条' % len(searchinfos) loggings.debug_log(infos) #输出log日志里 try: interact_opt.zipCompressLst.zipCompress_data(absolute_path_list, output_fullpath) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 #exit(traceback.format_exc()) #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态 raise Exception(traceback.format_exc()) infos = '收集数据%s,压缩并导出,时间段为%s,%s' % (category_abbr_en, starttime, endtime) loggings.debug_log(infos) #输出log日志里 return
def export_avi(category_abbr_en, starttime, endtime, frame_ps, output_fullpath): log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, 'export_avi') despath, name = os.path.split(output_fullpath) if not os.path.exists(despath): os.makedirs(despath) #input_rootpath = configs['data_rootpath'] input_rootpath = configs['rootpath'] #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'data_file_info' search_starttime = '\'%s\'' % starttime search_endtime = '\'%s\'' % endtime category_abbr_en = '\'%s\'' % (category_abbr_en) #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime) sqlcmd = 'SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE category_abbr_en = %s and start_time BETWEEN %s and %s order by start_time' % ( category_abbr_en, search_starttime, search_endtime) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: raise Exception(traceback.format_exc()) file_list = [] if (searchinfos): for searchinfo in searchinfos: path = searchinfo['path'] filename = searchinfo['filename'] input_fullfilename = input_rootpath + '/' + path + '/' + filename ##avi图输入0大小的jpg图片会报错,但是程序不会中断 ##OpenCV(4.2.0) /io/opencv/modules/imgproc/src/resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'resize' file_list.append(input_fullfilename) else: infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en, starttime, endtime) loggings.debug_log(infos) #输出log日志里 #raise Exception(infos) #exit(0) return ####压缩 try: interact_opt.make_video.img2video(file_list, output_fullpath, fps=frame_ps) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 #exit(traceback.format_exc()) #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态 raise Exception(traceback.format_exc()) return
def export_gif(category_abbr_en, starttime, endtime, frame_ps, output_fullpath): log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, 'export_gif') despath, name = os.path.split(output_fullpath) if not os.path.exists(despath): os.makedirs(despath) #input_rootpath = configs['data_rootpath'] input_rootpath = configs['rootpath'] #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'data_file_info' search_starttime = '\'%s\'' % starttime search_endtime = '\'%s\'' % endtime category_abbr_en = '\'%s\'' % (category_abbr_en) #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime) #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE data_file_info.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime) sqlcmd = 'SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE data_file_info.category_abbr_en = %s and start_time BETWEEN %s and %s order by start_time' % ( category_abbr_en, search_starttime, search_endtime) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: infos = traceback.format_exc() loggings.debug_log(infos) #输出log日志里 raise Exception(traceback.format_exc()) file_list = [] if (searchinfos): for searchinfo in searchinfos: path = searchinfo['path'] filename = searchinfo['filename'] input_fullfilename = input_rootpath + '/' + path + '/' + filename ####图片在本地不存在,或者图片大小非性,如果图片大小为0,剔除掉 if not os.path.exists(input_fullfilename) or os.path.getsize( input_fullfilename) == 0: continue file_list.append(input_fullfilename) else: infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en, starttime, endtime) loggings.debug_log(infos) #输出log日志里 #raise Exception(infos) #exit(0) return #raise Exception(infos) #print ('数据库中不存在,%s' % category_abbr_en) ####压缩 try: interact_opt.make_gif.img2gif(output_fullpath, file_list, fps=frame_ps) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 #exit(traceback.format_exc()) #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态 raise Exception(traceback.format_exc()) return
e_year, e_month, e_day, e_hour, e_minute, e_second).strftime('%Y-%m-%d %H:%M:%S') ####数据导出,监控状态初始化 start_time = search_starttime end_time = search_endtime ret_code = check_db(start_time, end_time) if 0 == ret_code: print('之前有过此时间段的导出操作,不需要再进行导出操作,需要继续导致,可以继续导出') #return ##之前有过此时间段的删除操作,不需要再进行删除操作 if 1 == ret_code: print('之前有过此时间段的导出操作,但是status状态为False,需要再进行一次导出操作') #pass ##之前有过此时间段的删除操作,但是status状态为False,需要再进行一次删除操作 if 2 == ret_code: print('之前没有此时间段的导出操作,需要插入初始状态记录') record_time = get_current_BJT() update_time = get_current_BJT() status = 'False' record_db(record_time, update_time, start_time, end_time, status, filesize) ##之前没有此时间段的删除操作,需要插入初始状态记录 ##压缩收集到的数据及其数据库记录导出到json文件 export_data(argv1, argv2, argv3) print('export_data finish.') ##压缩要素提取生成的产品及其数据库记录导出到json文件 export_product(argv1, argv2, argv3) print('export_product finish.') ##发送警报的短信导出到json文件 export_alert(argv1, argv2, argv3) print('export_alert finish.') ##发送警报报告导出到json文件
def download_job(searchinfo, starttime, endtime, exe_path=os.path.dirname(os.path.abspath(__file__)), exe_name='download.py'): #db_name = 'yjy015' db_name = configs['db_name'] #table_name = 'data_monitor' ####传入的数据库记录,解析各个字段 category_abbr_en = searchinfo['category_abbr_en'] category_name_zh = searchinfo['category_name_zh'] #task_triggers = searchinfo['task_triggers'] data_class = searchinfo['data_class'] research_area = searchinfo['research_area'] website = searchinfo['website'] num_collect_perday = int(searchinfo['num_collect_perday']) num_store_perday = int(searchinfo['num_store_perday']) scheduling_interval_min = int(searchinfo['scheduling_interval_min']) scheduling_delayed_min = int(searchinfo['scheduling_delayed_min']) task_name = '数据收集 ' + category_name_zh + ' ' + category_abbr_en ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间 task_starttime = get_current_UTC() log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, category_abbr_en) ####配置命令行参数 exe_fullpath = os.path.join(exe_path, exe_name) #starttime = (datetime.datetime.strptime(taskStarttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S') ##加入延迟时间 ,延迟下载,根据延迟时间,下载当前时间 往前推算延迟时间的时刻,开始启动任务下载 #starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S') ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间 # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S') # endtime = starttime ####启动任务 cmd = 'python3' + ' ' + exe_fullpath + ' ' + category_abbr_en + ' ' + starttime + ' ' + endtime print(cmd) ####方案2,管道调用 ####如果异常,可能没有返回值 ret = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", timeout=None) #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1) """ 1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志 """ ####满足3个条件的基本没有,很少 #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '': if ret.returncode == 0: status = 'True' log = '成功' """ret返回的stdout,把屏幕输出的内容全部赋值给stdout""" #print("success:",ret) #print("success:",ret) else: status = 'False' ##log只记录成功和失败,失败的详细原因记录到任务日志里 #log = ret.stderr log = '失败' loggings.debug_log('ret.returncode = %d' % ret.returncode) #输出hadoop异常操作到log日志里 loggings.debug_log(ret.stderr) #输出hadoop异常操作到log日志里 #print (cmd) #print ("error:",ret.stderr) return
def alert_job(geomag, electron, flare, proton, searchinfo, delay_min): #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'alert_monitor' ####传入的数据库记录,解析各个字段 category_abbr_en = searchinfo['category_abbr_en'] category_name_zh = searchinfo['category_name_zh'] task_name = '事件警报 ' + category_name_zh + ' ' + category_abbr_en scheduling_delayed_min = int(searchinfo['scheduling_delayed_min']) data_class = searchinfo['data_class'] research_area = searchinfo['research_area'] website = searchinfo['website'] ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间 task_starttime = get_current_UTC() log_starttime = get_current_BJT() ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间 # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") + datetime.timedelta( minutes=-(scheduling_delayed_min+delay_min))).strftime('%Y-%m-%d %H:%M:%S') # endtime = starttime ##2020-05-27 15:31格式写入数据库2020-05-27 15:31:00 ##查找的时候,开始时间,结束时间单位精确到分钟,秒00~59,查找这个范围的数据 year = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").year month = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").month day = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").day hour = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").hour minute = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").minute s_second = 0 e_second = 59 ####计算延迟下载之前的时刻,精确到分钟,秒00~59范围匹配 starttime = ( datetime.datetime(year, month, day, hour, minute, s_second) + datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min)) ).strftime('%Y-%m-%d %H:%M:%S') endtime = ( datetime.datetime(year, month, day, hour, minute, e_second) + datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min)) ).strftime('%Y-%m-%d %H:%M:%S') ####实例化product类 alert = Alert(geomag, electron, flare, proton, category_abbr_en, starttime, endtime) ####判断数据种类,根据数据种类的启动时刻,来启动相应的警报任务 ####Kp指数使用SWPC,SWPC_latest_DGD; 不使用德国GFZ,GFZ_Kp_web #if category_abbr_en == 'GFZ_Kp_web' or category_abbr_en == 'SWPC_latest_DGD': if category_abbr_en == 'SWPC_latest_DGD': alert_type = 'geomag_storm_docx' #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类 #loggings.debug_log('SWPC_latest_DGD') try: alert.alert_geomag_storm() monitor_log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() monitor_log = str(e) #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 status = 'False' if category_abbr_en == 'SWPC_GOES_IE5m': alert_type = 'electron_burst_docx' #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类 try: alert.alert_electron_burst() monitor_log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() monitor_log = str(e) #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 status = 'False' if category_abbr_en == 'SWPC_GOES_XR1m': alert_type = 'solar_flare_docx' #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类 try: alert.alert_solar_flare() monitor_log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() monitor_log = str(e) #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 status = 'False' if category_abbr_en == 'SWPC_GOES_IP5m': alert_type = 'solar_proton_docx' #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类 try: alert.alert_solar_proton() monitor_log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() monitor_log = str(e) #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 status = 'False' # alert_type = 'geomag_storm_docx' # try: # alert.alert_geomag_storm() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' # alert_type = 'electron_burst_docx' # try: # alert.alert_electron_burst() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' # alert_type = 'solar_flare_docx' # try: # alert.alert_solar_flare() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' # alert_type = 'solar_flare_docx' # try: # alert.alert_solar_flare() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' create_time = get_current_BJT() ##入库时间按北京时间 update_time = get_current_BJT() ##入库更新时间按北京时间 pga = PostgresArchive() config_infos = { 'task_name': task_name, 'create_time': create_time, 'update_time': update_time, 'log': monitor_log, 'status': status, 'alert_type': alert_type ##录入事件类型,不是数据来源类型 } pga.insert_db_table(database_name=db_name, table_name=table_name, config_element=config_infos) return
def product_job(searchinfo, delay_min): #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'product_monitor' ####传入的数据库记录,解析各个字段 category_abbr_en = searchinfo['category_abbr_en'] category_name_zh = searchinfo['category_name_zh'] task_name = '产品生产 ' + category_name_zh + ' ' + category_abbr_en scheduling_delayed_min = int(searchinfo['scheduling_delayed_min']) data_class = searchinfo['data_class'] research_area = searchinfo['research_area'] website = searchinfo['website'] ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间 task_starttime = get_current_UTC() log_starttime = get_current_BJT() ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间 # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") + datetime.timedelta( minutes=-(scheduling_delayed_min+delay_min))).strftime('%Y-%m-%d %H:%M:%S') # endtime = starttime ##2020-05-27 15:31格式写入数据库2020-05-27 15:31:00 ##查找的时候,开始时间,结束时间单位精确到分钟,秒00~59,查找这个范围的数据 year = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").year month = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").month day = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").day hour = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").hour minute = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").minute s_second = 0 e_second = 59 ####计算延迟下载之前的时刻,精确到分钟,秒00~59范围匹配 starttime = ( datetime.datetime(year, month, day, hour, minute, s_second) + datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min)) ).strftime('%Y-%m-%d %H:%M:%S') endtime = ( datetime.datetime(year, month, day, hour, minute, e_second) + datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min)) ).strftime('%Y-%m-%d %H:%M:%S') ####实例化product类 prd = Product(category_abbr_en, starttime, endtime) # if category_abbr_en == 'CDA_DSCOVR_SW': # try: # prd.product_DSCOVR_FC() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' # if category_abbr_en == 'CDA_DSCOVR_MAG': # try: # prd.product_DSCOVR_mag() # log = '成功' # status = 'True' # except Exception as e: # #log = traceback.format_exc() # log = str(e) # status = 'False' if category_abbr_en == 'Ngdc_DSCOVR_SW': try: prd.product_Ngdc_DSCOVR_SW() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'Ngdc_DSCOVR_MAG': try: prd.product_NGDC_DSCOVR_m1s() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'CDA_TIMED_SL2a': try: prd.product_Timed_L2A() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'CDA_GPS_TEC': try: prd.product_IGS_TEC() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0094': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0131': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0171': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0193': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0211': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0304': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_0305': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_1600': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_AIA_1700': try: prd.product_SDO_draw_grid() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' if category_abbr_en == 'JSOC_HMI_12m': try: prd.product_SDO_draw_AR() log = '成功' status = 'True' except Exception as e: #log = traceback.format_exc() log = str(e) status = 'False' create_time = get_current_BJT() ##入库时间按北京时间 update_time = get_current_BJT() ##入库更新时间按北京时间 pga = PostgresArchive() config_infos = { 'task_name': task_name, 'create_time': create_time, 'update_time': update_time, 'log': log, 'status': status, 'data_class': data_class, 'research_area': research_area, 'website': website, 'category_abbr_en': category_abbr_en } #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos) #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos) pga.insert_db_table(database_name=db_name, table_name=table_name, config_element=config_infos) return
def redo_download_job(expire_days=3): """ 1. 每天上午06点开始重做前3天的失败任务,可能很多失败任务,查询log失败和status为False 2. 格式化输出',\',格式化\加',因为sql查询日期需要带单引号 """ #db_name = 'yjy015' db_name = configs['db_name'] table_name = 'data_monitor' current_date = '\'%s\'' % ( datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) expire_date = '\'%s\'' % ( (datetime.datetime.now() + datetime.timedelta(days=-expire_days)).strftime("%Y-%m-%d %H:%M:%S")) #search_log = '\'%s\''%'失败' search_status = '\'%s\'' % 'False' #sqlcmd='SELECT * FROM %s WHERE log = %s and status = %s and create_time BETWEEN %s and %s'%(table_name,search_log,search_status,expire_date,current_date) #sqlcmd='SELECT * FROM %s WHERE status = %s and create_time BETWEEN %s and %s'%(table_name,search_status,expire_date,current_date) ####2个表联合查询 sqlcmd = "SELECT data_monitor.*, data_category.redo_flag FROM data_monitor INNER JOIN data_category ON data_monitor.category_abbr_en = data_category.category_abbr_en \ WHERE data_monitor.status = %s and create_time BETWEEN %s and %s" % ( search_status, expire_date, current_date) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) #print (searchinfos) except Exception as e: #loggings.debug_log(str(e)) #输出异常操作到log日志里 #loggings.debug_log(traceback.format_exc()) #输出堆栈异常到log日志里 raise Exception(traceback.format_exc()) ####查询到的数据 if (searchinfos): for searchinfo in searchinfos: ####传入的数据库记录,解析各个字段 task_name = searchinfo['task_name'] create_time = searchinfo['create_time'] log = searchinfo['log'] status = searchinfo['status'] cmd = searchinfo['cmd'] data_class = searchinfo['data_class'] research_area = searchinfo['research_area'] website = searchinfo['website'] category_abbr_en = searchinfo['category_abbr_en'] redo_flag = searchinfo['redo_flag'] ####需要添加category_abbr_en种类在data_category表中redo_flag判断,如果是fasle则不需要重做,因为实时更新的数据,重做下载的不是历史数据,而是实时数据 if ('False' == redo_flag): continue ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间 logStarttime = get_current_BJT() ####实例化日志类 loggings = Loggings(logStarttime, category_abbr_en) ####方案2 ####如果异常,可能没有返回值 ret = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", timeout=None) #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1) """ 1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志 """ ####需要加入wget下载里,判断是否是重做任务 #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '': if ret.returncode == 0: update_status = 'True' update_log = '成功' """ret返回的stdout,把屏幕输出的内容全部赋值给stdout""" #print("success:",ret) #print("success:",ret) else: update_status = 'False' ##log只记录成功和失败,失败的详细原因记录到任务日志里 log = ret.stderr #update_log = '失败' loggings.debug_log('ret.returncode = %d' % ret.returncode) #输出hadoop异常操作到log日志里 loggings.debug_log(ret.stderr) #输出hadoop异常操作到log日志里 print("error:", ret.stderr) ####数据库监控表入库 ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主 update_time = get_current_BJT() ##入库时间按北京时间 pga = PostgresArchive() config_infos = { 'task_name': task_name, 'create_time': create_time, 'update_time': update_time, 'log': update_log, 'status': update_status, 'cmd': cmd, 'data_class': data_class, 'research_area': research_area, 'website': website, 'category_abbr_en': category_abbr_en } condition_infos = { 'task_name': task_name, 'create_time': create_time, 'status': status, 'log': log } #更新失败的字段 #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos) #pga.update_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos, condition_element=condition_infos) pga.update_db_table(database_name=db_name, table_name=table_name, config_element=config_infos, condition_element=condition_infos) return
def download_job(searchinfo, exe_path=os.path.dirname(os.path.abspath(__file__)), exe_name='download.py'): #db_name = 'yjy015' db_name = configs['db_name'] #table_name = 'data_monitor' ####传入的数据库记录,解析各个字段 category_abbr_en = searchinfo['category_abbr_en'] category_name_zh = searchinfo['category_name_zh'] #task_triggers = searchinfo['task_triggers'] data_class = searchinfo['data_class'] research_area = searchinfo['research_area'] website = searchinfo['website'] num_collect_perday = int(searchinfo['num_collect_perday']) num_store_perday = int(searchinfo['num_store_perday']) scheduling_interval_min = int(searchinfo['scheduling_interval_min']) scheduling_delayed_min = int(searchinfo['scheduling_delayed_min']) task_name = '数据收集 ' + category_name_zh + ' ' + category_abbr_en ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间 task_starttime = get_current_UTC() log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, category_abbr_en) ####配置命令行参数 exe_fullpath = os.path.join(exe_path, exe_name) #starttime = (datetime.datetime.strptime(taskStarttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S') ##加入延迟时间 ,延迟下载,根据延迟时间,下载当前时间 往前推算延迟时间的时刻,开始启动任务下载 #starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S') ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间 starttime = ( datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(minutes=-scheduling_delayed_min) ).strftime('%Y%m%d%H%M%S') endtime = starttime ####启动任务 cmd = 'python3' + ' ' + exe_fullpath + ' ' + category_abbr_en + ' ' + starttime + ' ' + endtime #print (cmd) #print (category_abbr_en) #print ('task_hour = %s'%task_hour) #print ('task_minute = %s'%task_minute) #print ('task_second = %s'%task_second) #print ('任务延迟开始......') #print ('任务延迟 %d秒'%(scheduling_delayed_min*60) ) ####启动数据下载任务,加入延迟操作,保证数据下载完整 #time.sleep(scheduling_delayed_min*60)#延迟下载,单位秒 #print ('任务延迟结束') ####方案1 #os.system(cmd) ####方案2,管道调用 ####如果异常,可能没有返回值 ret = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", timeout=None) #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1) """ 1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志 """ ####满足3个条件的基本没有,很少 #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '': if ret.returncode == 0: status = 'True' log = '成功' """ret返回的stdout,把屏幕输出的内容全部赋值给stdout""" #print("success:",ret) #print("success:",ret) else: status = 'False' ##log只记录成功和失败,失败的详细原因记录到任务日志里 #log = ret.stderr log = '失败' loggings.debug_log('ret.returncode = %d' % ret.returncode) #输出hadoop异常操作到log日志里 loggings.debug_log(ret.stderr) #输出hadoop异常操作到log日志里 #print (cmd) #print ("error:",ret.stderr) # ####数据库监控表入库 # ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主 # create_time = get_current_BJT()##入库时间按北京时间 # update_time = get_current_BJT()##入库更新时间按北京时间 # pga=PostgresArchive() # config_infos={ # 'task_name':task_name, # 'create_time':create_time, # 'update_time':update_time, # 'log':log, # 'status':status, # 'cmd':cmd, # 'data_class':data_class, # 'research_area':research_area, # 'website':website, # 'category_abbr_en':category_abbr_en # } # #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos) # #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos) # pga.insert_db_table(database_name=db_name, table_name=table_name, config_element = config_infos) return
def cleanup_data(startime, endtime): """ startime格式最好是 2020-05-27 endtime 格式最好是 2020-05-27 去数据库中查找,默认是每天的24小时的数据 startime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找 endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找 """ s_year = datetime.datetime.strptime(startime, "%Y-%m-%d").year s_month = datetime.datetime.strptime(startime, "%Y-%m-%d").month s_day = datetime.datetime.strptime(startime, "%Y-%m-%d").day s_hour = 0 s_minute = 0 s_second = 0 e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day e_hour = 23 e_minute = 59 e_second = 59 # search_starttime='\'%s\''%datetime.datetime(s_year,s_month,s_day,s_hour,s_minute,s_second).strftime('%Y-%m-%d %H:%M:%S') # search_endtime ='\'%s\''%datetime.datetime(e_year,e_month,e_day,e_hour,e_minute,e_second).strftime('%Y-%m-%d %H:%M:%S') search_starttime = '%s' % datetime.datetime( s_year, s_month, s_day, s_hour, s_minute, s_second).strftime('%Y-%m-%d %H:%M:%S') search_endtime = '%s' % datetime.datetime( e_year, e_month, e_day, e_hour, e_minute, e_second).strftime('%Y-%m-%d %H:%M:%S') ####监控初始化状态 clean_up_tablename = 'data_file_info' start_time = search_starttime end_time = search_endtime ret_code = check_db(clean_up_tablename, start_time, end_time) if 0 == ret_code: print('之前有过此时间段的删除操作,不需要再进行删除操作') #return ##之前有过此时间段的删除操作,不需要再进行删除操作 ####更改监控状态 update_time = get_current_BJT() config_infos = { 'update_time': update_time, 'status': '2' } #配置更新之后的值2,成功 # condition_infos={ # 'start_time':start_time, # 'end_time':end_time, # 'status':'False'} ####如果数据库中不存在删除日期的记录,默认为已经删除成功 condition_infos = { 'table_name': clean_up_tablename, 'start_time': start_time, 'end_time': end_time } update_db(config_infos, condition_infos) pass ####之前有过删除动作,比如上午删除了2020-08-21的下载数据,下载由于又下载了一批数据,又需要删除 if 1 == ret_code: print('之前有过此时间段的删除操作,但是status状态为:1失败,2删除中,需要再进行一次删除操作') ####更改监控状态 update_time = get_current_BJT() config_infos = { 'update_time': update_time, 'status': '2' } #配置更新之后的值2,成功 # condition_infos={ # 'start_time':start_time, # 'end_time':end_time, # 'status':'False'} ####如果数据库中不存在删除日期的记录,默认为已经删除成功 condition_infos = { 'table_name': clean_up_tablename, 'start_time': start_time, 'end_time': end_time } update_db(config_infos, condition_infos) pass ##之前有过此时间段的删除操作,但是status状态为False,需要再进行一次删除操作 if 2 == ret_code: print('之前没有此时间段的删除操作,需要插入初始状态记录') record_time = get_current_BJT() update_time = get_current_BJT() #status='False' status = '2' ####删除中 record_db(clean_up_tablename, record_time, update_time, start_time, end_time, status) ##之前没有此时间段的删除操作,需要插入初始状态记录 log_starttime = get_current_BJT() ####实例化日志类 loggings = Loggings(log_starttime, 'cleanup_data') input_rootpath = configs['rootpath'] db_name = configs['db_name'] table_name = 'data_file_info' sqlcmd = "SELECT * FROM %s WHERE start_time BETWEEN '%s' and '%s'" % ( table_name, search_starttime, search_endtime) print(sqlcmd) pga = PostgresArchive() try: searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd) except Exception as e: raise Exception(traceback.format_exc()) absolute_path_list = [] ##磁盘上存放的绝对路径 if (searchinfos): for searchinfo in searchinfos: path = searchinfo['path'] filename = searchinfo['filename'] absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename absolute_path_list.append(absolute_input_fullfilename) ####删除表记录 pga.delete_table_record(db_name, table_name, searchinfo) else: infos = '数据库 %s,产品不存在,不需要删除时间段: %s %s' % (table_name, startime, endtime) loggings.debug_log(infos) #输出log日志里 ####更改监控状态 update_time = get_current_BJT() config_infos = { 'update_time': update_time, 'status': '3' } #配置更新之后的值2,成功 # condition_infos={ # 'start_time':start_time, # 'end_time':end_time, # 'status':'False'} ####如果数据库中不存在删除日期的记录,默认为已经删除成功 condition_infos = { 'table_name': clean_up_tablename, 'start_time': start_time, 'end_time': end_time } update_db(config_infos, condition_infos) return #exit(0)##exit导致整个主程序退出,导致下面调用的子程序跳过 infos = '数据库表data_file_info 记录%d条' % len(searchinfos) loggings.debug_log(infos) #输出log日志里 for datafile in absolute_path_list: ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory: if not os.path.exists(datafile): pass else: os.remove(datafile) infos = '数据清理完毕.' loggings.debug_log(infos) #输出log日志里 ####更改监控状态 update_time = get_current_BJT() config_infos = {'update_time': update_time, 'status': '3'} #配置更新之后的值 # condition_infos={ # 'start_time':start_time, # 'end_time':end_time, # 'status':'False'} ####如果删除成功,不管之前的status状态,直接更新为2,成功 condition_infos = { 'table_name': clean_up_tablename, 'start_time': start_time, 'end_time': end_time } update_db(config_infos, condition_infos) return
####排出优先级,再根据1天下载的频次,启动下载时间为每天的00:00:00,根据这个时间计算出下载频次之间的时间间隔 ####做成类似crontab的启动任务,到点自动下载 ####目前默认删除前1天的临时数据,如果今天网络中断了1天,昨天的数据就无法删除了,因为明天只会删除今天的 ####是否增加删除前1周,或者前1个月的临时数据 ####每次启动自动下载,针对某个数据,只保留当天时间最大的数据 ####%Y%m%d%H%M%S转换为%Y-%m-%d %H:%M:%S,保证和数据库里格式一致,查询方便 #return True return if __name__ == "__main__": ####获取系统时间 taskStarttime = get_current_BJT() #### 1.启动数据库 sudo service postgresql start #### 2.查看host配置IP是否正确,ifconfig -a ####分2种情况,手动下载和自动下载 ####自动下载:命令行没有参数,获取当前的系统时间,来进行下载任务,可以开线程池,也可以串行下载 ####手动下载:命令行传入开始时间,结束时间,数据标识;手动下载多个,需要根据优先级排列出队伍,先下载优先级高的,如果并行下载,就没有优先级的概念了; ####手动下载命令行 #python3 /home/hffan/fanhuifeng/python_prj/YJY015/sqlmain.py CDA_TIMED_SL2b 20190919111000 20190920111000 ####自动下载命令行 #python3 /home/hffan/fanhuifeng/python_prj/YJY015/sqlmain.py ########可以使用navicat手动创建库和表;也可以使用python语句创建数据库和表 ########创建psql数据库,建立数据库