Example #1
0
    def record_monitor(self,log,status):
        """
        1. 下载数据监控表
        
        """
        table_name = 'data_monitor'
        task_name = '数据收集 ' + self.category_name_zh + ' ' + self.category_abbr_en
        
        
        ####数据库监控表入库
        ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主
        create_time = get_current_BJT()##入库时间按北京时间
        update_time = get_current_BJT()##入库更新时间按北京时间

        ##监控表参数设置
        exe_fullpath = configs['exe_fullpath']
        cmd = 'python3' + ' ' + exe_fullpath + ' ' + self.category_abbr_en + ' ' + self.starttime + ' ' + self.endtime        
        
        pga=PostgresArchive()
        config_infos={
                    'task_name':task_name,
                    'create_time':create_time,
                    'update_time':update_time,                
                    'log':log,
                    'status':status,
                    'cmd':cmd,
                    'data_class':self.data_class,
                    'research_area':self.research_area,
                    'website':self.website,
                    'category_abbr_en':self.category_abbr_en
                    }
                    
        #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos)
        #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos)
        pga.insert_db_table(database_name=db_name, table_name=table_name, config_element = config_infos)    
Example #2
0
def export_element_table(elment_tablename, starttime, endtime,
                         output_fullpath):
    """
    starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找
    endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找
    output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip
    
    """

    log_starttime = get_current_BJT()
    ####实例化日志类
    #loggings=Loggings(log_starttime,'export_element_ace_ep')
    loggings = Loggings(log_starttime, elment_tablename)

    despath, name = os.path.split(output_fullpath)
    if not os.path.exists(despath):
        os.makedirs(despath)

    input_rootpath = configs['rootpath']
    db_name = configs['db_name']
    export_json_rootpath = configs['export_json_rootpath']

    #table_name  = 'element_ace_ep'
    table_name = elment_tablename
    #json_file='element_ace_ep.json'
    json_file = elment_tablename + '.json'

    s_year = datetime.datetime.strptime(starttime, "%Y-%m-%d").year
    s_month = datetime.datetime.strptime(starttime, "%Y-%m-%d").month
    s_day = datetime.datetime.strptime(starttime, "%Y-%m-%d").day
    s_hour = 0
    s_minute = 0
    s_second = 0

    e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year
    e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month
    e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day
    e_hour = 23
    e_minute = 59
    e_second = 59

    search_starttime = '\'%s\'' % datetime.datetime(
        s_year, s_month, s_day, s_hour, s_minute,
        s_second).strftime('%Y-%m-%d %H:%M:%S')
    search_endtime = '\'%s\'' % datetime.datetime(
        e_year, e_month, e_day, e_hour, e_minute,
        e_second).strftime('%Y-%m-%d %H:%M:%S')
    #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime)
    #sqlcmd='SELECT * FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime)
    sqlcmd = 'SELECT * FROM %s WHERE utc_time BETWEEN %s and %s' % (
        table_name, search_starttime, search_endtime)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        raise Exception(traceback.format_exc())

    absolute_path_list = []  ##磁盘上存放的绝对路径
    #relative_path_list=[]##数据库中的相对路径
    if (searchinfos):
        for searchinfo in searchinfos:
            pass

    else:
        #pass
        infos = '数据库 %s,产品不存在,时间段%s %s' % (table_name, starttime, endtime)
        loggings.debug_log(infos)  #输出log日志里
        #raise Exception(infos)
        #exit(0)
        return

    infos = '数据库表%s 导出数据, %d条' % (table_name, len(searchinfos))
    loggings.debug_log(infos)  #输出log日志里

    ##数据库记录写入jason文件
    #json_fullpath = os.path.join(input_rootpath,json_file)
    #json_fullpath = export_json_rootpath + starttime + '/' + json_file
    export_json_path = export_json_rootpath + starttime + '/'
    if not os.path.exists(export_json_path):
        os.makedirs(export_json_path)
    json_fullpath = export_json_path + json_file

    ####方案2
    with open(json_fullpath, 'w', encoding='utf-8') as f:
        f.write(json.dumps(searchinfos, indent=4, ensure_ascii=False))

    ####压缩
    try:
        interact_opt.zipCompressLst.zipCompress(absolute_path_list,
                                                json_fullpath, output_fullpath)
    except Exception as e:
        #loggings.debug_log(str(e))                  #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())  #输出堆栈异常到log日志里
        #exit(traceback.format_exc())                #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态
        raise Exception(traceback.format_exc())

    #infos = 'element_ace_ep,压缩并导出'
    infos = '%s,压缩并导出' % table_name
    loggings.debug_log(infos)  #输出log日志里

    ##json临时文件,在压入zip文件之后,可以删除
    ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory:

    # if not os.path.exists(json_fullpath):
    # pass
    # else:
    # os.remove(json_fullpath)

    # infos = ''
    # loggings.debug_log(infos)#输出log日志里
    return
Example #3
0
def export_data(starttime, endtime, output_fullpath):
    """
    1. starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找
    2. endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找
    3. output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip
    4. 全部导出功能,比较耗时,需要添加导出状态表,前端界面可以查询此状态,查询有效期为3天,3天过后,导出没有结束,就不处理
    
    """

    log_starttime = get_current_BJT()
    ####实例化日志类
    loggings = Loggings(log_starttime, 'export_data')

    despath, name = os.path.split(output_fullpath)
    if not os.path.exists(despath):
        os.makedirs(despath)

    #input_rootpath             = configs['data_rootpath']
    input_rootpath = configs['rootpath']

    #db_name = 'yjy015'
    db_name = configs['db_name']
    export_json_rootpath = configs['export_json_rootpath']
    table_name = 'data_file_info'
    json_file = 'data_file_info.json'

    s_year = datetime.datetime.strptime(starttime, "%Y-%m-%d").year
    s_month = datetime.datetime.strptime(starttime, "%Y-%m-%d").month
    s_day = datetime.datetime.strptime(starttime, "%Y-%m-%d").day
    s_hour = 0
    s_minute = 0
    s_second = 0

    e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year
    e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month
    e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day
    e_hour = 23
    e_minute = 59
    e_second = 59

    search_starttime = '\'%s\'' % datetime.datetime(
        s_year, s_month, s_day, s_hour, s_minute,
        s_second).strftime('%Y-%m-%d %H:%M:%S')
    search_endtime = '\'%s\'' % datetime.datetime(
        e_year, e_month, e_day, e_hour, e_minute,
        e_second).strftime('%Y-%m-%d %H:%M:%S')
    #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime)
    ####必须是select *,因为要导出所有字段
    sqlcmd = 'SELECT * FROM %s WHERE start_time BETWEEN %s and %s' % (
        table_name, search_starttime, search_endtime)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        raise Exception(traceback.format_exc())

    absolute_path_list = []  ##磁盘上存放的绝对路径
    #relative_path_list=[]##数据库中的相对路径
    if (searchinfos):
        for searchinfo in searchinfos:
            path = searchinfo['path']
            filename = searchinfo['filename']
            absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename
            #relative_input_fullfilename = '/' + path + '/' + filename
            absolute_path_list.append(absolute_input_fullfilename)
            #relative_path_list.append(relative_input_fullfilename)

    else:
        infos = '数据库 %s,产品不存在,时间段%s %s' % (table_name, starttime, endtime)
        loggings.debug_log(infos)  #输出log日志里
        #raise Exception(infos)
        ##exit导致整个主进程退出,后面的子函数不调用,如果data导出失败,product导出也会跳过
        #exit(0)
        return
        #raise Exception('数据库 %s,产品不存在,时间段%s %s'%(table_name,starttime,endtime))
        #print ('数据库中不存在,%s' % category_abbr_en)
    infos = '数据库表%s 导出数据, %d条' % (table_name, len(searchinfos))
    loggings.debug_log(infos)  #输出log日志里

    ##数据库记录写入jason文件
    #json_file='data.jason'
    #json_fullpath = os.path.join(despath,json_file)
    #json_fullpath = os.path.join(input_rootpath,json_file)

    export_json_path = export_json_rootpath + starttime + '/'
    if not os.path.exists(export_json_path):
        os.makedirs(export_json_path)
    json_fullpath = export_json_path + json_file

    ##写入data文件
    ####方案1
    # with open(json_fullpath,'w',encoding='utf-8') as f:
    # for searchinfo in searchinfos:
    # json.dump(searchinfo,f,indent=4,ensure_ascii=False)
    # #json.dumps(searchinfo,f,indent=4,ensure_ascii=False)
    # f.write('\n')
    ####方案2
    with open(json_fullpath, 'w', encoding='utf-8') as f:
        #for searchinfo in searchinfos:
        f.write(json.dumps(searchinfos, indent=4, ensure_ascii=False))
        #json.dumps(searchinfo,f,indent=4,ensure_ascii=False)
        #f.write('\n')

    # print (file_list)
    # input()
    ####压缩
    try:
        interact_opt.zipCompressLst.zipCompress(absolute_path_list,
                                                json_fullpath, output_fullpath)
    except Exception as e:
        #loggings.debug_log(str(e))                  #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())  #输出堆栈异常到log日志里
        #exit(traceback.format_exc())                #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态
        raise Exception(traceback.format_exc())

    infos = '收集数据data,压缩并导出'
    loggings.debug_log(infos)  #输出log日志里

    ##json临时文件,在压入zip文件之后,可以删除
    ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory:

    # if not os.path.exists(json_fullpath):
    # pass
    # else:
    # os.remove(json_fullpath)

    return
Example #4
0
def export_partial_data(category_abbr_en, starttime, endtime, output_fullpath):
    """
    cd /home/YJY015/code;python3 /home/YJY015/code/export.py GFZ_Kp_tab '2020-06-16 00:00:00' '2020-06-18 23:59:59' /home/YJY015/test/20200616_20200618.zip
    category_abbr_en 用户传入,取数据库中查询此类数据
    starttime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找
    endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找
    output_fullpath输出全路径,包括文件名称,比如/home/YJY015/data/20200527.zip
    
    """

    log_starttime = get_current_BJT()
    ####实例化日志类
    loggings = Loggings(log_starttime, 'export_partial_data')

    despath, name = os.path.split(output_fullpath)
    if not os.path.exists(despath):
        os.makedirs(despath)

    #input_rootpath             = configs['data_rootpath']
    input_rootpath = configs['rootpath']
    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'data_file_info'

    # s_year    = datetime.datetime.strptime(starttime, "%Y-%m-%d").year
    # s_month   = datetime.datetime.strptime(starttime, "%Y-%m-%d").month
    # s_day     = datetime.datetime.strptime(starttime, "%Y-%m-%d").day
    # s_hour    = 0
    # s_minute  = 0
    # s_second  = 0

    # e_year    = datetime.datetime.strptime(endtime, "%Y-%m-%d").year
    # e_month   = datetime.datetime.strptime(endtime, "%Y-%m-%d").month
    # e_day     = datetime.datetime.strptime(endtime, "%Y-%m-%d").day
    # e_hour    = 23
    # e_minute  = 59
    # e_second  = 59

    search_starttime = '\'%s\'' % starttime
    search_endtime = '\'%s\'' % endtime

    #sqlcmd='SELECT path,filename FROM data_file_info WHERE start_time BETWEEN %s and %s'%(search_starttime,search_endtime)
    #sqlcmd="SELECT * FROM %s WHERE category_abbr_en = '%s' and start_time BETWEEN %s and %s"%(table_name,category_abbr_en,search_starttime,search_endtime)
    #sqlcmd="SELECT data_file_info.*, data_category.* FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = '%s' and data_file_info.start_time BETWEEN %s and %s"%(category_abbr_en,search_starttime,search_endtime)
    sqlcmd = "SELECT data_file_info.path, data_file_info.filename FROM data_file_info WHERE category_abbr_en = '%s' and data_file_info.start_time BETWEEN %s and %s order by start_time" % (
        category_abbr_en, search_starttime, search_endtime)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        raise Exception(traceback.format_exc())

    absolute_path_list = []  ##磁盘上存放的绝对路径
    #relative_path_list=[]##数据库中的相对路径
    if (searchinfos):
        for searchinfo in searchinfos:
            path = searchinfo['path']
            filename = searchinfo['filename']
            absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename
            #relative_input_fullfilename = '/' + path + '/' + filename
            absolute_path_list.append(absolute_input_fullfilename)
            #relative_path_list.append(relative_input_fullfilename)

    else:
        pass
        #raise Exception('数据库 %s,产品不存在,时间段%s %s'%(table_name,starttime,endtime))
        #print ('数据库中不存在,%s' % category_abbr_en)
        # infos = sqlcmd + ' 未搜索到数据'
        # loggings.debug_log(infos)
        # exit(0)
        infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en,
                                              starttime, endtime)
        loggings.debug_log(infos)  #输出log日志里
        #raise Exception(infos)
        #exit(0)
        return

    infos = '数据库表data_file_info导出数据, %d条' % len(searchinfos)
    loggings.debug_log(infos)  #输出log日志里

    try:
        interact_opt.zipCompressLst.zipCompress_data(absolute_path_list,
                                                     output_fullpath)
    except Exception as e:
        #loggings.debug_log(str(e))                  #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())  #输出堆栈异常到log日志里
        #exit(traceback.format_exc())                #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态
        raise Exception(traceback.format_exc())

    infos = '收集数据%s,压缩并导出,时间段为%s,%s' % (category_abbr_en, starttime, endtime)
    loggings.debug_log(infos)  #输出log日志里

    return
Example #5
0
def export_avi(category_abbr_en, starttime, endtime, frame_ps,
               output_fullpath):

    log_starttime = get_current_BJT()
    ####实例化日志类
    loggings = Loggings(log_starttime, 'export_avi')

    despath, name = os.path.split(output_fullpath)
    if not os.path.exists(despath):
        os.makedirs(despath)

    #input_rootpath             = configs['data_rootpath']
    input_rootpath = configs['rootpath']
    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'data_file_info'

    search_starttime = '\'%s\'' % starttime
    search_endtime = '\'%s\'' % endtime
    category_abbr_en = '\'%s\'' % (category_abbr_en)
    #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime)
    sqlcmd = 'SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE category_abbr_en = %s and start_time BETWEEN %s and %s order by start_time' % (
        category_abbr_en, search_starttime, search_endtime)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        raise Exception(traceback.format_exc())

    file_list = []
    if (searchinfos):
        for searchinfo in searchinfos:
            path = searchinfo['path']
            filename = searchinfo['filename']
            input_fullfilename = input_rootpath + '/' + path + '/' + filename
            ##avi图输入0大小的jpg图片会报错,但是程序不会中断
            ##OpenCV(4.2.0) /io/opencv/modules/imgproc/src/resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'resize'
            file_list.append(input_fullfilename)

    else:
        infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en,
                                              starttime, endtime)
        loggings.debug_log(infos)  #输出log日志里
        #raise Exception(infos)
        #exit(0)
        return

    ####压缩
    try:
        interact_opt.make_video.img2video(file_list,
                                          output_fullpath,
                                          fps=frame_ps)
    except Exception as e:
        #loggings.debug_log(str(e))                  #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())  #输出堆栈异常到log日志里
        #exit(traceback.format_exc())                #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态
        raise Exception(traceback.format_exc())

    return
Example #6
0
def export_gif(category_abbr_en, starttime, endtime, frame_ps,
               output_fullpath):

    log_starttime = get_current_BJT()
    ####实例化日志类
    loggings = Loggings(log_starttime, 'export_gif')

    despath, name = os.path.split(output_fullpath)
    if not os.path.exists(despath):
        os.makedirs(despath)

    #input_rootpath             = configs['data_rootpath']
    input_rootpath = configs['rootpath']
    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'data_file_info'

    search_starttime = '\'%s\'' % starttime
    search_endtime = '\'%s\'' % endtime
    category_abbr_en = '\'%s\'' % (category_abbr_en)
    #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info INNER JOIN data_category ON data_file_info.category_id = data_category.category_id WHERE data_category.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime)
    #sqlcmd='SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE data_file_info.category_abbr_en = %s and start_time BETWEEN %s and %s'%(category_abbr_en,search_starttime,search_endtime)
    sqlcmd = 'SELECT data_file_info.path,data_file_info.filename FROM data_file_info WHERE data_file_info.category_abbr_en = %s and start_time BETWEEN %s and %s order by start_time' % (
        category_abbr_en, search_starttime, search_endtime)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        infos = traceback.format_exc()
        loggings.debug_log(infos)  #输出log日志里
        raise Exception(traceback.format_exc())

    file_list = []
    if (searchinfos):
        for searchinfo in searchinfos:
            path = searchinfo['path']
            filename = searchinfo['filename']
            input_fullfilename = input_rootpath + '/' + path + '/' + filename

            ####图片在本地不存在,或者图片大小非性,如果图片大小为0,剔除掉
            if not os.path.exists(input_fullfilename) or os.path.getsize(
                    input_fullfilename) == 0:
                continue
            file_list.append(input_fullfilename)

    else:
        infos = '数据库 %s,产品 %s不存在,时间段%s %s' % (table_name, category_abbr_en,
                                              starttime, endtime)
        loggings.debug_log(infos)  #输出log日志里
        #raise Exception(infos)
        #exit(0)
        return
        #raise Exception(infos)
        #print ('数据库中不存在,%s' % category_abbr_en)

    ####压缩
    try:
        interact_opt.make_gif.img2gif(output_fullpath, file_list, fps=frame_ps)
    except Exception as e:
        #loggings.debug_log(str(e))                  #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())  #输出堆栈异常到log日志里
        #exit(traceback.format_exc())                #异常直接退出,否则可能出现没有element的数据库,但是提示要素提取完,并且把element_storage_status状态更新为true的状态
        raise Exception(traceback.format_exc())

    return
Example #7
0
                e_year, e_month, e_day, e_hour, e_minute,
                e_second).strftime('%Y-%m-%d %H:%M:%S')

            ####数据导出,监控状态初始化
            start_time = search_starttime
            end_time = search_endtime
            ret_code = check_db(start_time, end_time)
            if 0 == ret_code:
                print('之前有过此时间段的导出操作,不需要再进行导出操作,需要继续导致,可以继续导出')
                #return ##之前有过此时间段的删除操作,不需要再进行删除操作
            if 1 == ret_code:
                print('之前有过此时间段的导出操作,但是status状态为False,需要再进行一次导出操作')
                #pass ##之前有过此时间段的删除操作,但是status状态为False,需要再进行一次删除操作
            if 2 == ret_code:
                print('之前没有此时间段的导出操作,需要插入初始状态记录')
                record_time = get_current_BJT()
                update_time = get_current_BJT()
                status = 'False'
                record_db(record_time, update_time, start_time, end_time,
                          status, filesize)  ##之前没有此时间段的删除操作,需要插入初始状态记录

            ##压缩收集到的数据及其数据库记录导出到json文件
            export_data(argv1, argv2, argv3)
            print('export_data finish.')
            ##压缩要素提取生成的产品及其数据库记录导出到json文件
            export_product(argv1, argv2, argv3)
            print('export_product finish.')
            ##发送警报的短信导出到json文件
            export_alert(argv1, argv2, argv3)
            print('export_alert finish.')
            ##发送警报报告导出到json文件
Example #8
0
def download_job(searchinfo,
                 starttime,
                 endtime,
                 exe_path=os.path.dirname(os.path.abspath(__file__)),
                 exe_name='download.py'):

    #db_name = 'yjy015'
    db_name = configs['db_name']
    #table_name = 'data_monitor'

    ####传入的数据库记录,解析各个字段
    category_abbr_en = searchinfo['category_abbr_en']
    category_name_zh = searchinfo['category_name_zh']
    #task_triggers = searchinfo['task_triggers']
    data_class = searchinfo['data_class']
    research_area = searchinfo['research_area']
    website = searchinfo['website']
    num_collect_perday = int(searchinfo['num_collect_perday'])
    num_store_perday = int(searchinfo['num_store_perday'])
    scheduling_interval_min = int(searchinfo['scheduling_interval_min'])
    scheduling_delayed_min = int(searchinfo['scheduling_delayed_min'])
    task_name = '数据收集 ' + category_name_zh + ' ' + category_abbr_en

    ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间
    task_starttime = get_current_UTC()
    log_starttime = get_current_BJT()

    ####实例化日志类
    loggings = Loggings(log_starttime, category_abbr_en)

    ####配置命令行参数
    exe_fullpath = os.path.join(exe_path, exe_name)
    #starttime = (datetime.datetime.strptime(taskStarttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S')
    ##加入延迟时间 ,延迟下载,根据延迟时间,下载当前时间 往前推算延迟时间的时刻,开始启动任务下载
    #starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S')
    ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间

    # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S')
    # endtime = starttime

    ####启动任务
    cmd = 'python3' + ' ' + exe_fullpath + ' ' + category_abbr_en + ' ' + starttime + ' ' + endtime
    print(cmd)

    ####方案2,管道调用
    ####如果异常,可能没有返回值
    ret = subprocess.run(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         encoding="utf-8",
                         timeout=None)
    #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1)
    """
    1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志
    
    """
    ####满足3个条件的基本没有,很少
    #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '':
    if ret.returncode == 0:
        status = 'True'
        log = '成功'
        """ret返回的stdout,把屏幕输出的内容全部赋值给stdout"""
        #print("success:",ret)
        #print("success:",ret)
    else:
        status = 'False'
        ##log只记录成功和失败,失败的详细原因记录到任务日志里
        #log = ret.stderr
        log = '失败'
        loggings.debug_log('ret.returncode = %d' %
                           ret.returncode)  #输出hadoop异常操作到log日志里
        loggings.debug_log(ret.stderr)  #输出hadoop异常操作到log日志里
        #print (cmd)
        #print ("error:",ret.stderr)

    return
Example #9
0
def alert_job(geomag, electron, flare, proton, searchinfo, delay_min):

    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'alert_monitor'

    ####传入的数据库记录,解析各个字段
    category_abbr_en = searchinfo['category_abbr_en']
    category_name_zh = searchinfo['category_name_zh']
    task_name = '事件警报 ' + category_name_zh + ' ' + category_abbr_en
    scheduling_delayed_min = int(searchinfo['scheduling_delayed_min'])

    data_class = searchinfo['data_class']
    research_area = searchinfo['research_area']
    website = searchinfo['website']

    ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间
    task_starttime = get_current_UTC()
    log_starttime = get_current_BJT()

    ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间
    # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") + datetime.timedelta( minutes=-(scheduling_delayed_min+delay_min))).strftime('%Y-%m-%d %H:%M:%S')
    # endtime = starttime

    ##2020-05-27 15:31格式写入数据库2020-05-27 15:31:00
    ##查找的时候,开始时间,结束时间单位精确到分钟,秒00~59,查找这个范围的数据

    year = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").year
    month = datetime.datetime.strptime(task_starttime,
                                       "%Y-%m-%d %H:%M:%S").month
    day = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").day
    hour = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").hour
    minute = datetime.datetime.strptime(task_starttime,
                                        "%Y-%m-%d %H:%M:%S").minute
    s_second = 0
    e_second = 59

    ####计算延迟下载之前的时刻,精确到分钟,秒00~59范围匹配
    starttime = (
        datetime.datetime(year, month, day, hour, minute, s_second) +
        datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min))
    ).strftime('%Y-%m-%d %H:%M:%S')
    endtime = (
        datetime.datetime(year, month, day, hour, minute, e_second) +
        datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min))
    ).strftime('%Y-%m-%d %H:%M:%S')

    ####实例化product类
    alert = Alert(geomag, electron, flare, proton, category_abbr_en, starttime,
                  endtime)

    ####判断数据种类,根据数据种类的启动时刻,来启动相应的警报任务
    ####Kp指数使用SWPC,SWPC_latest_DGD; 不使用德国GFZ,GFZ_Kp_web
    #if category_abbr_en == 'GFZ_Kp_web' or category_abbr_en == 'SWPC_latest_DGD':
    if category_abbr_en == 'SWPC_latest_DGD':
        alert_type = 'geomag_storm_docx'
        #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类
        #loggings.debug_log('SWPC_latest_DGD')

        try:
            alert.alert_geomag_storm()
            monitor_log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            monitor_log = str(e)
            #loggings.debug_log(traceback.format_exc())              #输出堆栈异常到log日志里
            status = 'False'

    if category_abbr_en == 'SWPC_GOES_IE5m':
        alert_type = 'electron_burst_docx'
        #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类
        try:
            alert.alert_electron_burst()
            monitor_log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            monitor_log = str(e)
            #loggings.debug_log(traceback.format_exc())              #输出堆栈异常到log日志里
            status = 'False'

    if category_abbr_en == 'SWPC_GOES_XR1m':
        alert_type = 'solar_flare_docx'
        #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类
        try:
            alert.alert_solar_flare()
            monitor_log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            monitor_log = str(e)
            #loggings.debug_log(traceback.format_exc())              #输出堆栈异常到log日志里
            status = 'False'

    if category_abbr_en == 'SWPC_GOES_IP5m':
        alert_type = 'solar_proton_docx'
        #loggings=Loggings(log_starttime,'alert_' + category_abbr_en)#实例化日志类
        try:
            alert.alert_solar_proton()
            monitor_log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            monitor_log = str(e)
            #loggings.debug_log(traceback.format_exc())      #输出堆栈异常到log日志里
            status = 'False'

        # alert_type = 'geomag_storm_docx'
        # try:
        # alert.alert_geomag_storm()
        # log = '成功'
        # status = 'True'
        # except Exception as e:
        # #log = traceback.format_exc()
        # log = str(e)
        # status = 'False'

        # alert_type = 'electron_burst_docx'
        # try:
        # alert.alert_electron_burst()
        # log = '成功'
        # status = 'True'
        # except Exception as e:
        # #log = traceback.format_exc()
        # log = str(e)
        # status = 'False'

        # alert_type = 'solar_flare_docx'
        # try:
        # alert.alert_solar_flare()
        # log = '成功'
        # status = 'True'
        # except Exception as e:
        # #log = traceback.format_exc()
        # log = str(e)
        # status = 'False'

        # alert_type = 'solar_flare_docx'
        # try:
        # alert.alert_solar_flare()
        # log = '成功'
        # status = 'True'
        # except Exception as e:
        # #log = traceback.format_exc()
        # log = str(e)
        # status = 'False'

    create_time = get_current_BJT()  ##入库时间按北京时间
    update_time = get_current_BJT()  ##入库更新时间按北京时间

    pga = PostgresArchive()
    config_infos = {
        'task_name': task_name,
        'create_time': create_time,
        'update_time': update_time,
        'log': monitor_log,
        'status': status,
        'alert_type': alert_type  ##录入事件类型,不是数据来源类型
    }

    pga.insert_db_table(database_name=db_name,
                        table_name=table_name,
                        config_element=config_infos)

    return
Example #10
0
def product_job(searchinfo, delay_min):

    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'product_monitor'

    ####传入的数据库记录,解析各个字段
    category_abbr_en = searchinfo['category_abbr_en']
    category_name_zh = searchinfo['category_name_zh']
    task_name = '产品生产 ' + category_name_zh + ' ' + category_abbr_en
    scheduling_delayed_min = int(searchinfo['scheduling_delayed_min'])

    data_class = searchinfo['data_class']
    research_area = searchinfo['research_area']
    website = searchinfo['website']

    ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间
    task_starttime = get_current_UTC()
    log_starttime = get_current_BJT()

    ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间
    # starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") + datetime.timedelta( minutes=-(scheduling_delayed_min+delay_min))).strftime('%Y-%m-%d %H:%M:%S')
    # endtime = starttime

    ##2020-05-27 15:31格式写入数据库2020-05-27 15:31:00
    ##查找的时候,开始时间,结束时间单位精确到分钟,秒00~59,查找这个范围的数据

    year = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").year
    month = datetime.datetime.strptime(task_starttime,
                                       "%Y-%m-%d %H:%M:%S").month
    day = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").day
    hour = datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S").hour
    minute = datetime.datetime.strptime(task_starttime,
                                        "%Y-%m-%d %H:%M:%S").minute
    s_second = 0
    e_second = 59

    ####计算延迟下载之前的时刻,精确到分钟,秒00~59范围匹配
    starttime = (
        datetime.datetime(year, month, day, hour, minute, s_second) +
        datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min))
    ).strftime('%Y-%m-%d %H:%M:%S')
    endtime = (
        datetime.datetime(year, month, day, hour, minute, e_second) +
        datetime.timedelta(minutes=-(scheduling_delayed_min + delay_min))
    ).strftime('%Y-%m-%d %H:%M:%S')

    ####实例化product类
    prd = Product(category_abbr_en, starttime, endtime)

    # if category_abbr_en == 'CDA_DSCOVR_SW':
    # try:
    # prd.product_DSCOVR_FC()
    # log = '成功'
    # status = 'True'
    # except Exception as e:
    # #log = traceback.format_exc()
    # log = str(e)
    # status = 'False'
    # if category_abbr_en == 'CDA_DSCOVR_MAG':
    # try:
    # prd.product_DSCOVR_mag()
    # log = '成功'
    # status = 'True'
    # except Exception as e:
    # #log = traceback.format_exc()
    # log = str(e)
    # status = 'False'
    if category_abbr_en == 'Ngdc_DSCOVR_SW':
        try:
            prd.product_Ngdc_DSCOVR_SW()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'Ngdc_DSCOVR_MAG':
        try:
            prd.product_NGDC_DSCOVR_m1s()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'CDA_TIMED_SL2a':
        try:
            prd.product_Timed_L2A()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'CDA_GPS_TEC':
        try:
            prd.product_IGS_TEC()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0094':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0131':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0171':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0193':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0211':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0304':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_0305':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_1600':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_AIA_1700':
        try:
            prd.product_SDO_draw_grid()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'
    if category_abbr_en == 'JSOC_HMI_12m':
        try:
            prd.product_SDO_draw_AR()
            log = '成功'
            status = 'True'
        except Exception as e:
            #log = traceback.format_exc()
            log = str(e)
            status = 'False'

    create_time = get_current_BJT()  ##入库时间按北京时间
    update_time = get_current_BJT()  ##入库更新时间按北京时间

    pga = PostgresArchive()
    config_infos = {
        'task_name': task_name,
        'create_time': create_time,
        'update_time': update_time,
        'log': log,
        'status': status,
        'data_class': data_class,
        'research_area': research_area,
        'website': website,
        'category_abbr_en': category_abbr_en
    }

    #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos)
    #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos)
    pga.insert_db_table(database_name=db_name,
                        table_name=table_name,
                        config_element=config_infos)

    return
Example #11
0
def redo_download_job(expire_days=3):
    """
    1. 每天上午06点开始重做前3天的失败任务,可能很多失败任务,查询log失败和status为False
    2. 格式化输出',\',格式化\加',因为sql查询日期需要带单引号
    """
    #db_name = 'yjy015'
    db_name = configs['db_name']
    table_name = 'data_monitor'

    current_date = '\'%s\'' % (
        datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    expire_date = '\'%s\'' % (
        (datetime.datetime.now() +
         datetime.timedelta(days=-expire_days)).strftime("%Y-%m-%d %H:%M:%S"))
    #search_log     = '\'%s\''%'失败'
    search_status = '\'%s\'' % 'False'
    #sqlcmd='SELECT * FROM %s WHERE log = %s and status = %s and create_time BETWEEN %s and %s'%(table_name,search_log,search_status,expire_date,current_date)
    #sqlcmd='SELECT * FROM %s WHERE status = %s and create_time BETWEEN %s and %s'%(table_name,search_status,expire_date,current_date)
    ####2个表联合查询
    sqlcmd = "SELECT data_monitor.*, data_category.redo_flag FROM data_monitor INNER JOIN data_category ON data_monitor.category_abbr_en = data_category.category_abbr_en \
            WHERE data_monitor.status = %s and create_time BETWEEN %s and %s" % (
        search_status, expire_date, current_date)

    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
        #print (searchinfos)
    except Exception as e:
        #loggings.debug_log(str(e))                   #输出异常操作到log日志里
        #loggings.debug_log(traceback.format_exc())   #输出堆栈异常到log日志里
        raise Exception(traceback.format_exc())

    ####查询到的数据
    if (searchinfos):
        for searchinfo in searchinfos:
            ####传入的数据库记录,解析各个字段
            task_name = searchinfo['task_name']
            create_time = searchinfo['create_time']
            log = searchinfo['log']
            status = searchinfo['status']
            cmd = searchinfo['cmd']
            data_class = searchinfo['data_class']
            research_area = searchinfo['research_area']
            website = searchinfo['website']
            category_abbr_en = searchinfo['category_abbr_en']
            redo_flag = searchinfo['redo_flag']

            ####需要添加category_abbr_en种类在data_category表中redo_flag判断,如果是fasle则不需要重做,因为实时更新的数据,重做下载的不是历史数据,而是实时数据
            if ('False' == redo_flag):
                continue

            ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间
            logStarttime = get_current_BJT()

            ####实例化日志类
            loggings = Loggings(logStarttime, category_abbr_en)

            ####方案2
            ####如果异常,可能没有返回值
            ret = subprocess.run(cmd,
                                 shell=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 encoding="utf-8",
                                 timeout=None)
            #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1)
            """
            1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志
            
            """

            ####需要加入wget下载里,判断是否是重做任务
            #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '':
            if ret.returncode == 0:
                update_status = 'True'
                update_log = '成功'
                """ret返回的stdout,把屏幕输出的内容全部赋值给stdout"""
                #print("success:",ret)
                #print("success:",ret)
            else:
                update_status = 'False'
                ##log只记录成功和失败,失败的详细原因记录到任务日志里
                log = ret.stderr
                #update_log = '失败'
                loggings.debug_log('ret.returncode = %d' %
                                   ret.returncode)  #输出hadoop异常操作到log日志里
                loggings.debug_log(ret.stderr)  #输出hadoop异常操作到log日志里
                print("error:", ret.stderr)

            ####数据库监控表入库
            ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主
            update_time = get_current_BJT()  ##入库时间按北京时间
            pga = PostgresArchive()
            config_infos = {
                'task_name': task_name,
                'create_time': create_time,
                'update_time': update_time,
                'log': update_log,
                'status': update_status,
                'cmd': cmd,
                'data_class': data_class,
                'research_area': research_area,
                'website': website,
                'category_abbr_en': category_abbr_en
            }

            condition_infos = {
                'task_name': task_name,
                'create_time': create_time,
                'status': status,
                'log': log
            }  #更新失败的字段

            #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos)
            #pga.update_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos, condition_element=condition_infos)
            pga.update_db_table(database_name=db_name,
                                table_name=table_name,
                                config_element=config_infos,
                                condition_element=condition_infos)

    return
Example #12
0
def download_job(searchinfo,
                 exe_path=os.path.dirname(os.path.abspath(__file__)),
                 exe_name='download.py'):

    #db_name = 'yjy015'
    db_name = configs['db_name']
    #table_name = 'data_monitor'

    ####传入的数据库记录,解析各个字段
    category_abbr_en = searchinfo['category_abbr_en']
    category_name_zh = searchinfo['category_name_zh']
    #task_triggers = searchinfo['task_triggers']
    data_class = searchinfo['data_class']
    research_area = searchinfo['research_area']
    website = searchinfo['website']
    num_collect_perday = int(searchinfo['num_collect_perday'])
    num_store_perday = int(searchinfo['num_store_perday'])
    scheduling_interval_min = int(searchinfo['scheduling_interval_min'])
    scheduling_delayed_min = int(searchinfo['scheduling_delayed_min'])
    task_name = '数据收集 ' + category_name_zh + ' ' + category_abbr_en

    ####任务启动时间,使用UTC时间启动,因为网站更新数据的时间是UTC时间
    task_starttime = get_current_UTC()
    log_starttime = get_current_BJT()

    ####实例化日志类
    loggings = Loggings(log_starttime, category_abbr_en)

    ####配置命令行参数
    exe_fullpath = os.path.join(exe_path, exe_name)
    #starttime = (datetime.datetime.strptime(taskStarttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S')
    ##加入延迟时间 ,延迟下载,根据延迟时间,下载当前时间 往前推算延迟时间的时刻,开始启动任务下载
    #starttime = (datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S")).strftime('%Y%m%d%H%M%S')
    ####调度推迟5分钟,实际下任务的时间,也得推算到5分钟之前的时间

    starttime = (
        datetime.datetime.strptime(task_starttime, "%Y-%m-%d %H:%M:%S") +
        datetime.timedelta(minutes=-scheduling_delayed_min)
    ).strftime('%Y%m%d%H%M%S')
    endtime = starttime

    ####启动任务
    cmd = 'python3' + ' ' + exe_fullpath + ' ' + category_abbr_en + ' ' + starttime + ' ' + endtime
    #print (cmd)

    #print (category_abbr_en)
    #print ('task_hour = %s'%task_hour)
    #print ('task_minute = %s'%task_minute)
    #print ('task_second = %s'%task_second)

    #print ('任务延迟开始......')
    #print ('任务延迟 %d秒'%(scheduling_delayed_min*60) )
    ####启动数据下载任务,加入延迟操作,保证数据下载完整
    #time.sleep(scheduling_delayed_min*60)#延迟下载,单位秒
    #print ('任务延迟结束')
    ####方案1
    #os.system(cmd)

    ####方案2,管道调用
    ####如果异常,可能没有返回值
    ret = subprocess.run(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         encoding="utf-8",
                         timeout=None)
    #ret = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1)
    """
    1. ret.returncode == 0不能确定下载任务是否完成,需要根据下载任务返回状态信息,一起判别才作为此任务成功的标志
    
    """
    ####满足3个条件的基本没有,很少
    #if ret.returncode == 0 and ret.stderr == '' and ret.stdout == '':
    if ret.returncode == 0:
        status = 'True'
        log = '成功'
        """ret返回的stdout,把屏幕输出的内容全部赋值给stdout"""
        #print("success:",ret)
        #print("success:",ret)
    else:
        status = 'False'
        ##log只记录成功和失败,失败的详细原因记录到任务日志里
        #log = ret.stderr
        log = '失败'
        loggings.debug_log('ret.returncode = %d' %
                           ret.returncode)  #输出hadoop异常操作到log日志里
        loggings.debug_log(ret.stderr)  #输出hadoop异常操作到log日志里
        #print (cmd)
        #print ("error:",ret.stderr)

    # ####数据库监控表入库
    # ####如何更改调度任务表,调度任务表的信息用不用修改,我们最关心的是file_info数据表,调度任务表只是自动调度任务,最终以file_info表收集的信息为主
    # create_time = get_current_BJT()##入库时间按北京时间
    # update_time = get_current_BJT()##入库更新时间按北京时间

    # pga=PostgresArchive()
    # config_infos={
    # 'task_name':task_name,
    # 'create_time':create_time,
    # 'update_time':update_time,
    # 'log':log,
    # 'status':status,
    # 'cmd':cmd,
    # 'data_class':data_class,
    # 'research_area':research_area,
    # 'website':website,
    # 'category_abbr_en':category_abbr_en
    # }

    # #pga.insert_db_table(database_name='task_db', table_name='t_task_monitor', config_element = config_infos)
    # #pga.insert_db_table(database_name='yjy015', table_name='task_monitor', config_element = config_infos)
    # pga.insert_db_table(database_name=db_name, table_name=table_name, config_element = config_infos)

    return
Example #13
0
def cleanup_data(startime, endtime):
    """
    startime格式最好是 2020-05-27
    endtime 格式最好是 2020-05-27
    
    去数据库中查找,默认是每天的24小时的数据
    startime格式最好是 2020-05-27 00:00:00 ,可以直接去数据库里查找
    endtime 格式最好是 2020-05-27 23:59:59 ,可以直接去数据库里查找

    """

    s_year = datetime.datetime.strptime(startime, "%Y-%m-%d").year
    s_month = datetime.datetime.strptime(startime, "%Y-%m-%d").month
    s_day = datetime.datetime.strptime(startime, "%Y-%m-%d").day
    s_hour = 0
    s_minute = 0
    s_second = 0
    e_year = datetime.datetime.strptime(endtime, "%Y-%m-%d").year
    e_month = datetime.datetime.strptime(endtime, "%Y-%m-%d").month
    e_day = datetime.datetime.strptime(endtime, "%Y-%m-%d").day
    e_hour = 23
    e_minute = 59
    e_second = 59
    # search_starttime='\'%s\''%datetime.datetime(s_year,s_month,s_day,s_hour,s_minute,s_second).strftime('%Y-%m-%d %H:%M:%S')
    # search_endtime  ='\'%s\''%datetime.datetime(e_year,e_month,e_day,e_hour,e_minute,e_second).strftime('%Y-%m-%d %H:%M:%S')
    search_starttime = '%s' % datetime.datetime(
        s_year, s_month, s_day, s_hour, s_minute,
        s_second).strftime('%Y-%m-%d %H:%M:%S')
    search_endtime = '%s' % datetime.datetime(
        e_year, e_month, e_day, e_hour, e_minute,
        e_second).strftime('%Y-%m-%d %H:%M:%S')
    ####监控初始化状态
    clean_up_tablename = 'data_file_info'
    start_time = search_starttime
    end_time = search_endtime
    ret_code = check_db(clean_up_tablename, start_time, end_time)
    if 0 == ret_code:
        print('之前有过此时间段的删除操作,不需要再进行删除操作')
        #return ##之前有过此时间段的删除操作,不需要再进行删除操作
        ####更改监控状态
        update_time = get_current_BJT()
        config_infos = {
            'update_time': update_time,
            'status': '2'
        }  #配置更新之后的值2,成功
        # condition_infos={
        # 'start_time':start_time,
        # 'end_time':end_time,
        # 'status':'False'}
        ####如果数据库中不存在删除日期的记录,默认为已经删除成功
        condition_infos = {
            'table_name': clean_up_tablename,
            'start_time': start_time,
            'end_time': end_time
        }
        update_db(config_infos, condition_infos)
        pass  ####之前有过删除动作,比如上午删除了2020-08-21的下载数据,下载由于又下载了一批数据,又需要删除
    if 1 == ret_code:
        print('之前有过此时间段的删除操作,但是status状态为:1失败,2删除中,需要再进行一次删除操作')
        ####更改监控状态
        update_time = get_current_BJT()
        config_infos = {
            'update_time': update_time,
            'status': '2'
        }  #配置更新之后的值2,成功
        # condition_infos={
        # 'start_time':start_time,
        # 'end_time':end_time,
        # 'status':'False'}
        ####如果数据库中不存在删除日期的记录,默认为已经删除成功
        condition_infos = {
            'table_name': clean_up_tablename,
            'start_time': start_time,
            'end_time': end_time
        }
        update_db(config_infos, condition_infos)
        pass  ##之前有过此时间段的删除操作,但是status状态为False,需要再进行一次删除操作
    if 2 == ret_code:
        print('之前没有此时间段的删除操作,需要插入初始状态记录')
        record_time = get_current_BJT()
        update_time = get_current_BJT()
        #status='False'
        status = '2'  ####删除中
        record_db(clean_up_tablename, record_time, update_time, start_time,
                  end_time, status)  ##之前没有此时间段的删除操作,需要插入初始状态记录

    log_starttime = get_current_BJT()
    ####实例化日志类
    loggings = Loggings(log_starttime, 'cleanup_data')

    input_rootpath = configs['rootpath']
    db_name = configs['db_name']
    table_name = 'data_file_info'

    sqlcmd = "SELECT * FROM %s WHERE start_time BETWEEN '%s' and '%s'" % (
        table_name, search_starttime, search_endtime)
    print(sqlcmd)
    pga = PostgresArchive()
    try:
        searchinfos = pga.search_db_table_usercmd(db_name, sqlcmd)
    except Exception as e:
        raise Exception(traceback.format_exc())

    absolute_path_list = []  ##磁盘上存放的绝对路径
    if (searchinfos):
        for searchinfo in searchinfos:
            path = searchinfo['path']
            filename = searchinfo['filename']
            absolute_input_fullfilename = input_rootpath + '/' + path + '/' + filename
            absolute_path_list.append(absolute_input_fullfilename)

            ####删除表记录
            pga.delete_table_record(db_name, table_name, searchinfo)

    else:
        infos = '数据库 %s,产品不存在,不需要删除时间段: %s %s' % (table_name, startime,
                                                  endtime)
        loggings.debug_log(infos)  #输出log日志里
        ####更改监控状态
        update_time = get_current_BJT()
        config_infos = {
            'update_time': update_time,
            'status': '3'
        }  #配置更新之后的值2,成功
        # condition_infos={
        # 'start_time':start_time,
        # 'end_time':end_time,
        # 'status':'False'}
        ####如果数据库中不存在删除日期的记录,默认为已经删除成功
        condition_infos = {
            'table_name': clean_up_tablename,
            'start_time': start_time,
            'end_time': end_time
        }
        update_db(config_infos, condition_infos)
        return
        #exit(0)##exit导致整个主程序退出,导致下面调用的子程序跳过

    infos = '数据库表data_file_info 记录%d条' % len(searchinfos)
    loggings.debug_log(infos)  #输出log日志里

    for datafile in absolute_path_list:
        ##判断文件是否存在,存在删除,不存在跳过,如果不存在,使用remove,系统报错 FileNotFoundError: [Errno 2] No such file or directory:
        if not os.path.exists(datafile):
            pass
        else:
            os.remove(datafile)
    infos = '数据清理完毕.'
    loggings.debug_log(infos)  #输出log日志里

    ####更改监控状态
    update_time = get_current_BJT()
    config_infos = {'update_time': update_time, 'status': '3'}  #配置更新之后的值
    # condition_infos={
    # 'start_time':start_time,
    # 'end_time':end_time,
    # 'status':'False'}
    ####如果删除成功,不管之前的status状态,直接更新为2,成功
    condition_infos = {
        'table_name': clean_up_tablename,
        'start_time': start_time,
        'end_time': end_time
    }
    update_db(config_infos, condition_infos)

    return
Example #14
0
    ####排出优先级,再根据1天下载的频次,启动下载时间为每天的00:00:00,根据这个时间计算出下载频次之间的时间间隔
    ####做成类似crontab的启动任务,到点自动下载

    ####目前默认删除前1天的临时数据,如果今天网络中断了1天,昨天的数据就无法删除了,因为明天只会删除今天的
    ####是否增加删除前1周,或者前1个月的临时数据
    ####每次启动自动下载,针对某个数据,只保留当天时间最大的数据
    ####%Y%m%d%H%M%S转换为%Y-%m-%d %H:%M:%S,保证和数据库里格式一致,查询方便

    #return True
    return


if __name__ == "__main__":

    ####获取系统时间
    taskStarttime = get_current_BJT()

    #### 1.启动数据库 sudo service postgresql start
    #### 2.查看host配置IP是否正确,ifconfig -a

    ####分2种情况,手动下载和自动下载
    ####自动下载:命令行没有参数,获取当前的系统时间,来进行下载任务,可以开线程池,也可以串行下载
    ####手动下载:命令行传入开始时间,结束时间,数据标识;手动下载多个,需要根据优先级排列出队伍,先下载优先级高的,如果并行下载,就没有优先级的概念了;

    ####手动下载命令行
    #python3 /home/hffan/fanhuifeng/python_prj/YJY015/sqlmain.py CDA_TIMED_SL2b 20190919111000 20190920111000
    ####自动下载命令行
    #python3 /home/hffan/fanhuifeng/python_prj/YJY015/sqlmain.py

    ########可以使用navicat手动创建库和表;也可以使用python语句创建数据库和表
    ########创建psql数据库,建立数据库