def query2(ac_type='', monthlist='', column=''): """ [数据统计-列出某人指定日期间的某项快照数据全部结果]###### 函数说明 乔晖 2018/4/22 [输入Parameters]: ac_type:string 机型 e.g. 可以是机型列表'B737-700','B737-800' start:string 开始日期 format:YYYY/MM/DD end:string 结束日期 format:YYYY/MM/DD column:string 具体分析某一个快照,如平飘距离 `DIST_LDG (feet)` ------- [返回值return] [数据统计-]###### 函数说明 乔晖 2018/3/30 列出每月指定机型的某项快照数据 """ #查询737机型某月单项数据 #B737=query("SELECT lnk.key_id,lnk.航班日期,ags.`RETARD_ALT (FT)` as 数据 FROM `flight_link_chn` lnk,`ags_snapshot` ags where ags.key_id=lnk.key_id and lnk.航班日期 between '2018/1/1' and '2018/3/31' and lnk.机型 IN ('73M','73L','73H','738','73E','737','73G','73A')") B737 = query( "SELECT lnk.key_id,lnk.航班日期,ags.`VRTG_MAX_LD (g)` as 数据 FROM `flight_link_chn` lnk,`ags_snapshot` ags where ags.key_id=lnk.key_id and lnk.航班日期 between '2018/1/1' and '2018/1/31' and lnk.机型 IN ('73L','73H','738','73E','737','73G','73A')" ) #B737=query("select flnk.`航班日期`,ags.`From`,ags.`To`,ags.`DIST_LDG (feet)` as 数据 from crew_link lnk,ags_snapshot ags,flight_link_chn flnk where ags.key_id=lnk.key_id and flnk.key_id=lnk.key_id and flnk.key_id=ags.key_id and lnk.姓名='乔晖' and flnk.航班日期 between '2018/2/1' and '2018/2/28' order by 航班日期") B737_result = B737.fetchall() B737_list = [] for B737_row in B737_result: B737_list.append(float(B737_row['数据'])) print("B737 3月份平飘距离Q3指标为:", np.percentile(B737_list, 75)) #平均值 print("平均值:", mean(B737_list)) #标准差 print("标准差:", std(B737_list, ddof=1)) #变异系数 print("变异系数:", std(B737_list, ddof=1) / mean(B737_list)) print("中位数,Q3,标准差,变异系数:", mean(B737_list), np.percentile(B737_list, 75), std(B737_list, ddof=1), std(B737_list, ddof=1) / mean(B737_list)) a = query( "select flnk.`航班日期`,ags.`From`,ags.`To`,ags.`DIST_LDG (feet)` as 数据 from crew_link lnk,ags_snapshot ags,flight_link_chn flnk where ags.key_id=lnk.key_id and flnk.key_id=lnk.key_id and flnk.key_id=ags.key_id and lnk.姓名='章磊' and date_format(flnk.航班日期,'%Y-%m')='2018-03'" ) print("共%s条有效数据:" % a.rowcount) result = a.fetchall() list = [] #print(result) #print(result) #print(11) #print(np.percentile(result,50,axis = 1)) for row in result: #data=row['数据'] #数据库里是字符串,这里必须转换成浮点数 list.append(float(row['数据'])) #print(list) #d=np.array([list]) #print(d) #print(np.percentile(d,50)) #print(list) #ls=[10, 7, 4, 3, 2, 1] #d = np.array([list]) print("某人的中位数为:", np.percentile(list, 50))
def match(): """ [数据匹配]###### 函数说明 乔晖 2018/6/11 将每月ags_event导入数据库后执行该程序,程序会进行自动数据匹配,通过和crew_link_chn表进行配合,得到事件和机组姓名的匹配 每月数据约30000条,使用commercail库,不包含本场和发动机试车数据 包含master caution数据,后续分析中会使用[ags_event_exception]排除无关数据 """ #进行匹配,取出所有未进行匹配的数据 a=query("select event_id,`Flight Date`,`A/C Tail`,`Flight No`,`Departure Airport`,`Arrival Airport` " \ "from ags_event where `A/C Tail` is not null and ags_valid is null") result = a.fetchall() count = 0 for row in result: #获取数据 event_id = row['event_id'] ags_datetime = row['Flight Date'] ags_reg = row['A/C Tail'] #print(ags_reg) ags_fn = row['Flight No'] ags_from = row['Departure Airport'] ags_to = row['Arrival Airport'] #QAR事件中的时间为实际滑出时间 #ags_datetime=datetime.strptime(ags_datetime2, "%d/%m/%Y %H:%M:%S") #GMT转化为北京时 GMT+8 ags_datetime = ags_datetime + timedelta(hours=8) #进行mysql语句层面的匹配,规则为取前后三天的航班进行时间间隔相减,如果两者时间间隔小于一小时且数据只有一条,则认为精确匹配 #sql="select key_id,航班号,timediff('%s' ,实飞) as 时间间隔 from flight_link_chn where 航班日期 between '%s' and '%s' and 机号='%s' having 时间间隔<='-01:30:00'" % (ags_datetime,ags_datetime-timedelta(days = 1),ags_datetime+timedelta(days =1),ags_reg[2:6]) sql="select key_id,航班号,timediff('%s' ,实飞) as 时间间隔 from flight_link_chn " \ "where 航班日期 between '%s' and '%s' and 机号='%s' having 时间间隔<='-01:30:00'" % (ags_datetime,ags_datetime-timedelta(days = 2),ags_datetime+timedelta(days =2),ags_reg[2:6]) b = query(sql) rst = b.fetchall() #print(sql) if b.rowcount == 1: #精确匹配 count = count + 1 for row2 in rst: fn = row2['航班号'] print('event:%s|机组排版系:%s' % (ags_fn, fn)) #查询队列置0 sql_list = [] #获取key_id key_id = row2['key_id'] #print(ags_id,key_id) #1. 更新[ags_event],进行以下操作: #1.1 ags_valid置为1 有效 #1.2 匹配模式为1 精确匹配 #1.3 ags_id和key_id关联 sql_list.append( "update ags_event set ags_valid=1,match_mode=1,key_id='%s' where event_id='%s'" % (key_id, event_id)) #2. 更新[crew_link]中的ags_id #sql_list.append("update crew_link set ags_id='%s' where key_id='%s'" % (ags_id,key_id)) #执行数据库更新操作 query_list(sql_list) print('[%s]success,%s' % (event_id, count))
def match(): """ [数据匹配]###### 函数说明 乔晖 2018/4/2 """ #进行匹配,取出所有未进行匹配的数据 a = query( "select ags_id,`Date & Time`,`A/C Tail`,`Flight No`,`From`,`To` from ags_snapshot where `A/C Tail` is not null and ags_valid is null" ) result = a.fetchall() count = 0 for row in result: #获取数据 ags_id = row['ags_id'] ags_datetime2 = row['Date & Time'] ags_reg = row['A/C Tail'] #print(ags_reg) ags_fn = row['Flight No'] ags_from = row['From'] ags_to = row['To'] #QAR数据的上传时间GMT ags_datetime = datetime.strptime(ags_datetime2, "%d/%m/%Y %H:%M:%S") #GMT转化为北京时 GMT+8 ags_datetime = ags_datetime + timedelta(hours=8) #进行mysql语句层面的匹配,规则为取前后三天的航班进行时间间隔相减,如果两者时间间隔小于一小时且数据只有一条,则认为精确匹配 sql = "select key_id,timediff('%s' ,实到) as 时间间隔 from flight_link_chn where 航班日期 between '%s' and '%s' and 机号='%s' having 时间间隔<='01:30:00' and 时间间隔>='00:00:00'" % ( ags_datetime, ags_datetime - timedelta(days=2), ags_datetime + timedelta(days=2), ags_reg[2:6]) b = query(sql) rst = b.fetchall() #print(sql) if b.rowcount == 1: #精确匹配 count = count + 1 for row2 in rst: #查询队列置0 sql_list = [] #获取key_id key_id = row2['key_id'] #print(ags_id,key_id) #1. 更新[ags_snapshot],进行以下操作: #1.1 ags_valid置为1 有效 #1.2 匹配模式为1 精确匹配 #1.3 ags_id和key_id关联 sql_list.append( "update ags_snapshot set ags_valid=1,match_mode=1,key_id='%s' where ags_id='%s'" % (key_id, ags_id)) #2. 更新[crew_link]中的ags_id sql_list.append( "update crew_link set ags_id='%s' where key_id='%s'" % (ags_id, key_id)) #执行数据库更新操作 query_list(sql_list) print('[%s]success,%s' % (ags_id, count))
def send(max_mail_list=200): """ [循环发送邮件]###### 函数说明 创建:乔晖 2018/10/6 [输入Parameters]: max_mail_list:int 最大发送数量 ------- [返回值return]: """ ###获取邮件列表 sql="select log.send_id,list.email,log.month,log.name,log.start_date,log.end_date " \ "from ags_mail_log log , ags_mail_list list " \ "where log.mail_id=list.mail_id and log.status=0 LIMIT %d" % (max_mail_list) a = query(sql) result = a.fetchall() ###获取机队信息 for row in result: #获取数据 send_id = row['send_id'] name = row['name'] email = row['email'] month = row['month'] start_date = row['start_date'] end_date = row['end_date'] #整合字符串 df_event = export_ags_event_summary(start_date=start_date, end_date=end_date, flag_csv=0) df_fleet = analyze_fleet_month_df(ac_type, month, m_parameters) text_msg = report_person_v1(name=name, month=month, df_event=df_event, df_fleet=df_fleet) #发送邮件 ml = mail(name, month, email, start_date, end_date, text_msg) rst = ml.send() if rst == True: #邮件发送成功 logging.debug("%s的%s邮件已发送至%s邮箱" % (name, month, email)) #发送列表修改为已发送 sql2 = "update ags_mail_log set status=1 where send_id=%d" % ( send_id) b = query(sql2) if b == False: logging.debug("修改已发送邮件失败,send_id号%d" % (send_id)) elif rst == False: #邮件发送失败 logging.error("邮件发送失败:send_id号:%d;姓名:%s;邮箱:%s" % (send_id, name, mail))
def import_data(): #导入数据 #通过比较flight_delay和flight_link_chn两张表的数据异同,将[flight_link_chn]中的新增数据导入[flight_delay] #新导入的数据默认valid为null,因此在caculate_time函数中,通过检查valid值来确定哪些数据需要计算,哪些已经计算过了 sql = "insert into flight_delay (key_id) (select A.key_id from flight_link_chn A where not exists (select B.key_id from flight_delay B where B.key_id=A.key_id))" a = query(sql) """
def clean(): """ [数据清洗]###### 函数说明 乔晖 2018/3/30 通过检查[ags_snapshot] """ #去除非有效数据 2018/4/3 增加双条件判断,预估每月可以排除200条左右的非有效数据 a = query( "update ags_snapshot set ags_valid=0,match_mode=0 where (`From` is null and `To` is null) or (`From` ='' and `To` ='')" ) print("共删除%d条非有效数据:" % a.rowcount)
def caculate_time(): ######【计算飞行时间】###### sql = "SELECT dly.key_id,lnk.`航班日期`,lnk.`航班号`,lnk.`计飞`,lnk.`实飞`,lnk.`计到`,lnk.`实到`,lnk.`飞行时间` FROM flight_delay AS dly ,flight_link_chn AS lnk WHERE dly.key_id = lnk.key_id AND dly.`valid` IS NULL" a = query(sql) print("本次查询总共返回结果数:", a.rowcount) result = a.fetchall() for row in result: #下面两行被注释的原因是从数据库内相关字段获取的时间字符串直接就是时间格式,strptime只接受字符串形式,所以转换失败。字段可以直接拿来做时间使用 #d1=datetime.strptime(row['计飞'], "%Y-%m-%d %H:%M:%S") #d2=datetime.strptime(row['实飞'], "%Y-%m-%d %H:%M:%S") #获取数据 d1 = row['计飞'] d2 = row['实飞'] a1 = row['计到'] a2 = row['实到'] flt_time_str = row['飞行时间'] key_id = row['key_id'] ##判断数据是否有效 if d1 is None or d2 is None or a1 is None or a2 is None: #四个时间段其中有一个为NULL,所以数据无效,设置valid为0,其余延误时间和飞行时间不做处理 query("update flight_delay set valid='%s' where key_id='%d'" % (0, key_id)) print("无效数据[", key_id, '] valid属性置0') else: #定义和计算起飞延误时间 dep_delay = (d2 - d1).total_seconds() // 60 #定义和计算落地延误时间 arr_delay = (a2 - a1).total_seconds() // 60 flt_time = flt_time_str.split(':') #将[03:52]的时间格式转换成分钟 time_min = int(flt_time[0]) * 60 + int(flt_time[1]) #将结果返回数据库 query( "update flight_delay set 起飞延误='%d',落地延误='%d',飞行时间='%d',valid=1 where key_id='%d'" % (dep_delay, arr_delay, time_min, key_id)) #print(row['key_id'],row['航班号'],dep_delay) print("结果输出完毕!")
def mail_init(month=None, start_date=None, end_date=None): """ [邮件发送列表初始化]###### 函数说明 创建:乔晖 2018/10/6 整合每月例行发送的邮件列表 校验ags_mail_log中是否存在当月发送列表,不存在则写入,存在则跳过 默认初始化中对status列置为0(0:邮件未发送 1:邮件已发送) [输入Parameters]: month string 需要导入列表的月份 如2018-10 start_date string 开始日期 end_date 结束日期 注:开始和结束日期用于定位���据���集的时间段,和发送日期无关 ------- [返回值return]: True False """ #第一步:校验是否有重复数据 sql_duplicate = query( "select count(send_id) as cnt from ags_mail_log where month='%s'" % (month)) result = sql_duplicate.fetchone() if result['cnt'] == 0: #返回结果为0,进行导入操作 sql="insert into ags_mail_log (mail_id,month,name,start_date,end_date,status) " \ "select mail_id,'%s',name,'%s','%s',0 " \ "from ags_mail_list where valid=1" % (month,start_date,end_date) sql_import = query(sql) logging.debug("%s邮件列表导入完成" % (month)) return True else: #返回结果不为0,忽略导入操作 logging.debug("%s邮件发送列表已存在,忽略导入操作!" % (month)) return False pass
def analyze_fleet_monthlist_CL(ac_type='', monthlist='', columnlist=''): """ [数据统计-列出机队按月分布的快照数据结果 【经典分析方法】###### 函数说明 创建:乔晖 2018/7/8 此为第一版统计学分析,使用经典的mysql导出数据列表,将字段信息写入list列表,再通过对list进行统计学分析得到结果 后续已被analyze_fleet_monthlist_df替换 【本版本已不再维护】 [输入Parameters]: ac_type:string 机队列表 monthlist:list 要分析的月份列表 如['2018-01','2018-02','2018-03'] columnlist:list 要分析的快照列表,如平飘距离和着陆载荷 ['`VRTG_MAX_LD (g)`','`DIST_LDG (feet)`'] 注意:一定要有``否则会出错 ------- [返回值return]: 修改说明: 2018/7/9 1. 修正sql语句,原先移植版本从事件人员角度分析,会有重复,因此采用distinct消除重复。目前版本删除crew_link表,取消人员查询,因此月度查询速度从40秒优化至4.3秒 2018/7/12 1.修改输入函数,原先一次只能处理一种快照数据,现输入项为快照列表,可以输入多���快照数据 """ #打印表头 print("机队,字段,月份,航班快照量,Q1值,中位数,Q3值,Q90,标准差,变异系数,平均值") for column in columnlist: for month in monthlist: sql="select ags.ags_id,flnk.`航班日期`,ags.`From`,ags.`To`,ags.%s as 数据 from " \ "ags_snapshot ags,flight_link_chn flnk "\ "where flnk.key_id=ags.key_id and "\ "date_format(flnk.航班日期,'%%Y-%%m')='%s' and flnk.机型 IN (%s)" % (column,month,ac_type) #print(sql) a = query(sql) if a.rowcount > 0: result = a.fetchall() list = [] for row in result: #data=row['数据'] #数据库里是字符串,这里必须转换成浮点数 list.append(float(row['数据'])) #输出结果 print(len(list)) Q1 = np.percentile(list, 25) Q2 = np.percentile(list, 50) Q3 = np.percentile(list, 75) #注:Q90在个人数据中没有意义,不做统计 Q90 = np.percentile(list, 90) m_std = std(list, ddof=1) m_mean = mean(list) m_cv = std(list, ddof=1) / m_mean #print(name,month,Q2,Q3,m_std,m_cv) #清空list列表 list = [] print("%s,%s,%s,%d,%f,%f,%f,%f,%f,%f,%f" % ("B737机队", column, month, len(list), Q1, Q2, Q3, Q90, m_std, m_cv, m_mean)) else: pass pass """ [数据统计-列出机队按月分布的快照数据结果]###### 函数说明 创建:乔晖 2018/7/8 [输入Parameters]: ac_type:string 机队列表 monthlist:list 要分析的月份列表 如['2018-01','2018-02','2018-03'] columnlist:list 要分析的快照列表,如平飘距离和着陆载荷 ['`VRTG_MAX_LD (g)`','`DIST_LDG (feet)`'] 注意:一定要有``否则会出错 ------- [返回值return]: 修改说明: 2018/7/9 1. 修正sql语句,原先移植版本从事件人员角度分析,会有重复,因此采用distinct消除重复。目前版本删除crew_link表,取消人员查询,因此月度查询速度从40秒优化至4.3秒 2018/7/12 1.修改输入函数,原先一次只能处理一种快照数据,现输入项为快照列表,可以输入多种快照数据 2.数据处理过程使用df,理论上速度更快 """ #打印表头 print("机队,字段,月份,航班快照量,Q1值,中位数,Q3值,Q90,标准差,变异系数,平均值") for column in columnlist: for month in monthlist: sql="select ags.ags_id,flnk.`航班日期`,ags.`From`,ags.`To`,ags.%s as 数据 " \ "from ags_snapshot ags,flight_link_chn flnk " \ "where flnk.key_id=ags.key_id and " \ "date_format(flnk.航班日期,'%%Y-%%m')='%s' and flnk.机型 IN (%s)" % (column,month,ac_type) df = query_df(sql) #数据类型转换 text->float df_float = df['数据'].astype('float') count = df_float.count() Q1 = df_float.quantile(0.25) Q2 = df_float.quantile(0.50) Q3 = df_float.quantile(0.75) #注:Q90在个人数据中没有意义,不做统计 Q90 = df_float.quantile(0.9) m_std = df_float.std() #m_std=std(list,ddof=1) m_mean = df_float.mean() m_cv = m_std / m_mean print("%s,%s,%s,%d,%f,%f,%f,%f,%f,%f,%f" % ("B737机队", column, month, count, Q1, Q2, Q3, Q90, m_std, m_cv, m_mean))
def import_data(): ######[导入数据]###### #函数说明 乔晖 2018/3/30 #通过检查[flight_link_chn]中的机长、航线机长、一副、二副这四个四段,把相应的机组成员导入到[crew_link]表中 #新导入的数据默认valid为null,因此在caculate_time函数中,通过检查valid值来确定哪些数据需要计算,哪些已经计算过了 #######第一步:从[flight_link_chn]中取出所有没有做过导入的数据 sql_flight_link_chn_not_input = "select key_id,联线号,机长,航线机长,一副,二副 from flight_link_chn where 机组拆分标识 is null" a = query(sql_flight_link_chn_not_input) ##print("本次查询总共返回结果数:",a.rowcount) result = a.fetchall() for row in result: #获取数据 key_id = row['key_id'] link_id = row['联线号'] cap = row['机长'] crz = row['航线机长'] f1 = row['一副'] f2 = row['二副'] #初始化sql查询语句 sql_list = [] #初始化责任机长标识 cap_flag_loop = 0 #######第二步:拆分和导入[机长]字段 if len(cap) > 0: #字段内有内容,拆分 cap_list = cap.split(",") for n in cap_list: num = len(n) name = n[0:num - 2] tech = n[num - 2:num] if cap_flag_loop == 0: cap_flag = 1 else: cap_flag = 0 cap_flag_loop = cap_flag_loop + 1 ##print("航班连线:%s|姓名:%s,|技术授权:%s,|责任机长标识=%d" % (link_id,name,tech,cap_flag)) #将sql查询语句加载到多语句列表中 sql_list.append( "insert into crew_link (key_id,link_id,姓名,机上岗位,技术授权,责任机长标识) values ('%s','%s','%s','机长','%s','%s')" % (key_id, link_id, name, tech, cap_flag)) else: #字段内无内容,跳过 pass #######第三步:拆分和导入[航线机长]字段 if len(crz) > 0: #字段内有内容,拆分 crz_list = crz.split(",") for n in crz_list: num = len(n) name = n[0:num - 2] tech = n[num - 2:num] #航线机长中直接将机长标识置为0 cap_flag = 0 ##print("航班连线:%s|姓名:%s,|技术授权:%s,|责任机长标识=%d" % (link_id,name,tech,cap_flag)) #将sql查询语句加载到多语句列表中 sql_list.append( "insert into crew_link (key_id,link_id,姓名,机上岗位,技术授权,责任机长标识) values ('%s','%s','%s','航线机长','%s','%s')" % (key_id, link_id, name, tech, cap_flag)) else: #字段内无内容,跳过 pass #######第四步:拆分和导入[一副]字段 if len(f1) > 0: #字段内有内容,拆分 f1_list = f1.split(",") for n in f1_list: num = len(n) name = n[0:num - 2] tech = n[num - 2:num] #航线机长中直接将机长标识置为0 cap_flag = 0 print("航班连线:%s|姓名:%s,|技术授权:%s,|责任机长标识=%d" % (link_id, name, tech, cap_flag)) #将sql查询语句加载到多语句列表中 sql_list.append( "insert into crew_link (key_id,link_id,姓名,机上岗位,技术授权,责任机长标识) values ('%s','%s','%s','一副','%s','%s')" % (key_id, link_id, name, tech, cap_flag)) else: #字段内无内容,跳过 pass #######第五步:拆分和导入[二副]字段 if len(f2) > 0: #字段内有内容,拆分 f2_list = f2.split(",") for n in f2_list: num = len(n) name = n[0:num - 2] tech = n[num - 2:num] #航线机长中直接将机长标识置为0 cap_flag = 0 print("航班连线:%s|姓名:%s,|技术授权:%s,|责任机长标识=%d" % (link_id, name, tech, cap_flag)) #将sql查询语句加载到多语句列表中 sql_list.append( "insert into crew_link (key_id,link_id,姓名,机上岗位,技术授权,责任机长标识) values ('%s','%s','%s','二副','%s','%s')" % (key_id, link_id, name, tech, cap_flag)) else: #字段内无内容,跳过 pass #######第六步:[flight_link_chn]中的机组拆分标识 置为1 sql_list.append( "update flight_link_chn set 机组拆分标识='1' where key_id='%d'" % (key_id)) #######第七步:执行sql语句 query_list(sql_list)