def get_impala_job_summary(from_time=get_one_day_ago_from(), to_time=get_now()): query_1min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration <= 60000.0" query_5min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 60000.0 and query_duration <= 300000.0" query_15min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 300000.0 and query_duration <= 900000.0" query_30min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 900000.0 and query_duration <= 1800000.0" query_60min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 1800000.0 and query_duration <= 3600000.0" query_120min_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 3600000.0 and query_duration <= 7200000.0" query_120min_plus_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION) and query_duration > 7200000.0" job_1min_count = get_job_count(query_1min_count, from_time, to_time) job_5min_count = get_job_count(query_5min_count, from_time, to_time) job_15min_count = get_job_count(query_15min_count, from_time, to_time) job_30min_count = get_job_count(query_30min_count, from_time, to_time) job_60min_count = get_job_count(query_60min_count, from_time, to_time) job_120min_count = get_job_count(query_120min_count, from_time, to_time) job_120min_plus_count = get_job_count(query_120min_plus_count, from_time, to_time) job_total = job_1min_count + job_5min_count + job_15min_count + job_30min_count + job_60min_count + job_120min_count + job_120min_plus_count types = '0-1m', '1-5m', '5-15m', '15-30m', '30-60m', '60-120m', '>120m' x = [ job_1min_count, job_5min_count, job_15min_count, job_30min_count, job_60min_count, job_120min_count, job_120min_plus_count ] file_path = "./impala_pie.png" pie_charts(x, types, job_total, file_path)
def do_get_top_user_demo( from_time=get_one_day_ago_from(), to_time=get_now(), duration=900): hive_info = HiveInfo() top_users = hive_info.get_top_user_demo(from_time=from_time, to_time=to_time, duration=duration) massage_dfs = [] if top_users.applications: for i in top_users.applications: line = {} line['category'] = "YARN_APPLICATION" line['service_name'] = "yarn" line['pool'] = i.pool line['user'] = i.user try: line['cpu_milliseconds'] = i.attributes['cpu_milliseconds'] except KeyError: line['cpu_milliseconds'] = 0 line['name'] = i.attributes['hive_query_string'] line['entityName'] = i.applicationId line['time'] = zone_conversion(timestamp=i.startTime, format=u'YYYY-MM-DD HH:mm:ss') attr_val = round_milli_time(i.startTime, i.endTime) line['application_duration1'] = attr_val if int(float(attr_val)) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(float(attr_val)) > 60 * 1000: attr_val = str(int(float(attr_val)) / 60 / 1000) + "m" # 分 elif int(float(attr_val)) > 1000: attr_val = str(int(float(attr_val)) / 1000) + "s" # 秒 line['application_duration'] = attr_val massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['application_duration1'], reverse=True)
def get_hive_job_summary(from_time=get_one_day_ago_from(), to_time=get_now()): query_5min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration < 300000.0 " query_15min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 300000.0 and application_duration < 900000.0 " query_30min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 900000.0 and application_duration < 1800000.0" query_60min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 1800000.0 and application_duration < 3600000.0" query_120min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 3600000.0 and application_duration < 7200000.0 " query_120min_plus_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 7200000.0 " job_5min_count = get_job_count(query_5min_count, from_time, to_time) job_15min_count = get_job_count(query_15min_count, from_time, to_time) job_30min_count = get_job_count(query_30min_count, from_time, to_time) job_60min_count = get_job_count(query_60min_count, from_time, to_time) job_120min_count = get_job_count(query_120min_count, from_time, to_time) job_120min_plus_count = get_job_count(query_120min_plus_count, from_time, to_time) job_total = job_5min_count + job_15min_count + job_30min_count + job_60min_count + job_120min_count + job_120min_plus_count types = '1-5m', '5-15m', '15-30m', '30-60m', '60-120m', '>120m' x = [ job_5min_count, job_15min_count, job_30min_count, job_60min_count, job_120min_count, job_120min_plus_count ] file_path = "./hive_pie.png" pie_charts(x, types, job_total, file_path)
def do_get_hive_top_email_bac( from_time=get_one_day_ago_from(), to_time=get_now(), duration=900000.0): top_list = do_get_top_user_demo(from_time=from_time, to_time=to_time, duration=duration) return top_list[:40]
def do_get_impala_top( from_time=get_last_week_to(), to_time=get_now(), duration=300000.0): attrs = [ 'user', 'database', 'query_duration', 'thread_cpu_time', 'category', 'executing', 'service_name', 'coordinator_host_id', 'stats_missing', 'statement', 'entityName', 'pool' ] responses = do_query(IMPALA_QUERY % duration, from_time, to_time) massage_dfs = [] for response in responses: if response.timeSeries: for ts in response.timeSeries: metadata = ts.metadata line = {} if metadata.attributes: for attr in attrs: if attr in metadata.attributes: attr_val = metadata.attributes[attr] if attr == 'query_duration': line['query_duration1'] = int(attr_val) if int(attr_val) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(attr_val) > 60 * 1000: attr_val = str( int(attr_val) / 60 / 1000) + "m" # 分 elif int(attr_val) > 1000: attr_val = str( int(attr_val) / 1000) + "s" # 秒 line[attr] = attr_val continue line[attr] = attr_val for data in ts.data: line['time'] = zone_conversion( timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss') massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['query_duration1'], reverse=True)
def do_get_hive_top( from_time=get_one_day_ago_from(), to_time=get_now(), duration=900000.0): attrs = [ 'user', 'name', 'application_duration', 'entityName', 'pool', 'cpu_milliseconds', 'category', 'service_name' ] responses = do_query(HIVE_QUERY % duration, from_time, to_time) massage_dfs = [] for response in responses: if response.timeSeries: for ts in response.timeSeries: metadata = ts.metadata line = {} if metadata.attributes: for attr in attrs: if attr in metadata.attributes: attr_val = metadata.attributes[attr] if 'application_duration' == attr: line['application_duration1'] = float(attr_val) if int(float(attr_val)) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(float(attr_val)) > 60 * 1000: attr_val = str( int(float(attr_val)) / 60 / 1000) + "m" # 分 elif int(float(attr_val)) > 1000: attr_val = str( int(float(attr_val)) / 1000) + "s" # 秒 line[attr] = attr_val for data in ts.data: line['time'] = zone_conversion( timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss') massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['application_duration1'], reverse=True)
def do_get_hdfs_used_weekly(pic=''): responses = _do_get_hdfs_used(get_last_n_week_from(), get_now(), 'WEEKLY') capacity = [] labels = [] color = ['r', 'g', 'b'] file_path = './hdfs_weekly.png' for response in responses: if response.timeSeries: for ts in response.timeSeries: bar_labels = [] hdfs_used_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0] for data in ts.data: bar_labels.append(zone_conversion(data.timestamp)) hdfs_used_values.append(check_digital_storage_without_unit(data.value, unit, 'bytes')) capacity.append(hdfs_used_values) labels = bar_labels if not pic: two_bar_charts(labels[-4::], capacity[-4::], color, file_path) list, remaining = ring_table_clo(capacity[0], labels) metadata = [u'日期', u'已用容量(T)', u'上周容量(T)', u'增量(T)', u'周增长率(%)'] return list[-4::][::-1], "%.1f" % (remaining * 7), metadata
def overall_report(): os.chdir(os.path.dirname(sys.argv[0])) data = do_get_dfs_capacity_email() rows, remaining, metadata = do_get_hdfs_used_weekly() rows1, metadata = do_get_hdfs_used_monthly() rows2, metadata = do_get_hdfs_used_quarterly() rows3 = do_get_hive_top_email() rows4 = do_get_impala_top_email() rows5 = query_small_files() get_impala_job_summary() get_hive_job_summary() query_file_incr_info() do_get_dfs_cpu() do_get_dfs_mem() do_get_dfs_net() with open('./cpu.png', 'rb') as lena1: image1 = lena1.read() with open('./mem.png', 'rb') as lena2: image2 = lena2.read() with open('./net.png', 'rb') as lena3: image3 = lena3.read() with open('./hdfs_weekly.png', 'rb') as lena4: image4 = lena4.read() with open('./hdfs_monthly.png', 'rb') as lena5: image5 = lena5.read() with open('./hdfs_quarterly.png', 'rb') as lena6: image6 = lena6.read() with open('./file.png', 'rb') as lena7: image7 = lena7.read() with open('./file2.png', 'rb') as lena8: image8 = lena8.read() with open('./impala_pie.png', 'rb') as lena9: image9 = lena9.read() with open('./hive_pie.png', 'rb') as lena10: image10 = lena10.read() inline_image1 = InlineImage(filename="cpu.png", content=image1) inline_image2 = InlineImage(filename="mem.png", content=image2) inline_image3 = InlineImage(filename="net.png", content=image3) inline_image4 = InlineImage(filename="hdfs_weekly.png", content=image4) inline_image5 = InlineImage(filename="hdfs_monthly.png", content=image5) inline_image6 = InlineImage(filename="hdfs_quarterly.png", content=image6) inline_image7 = InlineImage(filename="file.png", content=image7) inline_image8 = InlineImage(filename="file2.png", content=image8) inline_image9 = InlineImage(filename="impala_pie.png", content=image9) inline_image10 = InlineImage(filename="hive_pie.png", content=image10) send_templated_mail( template_name='report_template2', from_email='*****@*****.**', recipient_list=['*****@*****.**', '*****@*****.**'], context={ 'time': zone_conversion(get_now()), 'data': data, 'rows': rows, 'rows1': rows1, 'rows2': rows2, 'rows3': rows3, 'rows4': rows4, 'rows5': rows5, 'remaining': remaining, 'cpu_image': inline_image1, 'mem_image': inline_image2, 'net_image': inline_image3, 'hdfs_weekly_image': inline_image4, 'hdfs_monthly_image': inline_image5, 'hdfs_quarterly_image': inline_image6, 'file_image': inline_image7, 'file2_image': inline_image8, 'impala_pie_image': inline_image9, 'hive_pie_image': inline_image10 }, # Optional: # cc=['*****@*****.**'], # bcc=['*****@*****.**'], # headers={'My-Custom-Header':'Custom Value'}, # template_prefix="my_emails/", # template_suffix="email", ) print 'email send'
def do_get_impala_top_email_bac( from_time=get_last_week_to(), to_time=get_now(), duration=300000.0): top_list = do_get_impala_top(from_time=from_time, to_time=to_time, duration=duration) return top_list[:20]
def get_top_user_demo(self): return get_service(self._api, cluster_name="cluster", name="impala").get_impala_queries( start_time=get_n_day_ago_from(n=1), end_time=get_now(), filter_str="")
def _do_get_hdfs_used(from_time, to_time=get_now(), granularity='WEEKLY'): return do_query_rollup("select dfs_capacity_used, dfs_capacity where entityName=hdfs:nn-idc", from_time, to_time, granularity)
def _get_service(self, path): params = { 'from': get_n_hour_ago_from(n=1), 'to': get_now() } return call(method=self._api.get, path=path, ret_type=ApiTest2, ret_is_list=True, params=params)