def get_impala_job_summary(from_time=get_one_day_ago_from(),
                           to_time=get_now()):
    query_1min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration <= 60000.0"
    query_5min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 60000.0 and query_duration <= 300000.0"
    query_15min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 300000.0 and query_duration <= 900000.0"
    query_30min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 900000.0 and query_duration <= 1800000.0"
    query_60min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 1800000.0 and query_duration <= 3600000.0"
    query_120min_count = "select  query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 3600000.0 and query_duration <= 7200000.0"
    query_120min_plus_count = "select query_duration from IMPALA_QUERIES where serviceName=impala AND (query_state=FINISHED OR query_state=EXCEPTION)  and query_duration > 7200000.0"

    job_1min_count = get_job_count(query_1min_count, from_time, to_time)
    job_5min_count = get_job_count(query_5min_count, from_time, to_time)
    job_15min_count = get_job_count(query_15min_count, from_time, to_time)
    job_30min_count = get_job_count(query_30min_count, from_time, to_time)
    job_60min_count = get_job_count(query_60min_count, from_time, to_time)
    job_120min_count = get_job_count(query_120min_count, from_time, to_time)
    job_120min_plus_count = get_job_count(query_120min_plus_count, from_time,
                                          to_time)
    job_total = job_1min_count + job_5min_count + job_15min_count + job_30min_count + job_60min_count + job_120min_count + job_120min_plus_count
    types = '0-1m', '1-5m', '5-15m', '15-30m', '30-60m', '60-120m', '>120m'
    x = [
        job_1min_count, job_5min_count, job_15min_count, job_30min_count,
        job_60min_count, job_120min_count, job_120min_plus_count
    ]
    file_path = "./impala_pie.png"
    pie_charts(x, types, job_total, file_path)
def do_get_top_user_demo(
        from_time=get_one_day_ago_from(), to_time=get_now(), duration=900):
    hive_info = HiveInfo()
    top_users = hive_info.get_top_user_demo(from_time=from_time,
                                            to_time=to_time,
                                            duration=duration)
    massage_dfs = []
    if top_users.applications:
        for i in top_users.applications:
            line = {}
            line['category'] = "YARN_APPLICATION"
            line['service_name'] = "yarn"
            line['pool'] = i.pool
            line['user'] = i.user
            try:
                line['cpu_milliseconds'] = i.attributes['cpu_milliseconds']
            except KeyError:
                line['cpu_milliseconds'] = 0
            line['name'] = i.attributes['hive_query_string']
            line['entityName'] = i.applicationId
            line['time'] = zone_conversion(timestamp=i.startTime,
                                           format=u'YYYY-MM-DD HH:mm:ss')
            attr_val = round_milli_time(i.startTime, i.endTime)
            line['application_duration1'] = attr_val
            if int(float(attr_val)) > 60 * 60 * 1000:
                attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h"
            elif int(float(attr_val)) > 60 * 1000:
                attr_val = str(int(float(attr_val)) / 60 / 1000) + "m"  # 分
            elif int(float(attr_val)) > 1000:
                attr_val = str(int(float(attr_val)) / 1000) + "s"  # 秒
            line['application_duration'] = attr_val
            massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['application_duration1'],
                  reverse=True)
def get_hive_job_summary(from_time=get_one_day_ago_from(), to_time=get_now()):
    query_5min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration < 300000.0 "
    query_15min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 300000.0 and application_duration < 900000.0 "
    query_30min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 900000.0 and application_duration < 1800000.0"
    query_60min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 1800000.0 and application_duration < 3600000.0"
    query_120min_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 3600000.0 and application_duration < 7200000.0 "
    query_120min_plus_count = "select application_duration from YARN_APPLICATIONS where service_name = \"yarn\" and hive_query_id RLIKE \".*\" and application_duration >= 7200000.0 "

    job_5min_count = get_job_count(query_5min_count, from_time, to_time)
    job_15min_count = get_job_count(query_15min_count, from_time, to_time)
    job_30min_count = get_job_count(query_30min_count, from_time, to_time)
    job_60min_count = get_job_count(query_60min_count, from_time, to_time)
    job_120min_count = get_job_count(query_120min_count, from_time, to_time)
    job_120min_plus_count = get_job_count(query_120min_plus_count, from_time,
                                          to_time)
    job_total = job_5min_count + job_15min_count + job_30min_count + job_60min_count + job_120min_count + job_120min_plus_count

    types = '1-5m', '5-15m', '15-30m', '30-60m', '60-120m', '>120m'
    x = [
        job_5min_count, job_15min_count, job_30min_count, job_60min_count,
        job_120min_count, job_120min_plus_count
    ]

    file_path = "./hive_pie.png"
    pie_charts(x, types, job_total, file_path)
def do_get_hive_top_email_bac(
        from_time=get_one_day_ago_from(),
        to_time=get_now(), duration=900000.0):
    top_list = do_get_top_user_demo(from_time=from_time,
                                    to_time=to_time,
                                    duration=duration)
    return top_list[:40]
def do_get_impala_top(
        from_time=get_last_week_to(), to_time=get_now(), duration=300000.0):
    attrs = [
        'user', 'database', 'query_duration', 'thread_cpu_time', 'category',
        'executing', 'service_name', 'coordinator_host_id', 'stats_missing',
        'statement', 'entityName', 'pool'
    ]
    responses = do_query(IMPALA_QUERY % duration, from_time, to_time)
    massage_dfs = []
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                metadata = ts.metadata
                line = {}
                if metadata.attributes:
                    for attr in attrs:
                        if attr in metadata.attributes:
                            attr_val = metadata.attributes[attr]
                            if attr == 'query_duration':
                                line['query_duration1'] = int(attr_val)
                                if int(attr_val) > 60 * 60 * 1000:
                                    attr_val = ('%.2f' %
                                                (float(attr_val) / 60 / 60 /
                                                 1000)) + "h"
                                elif int(attr_val) > 60 * 1000:
                                    attr_val = str(
                                        int(attr_val) / 60 / 1000) + "m"  # 分
                                elif int(attr_val) > 1000:
                                    attr_val = str(
                                        int(attr_val) / 1000) + "s"  # 秒
                                line[attr] = attr_val
                                continue
                            line[attr] = attr_val
                for data in ts.data:
                    line['time'] = zone_conversion(
                        timestamp=data.timestamp,
                        format=u'YYYY-MM-DD HH:mm:ss')
                massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['query_duration1'],
                  reverse=True)
def do_get_hive_top(
        from_time=get_one_day_ago_from(),
        to_time=get_now(), duration=900000.0):
    attrs = [
        'user', 'name', 'application_duration', 'entityName', 'pool',
        'cpu_milliseconds', 'category', 'service_name'
    ]
    responses = do_query(HIVE_QUERY % duration, from_time, to_time)
    massage_dfs = []
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                metadata = ts.metadata
                line = {}
                if metadata.attributes:
                    for attr in attrs:
                        if attr in metadata.attributes:
                            attr_val = metadata.attributes[attr]
                            if 'application_duration' == attr:
                                line['application_duration1'] = float(attr_val)
                                if int(float(attr_val)) > 60 * 60 * 1000:
                                    attr_val = ('%.2f' %
                                                (float(attr_val) / 60 / 60 /
                                                 1000)) + "h"
                                elif int(float(attr_val)) > 60 * 1000:
                                    attr_val = str(
                                        int(float(attr_val)) / 60 /
                                        1000) + "m"  # 分
                                elif int(float(attr_val)) > 1000:
                                    attr_val = str(
                                        int(float(attr_val)) / 1000) + "s"  # 秒
                            line[attr] = attr_val
                for data in ts.data:
                    line['time'] = zone_conversion(
                        timestamp=data.timestamp,
                        format=u'YYYY-MM-DD HH:mm:ss')
                massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['application_duration1'],
                  reverse=True)
def do_get_hdfs_used_weekly(pic=''):
    responses = _do_get_hdfs_used(get_last_n_week_from(), get_now(), 'WEEKLY')
    capacity = []
    labels = []
    color = ['r', 'g', 'b']
    file_path = './hdfs_weekly.png'
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                bar_labels = []
                hdfs_used_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0]
                for data in ts.data:
                    bar_labels.append(zone_conversion(data.timestamp))
                    hdfs_used_values.append(check_digital_storage_without_unit(data.value, unit, 'bytes'))
                capacity.append(hdfs_used_values)
                labels = bar_labels
        if not pic:
            two_bar_charts(labels[-4::], capacity[-4::], color, file_path)
        list, remaining = ring_table_clo(capacity[0], labels)
        metadata = [u'日期', u'已用容量(T)', u'上周容量(T)', u'增量(T)', u'周增长率(%)']
        return list[-4::][::-1], "%.1f" % (remaining * 7), metadata
Exemple #8
0
def overall_report():
    os.chdir(os.path.dirname(sys.argv[0]))
    data = do_get_dfs_capacity_email()
    rows, remaining, metadata = do_get_hdfs_used_weekly()
    rows1, metadata = do_get_hdfs_used_monthly()
    rows2, metadata = do_get_hdfs_used_quarterly()
    rows3 = do_get_hive_top_email()
    rows4 = do_get_impala_top_email()
    rows5 = query_small_files()
    get_impala_job_summary()
    get_hive_job_summary()
    query_file_incr_info()
    do_get_dfs_cpu()
    do_get_dfs_mem()
    do_get_dfs_net()

    with open('./cpu.png', 'rb') as lena1:
        image1 = lena1.read()

    with open('./mem.png', 'rb') as lena2:
        image2 = lena2.read()

    with open('./net.png', 'rb') as lena3:
        image3 = lena3.read()

    with open('./hdfs_weekly.png', 'rb') as lena4:
        image4 = lena4.read()

    with open('./hdfs_monthly.png', 'rb') as lena5:
        image5 = lena5.read()

    with open('./hdfs_quarterly.png', 'rb') as lena6:
        image6 = lena6.read()

    with open('./file.png', 'rb') as lena7:
        image7 = lena7.read()

    with open('./file2.png', 'rb') as lena8:
        image8 = lena8.read()

    with open('./impala_pie.png', 'rb') as lena9:
        image9 = lena9.read()

    with open('./hive_pie.png', 'rb') as lena10:
        image10 = lena10.read()

    inline_image1 = InlineImage(filename="cpu.png", content=image1)
    inline_image2 = InlineImage(filename="mem.png", content=image2)
    inline_image3 = InlineImage(filename="net.png", content=image3)
    inline_image4 = InlineImage(filename="hdfs_weekly.png", content=image4)
    inline_image5 = InlineImage(filename="hdfs_monthly.png", content=image5)
    inline_image6 = InlineImage(filename="hdfs_quarterly.png", content=image6)
    inline_image7 = InlineImage(filename="file.png", content=image7)
    inline_image8 = InlineImage(filename="file2.png", content=image8)
    inline_image9 = InlineImage(filename="impala_pie.png", content=image9)
    inline_image10 = InlineImage(filename="hive_pie.png", content=image10)

    send_templated_mail(
        template_name='report_template2',
        from_email='*****@*****.**',
        recipient_list=['*****@*****.**', '*****@*****.**'],
        context={
            'time': zone_conversion(get_now()),
            'data': data,
            'rows': rows,
            'rows1': rows1,
            'rows2': rows2,
            'rows3': rows3,
            'rows4': rows4,
            'rows5': rows5,
            'remaining': remaining,
            'cpu_image': inline_image1,
            'mem_image': inline_image2,
            'net_image': inline_image3,
            'hdfs_weekly_image': inline_image4,
            'hdfs_monthly_image': inline_image5,
            'hdfs_quarterly_image': inline_image6,
            'file_image': inline_image7,
            'file2_image': inline_image8,
            'impala_pie_image': inline_image9,
            'hive_pie_image': inline_image10
        },
        # Optional:
        # cc=['*****@*****.**'],
        # bcc=['*****@*****.**'],
        # headers={'My-Custom-Header':'Custom Value'},
        # template_prefix="my_emails/",
        # template_suffix="email",
    )
    print 'email send'
def do_get_impala_top_email_bac(
        from_time=get_last_week_to(), to_time=get_now(), duration=300000.0):
    top_list = do_get_impala_top(from_time=from_time,
                                 to_time=to_time,
                                 duration=duration)
    return top_list[:20]
 def get_top_user_demo(self):
     return get_service(self._api, cluster_name="cluster",
                        name="impala").get_impala_queries(
                            start_time=get_n_day_ago_from(n=1),
                            end_time=get_now(),
                            filter_str="")
def _do_get_hdfs_used(from_time, to_time=get_now(), granularity='WEEKLY'):
    return do_query_rollup("select dfs_capacity_used, dfs_capacity  where  entityName=hdfs:nn-idc", from_time, to_time,
                           granularity)
 def _get_service(self, path):
     params = {
         'from': get_n_hour_ago_from(n=1),
         'to': get_now()
     }
     return call(method=self._api.get, path=path, ret_type=ApiTest2, ret_is_list=True, params=params)