def do_get_top_user_demo(
        from_time=get_one_day_ago_from(), to_time=get_now(), duration=900):
    hive_info = HiveInfo()
    top_users = hive_info.get_top_user_demo(from_time=from_time,
                                            to_time=to_time,
                                            duration=duration)
    massage_dfs = []
    if top_users.applications:
        for i in top_users.applications:
            line = {}
            line['category'] = "YARN_APPLICATION"
            line['service_name'] = "yarn"
            line['pool'] = i.pool
            line['user'] = i.user
            try:
                line['cpu_milliseconds'] = i.attributes['cpu_milliseconds']
            except KeyError:
                line['cpu_milliseconds'] = 0
            line['name'] = i.attributes['hive_query_string']
            line['entityName'] = i.applicationId
            line['time'] = zone_conversion(timestamp=i.startTime,
                                           format=u'YYYY-MM-DD HH:mm:ss')
            attr_val = round_milli_time(i.startTime, i.endTime)
            line['application_duration1'] = attr_val
            if int(float(attr_val)) > 60 * 60 * 1000:
                attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h"
            elif int(float(attr_val)) > 60 * 1000:
                attr_val = str(int(float(attr_val)) / 60 / 1000) + "m"  # 分
            elif int(float(attr_val)) > 1000:
                attr_val = str(int(float(attr_val)) / 1000) + "s"  # 秒
            line['application_duration'] = attr_val
            massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['application_duration1'],
                  reverse=True)
def do_get_hdfs_used_quarterly(pic=''):
    days, quarter_from = get_last_n_quarter_from()
    responses = _do_get_hdfs_used(quarter_from, None, 'WEEKLY')
    capacity = []
    labels = []
    color = ['r', 'g', 'b']
    file_path = './hdfs_quarterly.png'
    test = {}
    for i in days:
        test[i] = 0
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                test1 = collections.OrderedDict()
                bar_labels = []
                hdfs_used_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0]
                for data in ts.data:
                    time = zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM')
                    if time in test.keys():
                        test1[time] = data.value
                for key, value in test1.items():
                    bar_labels.append(key)
                    hdfs_used_values.append(check_digital_storage_without_unit(value, unit, 'bytes'))
                capacity.append(hdfs_used_values[-5::])
                labels = bar_labels[-5::]
        labels = [Quarter.from_string(text=x).__str__() for x in labels]
        if not pic:
            two_bar_charts(labels[-4::], capacity[-4::], color, file_path)
        list, remaining = ring_table_clo(capacity[0], labels)
        metadata = [u'日期', u'已用容量(T)', u'上季容量(T)', u'增量(T)', u'季度增长率(%)']
        return list[-4::][::-1], metadata
def do_get_hdfs_used_monthly(pic=''):
    responses = _do_get_hdfs_used(get_last_n_month_from(), None, 'WEEKLY')
    capacity = []
    labels = []
    color = ['r', 'g', 'b']
    file_path = './hdfs_monthly.png'
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                test1 = collections.OrderedDict()
                bar_labels = []
                hdfs_used_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0]
                for data in ts.data:
                    test1[zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM')] = data.value
                for key, value in test1.items():
                    bar_labels.append(key)
                    hdfs_used_values.append(check_digital_storage_without_unit(value, unit, 'bytes'))
                capacity.append(hdfs_used_values[-5::])
                labels = bar_labels[-5::]
        if not pic:
            two_bar_charts(labels[-4::], capacity[-4::], color, file_path)
        list, remaining = ring_table_clo(capacity[0], labels)
        metadata = [u'日期', u'已用容量(T)', u'上月容量(T)', u'增量(T)', u'月增长率(%)']
        return list[-4::][::-1], metadata
def do_get_dfs_net(pic=''):
    responses = do_query_rollup("select total_bytes_transmit_rate_across_network_interfaces where category = CLUSTER",
                                get_n_hour_ago_from(n=24), None, 'HOURLY')

    file_path = "./net.png"
    code = ['-', ':', '--', '-.', '-.']
    test = []
    index = 0
    y_max = 0
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                x = []
                max_values = []
                mean_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0].encode("utf-8")
                for data in ts.data:
                    x_time = pl.datetime.datetime.strptime(
                        zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss'), "%Y-%m-%d %H:%M:%S")
                    x.append(x_time)
                    mean_values.append("%.2f" % data.value)
                    data_type = data.type
                legend = metadata.entityName
                label_mean = legend
                test.append((x, mean_values, label_mean, code[index], 1))
                index = index + 1
                if max_values:
                    y_max = max(y_max, max(max_values))
                y_max = max(y_max, max(mean_values))
        if not pic:
            n_lines_charts(test, file_path, unit, data_type, float(y_max), u'群集Net传输量')
        return test, unit
def do_get_dfs_cpu(pic=''):
    responses = do_query_rollup("select cpu_percent_across_hosts WHERE category = CLUSTER",
                                get_n_hour_ago_from(n=24), None, 'HOURLY')
    file_path = "./cpu.png"
    code = ['-', ':', '--', '-.', '-.']
    test = []
    index = 0
    y_max = 100
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                x = []
                max_values = []
                mean_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0].encode("utf-8")
                for data in ts.data:
                    x_time = pl.datetime.datetime.strptime(
                        zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss'), "%Y-%m-%d %H:%M:%S")
                    x.append(x_time)
                    if data.aggregateStatistics:
                        max_values.append("%.2f" % data.aggregateStatistics.max)
                    mean_values.append("%.2f" % data.value)
                    data_type = data.type
                legend = metadata.entityName

                if max_values:
                    label_max = legend + "Max"
                    test.append((x, max_values, label_max, code[index], 1))
                label_mean = legend + "Avg"
                test.append((x, mean_values, label_mean, code[index], 1))
                index = index + 1
        if not pic:
            n_lines_charts(test, file_path, unit, data_type, y_max, u'群集CPU使用率')
        return test, unit
def do_get_impala_top(
        from_time=get_last_week_to(), to_time=get_now(), duration=300000.0):
    attrs = [
        'user', 'database', 'query_duration', 'thread_cpu_time', 'category',
        'executing', 'service_name', 'coordinator_host_id', 'stats_missing',
        'statement', 'entityName', 'pool'
    ]
    responses = do_query(IMPALA_QUERY % duration, from_time, to_time)
    massage_dfs = []
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                metadata = ts.metadata
                line = {}
                if metadata.attributes:
                    for attr in attrs:
                        if attr in metadata.attributes:
                            attr_val = metadata.attributes[attr]
                            if attr == 'query_duration':
                                line['query_duration1'] = int(attr_val)
                                if int(attr_val) > 60 * 60 * 1000:
                                    attr_val = ('%.2f' %
                                                (float(attr_val) / 60 / 60 /
                                                 1000)) + "h"
                                elif int(attr_val) > 60 * 1000:
                                    attr_val = str(
                                        int(attr_val) / 60 / 1000) + "m"  # 分
                                elif int(attr_val) > 1000:
                                    attr_val = str(
                                        int(attr_val) / 1000) + "s"  # 秒
                                line[attr] = attr_val
                                continue
                            line[attr] = attr_val
                for data in ts.data:
                    line['time'] = zone_conversion(
                        timestamp=data.timestamp,
                        format=u'YYYY-MM-DD HH:mm:ss')
                massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['query_duration1'],
                  reverse=True)
def do_get_hive_top(
        from_time=get_one_day_ago_from(),
        to_time=get_now(), duration=900000.0):
    attrs = [
        'user', 'name', 'application_duration', 'entityName', 'pool',
        'cpu_milliseconds', 'category', 'service_name'
    ]
    responses = do_query(HIVE_QUERY % duration, from_time, to_time)
    massage_dfs = []
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                metadata = ts.metadata
                line = {}
                if metadata.attributes:
                    for attr in attrs:
                        if attr in metadata.attributes:
                            attr_val = metadata.attributes[attr]
                            if 'application_duration' == attr:
                                line['application_duration1'] = float(attr_val)
                                if int(float(attr_val)) > 60 * 60 * 1000:
                                    attr_val = ('%.2f' %
                                                (float(attr_val) / 60 / 60 /
                                                 1000)) + "h"
                                elif int(float(attr_val)) > 60 * 1000:
                                    attr_val = str(
                                        int(float(attr_val)) / 60 /
                                        1000) + "m"  # 分
                                elif int(float(attr_val)) > 1000:
                                    attr_val = str(
                                        int(float(attr_val)) / 1000) + "s"  # 秒
                            line[attr] = attr_val
                for data in ts.data:
                    line['time'] = zone_conversion(
                        timestamp=data.timestamp,
                        format=u'YYYY-MM-DD HH:mm:ss')
                massage_dfs.append(line)
    return sorted(massage_dfs,
                  key=lambda t: t['application_duration1'],
                  reverse=True)
def do_get_hdfs_used_weekly(pic=''):
    responses = _do_get_hdfs_used(get_last_n_week_from(), get_now(), 'WEEKLY')
    capacity = []
    labels = []
    color = ['r', 'g', 'b']
    file_path = './hdfs_weekly.png'
    for response in responses:
        if response.timeSeries:
            for ts in response.timeSeries:
                bar_labels = []
                hdfs_used_values = []
                metadata = ts.metadata
                unit = metadata.unitNumerators[0]
                for data in ts.data:
                    bar_labels.append(zone_conversion(data.timestamp))
                    hdfs_used_values.append(check_digital_storage_without_unit(data.value, unit, 'bytes'))
                capacity.append(hdfs_used_values)
                labels = bar_labels
        if not pic:
            two_bar_charts(labels[-4::], capacity[-4::], color, file_path)
        list, remaining = ring_table_clo(capacity[0], labels)
        metadata = [u'日期', u'已用容量(T)', u'上周容量(T)', u'增量(T)', u'周增长率(%)']
        return list[-4::][::-1], "%.1f" % (remaining * 7), metadata
Beispiel #9
0
def overall_report():
    os.chdir(os.path.dirname(sys.argv[0]))
    data = do_get_dfs_capacity_email()
    rows, remaining, metadata = do_get_hdfs_used_weekly()
    rows1, metadata = do_get_hdfs_used_monthly()
    rows2, metadata = do_get_hdfs_used_quarterly()
    rows3 = do_get_hive_top_email()
    rows4 = do_get_impala_top_email()
    rows5 = query_small_files()
    get_impala_job_summary()
    get_hive_job_summary()
    query_file_incr_info()
    do_get_dfs_cpu()
    do_get_dfs_mem()
    do_get_dfs_net()

    with open('./cpu.png', 'rb') as lena1:
        image1 = lena1.read()

    with open('./mem.png', 'rb') as lena2:
        image2 = lena2.read()

    with open('./net.png', 'rb') as lena3:
        image3 = lena3.read()

    with open('./hdfs_weekly.png', 'rb') as lena4:
        image4 = lena4.read()

    with open('./hdfs_monthly.png', 'rb') as lena5:
        image5 = lena5.read()

    with open('./hdfs_quarterly.png', 'rb') as lena6:
        image6 = lena6.read()

    with open('./file.png', 'rb') as lena7:
        image7 = lena7.read()

    with open('./file2.png', 'rb') as lena8:
        image8 = lena8.read()

    with open('./impala_pie.png', 'rb') as lena9:
        image9 = lena9.read()

    with open('./hive_pie.png', 'rb') as lena10:
        image10 = lena10.read()

    inline_image1 = InlineImage(filename="cpu.png", content=image1)
    inline_image2 = InlineImage(filename="mem.png", content=image2)
    inline_image3 = InlineImage(filename="net.png", content=image3)
    inline_image4 = InlineImage(filename="hdfs_weekly.png", content=image4)
    inline_image5 = InlineImage(filename="hdfs_monthly.png", content=image5)
    inline_image6 = InlineImage(filename="hdfs_quarterly.png", content=image6)
    inline_image7 = InlineImage(filename="file.png", content=image7)
    inline_image8 = InlineImage(filename="file2.png", content=image8)
    inline_image9 = InlineImage(filename="impala_pie.png", content=image9)
    inline_image10 = InlineImage(filename="hive_pie.png", content=image10)

    send_templated_mail(
        template_name='report_template2',
        from_email='*****@*****.**',
        recipient_list=['*****@*****.**', '*****@*****.**'],
        context={
            'time': zone_conversion(get_now()),
            'data': data,
            'rows': rows,
            'rows1': rows1,
            'rows2': rows2,
            'rows3': rows3,
            'rows4': rows4,
            'rows5': rows5,
            'remaining': remaining,
            'cpu_image': inline_image1,
            'mem_image': inline_image2,
            'net_image': inline_image3,
            'hdfs_weekly_image': inline_image4,
            'hdfs_monthly_image': inline_image5,
            'hdfs_quarterly_image': inline_image6,
            'file_image': inline_image7,
            'file2_image': inline_image8,
            'impala_pie_image': inline_image9,
            'hive_pie_image': inline_image10
        },
        # Optional:
        # cc=['*****@*****.**'],
        # bcc=['*****@*****.**'],
        # headers={'My-Custom-Header':'Custom Value'},
        # template_prefix="my_emails/",
        # template_suffix="email",
    )
    print 'email send'