コード例 #1
0
def rm_active_standby(active=None, back=None):
    active = ResourceManager(address=RM_HOST, port=RM_PORT)
    back = ResourceManager(address=RM_HOST_BAK, port=RM_PORT)

    activeHaState = active.cluster_information().data.get('clusterInfo').get(
        'haState')
    backHaState = back.cluster_information().data.get('clusterInfo').get(
        'haState')

    # state : ACTIVE, STANDBY
    if activeHaState == 'ACTIVE':
        logging.info("ResourceManager host = " + active.address + " is active")
        return active
    elif backHaState == 'ACTIVE':
        logging.info("ResourceManager host = " + back.address + " is active")
        return back
    else:
        logging.warning("No ResourceManager can be usered, Please check again")
コード例 #2
0
    def configure(self):
        resource_manager_address = self.config.get('resource_manager_address')
        port = self.config.get('port')
        if port:
            self.resource_manager = ResourceManager(
                address=resource_manager_address, port=port)
            self.app_master = ApplicationMaster(
                address=resource_manager_address, port=port)
        else:
            self.resource_manager = ResourceManager(
                address=resource_manager_address)
            self.app_master = ApplicationMaster(
                address=resource_manager_address)

        self.application_ids = self.config.get('application_ids')
        self.application_status = self.config.get('application_status')
        self.application_tags = self.config.get('application_tags')
        self.application_names = self.config.get('application_names')
        self.application_status_list = []
コード例 #3
0
def getAllApps(rmAddress):
	xTime = getTime()
	rm=ResourceManager(address=rmAddress,port=rmPort,timeout=30)
	res=rm.cluster_applications(started_time_begin=str(xTime)).data
	runningApps = rm.cluster_applications(state="RUNNING").data
	if res.get("apps") is None:
		return "null"
	else:
		if runningApps.get("apps") is None:
			return res.get("apps").get("app")
		else:
			return res.get("apps").get("app")+runningApps.get("apps").get("app")
			print(runningApps.get("apps").get("app")+res.get("apps").get("app"))
コード例 #4
0
def get24MinsApp(rmAddress):
	t = get0ClockTime()
	rm=ResourceManager(address=rmAddress,port=rmPort,timeout=30)
	res=rm.cluster_applications(started_time_begin=str(t)).data
	if res.get("apps") is None:
		return "null"
	else:
		tmpList = []
		ll = res.get("apps").get("app")
		for i in ll:
			if i.get("name") == "com.yunchen.batch.BatchOneDay":
				tmpList.append(i)
		return tmpList
コード例 #5
0
def main():
    YarnLog.writeLog('init logging')
    # init_logging()

    # RM = rm_active_standby()
    RM = ResourceManager(address=RM_HOST, port=RM_PORT)
    AM = ApplicationMaster(address=AM_HOST, port=AM_PORT)
    appsDict = update_applicatioins_map(RM)

    if YarnConfig.getConfig('MonitorSwitch', 'FinishedSwitch') == 'True':
        logging.info(
            '############################ Filter FINISHED Application and Start it #################################'
        )
        # when spark application finished, start
        # finishedswitch = True
        failedAppsDict = filter_apps_state(appsDict, 'FINISHED')
        start_spark(failedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'FailedSwitch') == 'True':
        logging.info(
            '############################ Filter FAILED Application and Start it #################################'
        )
        # when spark application failed, start
        failedAppsDict = filter_apps_state(appsDict, 'FAILED')
        start_spark(failedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'KilledSwitch') == 'True':
        logging.info(
            '############################ Filter KILLED Application and Start it #################################'
        )

        # when spark application killed by user, start
        killedAppsDict = filter_apps_state(appsDict, 'KILLED')
        start_spark(killedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'RunningSwitch') == 'True':
        logging.info(
            '############################ Filter RUNNING Application and Start it #################################'
        )

        # when spark application is running
        # but the duration time of the current job for more then 2 minute; kill and will be restart by next time
        runningAppsDict = filter_apps_state(appsDict, 'RUNNING')
        durationAppDict = apps_running_duration(runningAppsDict, AM)
        # start_spark(durationAppDict)
        kill_spark(durationAppDict)

    logging.info(
        '############################ Ending ###################################'
    )
コード例 #6
0
 def __init__(self, zaddr=ZABBIX_ADDR, zport=ZABBIX_PORT, iface=None):
     self._API_TYPE = {
         1: {
             'API_ID': 'clusterInfo',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/info',
             'KEY_PREFIX': 'Info'
         },
         2: {
             'API_ID': 'clusterMetrics',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/metrics',
             'KEY_PREFIX': 'Metrics'
         },
         3: {
             'API_ID': 'scheduler',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/scheduler',
             'KEY_PREFIX': 'Scheduler'
         },
         4: {
             'API_ID': 'apps',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/apps',
             'KEY_PREFIX': 'Apps'
         },
         5: {
             'API_ID': 'appStatInfo',
             'API_PREFIX': 'RM',
             'API_ADDRESS':
             'http://RMADDRESS:8088/ws/v1/cluster/appstatistics',
             'KEY_PREFIX': 'AppStatInfo'
         },
         6: {
             'API_ID': 'nodes',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/nodes',
             'KEY_PREFIX': 'Nodes'
         },
     }
     self._type = 1
     self._activerm = self._get_activerm()
     # self.apitype= apitype
     self.zaddr = zaddr
     self.zport = zport
     self.ret_result = []
     self.final_result_dict = {}
     self.zbserver = ZabbixSender(zaddr, zport)
     self._ip = self._getLocalIP(iface)
     self.rm = ResourceManager(address=self._activerm, timeout=10)
コード例 #7
0
    def run(self):
        analysis_timestamp = str(datetime.now())
        output_path = os.path.join(
            'daily_leader_boards', 'leader_board_' + str(self.jobs_year) +
            '-' + str(self.jobs_month).zfill(2) + '-' +
            str(self.jobs_day).zfill(2) + '.csv')

        rm = ResourceManager(configuration.yarn_resource_managers)
        metrics = rm.cluster_metrics()
        cluster_vcores_total = metrics.data['clusterMetrics'][
            'totalVirtualCores']
        cluster_daily_vcore_seconds = int(cluster_vcores_total * 60 * 60 * 24)

        cluster_memory_total_mb = metrics.data['clusterMetrics']['totalMB']
        cluster_daily_megabyte_memory_seconds = int(cluster_memory_total_mb *
                                                    60 * 60 * 24)

        begin_date = datetime(int(str(self.jobs_year)),
                              int(str(self.jobs_month)),
                              int(str(self.jobs_day)))
        end_date = begin_date + timedelta(1)
        begin_ms = str(int(begin_date.timestamp() * 1000))
        end_ms = str(int(end_date.timestamp() * 1000))

        # filter out jobs that started after the end of the analyzed day
        apps = rm.cluster_applications(
            # finished_time_begin=begin_ms,
            started_time_end=end_ms)
        applist = apps.data['apps']['app']
        total_vcore_seconds = 0
        total_mb_seconds = 0
        sum_elapsed_time_ms = 0
        overall_started_time_ms = 9999999999999
        overall_finished_time_ms = 0
        total_yarn_apps = 0

        users = {}

        app_file = 'app_lists/apps_' + str(self.jobs_year) \
                   + '-' + str(self.jobs_month).zfill(2) \
                   + '-' + str(self.jobs_day).zfill(2) + '.csv'

        apps_df = pd.DataFrame(applist)
        apps_df.to_csv(app_file)

        for app in applist:

            begin_ms_int = int(begin_ms)
            end_ms_int = int(end_ms)
            started_time = app['startedTime']
            finished_time = app['finishedTime']
            elapsed_time = app['elapsedTime']

            # disregard apps that haven't ever or yet consumed any resources
            if app['state'] not in ['FINISHED', 'FAILED', 'KILLED', 'RUNNING']:
                continue

            # disregard apps that finished before the beginning of the analyzed day
            if 0 < finished_time < begin_ms_int:
                continue

            # for scenario where job began and ended in the same day
            percent_within_day = 1.0

            # scenario where job began before the beginning of the day and ended before the end of the day
            if started_time < begin_ms_int < finished_time < end_ms_int:
                percent_within_day = (finished_time -
                                      begin_ms_int) / elapsed_time

            # scenario where job began before the beginning of the day and continued beyond the end of the day
            if started_time < begin_ms_int and (finished_time == 0
                                                or finished_time > end_ms_int):
                percent_within_day = 86400000 / elapsed_time

            # scenario where job began before the end of the day and continued beyond the end of the day
            if begin_ms_int < started_time < end_ms_int \
                    and (finished_time == 0 or end_ms_int < finished_time):
                percent_within_day = (end_ms_int - started_time) / elapsed_time

            weighted_app_vcore_seconds = int(app['vcoreSeconds'] *
                                             percent_within_day)
            weighted_app_memory_seconds = int(app['memorySeconds'] *
                                              percent_within_day)

            user = users.setdefault(
                app['user'], {
                    'user_first_task_started_time_ms': 9999999999999,
                    'last_task_finished_time_ms': 0
                })
            total_vcore_seconds += weighted_app_vcore_seconds
            total_mb_seconds += weighted_app_memory_seconds

            user['user_first_task_started_time_ms'] = app['startedTime'] \
                if app['startedTime'] < user['user_first_task_started_time_ms'] \
                else user['user_first_task_started_time_ms']
            user['last_task_finished_time_ms'] = app['finishedTime'] \
                if app['finishedTime'] > user['last_task_finished_time_ms'] \
                else user['last_task_finished_time_ms']

            overall_started_time_ms = app['startedTime'] if app['startedTime'] < overall_started_time_ms \
                else overall_started_time_ms
            overall_finished_time_ms = app['finishedTime'] if app['finishedTime'] > overall_finished_time_ms \
                else overall_finished_time_ms

            sum_elapsed_time_ms += app['elapsedTime']
            total_yarn_apps += 1

            user_total_vcore_seconds = user.setdefault('total_vcore_seconds',
                                                       0)
            user[
                'total_vcore_seconds'] = user_total_vcore_seconds + weighted_app_vcore_seconds

            user_total_mb_seconds = user.setdefault('total_MB_seconds', 0)
            user[
                'total_MB_seconds'] = user_total_mb_seconds + weighted_app_memory_seconds

        header = [
            'jobs_year', 'jobs_month', 'jobs_day',
            'cluster_daily_vcore_seconds',
            'cluster_daily_megabyte_memory_seconds', 'user',
            'used_vcore_seconds', 'percent_used_of_all_used_vcore_seconds',
            'percent_used_of_total_cluster_vcore_seconds', 'used_MB_seconds',
            'percent_used_of_all_used_MB_seconds',
            'percent_used_of_total_cluster_MB_seconds',
            'user_first_task_started_time', 'user_last_task_finished_time'
        ]

        table = []

        for user in users:

            # set last_task_finished_time to None if timestamp == 0 representing that the task hasn't finished yet
            if int(users[user]['last_task_finished_time_ms']) == 0:
                last_task_finished_time_string = ''
            else:
                last_task_finished_time_string = \
                    datetime.fromtimestamp(users[user]['last_task_finished_time_ms'] / 1000.0)\
                        .strftime('%Y-%m-%d %H:%M')

            row = [
                self.jobs_year,
                self.jobs_month,
                self.jobs_day,
                cluster_daily_vcore_seconds,
                cluster_daily_megabyte_memory_seconds,
                user,
                round(users[user]['total_vcore_seconds'], 0),
                round(
                    100 * users[user]['total_vcore_seconds'] /
                    total_vcore_seconds, 2),
                round(
                    100 * users[user]['total_vcore_seconds'] /
                    cluster_daily_vcore_seconds, 2),
                round(users[user]['total_MB_seconds'], 0),
                round(100 * users[user]['total_MB_seconds'] / total_mb_seconds,
                      2),
                round(
                    100 * users[user]['total_MB_seconds'] /
                    cluster_daily_megabyte_memory_seconds, 2),
                datetime.fromtimestamp(
                    users[user]['user_first_task_started_time_ms'] /
                    1000.0).strftime('%Y-%m-%d %H:%M'),
                last_task_finished_time_string,
            ]

            table.append(row)

        df = pd.DataFrame(table, columns=header)
        df = df.sort_values(by='used_MB_seconds', ascending=False)

        print()
        print('analysis timestamp: ' + analysis_timestamp)
        # print('functional account:', job_user)
        print('jobs date: ' + begin_date.strftime('%Y-%m-%d'))
        print('----------------------')
        print('count of yarn apps: ' + str(total_yarn_apps))
        print(
            'overall daily jobs started time ',
            datetime.fromtimestamp(overall_started_time_ms /
                                   1000.0).strftime('%Y-%m-%d %H:%M'))
        print(
            'overall daily jobs finished time',
            datetime.fromtimestamp(overall_finished_time_ms /
                                   1000.0).strftime('%Y-%m-%d %H:%M'))
        print()

        print(tabulate(df, headers='keys', showindex=False))
        df.to_csv(output_path, index=False)
コード例 #8
0
def get_state(application_id):
    rm = ResourceManager([config.yarn_url])
    response = rm.cluster_application(application_id).data
    app = response['app']
    state = app['state']
    return state
コード例 #9
0
ファイル: test.py プロジェクト: Biking0/big_data_old
# 文件名称:cpu_monitor.py
# 功能描述:
# 1.监控华为yarn资源
# 2.监控119、120主机硬盘空间
# 功能描述:
# 功能描述:
# 输 入 表:
# 输 出 表:
# 创 建 者:hyn
# 创建日期:20191023
# 修改日志:
# 修改日期:
# ***************************************************************************
# 程序调用格式:python cpu_monitor.py
# ***************************************************************************

import os
import sys
import time
import json
import datetime

result_list = []

from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager

#rm =ResourceManager(address='10.93.171.97',port='8088')
#rm =ResourceManager(service_endpoints='10.93.171.97',port='8088')
rm = ResourceManager(service_endpoints='10.93.171.97:8088')
print(rm.cluster_applications().data)
コード例 #10
0
ファイル: demo.py プロジェクト: Biking0/big_data_old
# 创 建 者:hyn
# 创建日期:20200917
# 修改日志:
# 修改日期:
# ***************************************************************************
# 程序调用格式:python yarn_monitor.py
# ***************************************************************************

import os
import sys
import json
import time

from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager

rm = ResourceManager(service_endpoints=['http://172.19.168.100:8088', 'http://172.19.168.4:8088'])
# print rm.cluster_information().data
#
# ApplicationMaster()
#
# NodeManager.

# 过滤重要任务
ats = 'ats'
thritf = 'Thrift'
dis='dis'

# 3个小时之前
# run_time = 10800

# 24小时之前
コード例 #11
0
ファイル: YarnUtil.py プロジェクト: DoTime/PyStudy
def get_info(application_id):
    rm = ResourceManager([config.yarn_url])
    response = rm.cluster_application(application_id).data
    app = response['app']
    return app