Ejemplo n.º 1
0
    def get_cluster_scheduler(self):
        """
        获取hadoop 集群信息
        :param file: 输出文件保存路径
        """
        url = self.hadoop_url + "scheduler"
        scheduler_file = os.path.join(self.file_path, "scheduler.csv")
        scheduler_file2 = os.path.join(self.file_path, "scheduler2.csv")

        try:
            results = urlopen(url, timeout=2000).read()
            results = json.loads(results)
            results = results['scheduler']['schedulerInfo']['queues']['queue']
            print(self.memcpu_info)
            for scheduler_info in results:
                results_copy = scheduler_info.copy()
                for key, value in results_copy['resourcesUsed'].items():
                    scheduler_info[key] = value / self.memcpu_info[key]
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except Exception as error:
            logger.error(error)

        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        headers = results[0].keys()
        FileOperator.write_to_csv(results, scheduler_file,
                                  headers=headers, write_header=write_header, model="a+")
        FileOperator.write_to_csv(results, scheduler_file2,
                                  headers=headers, write_header=write_header,model="w+")
Ejemplo n.º 2
0
    def get_cluster_information(self):
        """
        get cluster infromation
        """
        url = self.hadoop_url + "metrics"
        write_header = True
        cluster_file = os.path.join(self.file_path, "cluster.csv")
        cluster_file2 = os.path.join(self.file_path, "cluster2.csv")
        if FileOperator.file_exits(cluster_file):
            write_header = False
        try:
            results = urlopen(url, timeout=2000).read()
            results = [json.loads(results)["clusterMetrics"]]
        except Exception as error:
            logger.error(error)

        self.memcpu_info["memory"] = results[0].get('totalMB', 0)
        self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
        self.get_cluster_scheduler()
        headers = results[0].keys()

        FileOperator.write_to_csv(results, cluster_file,
                                  headers=headers, write_header=write_header,model="a+")
        FileOperator.write_to_csv(results, cluster_file2,
                                  headers=headers, model="w")
Ejemplo n.º 3
0
    def get_scheduler_info(self, running_application):
        logger.info("start get_scheduler_info")
        apps = running_application.copy(deep=True)

        apps = apps.groupby('queue')['allocatedMB', 'allocatedVCores'].sum()
        apps['queueName'] = apps.index
        apps.insert(0, 'totalMemory', self.memcpu_info['memory'])
        apps.insert(0, 'totalCpu', self.memcpu_info['vCores'])
        apps.insert(0, 'memory', apps['allocatedMB'] / apps['totalMemory'])
        apps.insert(0, 'vCores', apps['allocatedVCores'] / apps['totalCpu'])

        scheduler_file = os.path.join(self.file_path, "scheduler_summary.csv")
        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        apps.to_csv(scheduler_file,
                    header=write_header,
                    index=False,
                    mode="a+")

        logger.info("start get_cluster_scheduler")
        url = self.hadoop_url + "scheduler"
        scheduler_file2 = os.path.join(self.file_path, "scheduler_metric.csv")

        results = urlopen(url, timeout=2000).read()
        results = json.loads(results)
        results = results['scheduler']['schedulerInfo']['queues']['queue']
        headers = results[0].keys()
        for j in results:
            if j.has_key('queues'):
                del j['queues']
        FileOperator.write_to_csv(results,
                                  scheduler_file2,
                                  headers=headers,
                                  model="w+")
Ejemplo n.º 4
0
def update_cluster_info(rmq, cfg):
    cluster_file = cfg.get_cluster_metric_path()
    if FileOperator.file_exits(cluster_file):
        total_mb = datainput.read_cluster_csv(cluster_file)
        if total_mb == 0:
            return
        queue = rmq.get_queue('root')
        queue.data.add_totalMb(total_mb)
Ejemplo n.º 5
0
def update_predict_info(rmq, cfg):
    prediction_file = cfg.get_prediction_path()
    if FileOperator.file_exits(prediction_file):
        queue_wishes = datainput.read_prediction_csv(prediction_file)
        for wish in queue_wishes:
            queue = rmq.get_queue(wish.name)
            if queue is None:
                print("Unknown queue name6", wish.name)
                continue
            queue.data.update_queue_wish(wish)
Ejemplo n.º 6
0
def update_app_info(rmq, cfg):
    app_file = cfg.get_job_metric_path()
    if FileOperator.file_exits(app_file):
        jobs = datainput.read_app_csv(app_file)
        for job in jobs:
            queue = rmq.get_queue(job.name)
            if queue is None:
                print("Unknown queue name4", job.name)
                continue
            queue.data.add_job(job)
Ejemplo n.º 7
0
def update_scheduler_info(rmq, cfg):
    scheduler_file = cfg.get_scheduler_metric_path()
    if FileOperator.file_exits(scheduler_file):
        queue_configs = datainput.read_scheduler_csv(scheduler_file)
        for qc in queue_configs:
            queue = rmq.get_queue(qc.name)
            if queue is None:
                print("Unknown queue name", qc.name)
                continue
            else:
                queue.data.update_queue_config(qc)
Ejemplo n.º 8
0
 def get_cluster_information(self):
     logger.info("start get_cluster_information")
     url = self.hadoop_url + "metrics"
     write_header = True
     cluster_file = os.path.join(self.file_path, "cluster.csv")
     if FileOperator.file_exits(cluster_file):
         write_header = False
     results = urlopen(url, timeout=2000).read()
     results = [json.loads(results)["clusterMetrics"]]
     self.memcpu_info["memory"] = results[0].get('totalMB', 0)
     self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
     headers = results[0].keys()
     FileOperator.write_to_csv(results,
                               cluster_file,
                               headers=headers,
                               write_header=write_header,
                               model="a+")
     self.get_applications_information()