def get_cluster_scheduler(self): """ 获取hadoop 集群信息 :param file: 输出文件保存路径 """ url = self.hadoop_url + "scheduler" scheduler_file = os.path.join(self.file_path, "scheduler.csv") scheduler_file2 = os.path.join(self.file_path, "scheduler2.csv") try: results = urlopen(url, timeout=2000).read() results = json.loads(results) results = results['scheduler']['schedulerInfo']['queues']['queue'] print(self.memcpu_info) for scheduler_info in results: results_copy = scheduler_info.copy() for key, value in results_copy['resourcesUsed'].items(): scheduler_info[key] = value / self.memcpu_info[key] except KeyError as error: logger.error("key error {0}".format(error)) except Exception as error: logger.error(error) write_header = True if FileOperator.file_exits(scheduler_file): write_header = False headers = results[0].keys() FileOperator.write_to_csv(results, scheduler_file, headers=headers, write_header=write_header, model="a+") FileOperator.write_to_csv(results, scheduler_file2, headers=headers, write_header=write_header,model="w+")
def get_cluster_information(self): """ get cluster infromation """ url = self.hadoop_url + "metrics" write_header = True cluster_file = os.path.join(self.file_path, "cluster.csv") cluster_file2 = os.path.join(self.file_path, "cluster2.csv") if FileOperator.file_exits(cluster_file): write_header = False try: results = urlopen(url, timeout=2000).read() results = [json.loads(results)["clusterMetrics"]] except Exception as error: logger.error(error) self.memcpu_info["memory"] = results[0].get('totalMB', 0) self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0) self.get_cluster_scheduler() headers = results[0].keys() FileOperator.write_to_csv(results, cluster_file, headers=headers, write_header=write_header,model="a+") FileOperator.write_to_csv(results, cluster_file2, headers=headers, model="w")
def get_scheduler_info(self, running_application): logger.info("start get_scheduler_info") apps = running_application.copy(deep=True) apps = apps.groupby('queue')['allocatedMB', 'allocatedVCores'].sum() apps['queueName'] = apps.index apps.insert(0, 'totalMemory', self.memcpu_info['memory']) apps.insert(0, 'totalCpu', self.memcpu_info['vCores']) apps.insert(0, 'memory', apps['allocatedMB'] / apps['totalMemory']) apps.insert(0, 'vCores', apps['allocatedVCores'] / apps['totalCpu']) scheduler_file = os.path.join(self.file_path, "scheduler_summary.csv") write_header = True if FileOperator.file_exits(scheduler_file): write_header = False apps.to_csv(scheduler_file, header=write_header, index=False, mode="a+") logger.info("start get_cluster_scheduler") url = self.hadoop_url + "scheduler" scheduler_file2 = os.path.join(self.file_path, "scheduler_metric.csv") results = urlopen(url, timeout=2000).read() results = json.loads(results) results = results['scheduler']['schedulerInfo']['queues']['queue'] headers = results[0].keys() for j in results: if j.has_key('queues'): del j['queues'] FileOperator.write_to_csv(results, scheduler_file2, headers=headers, model="w+")
def update_cluster_info(rmq, cfg): cluster_file = cfg.get_cluster_metric_path() if FileOperator.file_exits(cluster_file): total_mb = datainput.read_cluster_csv(cluster_file) if total_mb == 0: return queue = rmq.get_queue('root') queue.data.add_totalMb(total_mb)
def update_predict_info(rmq, cfg): prediction_file = cfg.get_prediction_path() if FileOperator.file_exits(prediction_file): queue_wishes = datainput.read_prediction_csv(prediction_file) for wish in queue_wishes: queue = rmq.get_queue(wish.name) if queue is None: print("Unknown queue name6", wish.name) continue queue.data.update_queue_wish(wish)
def update_app_info(rmq, cfg): app_file = cfg.get_job_metric_path() if FileOperator.file_exits(app_file): jobs = datainput.read_app_csv(app_file) for job in jobs: queue = rmq.get_queue(job.name) if queue is None: print("Unknown queue name4", job.name) continue queue.data.add_job(job)
def update_scheduler_info(rmq, cfg): scheduler_file = cfg.get_scheduler_metric_path() if FileOperator.file_exits(scheduler_file): queue_configs = datainput.read_scheduler_csv(scheduler_file) for qc in queue_configs: queue = rmq.get_queue(qc.name) if queue is None: print("Unknown queue name", qc.name) continue else: queue.data.update_queue_config(qc)
def get_cluster_information(self): logger.info("start get_cluster_information") url = self.hadoop_url + "metrics" write_header = True cluster_file = os.path.join(self.file_path, "cluster.csv") if FileOperator.file_exits(cluster_file): write_header = False results = urlopen(url, timeout=2000).read() results = [json.loads(results)["clusterMetrics"]] self.memcpu_info["memory"] = results[0].get('totalMB', 0) self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0) headers = results[0].keys() FileOperator.write_to_csv(results, cluster_file, headers=headers, write_header=write_header, model="a+") self.get_applications_information()