Beispiel #1
0
    def get_cluster_scheduler(self):
        """
        获取hadoop 集群信息
        :param file: 输出文件保存路径
        """
        url = self.hadoop_url + "scheduler"
        scheduler_file = os.path.join(self.file_path, "scheduler.csv")
        scheduler_file2 = os.path.join(self.file_path, "scheduler2.csv")

        try:
            results = urlopen(url, timeout=2000).read()
            results = json.loads(results)
            results = results['scheduler']['schedulerInfo']['queues']['queue']
            print(self.memcpu_info)
            for scheduler_info in results:
                results_copy = scheduler_info.copy()
                for key, value in results_copy['resourcesUsed'].items():
                    scheduler_info[key] = value / self.memcpu_info[key]
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except Exception as error:
            logger.error(error)

        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        headers = results[0].keys()
        FileOperator.write_to_csv(results, scheduler_file,
                                  headers=headers, write_header=write_header, model="a+")
        FileOperator.write_to_csv(results, scheduler_file2,
                                  headers=headers, write_header=write_header,model="w+")
Beispiel #2
0
    def get_cluster_information(self):
        """
        get cluster infromation
        """
        url = self.hadoop_url + "metrics"
        write_header = True
        cluster_file = os.path.join(self.file_path, "cluster.csv")
        cluster_file2 = os.path.join(self.file_path, "cluster2.csv")
        if FileOperator.file_exits(cluster_file):
            write_header = False
        try:
            results = urlopen(url, timeout=2000).read()
            results = [json.loads(results)["clusterMetrics"]]
        except Exception as error:
            logger.error(error)

        self.memcpu_info["memory"] = results[0].get('totalMB', 0)
        self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
        self.get_cluster_scheduler()
        headers = results[0].keys()

        FileOperator.write_to_csv(results, cluster_file,
                                  headers=headers, write_header=write_header,model="a+")
        FileOperator.write_to_csv(results, cluster_file2,
                                  headers=headers, model="w")
Beispiel #3
0
 def get_sparkjobs_information(self, applications):
     """
     get each application's jobs information
     :param applications: list contains applications information
     """
     app_jobs = []
     spark_job_file = os.path.join(self.file_path, "sparkjob.json")
     self.job_metrics = self.job_metrics.replace("\n", "").split(',')
     for application_items in applications:
         application_id = application_items["id"]
         application_rest_url = self.application_url + application_id + "/1/jobs"
         try:
             application_jobs_list = HadoopUtil.request_url(application_rest_url)
             application_jobs_list = json.loads(application_jobs_list)
         except urlerror:
             logger.warning("this application {0} is not "
                         "a spark type application".format(application_items["id"]))
         else:
             for applications in application_jobs_list:
                 apps = {key: value for key, value in applications.items()
                         for applications in application_jobs_list
                         if key in self.job_metrics}
                 app_jobs.append(dict(apps, **application_items))
     headers = app_jobs[0].keys()
     FileOperator.write_to_json(app_jobs, spark_job_file)
     FileOperator.write_to_csv(app_jobs, spark_job_file, headers=headers)
Beispiel #4
0
    def get_scheduler_info(self, running_application):
        logger.info("start get_scheduler_info")
        apps = running_application.copy(deep=True)

        apps = apps.groupby('queue')['allocatedMB', 'allocatedVCores'].sum()
        apps['queueName'] = apps.index
        apps.insert(0, 'totalMemory', self.memcpu_info['memory'])
        apps.insert(0, 'totalCpu', self.memcpu_info['vCores'])
        apps.insert(0, 'memory', apps['allocatedMB'] / apps['totalMemory'])
        apps.insert(0, 'vCores', apps['allocatedVCores'] / apps['totalCpu'])

        scheduler_file = os.path.join(self.file_path, "scheduler_summary.csv")
        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        apps.to_csv(scheduler_file,
                    header=write_header,
                    index=False,
                    mode="a+")

        logger.info("start get_cluster_scheduler")
        url = self.hadoop_url + "scheduler"
        scheduler_file2 = os.path.join(self.file_path, "scheduler_metric.csv")

        results = urlopen(url, timeout=2000).read()
        results = json.loads(results)
        results = results['scheduler']['schedulerInfo']['queues']['queue']
        headers = results[0].keys()
        for j in results:
            if j.has_key('queues'):
                del j['queues']
        FileOperator.write_to_csv(results,
                                  scheduler_file2,
                                  headers=headers,
                                  model="w+")
Beispiel #5
0
    def get_applications_information(self, query_parametes=None):
        """
        :param query_parametes: dict 过滤条件,默认为成功执行完成 默认搜索所有
          * state [deprecated] - state of the application
          * states - applications matching the given application states,
                specified as a comma-separated list.
          * finalStatus - the final status of the application -
                reported by the application itself
          * user - user name
          * queue - queue name
          * limit - total number of app objects to be returned
          * startedTimeBegin -
                applications with start time beginning with this time,
                specified in ms since epoch
          * startedTimeEnd -
                applications with start time ending with this time,
                specified in ms since epoch
          * finishedTimeBegin -
                applications with finish time beginning with this time,
                specified in ms since epoch
          * finishedTimeEnd -
                applications with finish time ending with this time,
                specified in ms since epoch
          * applicationTypes -
                applications matching the given application types,
                specified as a comma-separated list.
          * applicationTags -
                applications matching any of the given application tags,
                specified as a comma-separated list.
        :param file: 输出文件保存位置
        example:
           query_parametes = {"finalStaus": "SUCCEEDED"}
           get_job(query_parametes=query_parametes)
        """
        hadoop_rest_url = self.hadoop_url + "apps?"
        app_file = os.path.join(self.file_path, "app.csv")

        try:
            for key, value in query_parametes.items():
                hadoop_rest_url += key + "=" + str(value) + "&"
        except AttributeError:
            logger.warn("didn't get any query_parametes, so ,collect all apps")

        json_result = HadoopUtil.request_url(hadoop_rest_url)
        try:
            list_result = json.loads(json_result)['apps']['app']
            headers = list_result[0].keys()
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except TypeError:
            logger.warn("dit not get any data from parameters "
                        "{0}".format(query_parametes))
        except Exception as error:
            logger.error(error)
        else:
            FileOperator.write_to_csv(list_result, app_file, headers=headers)
            self.get_sparkjobs_information(list_result)
Beispiel #6
0
    def get_commonjobs_information(self):
        commonjob_file = os.path.join(self.file_path, "commonjob.csv")

        result = HadoopUtil.request_url(self.job_url)

        result = json.loads(result)["jobs"]
        if not result:
            return
        result = result["job"]
        headers = result[0].keys()
        FileOperator.write_to_csv(result, commonjob_file, headers=headers)
Beispiel #7
0
 def get_cluster_information(self):
     logger.info("start get_cluster_information")
     url = self.hadoop_url + "metrics"
     write_header = True
     cluster_file = os.path.join(self.file_path, "cluster.csv")
     if FileOperator.file_exits(cluster_file):
         write_header = False
     results = urlopen(url, timeout=2000).read()
     results = [json.loads(results)["clusterMetrics"]]
     self.memcpu_info["memory"] = results[0].get('totalMB', 0)
     self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
     headers = results[0].keys()
     FileOperator.write_to_csv(results,
                               cluster_file,
                               headers=headers,
                               write_header=write_header,
                               model="a+")
     self.get_applications_information()