Ejemplo n.º 1
0
 def __init__(self, file_path):
     self.confing_util = ConfigUtil(CONFIG_FILE)
     self.hadoop_url = self.confing_util.get_options("url", "hadoop_url")
     self.file_path = file_path
     self.application_url = self.confing_util.get_options(
         "url", "application_url")
     self.job_metrics = self.confing_util.get_options("job", "job_metrices")
     self.job_url = self.confing_util.get_options("url", "job_url")
     self.memcpu_info = {}
Ejemplo n.º 2
0
 def test_get_options(self):
     """
     test if  ConfigUtil.get_options can resolve exception correct
     """
     config_file = "../conf/properties.conf"
     config_util = ConfigUtil(config_file)
     self.assertEqual(config_util.get_options("url", "hadoop_url"),
                      "http://10.10.236.203:8088/ws/v1/cluster/")
     self.assertRaises(KeyError, config_util.get_options, "url",
                       "hadoop_ur2l")
     self.assertRaises(KeyError, config_util.get_options, "url2",
                       "hadoop_url")
     self.assertRaises(KeyError, config_util.get_options, "url2")
Ejemplo n.º 3
0
def main(text:str):
  config            = ConfigUtil.get_instance().config
  #text             =  get_text(config)
  #text              = 'お米ならいくらでも食べられます。'
  text_list         = tokenize(text)
  result_tuple_list = predict(text_list, config)
  #result_tuple_list = [('バラジャーノ', 0.83023), ('ほげ', 0.82)]
  result_json       = tuple_list_to_json(result_tuple_list)
  print(result_json)
  return result_json
Ejemplo n.º 4
0
    def __init__(self):
        conf = ConfigUtil()
        host = conf.get_config(conf='host', section='db_info')
        port = conf.get_config(conf='port', section='db_info')
        username = conf.get_config(conf='username', section='db_info')
        password = conf.get_config(conf='password', section='db_info')
        database = conf.get_config(conf='database', section='db_info')
        charset = conf.get_config(conf='charset', section='db_info')

        self.DB_CONNECT_STRING = 'mysql+mysqldb://{username}:{password}@{host}:{port}/{database}?charset={charset}'.format(
            username=username,
            password=password,
            host=host,
            port=port,
            database=database,
            charset=charset)
import tornado.httpserver
import tornado.web
import tornado.ioloop
from config_util import ConfigUtil

from core.handler import BaseHandler


class Application(tornado.web.Application):
    def __init__(self):
        print(type(BaseHandler))
        super(Application, self).__init__(handlers=[(r"/api/.*", BaseHandler)],
                                          debug=False)


if __name__ == '__main__':
    config = ConfigUtil(file_paths=[("server", "server.conf")])
    app = Application()
    http_server = tornado.httpserver.HTTPServer(app)
    http_server.listen(int(config.get_config("server", "tornado", "port")))
    tornado.ioloop.IOLoop.instance().start()
Ejemplo n.º 6
0
class HadoopUtil(object):
    def __init__(self, file_path):
        self.confing_util = ConfigUtil(CONFIG_FILE)
        self.hadoop_url = self.confing_util.get_options("url", "hadoop_url")
        self.file_path = file_path
        self.application_url = self.confing_util.get_options(
            "url", "application_url")
        self.job_metrics = self.confing_util.get_options("job", "job_metrices")
        self.job_url = self.confing_util.get_options("url", "job_url")
        self.memcpu_info = {}

    def get_cluster_information(self):
        logger.info("start get_cluster_information")
        url = self.hadoop_url + "metrics"
        write_header = True
        cluster_file = os.path.join(self.file_path, "cluster.csv")
        if FileOperator.file_exits(cluster_file):
            write_header = False
        results = urlopen(url, timeout=2000).read()
        results = [json.loads(results)["clusterMetrics"]]
        self.memcpu_info["memory"] = results[0].get('totalMB', 0)
        self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
        headers = results[0].keys()
        FileOperator.write_to_csv(results,
                                  cluster_file,
                                  headers=headers,
                                  write_header=write_header,
                                  model="a+")
        self.get_applications_information()

    def get_scheduler_info(self, running_application):
        logger.info("start get_scheduler_info")
        apps = running_application.copy(deep=True)

        apps = apps.groupby('queue')['allocatedMB', 'allocatedVCores'].sum()
        apps['queueName'] = apps.index
        apps.insert(0, 'totalMemory', self.memcpu_info['memory'])
        apps.insert(0, 'totalCpu', self.memcpu_info['vCores'])
        apps.insert(0, 'memory', apps['allocatedMB'] / apps['totalMemory'])
        apps.insert(0, 'vCores', apps['allocatedVCores'] / apps['totalCpu'])

        scheduler_file = os.path.join(self.file_path, "scheduler_summary.csv")
        scheduler_file1 = os.path.join(self.file_path,
                                       "scheduler_summary_current.csv")
        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        apps.to_csv(scheduler_file,
                    header=write_header,
                    index=False,
                    mode="a+")
        apps.to_csv(scheduler_file1, header=True, index=False, mode="w")

        logger.info("start get_cluster_scheduler")
        url = self.hadoop_url + "scheduler"
        scheduler_file2 = os.path.join(self.file_path, "scheduler_metric.csv")

        results = urlopen(url, timeout=2000).read()
        results = json.loads(results)
        results = results['scheduler']['schedulerInfo']['queues']['queue']
        headers = results[0].keys()
        for j in results:
            if j.has_key('queues'):
                del j['queues']
        FileOperator.write_to_csv(results,
                                  scheduler_file2,
                                  headers=headers,
                                  model="w+")

    @staticmethod
    def request_url(url):
        try:
            result = urlopen(url, timeout=2000).read()
        except urlerror as error:
            raise urlerror("urlopen {0} error:{1}".format(url, error.reason))
        else:
            return result

    def get_applications_information(self):
        logger.info("start get_application_information")
        hadoop_rest_url = self.hadoop_url + "apps?"
        finished_app_file = os.path.join(self.file_path, "finishedapp.csv")
        running_app1_file = os.path.join(self.file_path, "runningapp1.csv")
        running_app2_file = os.path.join(self.file_path, "runningapp2.csv")

        finished_data = hadoop_util.request_url(
            format(hadoop_rest_url +
                   "states=finished&finishedTimeEnd={0}".format(
                       TIME_BEGIN - FLAGS.time_period * 1000)))
        running_data = hadoop_util.request_url(
            format(hadoop_rest_url + "states=running"))
        try:
            finished_data_list = json.loads(finished_data)['apps']['app']
            finished_data_frame = DataFrame(finished_data_list)
            finished_data_frame = finished_data_frame[
                (finished_data_frame['state'] == 'FINISHED') |
                (finished_data_frame['state'] == 'finished')]
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except TypeError:
            logger.warn('did ont get any data finished apps use:{0}'.format(
                (hadoop_rest_url +
                 "states=finished&finishedTimeEnd={0}".format(
                     TIME_BEGIN - FLAGS.time_period * 1000))))
        except Exception as error:
            logger.error(error)
        else:
            finished_data_frame.to_csv(finished_app_file, index=False)
            # self.get_scheduler_info(finished_data_frame)

        try:
            running_data_list = json.loads(running_data)['apps']['app']
            running_data_frame = DataFrame(running_data_list)
            running_data_frame = running_data_frame[
                (running_data_frame['state'] == 'RUNNING')
                | (running_data_frame['state'] == 'running')]
            running_data1 = running_data_frame[
                running_data_frame['startedTime'] <= (
                    TIME_BEGIN - FLAGS.time_period * 1000)]
            running_data2 = running_data_frame[
                running_data_frame['startedTime'] > (TIME_BEGIN -
                                                     FLAGS.time_period * 1000)]
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except TypeError:
            logger.warn('did ont get any data running apps use:{0}'.format(
                hadoop_rest_url + "states=running"))
        except Exception as error:
            logger.error(error)
        else:
            running_data1.to_csv(running_app1_file, index=False)
            running_data2.to_csv(running_app2_file, index=False)
            self.get_scheduler_info(running_data_frame)

    def get_commonjobs_information(self):
        logger.info("start get_commonjobs_information")
        commonjob_file = os.path.join(self.file_path, "commonjob.csv")

        result = hadoop_util.request_url(self.job_url)

        result = json.loads(result)["jobs"]
        if not result:
            return
        result = result["job"]
        headers = result[0].keys()
        FileOperator.write_to_csv(result, commonjob_file, headers=headers)
Ejemplo n.º 7
0
 def create_config_util(self):
     if (self._config_util == None):
         self._config_util = ConfigUtil()
         return self._config_util
     else:
         return self._config_util
Ejemplo n.º 8
0
class HadoopUtil(object):
    """
    获取hadoop 的相关信息,主要包括队列信息,job信息
    """

    def __init__(self, file_path):
        """
        :param hadoop_url: 初始化集群url
        """

        self.confing_util = ConfigUtil(CONFIG_FILE)
        self.hadoop_url = self.confing_util.get_options("url", "hadoop_url")
        self.file_path = file_path
        self.application_url = self.confing_util.get_options("url", "application_url")
        self.job_metrics = self.confing_util.get_options("job", "job_metrices")
        self.job_url = self.confing_util.get_options("url", "job_url")
        self.memcpu_info = {}

    def get_cluster_information(self):
        """
        get cluster infromation
        """
        url = self.hadoop_url + "metrics"
        write_header = True
        cluster_file = os.path.join(self.file_path, "cluster.csv")
        cluster_file2 = os.path.join(self.file_path, "cluster2.csv")
        if FileOperator.file_exits(cluster_file):
            write_header = False
        try:
            results = urlopen(url, timeout=2000).read()
            results = [json.loads(results)["clusterMetrics"]]
        except Exception as error:
            logger.error(error)

        self.memcpu_info["memory"] = results[0].get('totalMB', 0)
        self.memcpu_info["vCores"] = results[0].get('totalVirtualCores', 0)
        self.get_cluster_scheduler()
        headers = results[0].keys()

        FileOperator.write_to_csv(results, cluster_file,
                                  headers=headers, write_header=write_header,model="a+")
        FileOperator.write_to_csv(results, cluster_file2,
                                  headers=headers, model="w")

    def get_cluster_scheduler(self):
        """
        获取hadoop 集群信息
        :param file: 输出文件保存路径
        """
        url = self.hadoop_url + "scheduler"
        scheduler_file = os.path.join(self.file_path, "scheduler.csv")
        scheduler_file2 = os.path.join(self.file_path, "scheduler2.csv")

        try:
            results = urlopen(url, timeout=2000).read()
            results = json.loads(results)
            results = results['scheduler']['schedulerInfo']['queues']['queue']
            print(self.memcpu_info)
            for scheduler_info in results:
                results_copy = scheduler_info.copy()
                for key, value in results_copy['resourcesUsed'].items():
                    scheduler_info[key] = value / self.memcpu_info[key]
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except Exception as error:
            logger.error(error)

        write_header = True
        if FileOperator.file_exits(scheduler_file):
            write_header = False
        headers = results[0].keys()
        FileOperator.write_to_csv(results, scheduler_file,
                                  headers=headers, write_header=write_header, model="a+")
        FileOperator.write_to_csv(results, scheduler_file2,
                                  headers=headers, write_header=write_header,model="w+")

    @staticmethod
    def request_url(url):
        try:
            result = urlopen(url, timeout=2000).read()
        except urlerror as error:
            raise urlerror("urlopen {0} error:{1}".format(url, error.reason))
        else:
            return result

    def get_applications_information(self, query_parametes=None):
        """
        :param query_parametes: dict 过滤条件,默认为成功执行完成 默认搜索所有
          * state [deprecated] - state of the application
          * states - applications matching the given application states,
                specified as a comma-separated list.
          * finalStatus - the final status of the application -
                reported by the application itself
          * user - user name
          * queue - queue name
          * limit - total number of app objects to be returned
          * startedTimeBegin -
                applications with start time beginning with this time,
                specified in ms since epoch
          * startedTimeEnd -
                applications with start time ending with this time,
                specified in ms since epoch
          * finishedTimeBegin -
                applications with finish time beginning with this time,
                specified in ms since epoch
          * finishedTimeEnd -
                applications with finish time ending with this time,
                specified in ms since epoch
          * applicationTypes -
                applications matching the given application types,
                specified as a comma-separated list.
          * applicationTags -
                applications matching any of the given application tags,
                specified as a comma-separated list.
        :param file: 输出文件保存位置
        example:
           query_parametes = {"finalStaus": "SUCCEEDED"}
           get_job(query_parametes=query_parametes)
        """
        hadoop_rest_url = self.hadoop_url + "apps?"
        app_file = os.path.join(self.file_path, "app.csv")

        try:
            for key, value in query_parametes.items():
                hadoop_rest_url += key + "=" + str(value) + "&"
        except AttributeError:
            logger.warn("didn't get any query_parametes, so ,collect all apps")

        json_result = HadoopUtil.request_url(hadoop_rest_url)
        try:
            list_result = json.loads(json_result)['apps']['app']
            headers = list_result[0].keys()
        except KeyError as error:
            logger.error("key error {0}".format(error))
        except TypeError:
            logger.warn("dit not get any data from parameters "
                        "{0}".format(query_parametes))
        except Exception as error:
            logger.error(error)
        else:
            FileOperator.write_to_csv(list_result, app_file, headers=headers)
            self.get_sparkjobs_information(list_result)

    def get_sparkjobs_information(self, applications):
        """
        get each application's jobs information
        :param applications: list contains applications information
        """
        app_jobs = []
        spark_job_file = os.path.join(self.file_path, "sparkjob.json")
        self.job_metrics = self.job_metrics.replace("\n", "").split(',')
        for application_items in applications:
            application_id = application_items["id"]
            application_rest_url = self.application_url + application_id + "/1/jobs"
            try:
                application_jobs_list = HadoopUtil.request_url(application_rest_url)
                application_jobs_list = json.loads(application_jobs_list)
            except urlerror:
                logger.warning("this application {0} is not "
                            "a spark type application".format(application_items["id"]))
            else:
                for applications in application_jobs_list:
                    apps = {key: value for key, value in applications.items()
                            for applications in application_jobs_list
                            if key in self.job_metrics}
                    app_jobs.append(dict(apps, **application_items))
        headers = app_jobs[0].keys()
        FileOperator.write_to_json(app_jobs, spark_job_file)
        FileOperator.write_to_csv(app_jobs, spark_job_file, headers=headers)

    def get_commonjobs_information(self):
        commonjob_file = os.path.join(self.file_path, "commonjob.csv")

        result = HadoopUtil.request_url(self.job_url)

        result = json.loads(result)["jobs"]
        if not result:
            return
        result = result["job"]
        headers = result[0].keys()
        FileOperator.write_to_csv(result, commonjob_file, headers=headers)