Esempio n. 1
0
def thread_main():
    """
    for queue to trainning model and predict
    """

    cluster_df = pd.read_csv(CLUSTER_INFILE)
    # total_mem = cluster_df["totalMB"].values[0]
    # total_cpu = cluster_df["totalVirtualCores"].values[0]

    scheduler_df = pd.read_csv(SCHEDULER_INFILE)
    scheduler_df = scheduler_df.dropna(how="any", axis=0)

    # scheduler_df["memory"] = scheduler_df["memory"] / total_mem
    # scheduler_df["vCores"] = scheduler_df["vCores"] / total_cpu

    queue_names = set(scheduler_df["queueName"].values)
    scheduler_df = scheduler_df.set_index("queueName")

    FileOperator.makesure_file_exits("model_input")
    FileOperator.makesure_file_exits("model_out")
    FileOperator.makesure_file_exits("model")

    # empty the pre_file
    FileOperator.write_list_tocsv([], PRE_FILE)

    for queue_name in queue_names:
        queue_information = scheduler_df.ix[queue_name, ["memory", "vCores"]]
        queue_information.insert(0, "times", range(queue_information.shape[0]))
        model_input_file = "./model_input/{0}.csv".format(queue_name)
        queue_information.to_csv(model_input_file, index=False, header=False)
        model_dir = "./model/{0}".format(queue_name)

        train(queue_name, model_input_file, PRE_FILE, model_dir,
              FLAGS.train_step, FLAGS.predict_step)
Esempio n. 2
0
def get_logger(refresh=False):
    log_path = "./log"
    log_file = os.path.join(log_path, "logs")
    FileOperator.makesure_file_exits(log_path)
    if refresh:
        get_logger.logger = None
    if get_logger.logger:
        return get_logger.logger
    logger = logging.getLogger()
    logger.setLevel(logging.WARNING)
    file_hadler = logging.FileHandler(log_file)
    file_hadler.setLevel(logging.WARNING)
    stream_hadler = logging.StreamHandler()
    stream_hadler.setLevel(logging.WARNING)
    formater = logging.Formatter(
        "%(levelname)s %(asctime)s %(filename)s[line:%(lineno)d]: %(message)s")
    file_hadler.setFormatter(formater)
    stream_hadler.setFormatter(formater)
    logger.addHandler(file_hadler)
    logger.addHandler(stream_hadler)
    get_logger.logger = logger
    return get_logger.logger
Esempio n. 3
0
    parser.add_argument(
        "--time_interval",
        type=int,
        default=1,
        help="to collector job's information which job's finished time begin "
             "before now.time_format:m , time_interval:20 means collectors "
             "job's information which finished in lasted 20 minutes, "
             "if time_interval < 0 then collecotrs all"
    )
    parser.add_argument(
        "--state",
        type=str,
        choices=["finished", "accepted", "running"],
        default="finished",
        help="the job's state"
    )
    parser.add_argument(
        "--time_period",
        type=int,
        default=60,
        help="the scripts run's time period"
    )

    FLAGS = parser.parse_args()
    FileOperator.makesure_file_exits(FLAGS.file_path)

    hadoop_util = HadoopUtil(
        FLAGS.file_path,
        )
    main()
Esempio n. 4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 11/20/17 3:24 PM
# @Author  : chenxf
import numpy as np
import os
from file_operator import FileOperator
from train_lstm import train
from project_dir import project_dir

memory = np.random.rand(500, 2)
data = list()

FileOperator.makesure_file_exits("../model_out")

train_file = os.path.join(project_dir, "model_input/spark.csv")
pre_file = os.path.join(project_dir, "model_out/pre.csv")
model_dir = os.path.join(project_dir, "model/")

train(queue_name='spark',
      csv_file=train_file,
      pre_file=pre_file,
      model_dir=model_dir,
      train_step=1000,
      predict_step=50)