Beispiel #1
0
def get_logger(refresh=False):
    log_path = "./log"
    log_file = os.path.join(log_path, "logs")
    FileOperator.path_exits(log_path)
    if refresh:
        get_logger.logger = None
    if get_logger.logger:
        return get_logger.logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    file_hadler = logging.FileHandler(log_file)
    file_hadler.setLevel(logging.INFO)
    stream_hadler = logging.StreamHandler()
    stream_hadler.setLevel(logging.INFO)
    formater = logging.Formatter(
        "%(levelname)s %(asctime)s %(filename)s[line:%(lineno)d]: %(message)s")
    file_hadler.setFormatter(formater)
    stream_hadler.setFormatter(formater)
    logger.addHandler(file_hadler)
    logger.addHandler(stream_hadler)
    get_logger.logger = logger
    return get_logger.logger
Beispiel #2
0
def _main(flags):
    scheduler_df = pd.read_csv(SCHEDULER_INFILE, error_bad_lines=False)
    scheduler_df = scheduler_df.set_index("queueName")
    queue_names = pd.unique(scheduler_df.index.values)

    FileOperator.path_exits("model_input")
    FileOperator.path_exits("model_out")
    FileOperator.write_list_tocsv([], PRE_FILE)

    for queue_name in queue_names:
        print('--------------queue:{0}-----------'.format(queue_name))
        queue_information = scheduler_df.loc[queue_name, ['memory']]
        queue_information = queue_information.reset_index()
        queue_information = queue_information.loc[:, ['memory']]
        queue_information.insert(0, "times", queue_information.index.values)

        model_input_file = "./model_input/{0}.csv".format(queue_name)
        FileOperator.write_list_tocsv([], model_input_file)

        queue_information.to_csv(model_input_file, index=False, header=False)
        model_dir = "./model/{0}".format(queue_name)

        train(queue_name, model_input_file, PRE_FILE, model_dir,
              flags["train_step"], flags["predict_step"])
Beispiel #3
0
def thread_main():
    """
  for queue to trainning model and predict
  """

    # cluster_df = pd.read_csv(CLUSTER_INFILE)
    # total_mem = cluster_df["totalMB"].values[0]
    # total_cpu = cluster_df["totalVirtualCores"].values[1]

    scheduler_df = pd.read_csv(SCHEDULER_INFILE, error_bad_lines=False)

    # scheduler_df["memory"] = scheduler_df["memory"] / total_mem
    # scheduler_df["vCores"] = scheduler_df["vCores"] / total_cpu

    queue_names = set(scheduler_df["queueName"].values)

    scheduler_df = scheduler_df.set_index("queueName")

    FileOperator.path_exits("model_input")
    FileOperator.path_exits("model_out")
    FileOperator.path_exits("model")

    # empty the pre_file
    FileOperator.write_list_tocsv([], PRE_FILE)

    for queue_name in queue_names:
        print('--------------queue:{0}-----------'.format(queue_name))
        queue_information = scheduler_df.ix[queue_name, ["memory"]]
        # queue_information['memory'] = round(queue_information['memory'], 2)
        queue_information = queue_information.replace(0.0, 0.01)
        queue_information.insert(0, "times", range(queue_information.shape[0]))

        model_input_file = "./model_input/{0}.csv".format(queue_name)

        FileOperator.write_list_tocsv([], model_input_file)

        queue_information.to_csv(model_input_file, index=False, header=False)
        model_dir = "./model/{0}".format(queue_name)

        train(queue_name, model_input_file, PRE_FILE, model_dir,
              FLAGS.train_step, FLAGS.predict_step)
Beispiel #4
0
    parser.add_argument("--time_format",
                        type=str,
                        choices=['w', 'd', 'h', 'm', 's'],
                        default='m',
                        help="w: week, d:day, h:hour, m:minutes, s:second")
    parser.add_argument(
        "--time_interval",
        type=int,
        default=5,
        help="to collector job's information which job's finished time begin "
        "before now.time_format:m , time_interval:20 means collectors "
        "job's information which finished in lasted 20 minutes, "
        "if time_interval < 0 then collecotrs all")
    parser.add_argument("--states",
                        type=str,
                        choices=["finished", "accepted", "running"],
                        default='running',
                        help="the job's state")
    parser.add_argument("--time_period",
                        type=int,
                        default=300,
                        help="the scripts run's time period")
    FLAGS = parser.parse_args()
    FileOperator.path_exits(FLAGS.file_path)

    hadoop_util = HadoopUtil(FLAGS.file_path, )
    # t = threading.Timer(
    #   FLAGS.time_period, main)
    # t.start()
    main()