def get_logger(refresh=False): log_path = "./log" log_file = os.path.join(log_path, "logs") FileOperator.path_exits(log_path) if refresh: get_logger.logger = None if get_logger.logger: return get_logger.logger logger = logging.getLogger() logger.setLevel(logging.INFO) file_hadler = logging.FileHandler(log_file) file_hadler.setLevel(logging.INFO) stream_hadler = logging.StreamHandler() stream_hadler.setLevel(logging.INFO) formater = logging.Formatter( "%(levelname)s %(asctime)s %(filename)s[line:%(lineno)d]: %(message)s") file_hadler.setFormatter(formater) stream_hadler.setFormatter(formater) logger.addHandler(file_hadler) logger.addHandler(stream_hadler) get_logger.logger = logger return get_logger.logger
def _main(flags): scheduler_df = pd.read_csv(SCHEDULER_INFILE, error_bad_lines=False) scheduler_df = scheduler_df.set_index("queueName") queue_names = pd.unique(scheduler_df.index.values) FileOperator.path_exits("model_input") FileOperator.path_exits("model_out") FileOperator.write_list_tocsv([], PRE_FILE) for queue_name in queue_names: print('--------------queue:{0}-----------'.format(queue_name)) queue_information = scheduler_df.loc[queue_name, ['memory']] queue_information = queue_information.reset_index() queue_information = queue_information.loc[:, ['memory']] queue_information.insert(0, "times", queue_information.index.values) model_input_file = "./model_input/{0}.csv".format(queue_name) FileOperator.write_list_tocsv([], model_input_file) queue_information.to_csv(model_input_file, index=False, header=False) model_dir = "./model/{0}".format(queue_name) train(queue_name, model_input_file, PRE_FILE, model_dir, flags["train_step"], flags["predict_step"])
def thread_main(): """ for queue to trainning model and predict """ # cluster_df = pd.read_csv(CLUSTER_INFILE) # total_mem = cluster_df["totalMB"].values[0] # total_cpu = cluster_df["totalVirtualCores"].values[1] scheduler_df = pd.read_csv(SCHEDULER_INFILE, error_bad_lines=False) # scheduler_df["memory"] = scheduler_df["memory"] / total_mem # scheduler_df["vCores"] = scheduler_df["vCores"] / total_cpu queue_names = set(scheduler_df["queueName"].values) scheduler_df = scheduler_df.set_index("queueName") FileOperator.path_exits("model_input") FileOperator.path_exits("model_out") FileOperator.path_exits("model") # empty the pre_file FileOperator.write_list_tocsv([], PRE_FILE) for queue_name in queue_names: print('--------------queue:{0}-----------'.format(queue_name)) queue_information = scheduler_df.ix[queue_name, ["memory"]] # queue_information['memory'] = round(queue_information['memory'], 2) queue_information = queue_information.replace(0.0, 0.01) queue_information.insert(0, "times", range(queue_information.shape[0])) model_input_file = "./model_input/{0}.csv".format(queue_name) FileOperator.write_list_tocsv([], model_input_file) queue_information.to_csv(model_input_file, index=False, header=False) model_dir = "./model/{0}".format(queue_name) train(queue_name, model_input_file, PRE_FILE, model_dir, FLAGS.train_step, FLAGS.predict_step)
parser.add_argument("--time_format", type=str, choices=['w', 'd', 'h', 'm', 's'], default='m', help="w: week, d:day, h:hour, m:minutes, s:second") parser.add_argument( "--time_interval", type=int, default=5, help="to collector job's information which job's finished time begin " "before now.time_format:m , time_interval:20 means collectors " "job's information which finished in lasted 20 minutes, " "if time_interval < 0 then collecotrs all") parser.add_argument("--states", type=str, choices=["finished", "accepted", "running"], default='running', help="the job's state") parser.add_argument("--time_period", type=int, default=300, help="the scripts run's time period") FLAGS = parser.parse_args() FileOperator.path_exits(FLAGS.file_path) hadoop_util = HadoopUtil(FLAGS.file_path, ) # t = threading.Timer( # FLAGS.time_period, main) # t.start() main()