def __init__(self, model, loss, resume, config, train_logger=None): self.config = config self.logger = logging.getLogger(self.__class__.__name__) self.model = model self.loss = loss self.name = config['name'] self.epochs = config['trainer']['epochs'] self.save_freq = config['trainer']['save_freq'] self.verbosity = config['trainer']['verbosity'] self.summary_writer = SummaryWriter() # check cuda available if torch.cuda.is_available(): if config['cuda']: self.with_cuda = True self.gpus = { i: item for i, item in enumerate(self.config['gpus']) } device = 'cuda' if torch.cuda.device_count() > 1 and len(self.gpus) > 1: self.model.parallelize() torch.cuda.empty_cache() else: self.with_cuda = False device = 'cpu' else: self.logger.warning( 'Warning: There\'s no CUDA support on this machine, training is performed on CPU.' ) self.with_cuda = False device = 'cpu' self.device = torch.device(device) self.model.to(self.device) # log self.logger.debug('Model is initialized.') self._log_memory_useage() self.train_logger = train_logger # optimizer self.optimizer = self.model.optimize(config['optimizer_type'], config['optimizer']) # train monitor self.monitor = config['trainer']['monitor'] self.monitor_mode = config['trainer']['monitor_mode'] assert self.monitor_mode == 'min' or self.monitor_mode == 'max' self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf # checkpoint path self.start_epoch = 1 self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], self.name) make_dir(self.checkpoint_dir) if resume: self._resume_checkpoint(resume)
def __compute_hmean(self): self.model.eval() temp_dir = 'temp' make_dir(temp_dir) test_img_dir = pathlib.Path(self.root_dataset) / 'test_images' res = main_evaluate(self.model, test_img_dir, temp_dir, self.with_gpu, False) return res
def main(args: argparse.Namespace): output_dir = "outputs" make_dir(output_dir) model_path = args.model input_dir = args.input_dir with_image = args.save_img with_gpu = True if torch.cuda.is_available() else False if with_image: make_dir(os.path.join(output_dir, 'img')) model = load_model(model_path, with_gpu) print(main_evaluate(model, input_dir, output_dir, with_image, with_gpu))
def get_dir_to_save_plots(logs_path, dir_to_save_plots): dir_to_save_plots = (Path(dir_to_save_plots) .parent .joinpath( logs_path .as_posix() .rsplit("/", 1)[1] )).as_posix() make_dir(dir_to_save_plots) return dir_to_save_plots
def __init__(self, tb_log_dir, exp_string): """ Initialize summary writer """ self.exp_string = exp_string self.tb_log_dir = tb_log_dir self.val_img_dir = os.path.join(self.tb_log_dir, 'val_image') if CONFIG.local_rank == 0: util.make_dir(self.tb_log_dir) util.make_dir(self.val_img_dir) self.writer = SummaryWriter(self.tb_log_dir+'/' + self.exp_string) else: self.writer = None
def process_config(config): # make some necessary directories to save some important things time_stamp = datetime.datetime.now().strftime('%m%d_%H%M%S') config['trainer']['args']['log_dir'] = ''.join( (config['trainer']['args']['log_dir'], config['task_name'], '/')) # , '.%s/' % (time_stamp))) config['trainer']['args']['save_dir'] = ''.join( (config['trainer']['args']['save_dir'], config['task_name'], '/')) # , '.%s/' % (time_stamp))) config['trainer']['args']['output_dir'] = ''.join( (config['trainer']['args']['output_dir'], config['task_name'], '/')) # , '.%s/' % (time_stamp))) make_dir(config['trainer']['args']['log_dir']) make_dir(config['trainer']['args']['save_dir']) make_dir(config['trainer']['args']['output_dir']) return config
def get_config(use_cmd_config=True): '''Method to prepare the config for all downstream tasks''' # Read the config file config = _read_config() if (use_cmd_config): config = argument_parser(config) if (config[GENERAL][BASE_PATH] == ""): base_path = os.getcwd().split('/SelfPlay')[0] config[GENERAL][BASE_PATH] = base_path if (config[GENERAL][DEVICE] == ""): config[GENERAL][DEVICE] = CPU for key in [SEED]: config[GENERAL][key] = int(config[GENERAL][key]) key = ID if config[GENERAL][key] == "": config[GENERAL][key] = str(config[GENERAL][SEED]) # Model Params for key in [ NUM_EPOCHS, BATCH_SIZE, PERSIST_PER_EPOCH, EARLY_STOPPING_PATIENCE, NUM_OPTIMIZERS, LOAD_TIMESTAMP, MAX_STEPS_PER_EPISODE, MAX_STEPS_PER_EPISODE_SELFPLAY, TARGET_TO_SELFPLAY_RATIO, EPISODE_MEMORY_SIZE ]: config[MODEL][key] = int(config[MODEL][key]) for key in [ LEARNING_RATE, GAMMA, LAMBDA, LEARNING_RATE_ACTOR, LEARNING_RATE_CRITIC, REWARD_SCALE ]: config[MODEL][key] = float(config[MODEL][key]) for key in [USE_BASELINE, LOAD, IS_SELF_PLAY, IS_SELF_PLAY_WITH_MEMORY]: config[MODEL][key] = _get_boolean_value(config[MODEL][key]) agent = config[MODEL][AGENT] if (agent not in get_supported_agents()): config[MODEL][AGENT] = REINFORCE env = config[MODEL][ENV] if (env not in get_supported_envs()): config[MODEL][ENV] = MAZEBASE optimiser = config[MODEL][OPTIMISER] if (optimiser not in get_supported_optimisers()): config[MODEL][OPTIMISER] = ADAM if (config[MODEL][SAVE_DIR] == ""): config[MODEL][SAVE_DIR] = os.path.join(config[GENERAL][BASE_PATH], "model") elif (config[MODEL][SAVE_DIR][0] != "/"): config[MODEL][SAVE_DIR] = os.path.join(config[GENERAL][BASE_PATH], config[MODEL][SAVE_DIR]) make_dir(config[MODEL][SAVE_DIR]) if (config[MODEL][LOAD_PATH] == ""): config[MODEL][LOAD_PATH] = os.path.join(config[GENERAL][BASE_PATH], "model") elif (config[MODEL][LOAD_PATH][0] != "/"): config[MODEL][LOAD_PATH] = os.path.join(config[GENERAL][BASE_PATH], config[MODEL][LOAD_PATH]) # TB Params config[TB][DIR] = os.path.join(config[TB][BASE_PATH], datetime.now().strftime('%b%d_%H-%M-%S')) config[TB][SCALAR_PATH] = os.path.join(config[TB][BASE_PATH], "all_scalars.json") # Log Params key = FILE_PATH if (config[LOG][key] == ""): config[LOG][key] = os.path.join( config[GENERAL][BASE_PATH], "SelfPlay", "log_{}.txt".format(str(config[GENERAL][SEED]))) # Plot Params if (config[PLOT][BASE_PATH] == ""): config[PLOT][BASE_PATH] = os.path.join(config[GENERAL][BASE_PATH], "plot", config[GENERAL][ID]) make_dir(path=config[PLOT][BASE_PATH]) return config
logging.info(info) if error: console_append = f"<font color={self.error_color}><b>{info}</b></font>" if append: console_append = info return self.output.append(console_append) def error_handler(self, exc_type, exc_value, exc_traceback): logging.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)) self.console("CRITICAL ERROR: Check log file for full details", error=True) def closeEvent(self, event: QtGui.QCloseEvent): logging.info("App shutting down...") if __name__ == "__main__": make_dir(user_dir()) make_dir(os.path.join(user_dir(), "CSV Files")) log_file = os.path.join(user_dir(), "stellar-csv-creator.log") logging.basicConfig(filename=log_file, format=f"%(asctime)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%dT%H:%M:%SZ", level=logging.INFO) logging.info("App started...") setup_config() app = QtWidgets.QApplication(sys.argv) ui = CSVCreator() ui.show() sys.exit(app.exec_())