def start_api(): logger_flask = logging.getLogger('werkzeug') for hdlr in logger_flask.handlers[:]: # remove all old handlers logger_flask.removeHandler(hdlr) config.setup_logger(logger_flask, config.get_value(AppConfig.API_CONFIGURATION, AppConfig.ATTR_LOG_LEVEL), AppConfig.API_CONFIGURATION) config.setup_logger(rest_app.logger, config.get_value(AppConfig.API_CONFIGURATION, AppConfig.ATTR_LOG_LEVEL), AppConfig.API_CONFIGURATION) host = config.get_value(AppConfig.API_CONFIGURATION, AppConfig.API_HOST) port = config.get_value(AppConfig.API_CONFIGURATION, AppConfig.API_PORT) debug = config.is_value_active(AppConfig.API_CONFIGURATION, AppConfig.API_DEBUG) ssl = 'adhoc' if config.is_value_active(AppConfig.API_CONFIGURATION, AppConfig.API_SSL) else None logger.info('REST API starting') socketio.run(rest_app)
def main(): args = extend_arguments(get_parser()).parse_args() configs = common.config.get_config(args.env, args.experiment_name) assert args.alg in ['a2c', 'ppo', 'acktr', 'sac'] if args.recurrent_policy: assert args.alg in ['a2c', 'ppo' ], 'Recurrent policy is not implemented for ACKTR' if args.test: args.num_processes = 1 args.use_wandb = False logger = setup_logger(args.verbose, args.model_name, configs.log_directory) torch.set_num_threads(1) # set seed values seed = args.seed torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) if args.use_wandb: import wandb resume_wandb = True if args.wandb_resume_id is not None else False wandb.init(config=args, resume=resume_wandb, id=args.wandb_resume_id, project='rl', name=args.experiment_name) # make environements (envs[0] is used for evaluation) envs, env_vector = make_vec_envs_pytorch(args.env, return_evn_vector=True, device=device, log_dir=configs.log_directory, **vars(args)) eval_envs = wrap_env_pytorch(env_vector[0], args.gamma, device) actor_critic = Policy(envs.observation_space.shape, envs.action_space, base_kwargs={ 'recurrent': args.recurrent_policy, 'hidden_size': args.hidden_layer_size }) # load model if args.load_path is not None: logger.info("loading model: {}".format(args.load_path)) actor_critic = torch.load(args.load_path) actor_critic.to(device) if args.test: test(eval_envs, actor_critic, args, logger) else: train(envs, env_vector, eval_envs, actor_critic, args, configs, logger)
def main(): args = extend_arguments(get_parser()).parse_args() configs = common.config.get_config(args.env, args.experiment_name) if args.test: args.num_processes = 1 args.use_wandb = False logger = setup_logger(args.verbose, args.experiment_name, configs.log_directory) torch.set_num_threads(1) # set seed values seed = args.seed torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) if args.use_wandb: import wandb resume_wandb = True if args.wandb_resume_id is not None else False wandb.init(config=args, resume=resume_wandb, id=args.wandb_resume_id, project='rl', name=args.experiment_name) env = gym.make(args.env, **vars(args)) # Agent global_episodes = 0 agent = SAC(env.observation_space.shape[0], env.action_space, args) if args.load_path: global_episodes = agent.load_model(args.load_path, args.load_optim) * int( not args.reset_global_episode) logger.info(f'Agent loaded: {args.load_path} @{global_episodes}') memory = None if args.memory_load_path: memory = pickle.load(open(args.memory_load_path, 'rb')) logger.info(f'Memory loaded: {args.memory_load_path}') logger.info(f'Loaded Memory Length: {len(memory)}') logger.warning('There is something wrong with loading experiments from memory and ' 'the training becomes unstable. Be extra careful when using this feature!') if args.test: test(env, agent, args) else: train(env, agent, args, configs, memory, global_episodes)
def test(env, agent, args): logger = setup_logger() env.seed(args.seed) avg_reward, infos = _test(env, agent, args.test_episode) logger.info('Test trial complete. Writing results...') results_path = args.load_path + '_test_results' if args.env[0:6] == 'JawEnv': from artisynth_envs.envs.jaw_env import write_infos, calculate_convex_hull, \ maximum_occlusal_force write_infos(infos, results_path) # Derived metrics maximum_occlusal_force(env, results_path) calculate_convex_hull(results_path) logger.info(f'results written to: {results_path}') env.close()
import numpy as np import torch import os from common import constants as c from common.utilities import Bunch from common.config import setup_logger from artisynth_envs.artisynth_base_env import ArtiSynthBase logger = setup_logger() class JawEnv(ArtiSynthBase): def __init__(self, wait_action, reset_step, goal_threshold, goal_reward, **kwargs): self.args = Bunch(kwargs) super().__init__(**kwargs) self.episode_counter = 0 self.action_size = 0 self.obs_size = 0 self.goal_threshold = float(goal_threshold) self.reset_step = int(reset_step) self.wait_action = float(wait_action) self.goal_reward = goal_reward self.action_size, self.obs_size = self.init_spaces( incremental_actions=self.incremental_actions)
def main(): args = extend_arguments(get_parser()).parse_args() configs = common.config.get_config(args.env, args.experiment_name) setup_tensorflow() get_custom_objects().update( {'SmoothLogistic': Activation(smooth_logistic)}) save_path = os.path.join(configs.trained_directory, args.alg + "-" + args.env + ".h5f") log_file_name = args.model_name logger = setup_logger(args.verbose, log_file_name, configs.log_directory) import artisynth_envs.envs # imported here to avoid the conflict with tensorflow's logger env = gym.make(args.env, **vars(args)) env.seed(args.seed) try: nb_actions = env.action_space.shape[0] memory = SequentialMemory(limit=MEMORY_SIZE, window_length=1) mu_model = get_mu_model(env) v_model = get_v_model(env) l_model = get_l_model(env) random_process = OrnsteinUhlenbeckProcess( size=nb_actions, theta=THETA, mu=MU, sigma=SIGMA, dt=DT, sigma_min=SIGMA_MIN, n_steps_annealing=NUM_STEPS_ANNEALING) agent = MuscleNAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=mu_model, memory=memory, nb_steps_warmup=WARMUP_STEPS, random_process=random_process, gamma=GAMMA, target_model_update=UPDATE_TARGET_MODEL_STEPS) agent.compile(Adam(lr=args.lr), metrics=['mse']) env.agent = agent if args.load_path is not None: agent.load_weights(args.load_path) logger.info(f'Wights loaded from: {args.load_path}') callbacks = [] if args.use_tensorboard: from rl.callbacks import RlTensorBoard tensorboard = RlTensorBoard(log_dir=os.path.join( configs.tensorboard_log_directory, log_file_name), histogram_freq=1, batch_size=BATCH_SIZE, write_graph=True, write_grads=True, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, agent=agent) callbacks.append(tensorboard) if args.use_csvlogger: csv_logger = keras.callbacks.CSVLogger(os.path.join( configs.agent_log_directory, log_file_name), append=False, separator=',') callbacks.append(csv_logger) if not args.test: # train code training = True agent.fit(env, nb_steps=NUM_TRAINING_STEPS, visualize=False, verbose=args.verbose, nb_max_episode_steps=args.reset_step, callbacks=callbacks) logger.info('Training complete') agent.save_weights(save_path) else: # test code logger.info("Testing") training = False env.log_to_file = False history = agent.test(env, nb_episodes=args.test_episode, nb_max_episode_steps=args.reset_step) logger.info(history.history) logger.info('Average last distance: ', np.mean(history.history['last_distance'])) logger.info('Mean Reward: ', np.mean(history.history['episode_reward'])) except Exception as e: if training: agent.save_weights(save_path) logger.info("Error in main code:", str(e)) raise e
def train(env, agent, args, configs, memory=None, global_episodes=0): logger = setup_logger() # TesnorboardX if args.use_tensorboard: writer = SummaryWriter(logdir='{}/{}_SAC_{}_{}_{}'.format( configs.tensorboard_log_directory, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env, args.policy, "autotune" if args.automatic_entropy_tuning else "")) # Memory memory = memory or ReplayMemory(args.replay_size) # Training Loop global_steps = 0 for global_episodes in itertools.count(start=global_episodes, step=1): episode_reward = 0 episode_steps = 0 done = False state = env.reset() critic_1_loss_total = 0 critic_2_loss_total = 0 policy_loss_total = 0 ent_loss_total = 0 alpha_total = 0 while not done: action = agent.select_action(state) if len(memory ) > args.batch_size and global_steps > args.start_steps: # print('updating', len(memory), global_steps) for i in range(args.updates_per_step ): # Number of updates per step in environment critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = \ agent.update_parameters(memory, args.batch_size) # update all parameters critic_1_loss_total += critic_1_loss critic_2_loss_total += critic_2_loss policy_loss_total += policy_loss ent_loss_total += ent_loss alpha_total += alpha next_state, reward, done, _ = env.step(action) # Step episode_steps += 1 global_steps += 1 episode_reward += reward # Ignore the "done" signal if it comes from hitting the time horizon. # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py) mask = 1 if episode_steps == env.reset_step else float(not done) memory.push(state, action, reward, next_state, mask) # Append transition to memory state = next_state # end of episode # The following values are a bit off for the first episode as we have no updates # for len(memory) < batch_size critic_1_loss_total /= (episode_steps * args.updates_per_step) critic_2_loss_total /= (episode_steps * args.updates_per_step) policy_loss_total /= (episode_steps * args.updates_per_step) ent_loss_total /= (episode_steps * args.updates_per_step) alpha_total /= (episode_steps * args.updates_per_step) episode_reward /= episode_steps if global_episodes % args.episode_log_interval == 0: print( "Episode: {}, total numsteps: {}, episode steps: {}, reward: {}" .format(global_episodes, global_steps, episode_steps, round(episode_reward, 2))) if args.use_tensorboard: writer.add_scalar('reward/train', episode_reward, global_episodes) writer.add_scalar('loss/critic_1', critic_1_loss_total, global_episodes) writer.add_scalar('loss/critic_2', critic_2_loss_total, global_episodes) writer.add_scalar('loss/policy', policy_loss_total, global_episodes) writer.add_scalar('loss/entropy_loss', ent_loss_total, global_episodes) writer.add_scalar('entropy_temprature/alpha', alpha_total, global_episodes) if args.use_wandb: import wandb wandb.log( { 'episode_reward': episode_reward, 'loss/critic_1': critic_1_loss_total, 'loss/critic_2': critic_2_loss_total, 'loss/policy': policy_loss_total, 'loss/entropy_loss': ent_loss_total, 'entropy_temprature/alpha': alpha_total, 'lr': get_lr_pytorch(agent.policy_optim) }, step=global_episodes) if global_episodes % args.eval_interval == args.eval_interval - 1: avg_reward, infos = _test(env, agent, args.eval_episode) if args.use_tensorboard: writer.add_scalar('eval/avg_reward', avg_reward, global_episodes) for key, val in infos.items(): writer.add_scalar(f'eval/{key}', val, global_episodes) if args.use_wandb: import wandb wandb.log({'eval/avg_reward': avg_reward}, step=global_episodes) for key, val in infos.items(): wandb.log({f'eval/{key}': val}, step=global_episodes) if global_episodes % args.save_interval == args.save_interval - 1: test_save_path = os.path.join(configs.trained_directory, 'test_file') # TODO: update the following hack by saving file temp and copy to destination with open(test_save_path, 'w') as test_file: test_file.write( "This is just to make sure we have enough disk space to fully save " "the file not to screw up the agent or the memory! " * 1000) agent_save_path = os.path.join(configs.trained_directory, 'agent') agent.global_episode = global_episodes + 1 # torch.save(agent, agent_save_path) agent.save_model(agent_save_path, global_episodes) logger.info(f'model saved: {agent_save_path}') memory_path = os.path.join(configs.trained_directory, 'memory') pickle.dump(memory, open(memory_path, 'wb')) logger.info(f'memory saved: {memory_path}') print('------------------') if global_steps > args.num_steps: # end of training break env.close()