def train(self): args = self.args torch.manual_seed(args.seed) env = env = grid2op.make(args.env_name, test=args.for_test, reward_class=L2RPNReward) shared_model = ActorCritic(env.observation_space.size(), self.action_space, args.hidden_size) shared_model.share_memory() if args.no_shared: optimizer = None else: optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() processes = [] counter = mp.Value('i', 0) lock = mp.Lock() p = mp.Process(target=self.do_test, args=(args.num_processes, args, shared_model, counter)) p.start() processes.append(p) for rank in range(0, args.num_processes): p = mp.Process(target=self.do_train, args=(rank, args, shared_model, counter, lock, optimizer)) p.start() processes.append(p) for p in processes: p.join()
def run(args): device = torch.device("cpu") env = gym.make('SpaceInvaders-v0') state_size = env.observation_space.shape action_size = env.action_space.n model = ActorCritic([1, 4, 84, 84], action_size).to(device) opt = SharedRMSprop(model.parameters(), lr=args.lr, alpha=args.alpha, eps=1e-8, weight_decay=args.weight_decay, momentum=args.momentum, centered=False) opt_lock = mp.Lock() scheduler = LRScheduler(args) if args.load_fp: checkpoint = torch.load(args.load_fp) model.load_state_dict(checkpoint['model_state_dict']) opt.load_state_dict(checkpoint['optimizer_state_dict']) if args.train: start = time.time() model.share_memory() model.train() step_counter, max_reward, ma_reward, ma_loss = [ mp.Value('d', 0.0) for _ in range(4) ] processes = [] if args.num_procs == -1: args.num_procs = mp.cpu_count() for rank in range(args.num_procs): p = mp.Process(target=train, args=(rank, args, device, model, opt, opt_lock, scheduler, step_counter, max_reward, ma_reward, ma_loss)) p.start() processes.append(p) for p in processes: p.join() if args.verbose > 0: print(f"Seconds taken: {time.time() - start}") if args.save_fp: torch.save( { 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': opt.state_dict(), }, args.save_fp) if args.test: model.eval() test(args, device, model)
parser.add_argument('--num-steps', type=int, default=20, metavar='NS', help='number of forward steps in A3C (default: 20)') parser.add_argument('--max-episode-length', type=int, default=10000, metavar='M', help='maximum length of an episode (default: 10000)') parser.add_argument('--env-name', default='PongDeterministic-v3', metavar='ENV', help='environment to train on (default: PongDeterministic-v3)') if __name__ == '__main__': args = parser.parse_args() torch.manual_seed(args.seed) env = create_atari_env(args.env_name) shared_model = ActorCritic( env.observation_space.shape[0], env.action_space) shared_model.share_memory() processes = [] p = mp.Process(target=test, args=(args.num_processes, args, shared_model)) p.start() processes.append(p) for rank in range(0, args.num_processes): p = mp.Process(target=train, args=(rank, args, shared_model)) p.start() processes.append(p) for p in processes: p.join()
loss.backward() for local_param, global_param in zip( self.local_actor_critic.parameters(), self.global_actor_critic.parameters()): global_param._grad = local_param.grad self.optimizer.step() self.local_actor_critic.load_state_dict(self.global_actor_critic.state_dict()) self.local_actor_critic.clear_memory() t_step += 1 observation = observation_ with self.episode_index.get_lock(): self.episode_index.value += 1 print(self.name, 'episode ', self.episode_index.value, 'reward %.1f' % score) if __name__ == '__main__': lr = 1e-4 env_id = 'CartPole-v0' nb_actions = 2 input_dims = [4] global_actor_critic = ActorCritic(input_dims, nb_actions) global_actor_critic.share_memory() optim = SharedAdam(global_actor_critic.parameters(), lr=lr, betas=(0.92, 0.999)) global_ep = mp.Value('i', 0) workers = [ Agent(global_actor_critic, optim, input_dims, nb_actions, gamma=0.99, lr=lr, name=i, global_ep_index=global_ep, env_id=env_id) for i in range(mp.cpu_count())] [w.start() for w in workers] [w.join() for w in workers]
help='environment to train on (default: Breakout-v0)') parser.add_argument('--render', default=False, action='store_true', help='render the environment') if __name__ == '__main__': args = parser.parse_args() #torch.manual_seed(args.seed) torch.set_num_threads(1) env = gym.make(args.env_name) global_model = ActorCritic(env.action_space.n) global_model.share_memory() local_model = ActorCritic(env.action_space.n) optimizer = AsyncAdam(global_model.parameters(), local_model.parameters(), lr=args.lr) processes = [] for rank in range(args.num_processes): p = mp.Process(target=train, args=(rank, args, global_model, local_model, optimizer)) p.start() processes.append(p) for p in processes: p.join()
from args_ali import Args from agent import agent from coordinator import coordinator from model import ActorCritic from test_ali import test if __name__ == '__main__': os.environ['OMP_NUM_THREADS'] = '1' torch.set_num_threads(1) args = Args() torch.manual_seed(args.seed) model = ActorCritic() model.share_memory() # inter-process communication queues exp_queues = [] model_params = [] for i in range(args.num_processes): exp_queues.append(mp.Queue(1)) model_params.append(mp.Queue(1)) p = mp.Process(target=test, args=(args, model)) p.start() # creat a process for coordinator coordinator = mp.Process(target=coordinator, args=(args.num_processes, args, model, exp_queues, model_params))
self.tau = 1. self.seed = 1 self.num_processes = 16 self.num_steps = 20 self.max_episode_length = 10000 self.env_name = 'Breakout-v0' # Main run os.environ['OMP_NUM_THREADS'] = '1' # 1 thread per core params = Params() # creating the params object from the Params class, that sets all the model parameters torch.manual_seed(params.seed) # setting the seed (not essential) env = create_atari_env(params.env_name) # we create an optimized environment thanks to universe shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) # shared_model is the model shared by the different agents (different threads in different cores) shared_model.share_memory() # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr) # the optimizer is also shared because it acts on the shared model optimizer.share_memory() # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model processes = [] # initializing the processes with an empty list p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread p.start() # starting the created process p processes.append(p) # adding the created process p to the list of processes for rank in range(0, params.num_processes): # making a loop to run all the other processes that will be trained by updating the shared model p = mp.Process(target=train, args=(rank, params, shared_model, optimizer)) p.start() processes.append(p) for p in processes: # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely print('working')
# os.environ['OPENAI_REMOTE_VERBOSE'] = '0' # Setup args = parser.parse_args() print(' ' * 26 + 'Options') for k, v in vars(args).items(): print(' ' * 26 + k + ': ' + str(v)) args.env = 'CartPole-v1' # TODO: Remove hardcoded environment when code is more adaptable torch.manual_seed(args.seed) T = Counter() # Global shared counter # Create shared network env = gym.make(args.env) shared_model = ActorCritic(env.observation_space, env.action_space, args.hidden_size) shared_model.share_memory() if args.model and os.path.isfile(args.model): # Load pretrained weights shared_model.load_state_dict(torch.load(args.model)) # Create average network shared_average_model = ActorCritic(env.observation_space, env.action_space, args.hidden_size) shared_average_model.load_state_dict(shared_model.state_dict()) shared_average_model.share_memory() for param in shared_average_model.parameters(): param.requires_grad = False # Create optimiser for shared network parameters with shared statistics optimiser = SharedRMSprop(shared_model.parameters(), lr=args.lr, alpha=args.rmsprop_decay) optimiser.share_memory()
def run_acer(variant): # BLAS setup os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' # Setup # args = parser.parse_args() # Creating directories. save_dir = os.path.join('results', 'results') if not os.path.exists(save_dir): os.makedirs(save_dir) print(' ' * 26 + 'Options') """ # Saving parameters with open(os.path.join(save_dir, 'params.txt'), 'w') as f: for k, v in vars(args).items(): print(' ' * 26 + k + ': ' + str(v)) f.write(k + ' : ' + str(v) + '\n') """ # args.env = 'CartPole-v1' # TODO: Remove hardcoded environment when code is more adaptable # mp.set_start_method(platform.python_version()[0] == '3' and 'spawn' or 'fork') # Force true spawning (not forking) if available torch.manual_seed(variant['seed']) T = Counter() # Global shared counter # gym.logger.set_level(gym.logger.ERROR) # Disable Gym warnings # Create shared network env = gym.make(variant['env']) shared_model = ActorCritic(env.observation_space, env.action_space, variant['hidden_size']) shared_model.share_memory() """ if args.model and os.path.isfile(args.model): # Load pretrained weights shared_model.load_state_dict(torch.load(args.model)) """ # Create average network shared_average_model = ActorCritic(env.observation_space, env.action_space, variant['hidden_size']) shared_average_model.load_state_dict(shared_model.state_dict()) shared_average_model.share_memory() for param in shared_average_model.parameters(): param.requires_grad = False # Create optimiser for shared network parameters with shared statistics optimiser = SharedRMSprop(shared_model.parameters(), lr=variant['lr'], alpha=0.99) optimiser.share_memory() env.close() fields = ['t', 'rewards', 'avg_steps', 'time'] with open(os.path.join(save_dir, 'test_results.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(fields) # Start validation agent processes = [] p = mp.Process(target=test, args=(0, variant, T, shared_model)) p.start() processes.append(p) if not variant['evaluate']: # Start training agents for rank in range(1, variant['num-processes'] + 1): p = mp.Process(target=train, args=(rank, variant, T, shared_model, shared_average_model, optimiser)) p.start() print('Process ' + str(rank) + ' started') processes.append(p) # Clean up for p in processes: p.join()
self.seed = 1 self.num_processes = 16 self.num_steps = 20 self.max_episode_length = 10000 self.env_name = 'Breakout-v0' # Sadece oynun adını değiştirerek diğer oyunlar üzerinde çalışabiliriz. os.environ['OMP_NUM_THREADS'] = '1' # Her core için 1 thread params = Params() # Varsayılan parametreler ile Params nesnesi oluşuturulur. torch.manual_seed(params.seed) # Seed ayarı env = create_atari_env(params.env_name) # Optimize edilmiş oyun ortamı print(env.observation_space.shape) shared_model = ActorCritic( env.observation_space.shape[0], env.action_space ) # Diğer ajanlar tarafından paylaşılan shared_model (Farklı corelardaki farklı threadlerde) shared_model.share_memory( ) # Modeller farklı corelarda olsa bile paylaşımlı modeli kullanabilmesi için modeli paylaşımlı bellekte tutuyoruz. optimizer = shared_adam.SharedAdam(shared_model.parameters(), lr=params.lr) optimizer.share_memory( ) # Paylaşımlı model üzerinde çalıştığı için bu da paylaşımlı bellekte tutulur. processes = [] # process listesi p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread p.start() # p processini başlatır. processes.append(p) # Başlayan processi process listesine ekler. for rank in range( 0, params.num_processes ): # Paylaşımlı modeli güncellemesi için tüm processler eğitilir. p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
def main(): """ Train an A3C agent """ os.environ['OMP_NUM_THREADS'] = '1' # Command line arguments parser = argparse.ArgumentParser() parser.add_argument( '--max_timesteps', default=5000000, type=int, help="How many total timesteps to run between all environments") parser.add_argument( '--batch_size', default=20, type=int, help="How many steps to do before reflecting on the batch") parser.add_argument('--env_name', default='PongNoFrameskip-v4', type=str, help="Which environment to train on") parser.add_argument( '--discount_factor', default=0.99, type=float, help=("The disount factor, also called gamma, used for discounting " "future returns")) parser.add_argument('--gae', default=1., type=float, help="Parameter for use in GAE, also called tau") parser.add_argument('--actor_coef', default=1., type=float, help="How much weight to give the actor when updating") parser.add_argument( '--critic_coef', default=0.5, type=float, help="How much weight to give the critic when updating") parser.add_argument('--entropy_coef', default=0.01, type=float, help="How much weight to give entropy when updating") parser.add_argument('--learning_rate', default=0.0001, type=float, help="Optimizer learning rate") parser.add_argument('--no_of_workers', default=16, type=int, help="Number of parallel processes to run") parser.add_argument( '--feature_type', default='cnn', type=str, help="""The feature extractor to use on the network input. Options are: cnn, mlp""") args = parser.parse_args() print(f"Args: {args}") hyperparams = HyperParams(max_timesteps=args.max_timesteps, batch_size=args.batch_size, discount_factor=args.discount_factor, gae=args.gae, actor_coef=args.actor_coef, critic_coef=args.critic_coef, entropy_coef=args.entropy_coef, env_name=args.env_name, learning_rate=args.learning_rate, no_of_workers=args.no_of_workers, feature_type=args.feature_type) # Make temporary directory for logging directory = './runs/{}'.format( datetime.datetime.now().strftime("%Y%m%d-%H%M")) if not os.path.exists(directory): os.makedirs(directory) # Shared model atari = True if hyperparams.feature_type == 'cnn' else False temp_env = create_environment(args.env_name, monitor=False, atari=atari) shared_model = ActorCritic(temp_env.observation_space.shape, temp_env.action_space.n, hyperparams.feature_type) shared_model.share_memory() # Frame counter frame_counter = Value('i') # Optimizer optimizer = SharedAdam(shared_model.parameters(), lr=hyperparams.learning_rate) optimizer.share_memory() # Monitor monitor = Monitor(directory, hyperparams) processes = [] monitor_process = Process(target=monitor.monitor, args=(frame_counter, hyperparams.max_timesteps)) monitor_process.start() processes.append(monitor_process) for i in range(hyperparams.no_of_workers): process = Process(target=train, args=(shared_model, directory, hyperparams, frame_counter, optimizer, monitor.queue, i)) process.start() processes.append(process) # train( # shared_model=shared_model, # directory=directory, # hyperparams=hyperparams, # frame_counter=frame_counter, # optimizer=optimizer, # monitor_queue=monitor.queue, # process_number=0 # ) for process in processes: process.join()