Example #1
0
    def train(self):
        args = self.args
        torch.manual_seed(args.seed)
        env = env = grid2op.make(args.env_name,
                                 test=args.for_test,
                                 reward_class=L2RPNReward)
        shared_model = ActorCritic(env.observation_space.size(),
                                   self.action_space, args.hidden_size)
        shared_model.share_memory()

        if args.no_shared:
            optimizer = None
        else:
            optimizer = my_optim.SharedAdam(shared_model.parameters(),
                                            lr=args.lr)
            optimizer.share_memory()

        processes = []

        counter = mp.Value('i', 0)
        lock = mp.Lock()

        p = mp.Process(target=self.do_test,
                       args=(args.num_processes, args, shared_model, counter))
        p.start()
        processes.append(p)

        for rank in range(0, args.num_processes):
            p = mp.Process(target=self.do_train,
                           args=(rank, args, shared_model, counter, lock,
                                 optimizer))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
Example #2
0
def run(args):
    device = torch.device("cpu")
    env = gym.make('SpaceInvaders-v0')
    state_size = env.observation_space.shape
    action_size = env.action_space.n

    model = ActorCritic([1, 4, 84, 84], action_size).to(device)
    opt = SharedRMSprop(model.parameters(),
                        lr=args.lr,
                        alpha=args.alpha,
                        eps=1e-8,
                        weight_decay=args.weight_decay,
                        momentum=args.momentum,
                        centered=False)
    opt_lock = mp.Lock()
    scheduler = LRScheduler(args)

    if args.load_fp:
        checkpoint = torch.load(args.load_fp)
        model.load_state_dict(checkpoint['model_state_dict'])
        opt.load_state_dict(checkpoint['optimizer_state_dict'])

    if args.train:
        start = time.time()

        model.share_memory()
        model.train()

        step_counter, max_reward, ma_reward, ma_loss = [
            mp.Value('d', 0.0) for _ in range(4)
        ]

        processes = []
        if args.num_procs == -1:
            args.num_procs = mp.cpu_count()
        for rank in range(args.num_procs):
            p = mp.Process(target=train,
                           args=(rank, args, device, model, opt, opt_lock,
                                 scheduler, step_counter, max_reward,
                                 ma_reward, ma_loss))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()

        if args.verbose > 0:
            print(f"Seconds taken: {time.time() - start}")
        if args.save_fp:
            torch.save(
                {
                    'model_state_dict': model.state_dict(),
                    # 'optimizer_state_dict': opt.state_dict(),
                },
                args.save_fp)

    if args.test:
        model.eval()
        test(args, device, model)
Example #3
0
parser.add_argument('--num-steps', type=int, default=20, metavar='NS',
                    help='number of forward steps in A3C (default: 20)')
parser.add_argument('--max-episode-length', type=int, default=10000, metavar='M',
                    help='maximum length of an episode (default: 10000)')
parser.add_argument('--env-name', default='PongDeterministic-v3', metavar='ENV',
                    help='environment to train on (default: PongDeterministic-v3)')


if __name__ == '__main__':
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    env = create_atari_env(args.env_name)
    shared_model = ActorCritic(
        env.observation_space.shape[0], env.action_space)
    shared_model.share_memory()

    processes = []

    p = mp.Process(target=test, args=(args.num_processes, args, shared_model))
    p.start()
    processes.append(p)

    for rank in range(0, args.num_processes):
        p = mp.Process(target=train, args=(rank, args, shared_model))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
Example #4
0
                    loss.backward()
                    for local_param, global_param in zip(
                            self.local_actor_critic.parameters(),
                            self.global_actor_critic.parameters()):
                        global_param._grad = local_param.grad
                    self.optimizer.step()
                    self.local_actor_critic.load_state_dict(self.global_actor_critic.state_dict())
                    self.local_actor_critic.clear_memory()
                t_step += 1
                observation = observation_
            with self.episode_index.get_lock():
                self.episode_index.value += 1
            print(self.name, 'episode ', self.episode_index.value, 'reward %.1f' % score)


if __name__ == '__main__':
    lr = 1e-4
    env_id = 'CartPole-v0'
    nb_actions = 2
    input_dims = [4]
    global_actor_critic = ActorCritic(input_dims, nb_actions)
    global_actor_critic.share_memory()
    optim = SharedAdam(global_actor_critic.parameters(), lr=lr, betas=(0.92, 0.999))
    global_ep = mp.Value('i', 0)

    workers = [
        Agent(global_actor_critic, optim, input_dims, nb_actions, gamma=0.99, lr=lr, name=i,
              global_ep_index=global_ep,
              env_id=env_id) for i in range(mp.cpu_count())]
    [w.start() for w in workers]
    [w.join() for w in workers]
Example #5
0
                    help='environment to train on (default: Breakout-v0)')
parser.add_argument('--render',
                    default=False,
                    action='store_true',
                    help='render the environment')

if __name__ == '__main__':
    args = parser.parse_args()

    #torch.manual_seed(args.seed)
    torch.set_num_threads(1)

    env = gym.make(args.env_name)

    global_model = ActorCritic(env.action_space.n)
    global_model.share_memory()
    local_model = ActorCritic(env.action_space.n)

    optimizer = AsyncAdam(global_model.parameters(),
                          local_model.parameters(),
                          lr=args.lr)

    processes = []
    for rank in range(args.num_processes):
        p = mp.Process(target=train,
                       args=(rank, args, global_model, local_model, optimizer))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
Example #6
0
from args_ali import Args
from agent import agent
from coordinator import coordinator
from model import ActorCritic
from test_ali import test

if __name__ == '__main__':
    os.environ['OMP_NUM_THREADS'] = '1'
    torch.set_num_threads(1)

    args = Args()
    torch.manual_seed(args.seed)

    model = ActorCritic()
    model.share_memory()

    # inter-process communication queues
    exp_queues = []
    model_params = []
    for i in range(args.num_processes):
        exp_queues.append(mp.Queue(1))
        model_params.append(mp.Queue(1))

    p = mp.Process(target=test, args=(args, model))
    p.start()

    # creat a process for coordinator
    coordinator = mp.Process(target=coordinator,
                             args=(args.num_processes, args, model, exp_queues,
                                   model_params))
Example #7
0
        self.tau = 1.
        self.seed = 1
        self.num_processes = 16
        self.num_steps = 20
        self.max_episode_length = 10000
        self.env_name = 'Breakout-v0'


# Main run
os.environ['OMP_NUM_THREADS'] = '1'  # 1 thread per core
params = Params()  # creating the params object from the Params class, that sets all the model parameters
torch.manual_seed(params.seed)  # setting the seed (not essential)
env = create_atari_env(params.env_name)  # we create an optimized environment thanks to universe
shared_model = ActorCritic(env.observation_space.shape[0],
                           env.action_space)  # shared_model is the model shared by the different agents (different threads in different cores)
shared_model.share_memory()  # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores
optimizer = my_optim.SharedAdam(shared_model.parameters(),
                                lr=params.lr)  # the optimizer is also shared because it acts on the shared model
optimizer.share_memory()  # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model
processes = []  # initializing the processes with an empty list
p = mp.Process(target=test, args=(params.num_processes, params,
                                  shared_model))  # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
p.start()  # starting the created process p
processes.append(p)  # adding the created process p to the list of processes
for rank in range(0,
                  params.num_processes):  # making a loop to run all the other processes that will be trained by updating the shared model
    p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
    p.start()
    processes.append(p)
for p in processes:  # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely
    print('working')
Example #8
0
    # os.environ['OPENAI_REMOTE_VERBOSE'] = '0'

    # Setup
    args = parser.parse_args()
    print(' ' * 26 + 'Options')
    for k, v in vars(args).items():
        print(' ' * 26 + k + ': ' + str(v))
    args.env = 'CartPole-v1'  # TODO: Remove hardcoded environment when code is more adaptable
    torch.manual_seed(args.seed)
    T = Counter()  # Global shared counter

    # Create shared network
    env = gym.make(args.env)
    shared_model = ActorCritic(env.observation_space, env.action_space,
                               args.hidden_size)
    shared_model.share_memory()
    if args.model and os.path.isfile(args.model):
        # Load pretrained weights
        shared_model.load_state_dict(torch.load(args.model))
    # Create average network
    shared_average_model = ActorCritic(env.observation_space, env.action_space,
                                       args.hidden_size)
    shared_average_model.load_state_dict(shared_model.state_dict())
    shared_average_model.share_memory()
    for param in shared_average_model.parameters():
        param.requires_grad = False
    # Create optimiser for shared network parameters with shared statistics
    optimiser = SharedRMSprop(shared_model.parameters(),
                              lr=args.lr,
                              alpha=args.rmsprop_decay)
    optimiser.share_memory()
Example #9
0
def run_acer(variant):
    # BLAS setup
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['MKL_NUM_THREADS'] = '1'

    # Setup
    # args = parser.parse_args()
    # Creating directories.
    save_dir = os.path.join('results', 'results')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    print(' ' * 26 + 'Options')
    """
  # Saving parameters
  with open(os.path.join(save_dir, 'params.txt'), 'w') as f:
    for k, v in vars(args).items():
      print(' ' * 26 + k + ': ' + str(v))
      f.write(k + ' : ' + str(v) + '\n')
  """
    # args.env = 'CartPole-v1'  # TODO: Remove hardcoded environment when code is more adaptable
    # mp.set_start_method(platform.python_version()[0] == '3' and 'spawn' or 'fork')  # Force true spawning (not forking) if available
    torch.manual_seed(variant['seed'])
    T = Counter()  # Global shared counter
    # gym.logger.set_level(gym.logger.ERROR)  # Disable Gym warnings

    # Create shared network
    env = gym.make(variant['env'])
    shared_model = ActorCritic(env.observation_space, env.action_space,
                               variant['hidden_size'])
    shared_model.share_memory()
    """
  if args.model and os.path.isfile(args.model):
    # Load pretrained weights
    shared_model.load_state_dict(torch.load(args.model))
  """
    # Create average network
    shared_average_model = ActorCritic(env.observation_space, env.action_space,
                                       variant['hidden_size'])
    shared_average_model.load_state_dict(shared_model.state_dict())
    shared_average_model.share_memory()
    for param in shared_average_model.parameters():
        param.requires_grad = False
    # Create optimiser for shared network parameters with shared statistics
    optimiser = SharedRMSprop(shared_model.parameters(),
                              lr=variant['lr'],
                              alpha=0.99)
    optimiser.share_memory()
    env.close()

    fields = ['t', 'rewards', 'avg_steps', 'time']
    with open(os.path.join(save_dir, 'test_results.csv'), 'w') as f:
        writer = csv.writer(f)
        writer.writerow(fields)
    # Start validation agent
    processes = []
    p = mp.Process(target=test, args=(0, variant, T, shared_model))
    p.start()
    processes.append(p)

    if not variant['evaluate']:
        # Start training agents
        for rank in range(1, variant['num-processes'] + 1):
            p = mp.Process(target=train,
                           args=(rank, variant, T, shared_model,
                                 shared_average_model, optimiser))
            p.start()
            print('Process ' + str(rank) + ' started')
            processes.append(p)

    # Clean up
    for p in processes:
        p.join()
Example #10
0
        self.seed = 1
        self.num_processes = 16
        self.num_steps = 20
        self.max_episode_length = 10000
        self.env_name = 'Breakout-v0'  # Sadece oynun adını değiştirerek diğer oyunlar üzerinde çalışabiliriz.


os.environ['OMP_NUM_THREADS'] = '1'  # Her core için 1 thread
params = Params()  # Varsayılan parametreler ile Params nesnesi oluşuturulur.
torch.manual_seed(params.seed)  # Seed ayarı
env = create_atari_env(params.env_name)  # Optimize edilmiş oyun ortamı
print(env.observation_space.shape)
shared_model = ActorCritic(
    env.observation_space.shape[0], env.action_space
)  # Diğer ajanlar tarafından paylaşılan shared_model (Farklı corelardaki farklı threadlerde)
shared_model.share_memory(
)  # Modeller farklı corelarda olsa bile paylaşımlı modeli kullanabilmesi için modeli paylaşımlı bellekte tutuyoruz.

optimizer = shared_adam.SharedAdam(shared_model.parameters(), lr=params.lr)
optimizer.share_memory(
)  # Paylaşımlı model üzerinde çalıştığı için bu da paylaşımlı bellekte tutulur.

processes = []  # process listesi
p = mp.Process(target=test, args=(params.num_processes, params, shared_model))
# allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
p.start()  # p processini başlatır.
processes.append(p)  # Başlayan processi process listesine ekler.

for rank in range(
        0, params.num_processes
):  # Paylaşımlı modeli güncellemesi için tüm processler eğitilir.
    p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
Example #11
0
def main():
    """
    Train an A3C agent
    """
    os.environ['OMP_NUM_THREADS'] = '1'
    # Command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--max_timesteps',
        default=5000000,
        type=int,
        help="How many total timesteps to run between all environments")
    parser.add_argument(
        '--batch_size',
        default=20,
        type=int,
        help="How many steps to do before reflecting on the batch")
    parser.add_argument('--env_name',
                        default='PongNoFrameskip-v4',
                        type=str,
                        help="Which environment to train on")
    parser.add_argument(
        '--discount_factor',
        default=0.99,
        type=float,
        help=("The disount factor, also called gamma, used for discounting "
              "future returns"))
    parser.add_argument('--gae',
                        default=1.,
                        type=float,
                        help="Parameter for use in GAE, also called tau")
    parser.add_argument('--actor_coef',
                        default=1.,
                        type=float,
                        help="How much weight to give the actor when updating")
    parser.add_argument(
        '--critic_coef',
        default=0.5,
        type=float,
        help="How much weight to give the critic when updating")
    parser.add_argument('--entropy_coef',
                        default=0.01,
                        type=float,
                        help="How much weight to give entropy when updating")
    parser.add_argument('--learning_rate',
                        default=0.0001,
                        type=float,
                        help="Optimizer learning rate")
    parser.add_argument('--no_of_workers',
                        default=16,
                        type=int,
                        help="Number of parallel processes to run")
    parser.add_argument(
        '--feature_type',
        default='cnn',
        type=str,
        help="""The feature extractor to use on the network input.
        Options are: cnn, mlp""")
    args = parser.parse_args()
    print(f"Args: {args}")

    hyperparams = HyperParams(max_timesteps=args.max_timesteps,
                              batch_size=args.batch_size,
                              discount_factor=args.discount_factor,
                              gae=args.gae,
                              actor_coef=args.actor_coef,
                              critic_coef=args.critic_coef,
                              entropy_coef=args.entropy_coef,
                              env_name=args.env_name,
                              learning_rate=args.learning_rate,
                              no_of_workers=args.no_of_workers,
                              feature_type=args.feature_type)

    # Make temporary directory for logging
    directory = './runs/{}'.format(
        datetime.datetime.now().strftime("%Y%m%d-%H%M"))
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Shared model
    atari = True if hyperparams.feature_type == 'cnn' else False
    temp_env = create_environment(args.env_name, monitor=False, atari=atari)
    shared_model = ActorCritic(temp_env.observation_space.shape,
                               temp_env.action_space.n,
                               hyperparams.feature_type)
    shared_model.share_memory()

    # Frame counter
    frame_counter = Value('i')

    # Optimizer
    optimizer = SharedAdam(shared_model.parameters(),
                           lr=hyperparams.learning_rate)
    optimizer.share_memory()

    # Monitor
    monitor = Monitor(directory, hyperparams)

    processes = []
    monitor_process = Process(target=monitor.monitor,
                              args=(frame_counter, hyperparams.max_timesteps))
    monitor_process.start()
    processes.append(monitor_process)
    for i in range(hyperparams.no_of_workers):
        process = Process(target=train,
                          args=(shared_model, directory, hyperparams,
                                frame_counter, optimizer, monitor.queue, i))
        process.start()
        processes.append(process)

    # train(
    #     shared_model=shared_model,
    #     directory=directory,
    #     hyperparams=hyperparams,
    #     frame_counter=frame_counter,
    #     optimizer=optimizer,
    #     monitor_queue=monitor.queue,
    #     process_number=0
    # )

    for process in processes:
        process.join()