Beispiel #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('env_name', type=str)
    # parser.add_argument('--env_name', type=str, default='CartPole-v0')
    parser.add_argument('--exp_name', type=str, default='vpg')
    parser.add_argument('--render', action='store_true')
    parser.add_argument('--discount', type=float, default=1.0)
    parser.add_argument('--n_iter', '-n', type=int, default=100)
    parser.add_argument('--batch_size', '-b', type=int, default=1000)
    parser.add_argument('--ep_len', '-ep', type=float, default=-1.)
    parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3)
    parser.add_argument('--reward_to_go', '-rtg', action='store_true')
    # parser.add_argument('--reward_to_go', '-rtg', type=bool, default=True)
    parser.add_argument('--dont_normalize_advantages',
                        '-dna',
                        action='store_true')
    parser.add_argument('--nn_baseline', '-bl', action='store_true')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--n_experiments', '-e', type=int, default=1)
    parser.add_argument('--n_layers', '-l', type=int, default=2)
    parser.add_argument('--size', '-s', type=int, default=64)
    parser.add_argument('--dir', '-d', type=str, default='test')
    args = parser.parse_args()

    if not (os.path.exists(args.dir)):
        os.makedirs(args.dir)
    logdir = args.exp_name + '_' + args.env_name + '_' + time.strftime(
        "%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join(args.dir, logdir)
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    max_path_length = args.ep_len if args.ep_len > 0 else None

    processes = []

    for e in range(args.n_experiments):
        seed = args.seed + 10 * e
        print('Running experiment with seed %d' % seed)

        def train_func():
            train_PG(exp_name=args.exp_name,
                     env_name=args.env_name,
                     n_iter=args.n_iter,
                     gamma=args.discount,
                     min_timesteps_per_batch=args.batch_size,
                     max_path_length=max_path_length,
                     learning_rate=args.learning_rate,
                     reward_to_go=args.reward_to_go,
                     animate=args.render,
                     logdir=os.path.join(logdir, '%d' % seed),
                     normalize_advantages=not (args.dont_normalize_advantages),
                     nn_baseline=args.nn_baseline,
                     seed=seed,
                     n_layers=args.n_layers,
                     size=args.size)

        p = Process(target=train_func, args=tuple())
        p.start()
        processes.append(p)
        # if you comment in the line below, then the loop will block
        # until this process finishes
        # p.join()

    for p in processes:
        p.join()
Beispiel #2
0
import os
import torch
import torch.distributed as dist
from torch.multiprocessing import Process


def run(rank, size):
    """ Distributed function to be implemented later. """
    pass


def init_process(rank, size, fn, backend='gloo'):
    """ Initialize the distributed environment. """
    os.environ['MASTER_ADDR'] = '127.0.0.1'
    os.environ['MASTER_PORT'] = '29500'
    dist.init_process_group(backend, rank=rank, world_size=size)
    fn(rank, size)


if __name__ == "__main__":
    print(torch.distributed.is_available())
    print(torch.distributed.is_nccl_avaiable())
    size = 2
    processes = []
    for rank in range(size):
        p = Process(target=init_process, args=(rank, size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Beispiel #3
0

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("--world", default=1, type=int)
    parser.add_argument("--rank", default=0, type=int)
    parser.add_argument("--gpu", type=str)
    parser.add_argument("--master", default="127.0.0.1", type=str)
    parser.add_argument("--port", default="14007", type=str)
    parser.add_argument("--dataset", default="ImageTarDataset", type=str)
    parser.add_argument("--batch", default=256, type=int)
    parser.add_argument("--datapath", type=str)
    parser.add_argument("--data_dir", type=str)
    args = parser.parse_args()

    world_size = args.world
    node_id = args.rank
    node_size = int(args.gpu)
    batch_size = args.batch
    processes = []
    for local_rank in range(node_size):
        p = Process(target=init_processes,
                    args=(args.master, args.port, args.dataset, args.datapath,
                          batch_size, (node_id * node_size) + local_rank,
                          node_size * world_size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
        sac_trainer.policy_net.share_memory()  # model
        sac_trainer.log_alpha.share_memory_()  # variable
        ShareParameters(sac_trainer.soft_q_optimizer1)
        ShareParameters(sac_trainer.soft_q_optimizer2)
        ShareParameters(sac_trainer.policy_optimizer)
        ShareParameters(sac_trainer.alpha_optimizer)

        rewards_queue = mp.Queue(
        )  # used for get rewards from all processes and plot the curve

        num_workers = 2  # or: mp.cpu_count()
        processes = []
        rewards = []

        for i in range(num_workers):
            process = Process(target=worker, args=(i, sac_trainer, ENV, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \
            update_itr, AUTO_ENTROPY, DETERMINISTIC, hidden_dim, model_path))  # the args contain shared and not shared
            process.daemon = True  # all processes closed when the main stops
            processes.append(process)

        [p.start() for p in processes]
        while True:  # keep geting the episode reward from the queue
            r = rewards_queue.get()
            if r is not None:
                rewards.append(r)
            else:
                break

            if len(rewards) % 20 == 0 and len(rewards) > 0:
                plot(rewards)

        [p.join() for p in processes]  # finished at the same time
Beispiel #5
0
    'L': False,
    'SO': False,
    'OPT': 'Adam',
    'gpu_ids': [1, 1, 1, 1, 1, 1, 1, 1],
    'env_ids': [0, 1, 2, 3, 4, 5, 6, 8],
    'mr': 1,
    'mn': 'tdw_ppo_a3c_model_relation',
    'lmn': 'tdw_ppo_a3c_model_relation',
    'loss': 'relation_TDW_reward.p'
}

if __name__ == '__main__':
    torch.backends.cudnn.benchmark = False
    processes = []
    loadarguments()
    torch.manual_seed(args['seed'])
    torch.cuda.manual_seed(args['seed'])

    mp.set_start_method('spawn')
    #train(args,optimizer,0,shared_model)

    for rank in range(0, args['W']):
        p = Process(target=train,
                    args=(args, optimizer, rank, shared_model, step_loss))
        p.start()
        processes.append(p)
        time.sleep(10)
    for p in processes:
        p.join()
        time.sleep(10)
            if p_queue.empty():
                sleep(.1)
            else:
                s_id, params = p_queue.get()
                r_queue.put((s_id, r_gen.rollout(params)))


################################################################################
#                Define queues and start workers                               #
################################################################################
p_queue = Queue()
r_queue = Queue()
e_queue = Queue()

for p_index in range(num_workers):
    Process(target=slave_routine,
            args=(p_queue, r_queue, e_queue, p_index)).start()


################################################################################
#                           Evaluation                                         #
################################################################################
def evaluate(solutions, results, rollouts=100):
    """ Give current controller evaluation.

    Evaluation is minus the cumulated reward averaged over rollout runs.

    :args solutions: CMA set of solutions
    :args results: corresponding results
    :args rollouts: number of rollouts

    :returns: minus averaged cumulated reward
Beispiel #7
0
def register(freq, func, args):
    print(f'Registered {func} with freq: {freq}')
    p = Process(target=periodic_executor, args=(freq, func, args))
    p.daemon = True
    p.start()
Beispiel #8
0
    logger.info(f'Rank {rank} Terminated')
    logger.info(f'Rank {rank} Total Time:')
    logger.info(total_train_time)


def init_processes(rank, size, fn, backend='gloo'):
    global DEVICE, logger
    os.environ['MASTER_ADDR'] = '127.0.0.1'
    os.environ['MASTER_PORT'] = '29500'
    dist.init_process_group(backend, rank=rank, world_size=size)

    # Configuring multiple GPU
    if not debug_mode_enabled and isinstance(gpu, list):
        DEVICE = "cuda:{}".format(gpu[rank])
        torch.cuda.set_device(gpu[rank])

    logger = hlsgd_logging.init_logger(args, rank)
    fn(rank, size)


if __name__ == "__main__":
    processes = []
    for rank in range(world_size):
        p = Process(target=init_processes, args=(rank, world_size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

Beispiel #9
0
def train_ai2thor(model, args, rank=0, b=None):

    seed = args.seed + 10000 * rank
    torch.manual_seed(seed)
    np.random.seed(seed)

    # torch.cuda.set_device(rank)
    device = torch.device(f'cuda:{rank}')
    os.environ['DISPLAY'] = f':{rank}'

    model = model.to(device)
    model.share_memory()

    # Experience buffer
    storage = PPOBuffer(model.obs_shape,
                        args.steps,
                        args.num_workers,
                        args.state_size,
                        args.gamma,
                        device=device)
    storage.share_memory()

    #torch.multiprocessing.set_start_method('spawn')
    # start multiple processes
    ready_to_works = [Event() for _ in range(args.num_workers)]
    exit_flag = Value('i', 0)
    queue = SimpleQueue()

    processes = []
    task_config_file = "config_files/multiMugTaskTrain.json"
    # start workers
    for worker_id in range(args.num_workers):
        p = Process(target=worker,
                    args=(worker_id, model, storage, ready_to_works[worker_id],
                          queue, exit_flag, task_config_file))
        p.start()
        processes.append(p)

    # start trainer
    train_params = {
        "epochs": args.epochs,
        "steps": args.steps,
        "world_size": args.world_size,
        "num_workers": args.num_workers
    }
    ppo_params = {
        "clip_param": args.clip_param,
        "train_iters": args.train_iters,
        "mini_batch_size": args.mini_batch_size,
        "value_loss_coef": args.value_loss_coef,
        "entropy_coef": args.entropy_coef,
        "rnn_steps": args.rnn_steps,
        "lr": args.lr,
        "max_kl": args.max_kl
    }

    distributed = False
    if args.world_size > 1:
        distributed = True
        # Initialize Process Group, distributed backend type
        dist_backend = 'nccl'
        # Url used to setup distributed training
        dist_url = "tcp://127.0.0.1:23456"
        print("Initialize Process Group... pid:", os.getpid())
        dist.init_process_group(backend=dist_backend,
                                init_method=dist_url,
                                rank=rank,
                                world_size=args.world_size)
        # Make model DistributedDataParallel
        model = DistributedDataParallel(model,
                                        device_ids=[rank],
                                        output_device=rank)

    learner(model, storage, train_params, ppo_params, ready_to_works, queue,
            exit_flag, rank, distributed, b)

    for p in processes:
        print("process ", p.pid, " joined")
        p.join()
Beispiel #10
0
    ac_kwargs['action_space'] = env.action_space
    ac_kwargs['state_size'] = args.state_size
    env.close()
    # Main model
    print("Initialize Model...")
    # Construct Model
    ac_model = ActorCritic(obs_shape=obs_dim, **ac_kwargs)
    if args.model_path:
        ac_model.load_state_dict(torch.load(args.model_path))
    # Count variables
    var_counts = tuple(
        count_vars(m) for m in
        [ac_model.policy, ac_model.value_function, ac_model.feature_base])
    print('\nNumber of parameters: \t pi: %d, \t v: %d \tbase: %d\n' %
          var_counts)

    if args.world_size > 1:
        processes = []
        b = Barrier(args.world_size)
        for rank in range(args.world_size):
            p = Process(target=train_ai2thor, args=(ac_model, args, rank, b))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
            print("process ", p.pid, " joined")
    else:
        train_ai2thor(ac_model, args)
    print("main exits")
Beispiel #11
0
    parser.add_argument('--seed', type=int, default=1,
                        help='seed used for initialization')
    parser.add_argument('--master_address', type=str, default='127.0.0.1',
                        help='address for master')
    parser.add_argument("--x", default=50, type=int)
    parser.add_argument("--y", default=99, type=int)
    parser.add_argument("--t", default=3, type=int)
    parser.add_argument('--source_folder', default="/Users/alessandrozonta/Desktop/",
                        help="root where to store data")

    args = parser.parse_args()
    utils.create_exp_dir(args.save)

    size = args.world_size

    if size > 1:
        args.distributed = True
        processes = []
        for rank in range(size):
            args.local_rank = rank
            p = Process(target=init_processes, args=(rank, size, main, args))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        # for debugging
        print('starting in debug mode')
        args.distributed = True
        init_processes(0, size, main, args)
Beispiel #12
0
                        default=1,
                        help='seed used for initialization')
    args = parser.parse_args()
    args.save = args.root + '/eval-' + args.save
    utils.create_exp_dir(args.save)

    size = args.num_process_per_node

    if size > 1:
        args.distributed = True
        processes = []
        for rank in range(size):
            args.local_rank = rank
            global_rank = rank + args.node_rank * args.num_process_per_node
            global_size = args.num_proc_node * args.num_process_per_node
            args.global_rank = global_rank
            print('Node rank %d, local proc %d, global proc %d' %
                  (args.node_rank, rank, global_rank))
            p = Process(target=init_processes,
                        args=(global_rank, global_size, main, args))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        # for debugging
        print('starting in debug mode')
        args.distributed = True
        init_processes(0, size, main, args)
def local_process(target, args):
    return Process(target=target, args=args)