예제 #1
0
def main(args):
    epoch_dir = os.path.split(args.network_file)[0]
    initial_count = int(os.path.split(epoch_dir)[-1])
    network_file = args.network_file
    optimizer_file = args.optimizer_file
    args_file_path = args.args_file
    mts = args.max_train_steps
    with open(args.args_file, 'r') as args_file:
        args = dotdict(json.load(args_file))

    print_ascii_logo()
    log_id = make_log_id(args.tag, args.mode_name, args.agent, args.vision_network + args.network_body)
    log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)

    os.makedirs(log_id_dir)
    logger = make_logger('Local', os.path.join(log_id_dir, 'train_log.txt'))
    summary_writer = SummaryWriter(log_id_dir)
    saver = ModelSaver(args.nb_top_model, log_id_dir)

    log_args(logger, args)
    write_args_file(log_id_dir, args)
    logger.info('Resuming training from {} epoch {}'.format(args_file_path, initial_count))

    # construct env
    env = make_env(args, args.seed)

    # construct network
    torch.manual_seed(args.seed)
    network_head_shapes = get_head_shapes(env.action_space, env.engine, args.agent)
    network = make_network(env.observation_space, network_head_shapes, args)
    network.load_state_dict(torch.load(network_file))

    # construct agent
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = True
    agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args)

    # Construct the Container
    def make_optimizer(params):
        opt = torch.optim.RMSprop(params, lr=args.learning_rate, eps=1e-5, alpha=0.99)
        if args.optimizer_file is not None:
            opt.load_state_dict(torch.load(optimizer_file))
        return opt

    container = Local(
        agent,
        env,
        make_optimizer,
        args.epoch_len,
        args.nb_env,
        logger,
        summary_writer,
        args.summary_frequency,
        saver
    )
    try:
        container.run(mts + initial_count, initial_count)
    finally:
        env.close()
예제 #2
0
def main(args):
    print_ascii_logo()
    print('Saving replays... Press Ctrl+C to stop.')

    with open(args.args_file, 'r') as args_file:
        train_args = dotdict(json.load(args_file))
    train_args.nb_env = 1

    # construct env
    replay_dir = os.path.split(args.network_file)[0]

    def env_fn(seed):
        return DummyVecEnv([
            make_sc2_env(train_args.env_id,
                         train_args.seed,
                         replay_dir=replay_dir,
                         render=args.render)
        ], Engines.SC2)

    env = env_fn(args.seed)
    env.close()

    # construct network
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          train_args.agent)
    network = make_network(env.observation_space, network_head_shapes,
                           train_args)
    network.load_state_dict(torch.load(args.network_file))

    # create an agent (add act_eval method)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = True
    agent = make_agent(network, device, env.engine, env.gpu_preprocessor,
                       train_args)

    # create a rendering container
    # TODO: could terminate after a configurable number of replays instead of running indefinitely
    renderer = ReplayGenerator(agent, env_fn, device, args.seed)
    try:
        renderer.run()
    finally:
        env.close()
예제 #3
0
def main(args):
    # construct logging objects
    print_ascii_logo()
    print('Rendering... Press Ctrl+C to stop.')

    with open(args.args_file, 'r') as args_file:
        train_args = dotdict(json.load(args_file))
    train_args.nb_env = 1

    # construct env
    def env_fn(seed):
        return atari_from_args(train_args, seed, subprocess=False)

    env = env_fn(args.seed)
    env.close()

    # construct network
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          train_args.agent)
    network = make_network(env.observation_space, network_head_shapes,
                           train_args)
    network.load_state_dict(
        torch.load(args.network_file,
                   map_location=lambda storage, loc: storage))

    # create an agent (add act_eval method)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    agent = make_agent(network, device, env.engine, env.gpu_preprocessor,
                       train_args)

    # create a rendering container
    renderer = Renderer(agent, env_fn, device, args.seed)
    try:
        renderer.run()
    finally:
        env.close()
예제 #4
0
파일: impala.py 프로젝트: yushu-liu/adeptRL
def main(args):
    # host needs to broadcast timestamp so all procs create the same log dir
    if rank == 0:
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        log_id = make_log_id_from_timestamp(
            args.tag, args.mode_name, args.agent,
            args.vision_network + args.network_body, timestamp)
        log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)
        os.makedirs(log_id_dir)
        saver = SimpleModelSaver(log_id_dir)
        print_ascii_logo()
    else:
        timestamp = None
    timestamp = comm.bcast(timestamp, root=0)

    if rank != 0:
        log_id = make_log_id_from_timestamp(
            args.tag, args.mode_name, args.agent,
            args.vision_network + args.network_body, timestamp)
        log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)

    comm.Barrier()

    # construct env
    seed = args.seed if rank == 0 else args.seed + (
        args.nb_env * (rank - 1))  # unique seed per process
    env = make_env(args, seed)

    # construct network
    torch.manual_seed(args.seed)
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          args.agent)
    network = make_network(env.observation_space, network_head_shapes, args)

    # sync network params
    if rank == 0:
        for v in network.parameters():
            comm.Bcast(v.detach().cpu().numpy(), root=0)
        print('Root variables synced')
    else:
        # can just use the numpy buffers
        variables = [v.detach().cpu().numpy() for v in network.parameters()]
        for v in variables:
            comm.Bcast(v, root=0)
        for shared_v, model_v in zip(variables, network.parameters()):
            model_v.data.copy_(torch.from_numpy(shared_v), non_blocking=True)
        print('{} variables synced'.format(rank))

    # construct agent
    # host is always the first gpu, workers are distributed evenly across the rest
    if len(args.gpu_id) > 1:  # nargs is always a list
        if rank == 0:
            gpu_id = args.gpu_id[0]
        else:
            gpu_id = args.gpu_id[1:][(rank - 1) % len(args.gpu_id[1:])]
    else:
        gpu_id = args.gpu_id[-1]
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cudnn = True
    # disable cudnn for dynamic batches
    if rank == 0 and args.max_dynamic_batch > 0:
        cudnn = False

    torch.backends.cudnn.benchmark = cudnn
    agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args)

    # workers
    if rank != 0:
        logger = make_logger(
            'ImpalaWorker{}'.format(rank),
            os.path.join(log_id_dir, 'train_log{}.txt'.format(rank)))
        summary_writer = SummaryWriter(os.path.join(log_id_dir, str(rank)))
        container = ImpalaWorker(agent,
                                 env,
                                 args.nb_env,
                                 logger,
                                 summary_writer,
                                 use_local_buffers=args.use_local_buffers)

        # Run the container
        if args.profile:
            try:
                from pyinstrument import Profiler
            except:
                raise ImportError(
                    'You must install pyinstrument to use profiling.')
            profiler = Profiler()
            profiler.start()
            container.run()
            profiler.stop()
            print(profiler.output_text(unicode=True, color=True))
        else:
            container.run()
        env.close()
    # host
    else:
        logger = make_logger(
            'ImpalaHost',
            os.path.join(log_id_dir, 'train_log{}.txt'.format(rank)))
        summary_writer = SummaryWriter(os.path.join(log_id_dir, str(rank)))
        log_args(logger, args)
        write_args_file(log_id_dir, args)
        logger.info('Network Parameter Count: {}'.format(
            count_parameters(network)))

        # no need for the env anymore
        env.close()

        # Construct the optimizer
        def make_optimizer(params):
            opt = torch.optim.RMSprop(params,
                                      lr=args.learning_rate,
                                      eps=1e-5,
                                      alpha=0.99)
            return opt

        container = ImpalaHost(agent,
                               comm,
                               make_optimizer,
                               summary_writer,
                               args.summary_frequency,
                               saver,
                               args.epoch_len,
                               args.host_training_info_interval,
                               use_local_buffers=args.use_local_buffers)

        # Run the container
        if args.profile:
            try:
                from pyinstrument import Profiler
            except:
                raise ImportError(
                    'You must install pyinstrument to use profiling.')
            profiler = Profiler()
            profiler.start()
            if args.max_dynamic_batch > 0:
                container.run(args.max_dynamic_batch,
                              args.max_queue_length,
                              args.max_train_steps,
                              dynamic=True,
                              min_dynamic_batch=args.min_dynamic_batch)
            else:
                container.run(args.num_rollouts_in_batch,
                              args.max_queue_length, args.max_train_steps)
            profiler.stop()
            print(profiler.output_text(unicode=True, color=True))
        else:
            if args.max_dynamic_batch > 0:
                container.run(args.max_dynamic_batch,
                              args.max_queue_length,
                              args.max_train_steps,
                              dynamic=True,
                              min_dynamic_batch=args.min_dynamic_batch)
            else:
                container.run(args.num_rollouts_in_batch,
                              args.max_queue_length, args.max_train_steps)
예제 #5
0
파일: local.py 프로젝트: yushu-liu/adeptRL
def main(args):
    # construct logging objects
    print_ascii_logo()
    log_id = make_log_id(args.tag, args.mode_name, args.agent,
                         args.vision_network + args.network_body)
    log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)

    os.makedirs(log_id_dir)
    logger = make_logger('Local', os.path.join(log_id_dir, 'train_log.txt'))
    summary_writer = SummaryWriter(log_id_dir)
    saver = SimpleModelSaver(log_id_dir)

    log_args(logger, args)
    write_args_file(log_id_dir, args)

    # construct env
    env = make_env(args, args.seed)

    # construct network
    torch.manual_seed(args.seed)
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          args.agent)
    network = make_network(env.observation_space, network_head_shapes, args)
    logger.info('Network Parameter Count: {}'.format(
        count_parameters(network)))

    # construct agent
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = True
    agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args)

    # Construct the Container
    def make_optimizer(params):
        opt = torch.optim.RMSprop(params,
                                  lr=args.learning_rate,
                                  eps=1e-5,
                                  alpha=0.99)
        return opt

    container = Local(agent, env, make_optimizer, args.epoch_len, args.nb_env,
                      logger, summary_writer, args.summary_frequency, saver)

    # if running an eval thread create eval env, agent, & logger
    if args.nb_eval_env > 0:
        # replace args num envs & seed
        eval_args = deepcopy(args)
        eval_args.seed = args.seed + args.nb_env

        # env and agent
        eval_args.nb_env = args.nb_eval_env
        eval_env = make_env(eval_args, eval_args.seed)
        eval_net = make_network(eval_env.observation_space,
                                network_head_shapes, eval_args)
        eval_agent = make_agent(eval_net, device, eval_env.engine,
                                eval_env.gpu_preprocessor, eval_args)
        eval_net.load_state_dict(network.state_dict())

        # logger
        eval_logger = make_logger('LocalEval',
                                  os.path.join(log_id_dir, 'eval_log.txt'))

        evaluation_container = EvaluationThread(
            network,
            eval_agent,
            eval_env,
            args.nb_eval_env,
            eval_logger,
            summary_writer,
            args.eval_step_rate,
            override_step_count_fn=lambda: container.
            local_step_count  # wire local containers step count into eval
        )
        evaluation_container.start()

    # Run the container
    if args.profile:
        try:
            from pyinstrument import Profiler
        except:
            raise ImportError(
                'You must install pyinstrument to use profiling.')
        profiler = Profiler()
        profiler.start()
        container.run(10e3)
        profiler.stop()
        print(profiler.output_text(unicode=True, color=True))
    else:
        container.run(args.max_train_steps)
    env.close()

    if args.nb_eval_env > 0:
        evaluation_container.stop()
        eval_env.close()
예제 #6
0
def main(args):
    # host needs to broadcast timestamp so all procs create the same log dir
    if rank == 0:
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        log_id = make_log_id_from_timestamp(
            args.tag, args.mode_name, args.agent,
            args.vision_network + args.network_body, timestamp)
        log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)
        os.makedirs(log_id_dir)
        saver = SimpleModelSaver(log_id_dir)
        print_ascii_logo()
    else:
        timestamp = None
    timestamp = comm.bcast(timestamp, root=0)

    if rank != 0:
        log_id = make_log_id_from_timestamp(
            args.tag, args.mode_name, args.agent,
            args.vision_network + args.network_body, timestamp)
        log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)

    comm.Barrier()

    # construct env
    seed = args.seed if rank == 0 else args.seed + (
        args.nb_env * (rank - 1))  # unique seed per process
    # don't make a ton of envs if host
    if rank == 0:
        env_args = deepcopy(args)
        env_args.nb_env = 1
        env = make_env(env_args, seed)
    else:
        env = make_env(args, seed)

    # construct network
    torch.manual_seed(args.seed)
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          args.agent)
    network = make_network(env.observation_space, network_head_shapes, args)

    # sync network params
    if rank == 0:
        for v in network.parameters():
            comm.Bcast(v.detach().cpu().numpy(), root=0)
        print('Root variables synced')
    else:
        # can just use the numpy buffers
        variables = [v.detach().cpu().numpy() for v in network.parameters()]
        for v in variables:
            comm.Bcast(v, root=0)
        for shared_v, model_v in zip(variables, network.parameters()):
            model_v.data.copy_(torch.from_numpy(shared_v), non_blocking=True)
        print('{} variables synced'.format(rank))

    # host is rank 0
    if rank != 0:
        # construct logger
        logger = make_logger(
            'ToweredWorker{}'.format(rank),
            os.path.join(log_id_dir, 'train_log_rank{}.txt'.format(rank)))
        summary_writer = SummaryWriter(
            os.path.join(log_id_dir, 'rank{}'.format(rank)))

        # construct agent
        # distribute evenly across gpus
        if isinstance(args.gpu_id, list):
            gpu_id = args.gpu_id[(rank - 1) % len(args.gpu_id)]
        else:
            gpu_id = args.gpu_id
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        torch.backends.cudnn.benchmark = True
        agent = make_agent(network, device, env.engine, env.gpu_preprocessor,
                           args)

        # construct container
        container = ToweredWorker(agent, env, args.nb_env, logger,
                                  summary_writer, args.summary_frequency)

        # Run the container
        try:
            container.run()
        finally:
            env.close()
    # host
    else:
        logger = make_logger(
            'ToweredHost',
            os.path.join(log_id_dir, 'train_log_rank{}.txt'.format(rank)))
        log_args(logger, args)
        write_args_file(log_id_dir, args)
        logger.info('Network Parameter Count: {}'.format(
            count_parameters(network)))

        # no need for the env anymore
        env.close()

        # Construct the optimizer
        def make_optimizer(params):
            opt = torch.optim.RMSprop(params,
                                      lr=args.learning_rate,
                                      eps=1e-5,
                                      alpha=0.99)
            return opt

        container = ToweredHost(comm, args.num_grads_to_drop, network,
                                make_optimizer, saver, args.epoch_len, logger)

        # Run the container
        if args.profile:
            try:
                from pyinstrument import Profiler
            except:
                raise ImportError(
                    'You must install pyinstrument to use profiling.')
            profiler = Profiler()
            profiler.start()
            container.run(10e3)
            profiler.stop()
            print(profiler.output_text(unicode=True, color=True))
        else:
            container.run(args.max_train_steps)
예제 #7
0
def main(args):
    print_ascii_logo()
    logger = make_logger('Eval',
                         os.path.join(args.log_id_dir, 'evaluation_log.txt'))
    log_args(logger, args)

    epoch_ids = sorted([
        int(dir) for dir in os.listdir(args.log_id_dir)
        if os.path.isdir(os.path.join(args.log_id_dir, dir)) and (
            'rank' not in dir)
    ])

    with open(os.path.join(args.log_id_dir, 'args.json'), 'r') as args_file:
        train_args = dotdict(json.load(args_file))
    train_args.nb_env = 1

    # construct env
    def env_fn(seed):
        return make_env(train_args, seed, subprocess=False, render=args.render)

    env = env_fn(args.seed)
    env.close()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    network_head_shapes = get_head_shapes(env.action_space, env.engine,
                                          train_args.agent)
    network = make_network(env.observation_space, network_head_shapes,
                           train_args)

    results = []
    selected_models = []
    for epoch_id in epoch_ids:
        network_path = os.path.join(args.log_id_dir, str(epoch_id),
                                    'model*.pth')
        network_files = glob(network_path)

        best_mean = -float('inf')
        best_std_dev = 0.
        selected_model = None
        for network_file in network_files:
            # load new network
            network.load_state_dict(
                torch.load(network_file,
                           map_location=lambda storage, loc: storage))

            # construct agent
            agent = make_agent(network, device, env.engine,
                               env.gpu_preprocessor, train_args)

            # container
            container = Evaluation(agent, env_fn, device, args.seed,
                                   args.render)

            # Run the container
            mean_reward, std_dev = container.run(args.nb_episode)

            if mean_reward >= best_mean:
                best_mean = mean_reward
                best_std_dev = std_dev
                selected_model = os.path.split(network_file)[-1]

        result = Result(epoch_id, best_mean, best_std_dev)
        selected_model = SelectedModel(epoch_id, selected_model)
        logger.info(str(result) + ' ' + str(selected_model))
        results.append(np.asarray(result))
        selected_models.append(selected_model)

    # save results
    results = np.stack(results)
    np.savetxt(os.path.join(args.log_id_dir, 'eval.csv'),
               results,
               delimiter=',',
               fmt=['%d', '%.3f', '%.3f'])

    # save selected models
    with open(os.path.join(args.log_id_dir, 'selected_models.txt'), 'w') as f:
        for sm in selected_models:
            f.write(str(sm) + '\n')

    env.close()