예제 #1
0
def test(args, shared_model, env_conf):
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_{2}workers_log'.format(args.log_dir, args.env, args.workers))
    log['{}_log'.format(args.env)] = logging.getLogger(
        '{}_log'.format(args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    env = atari_env(args.env, env_conf)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.model = A3Clstm(
        player.env.observation_space.shape[0], player.env.action_space)
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    player.model.eval()

    while True:
        if player.done:
            player.model.load_state_dict(shared_model.state_dict())

        player.action_test()
        reward_sum += player.reward

        if player.done:
            num_tests += 1
            player.current_life = 0
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
                format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            if reward_sum > args.save_score_level:
                player.model.load_state_dict(shared_model.state_dict())
                state_to_save = player.model.state_dict()
                torch.save(state_to_save, '{0}{1}.dat'.format(
                    args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            time.sleep(60)
            player.state = torch.from_numpy(state).float()
예제 #2
0
    def test(self, iteration, show='none', save_max=False):
        env = create_env(self.args)
 
        player = Agent(None, env, self.args, None)
        player.gpu_id = self.gpu_id
        if self.args.model == 'MLP':
            player.model = A3C_MLP(
                player.env.observation_space.shape[0], player.env.action_space, self.args.stack_frames)
        if self.args.model == 'CONV':
            player.model = A3C_CONV(self.args.stack_frames, player.env.action_space)

        # load the input model
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                player.model.load_state_dict(self.shared_model.state_dict())
        else:
            player.model.load_state_dict(self.shared_model.state_dict())
        
        player.state = player.env.reset(self.args)
        player.state = torch.from_numpy(player.state).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                player.model = player.model.cuda()
                player.state = player.state.cuda()
        player.model.eval()

        while True:
            player.action_test()
            if self.args.show != 'none' or show != 'none':
                player.env.render()

            self.reward_sum += player.reward

            if player.done:
                self.num_tests += 1
                self.reward_total_sum += self.reward_sum
                reward_mean = self.reward_total_sum / self.num_tests
                self.reward_sum = 0
                player.eps_len = 0
                state = player.env.reset(self.args)
                player.state = torch.from_numpy(state).float()
                if self.gpu_id >= 0:
                    with torch.cuda.device(self.gpu_id):
                        player.state = player.state.cuda()
                if self.args.show != 'none' or show != 'none':
                    player.env.close()
                break
        return self.reward_total_sum
def test(args, shared_model, env_conf):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger(
        '{}_log'.format(args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    print("test proc:")
    env = AllowBacktracking(make_local_env(env_conf['game'], env_conf['level'], stack=False, scale_rew=False))
    print("test got env:", env.observation_space)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = A3Clstm(
        player.env.observation_space.shape[0], player.env.action_space)

    player.state = player.env.reset()
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    flag = True
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward

        """
        if player.done and player.info['ale.lives'] > 0 and not player.max_length:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        """
        if player.done or player.max_length:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
                format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            player.eps_len += 2
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #4
0
def test(args, shared_model, env_conf, shared_counter):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    device = torch.device('cuda:{}'.format(gpu_id) if gpu_id >= 0 else 'cpu')

    log = {}
    setup_logger(
        '{}_log'.format(args.env),
        os.path.join(args.log_dir, '{}-{}_log'.format(args.env,
                                                      args.exp_name)))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    env = atari_env(args.env, env_conf, args)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None, gpu_id=gpu_id)
    player.model = A3Clstm(player.env.observation_space.shape[0],
                           player.env.action_space)
    player.model.apply(weights_init)

    player.state = player.env.reset()
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).to(torch.float32)

    player.model = player.model.to(device)
    player.state = player.state.to(device)

    flag = True
    max_score = 0
    while True:
        if flag:
            player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).to(torch.float32)
            player.state = player.state.to(device)
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}, alpha {4:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean,
                    player.model.log_alpha.exp().detach().item()))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                torch.save(
                    player.model.state_dict(),
                    os.path.join(args.save_model_dir,
                                 '{}-{}.dat'.format(args.env, args.exp_name)))

            with shared_counter.get_lock():
                shared_counter.value += player.eps_len
                if shared_counter.value > args.interact_steps:
                    break
            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            player.eps_len += 2
            time.sleep(10)

            player.state = torch.from_numpy(state).to(torch.float32)
            player.state = player.state.to(device)
예제 #5
0
def test(args, shared_model):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger(
        '{}_log'.format(args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = create_env(args.env, args)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    if args.model == 'MLP':
        player.model = A3C_MLP(
            player.env.observation_space.shape[0], player.env.action_space, args.stack_frames)
    if args.model == 'CONV':
        player.model = A3C_CONV(args.stack_frames, player.env.action_space)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()
    while True:
        if player.done:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())

        player.action_test()
        reward_sum += player.reward

        if player.done:
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
                format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            if reward_sum > args.save_score_level:
                player.model.load_state_dict(shared_model.state_dict())
                state_to_save = player.model.state_dict()
                torch.save(state_to_save, '{0}{1}.dat'.format(
                    args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            time.sleep(60)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #6
0
파일: test.py 프로젝트: yanjiangyue123/RL
def test(rank, args, shared_model):
    writer = SummaryWriter('8_27_test')
    model_buffer = Model_Buffer(args)
    test_episodes = args.test_episodes
    ptitle('Test Agent')
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    print("logfile check", r'{0} {1}_log'.format(args.log_dir, args.env))

    print("logs in test", args.log_dir)

    log['{}_log'.format(args.env)] = logging.getLogger(  # 将logger放进字典
        '{}_log'.format(args.env))
    d_args = vars(args)  # vars() 函数返回对象object的属性和属性值的字典对象。
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(
            k, d_args[k]))  # 输出参数信息

    # for i in range(100):
    #     log['{}_log'.format(args.env)].info('{0}'.format(i))

    # print('we prefix seed = -1 when testing')
    # args.seed = -1
    torch.manual_seed(args.seed)
    env = create_env(args.env, args.seed)
    # env = gym.make(args.env)
    # env.seed(args.seed)

    start_time = time.time()
    num_tests = 0  # 当前玩的回合数
    player = Agent(None, env, args, None, rank)
    player.model = A3C_MLP(player.env.observation_space,
                           player.env.action_space,
                           args.stack_frames)  # 设置model
    player.state = player.env.reset()  # 设置state
    player.state = torch.from_numpy(player.state).float()
    player.done = True

    player.model.eval()  # 设为eval模式

    is_model_empty = True
    is_testing = False
    while True:
        model_buffer.put(shared_model)
        # 测试够一大回合,初始化
        if player.done and np.mod(num_tests,
                                  test_episodes) == 0 and not is_testing:
            reward_episode = 0
            success_rate = 0
            load_model = model_buffer.get()  # 获取公共model
            model_queue_size = model_buffer.qsize()
            if load_model:
                is_testing = True
                is_model_empty = False
                training_steps = load_model[1]
                training_episodes = load_model[2]
                # 用公共model实例化player_model(传入参数) load_model[0]保存的参数
                player.model.load_state_dict(load_model[0])
            else:
                is_model_empty = True  # 未获取到model
                time.sleep(10)

        if not is_model_empty:
            player.action_test()
            # log['{}_log'.format(args.env)].info("test steps {}".format(1))
            reward_episode += player.reward
            if 'is_success' in player.info.keys():  # 判断是否因成功而done
                success_rate += 1
            if player.done:  # 到达目标位置或撞毁或太远时为done,一回合结束
                # print("crash detected")
                # eps_len_temp = player.eps_len #?
                num_tests += 1  # done时test回合数加一
                player.eps_len = 0  # player这一回合所走的步数归零
                state = player.env.reset()
                player.state = torch.from_numpy(state).float()

                if np.mod(num_tests, test_episodes) == 0:  # 测试够一大回合,开始统计信息
                    is_testing = False
                    reward_episode = reward_episode / test_episodes
                    writer.add_scalar('success_num/Test', success_rate,
                                      training_steps)
                    success_rate = success_rate / test_episodes
                    log['{}_log'.format(args.env)].info(
                        "Time {0}, training episodes {1}, training steps {2}, reward episode {3}, success_rate {4}, "
                        "model cached {5}".format(
                            time.strftime(
                                "%Hh %Mm %Ss",
                                time.gmtime(time.time() - start_time)),
                            training_episodes, training_steps, reward_episode,
                            success_rate, model_queue_size))

                    writer.add_scalar('success_rate/Test', success_rate,
                                      training_steps)
                    # save model:
                    state_to_save = player.model.state_dict()
                    # torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env))
                    # torch.save(state_to_save, '{0}{1}_pre.dat'.format(args.save_model_dir, args.env))

                    torch.save(state_to_save,
                               '{0}{1}.dat'.format(args.log_dir, args.env))
                    torch.save(state_to_save,
                               '{0}{1}_pre.dat'.format(args.log_dir, args.env))
        if training_steps > args.training_steps:
            break
예제 #7
0
def test(rank, args, shared_model, train_modes, n_iters, device):
    writer = SummaryWriter(
        os.path.join(args.log_dir, 'Test Agent:{}'.format(rank)))
    ptitle('Test Agent: {}'.format(rank))
    torch.manual_seed(args.seed + rank)
    n_iter = 0

    log = {}
    setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)

    env = create_env(args.env, args)

    start_time = time.time()
    num_tests = 1
    n_step = 0
    player = Agent(None, env, args, None, None, device)
    player.model = build_model(player.env.observation_space,
                               player.env.action_space, args,
                               device).to(device)

    player.state = player.env.reset()
    if 'Unreal' in args.env:
        player.cam_pos = player.env.env.env.env.cam_pose
        player.collect_state = player.env.env.env.env.current_states

    player.set_cam_info()
    player.state = torch.from_numpy(player.state).float().to(device)

    player.model.eval()
    max_score = -100
    reward_sum = np.zeros(player.num_agents)
    reward_total_sum = np.zeros(player.num_agents)
    reward_sum_ep = np.zeros(player.num_agents)

    success_rate_sum_ep = np.zeros(player.num_agents)

    fps_counter = 0
    t0 = time.time()
    cross_entropy_loss = nn.CrossEntropyLoss()

    len_sum = 0
    seed = args.seed

    count_eps = 0
    eps_length = 0
    rate = 0
    rates = [0, 0]
    step_rates = [0, 0]
    mean_rates = [0, 0]

    visible_steps = 0
    while True:
        if player.done:
            count_eps += 1

            t0 = time.time()
            eps_length = 0

            player.model.load_state_dict(shared_model.state_dict())

        player.action_test()
        eps_length += 1
        n_step += 1

        fps_counter += 1
        reward_sum_ep += player.reward
        success_rate_sum_ep += player.success_rate

        gate_ids, gate_probs, gt_gates = [], [], []
        for k1 in range(len(player.rewards)):
            for k2 in range(player.num_agents):
                _, max_id = torch.max(player.gates[k1][k2], 0)
                gate_probs.append(player.gates[k1][k2])
                gate_ids.append(max_id)
                gt_gates.append(player.gate_gts[k1][k2])

        gate_probs = torch.cat(gate_probs).view(-1, 2).to(device)
        gate_gt_ids = torch.Tensor(gt_gates).view(
            1, -1).squeeze().long().to(device)
        gate_loss = cross_entropy_loss(gate_probs, gate_gt_ids)

        visible_steps += sum(np.array(gt_gates).squeeze()) / 4

        gate_ids = np.array(
            [gate_ids[i].cpu().detach().numpy() for i in range(4)])
        gt_gates = np.array(
            [gt_gates[i].cpu().detach().numpy() for i in range(4)])
        one_step_rate = sum(gate_ids == gt_gates) / player.num_agents
        rate += one_step_rate
        for id in range(2):
            right_num = sum(gate_ids[i] == gt_gates[i] == id for i in range(4))
            num = sum(gt_gates[i] == id for i in range(4))
            step_rate = right_num / num if num != 0 else 0
            if step_rate > 0:
                rates[id] += step_rate
                step_rates[id] += 1
                mean_rates[id] = rates[id] / step_rates[id]

        mean_rate = rate / n_step

        if player.done:
            player.state = player.env.reset()
            player.state = torch.from_numpy(player.state).float().to(device)
            player.set_cam_info()

            reward_sum += reward_sum_ep

            len_sum += player.eps_len
            fps = fps_counter / (time.time() - t0)
            n_iter = 0
            for n in n_iters:
                n_iter += n
            for i in range(player.num_agents):
                writer.add_scalar('test/reward' + str(i), reward_sum_ep[i],
                                  n_iter)

            writer.add_scalar('test/fps', fps, n_iter)
            writer.add_scalar('test/eps_len', player.eps_len, n_iter)
            writer.add_scalar('test/unvisible_acc', mean_rates[0], n_iter)
            writer.add_scalar('test/visible_acc', mean_rates[1], n_iter)
            writer.add_scalar('test/mean_acc', mean_rate, n_iter)
            writer.add_scalar('test/gate_loss', gate_loss, n_iter)

            player.eps_len = 0
            fps_counter = 0
            reward_sum_ep = np.zeros(player.num_agents)
            t0 = time.time()
            count_eps += 1
            if count_eps % args.test_eps == 0:
                player.max_length = True
            else:
                player.max_length = False

        if player.done and not player.max_length:
            seed += 1
            player.env.seed(seed)
            player.state = player.env.reset()
            player.set_cam_info()
            player.state = torch.from_numpy(player.state).float().to(device)

            player.eps_len += 2

        elif player.done and player.max_length:
            ave_reward_sum = reward_sum / args.test_eps
            reward_total_sum += ave_reward_sum
            reward_mean = reward_total_sum / num_tests
            len_mean = len_sum / args.test_eps
            reward_step = reward_sum / len_sum
            log['{}_log'.format(args.env)].info(
                "Time {0}, ave eps reward {1}, ave eps length {2}, reward mean {3}, reward step {4}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    ave_reward_sum, len_mean, reward_mean, reward_step))

            if ave_reward_sum.mean() >= max_score:
                print('save best! in %d iters' % n_step)
                max_score = ave_reward_sum.mean()
                model_dir = os.path.join(
                    args.log_dir,
                    '{0}-gate-all-model-best-{1}.dat'.format(args.env, n_step))
            else:
                model_dir = os.path.join(args.log_dir,
                                         '{0}-new.dat'.format(args.env))

            if args.gpu_ids[-1] >= 0:
                with torch.cuda.device(args.gpu_ids[-1]):
                    state_to_save = player.model.state_dict()
                    torch.save(state_to_save, model_dir)
            else:
                state_to_save = player.model.state_dict()
                torch.save(state_to_save, model_dir)

            num_tests += 1
            reward_sum = 0
            len_sum = 0
            seed += 1
            player.env.seed(seed)

            player.state = player.env.reset()
            if 'Unreal' in args.env:
                player.cam_pos = player.env.env.env.env.cam_pose
                player.collect_state = player.env.env.env.env.current_states
            player.set_cam_info()
            player.state = torch.from_numpy(player.state).float().to(device)
            player.input_actions = torch.Tensor(
                np.zeros((player.num_agents, 9)))

            time.sleep(args.sleep_time)

            if n_iter > args.max_step:
                env.close()
                for id in range(0, args.workers):
                    train_modes[id] = -100
                break

        player.clear_actions()
예제 #8
0
player.model.eval()
for i_episode in range(args.num_episodes):
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
    player.eps_len += 2
    reward_sum = 0
    while True:
        if args.render:
            if i_episode % args.render_freq == 0:
                player.env.render()

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_mon_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
예제 #9
0
def test(args, shared_model, env_conf):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu
    env_conf["env_gpu"] = gpu_id

    log = {}
    logger = Logger(args.log_dir)

    create_dir(args.log_dir + "models/")

    os.system("cp *.sh " + args.log_dir)
    os.system("cp *.py " + args.log_dir)
    os.system("cp models/models.py " + args.log_dir + "models/")
    os.system("cp models/basic_modules.py " + args.log_dir + "models/")

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    env_conf_log = env_conf

    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))
    for k in env_conf_log.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(
            k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    env = database_env(env_conf, seed=0, dstype="test")
    env.max_step = 900

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None, gpu_id)
    player.gpu_id = gpu_id

    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             env_conf["num_actions"],
                             gpu_id=0,
                             lstm_feats=args.lstm_feats)

    with torch.cuda.device(gpu_id):
        player.model = player.model.cuda()

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    max_score = 0

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()

        reward_sum += player.reward.mean()

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                print("------------------------------------------------")
                print(args.env)
                print("Log test #:", num_tests)
                print("sum rewards: ", player.env.sum_reward)
                print("action_history\n", player.env.action_his)
                print()
                print("------------------------------------------------")

                log_info = {
                    'mean_reward': reward_mean,
                    '100_mean_reward': recent_episode_scores.mean()
                }
                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, num_tests)

            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()

            time.sleep(15)

            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #10
0
def test(args, shared_models, env_conf):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = atari_env(args.env, env_conf, args)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(env, args, gpu_id)
    player.state = player.env.reset()
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
    flag = True
    max_score = 0
    prev_reward = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.models[0].load_state_dict(
                        shared_models[0].state_dict())
                    player.models[1].load_state_dict(
                        shared_models[1].state_dict())
            else:
                player.models[0].load_state_dict(shared_models[0].state_dict())
                player.models[1].load_state_dict(shared_models[1].state_dict())
            player.models[0].eval()
            player.models[1].eval()
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))
            with open('./results', 'a') as f:
                line = f"{reward_total_sum - prev_reward}\n"
                f.write(line)
                prev_reward = reward_total_sum
            player.episodic_reward = 0
            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.models[0].state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}_early.dat'.format(args.save_model_dir,
                                                      args.env))
                        state_to_save = player.models[1].state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}_late.dat'.format(args.save_model_dir,
                                                     args.env))
                else:
                    state_to_save = player.models[0].state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}_early.dat'.format(args.save_model_dir,
                                                  args.env))
                    state_to_save = player.models[1].state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}_late.dat'.format(args.save_model_dir,
                                                 args.env))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            player.eps_len += 2
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #11
0
def test_func(args, shared_model, env_conf, datasets):
    ptitle('Valid agent')

    gpu_id = args.gpu_ids[-1]

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        logger = Logger(args.log_dir)

        saved_src_dir = args.log_dir + "/src/"
        create_dir(saved_src_dir)
        os.system("cp *.py " + saved_src_dir)
        os.system("cp -r Models " + saved_src_dir)
        os.system("cp -r run_scripts " + saved_src_dir)
        os.system("cp -r Utils " + saved_src_dir)

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    env = Debug_env(datasets, env_conf)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id

    nChan = 3
    if args.is3D:
        nChan = 4
    if args.alpha_only:
        nChan = 1

    if not args.is3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif not args.obs3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif args.obs3D:
        player.model = get_model(args,
                                 "Net3D",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    if args.deploy:
        deploy(args, shared_model, player, gpu_id)
        exit()

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            print(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                log_rewards = [
                    int(rew * 100) for rew in player.env.sum_rewards
                ]
                print("rewards:", log_rewards)
                print("action: ", player.env.actions)
                print("reward history: ", player.env.rewards)
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist, 0)
                log_info = {"valid_sample": log_img}

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward': reward_mean,
                        '100_mean_reward': recent_episode_scores.mean(),
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            if args.save_sample:
                deploy_list = player.env.deploy
                print(len(deploy_list))
                for stepi, (vol, ref_img, lut, _) in enumerate(deploy_list):
                    io.imsave(
                        args.log_dir + "/" + str(num_tests) + "_vol_" +
                        str(stepi) + ".tif", vol)
                    io.imsave(
                        args.log_dir + "/" + str(num_tests) + "_ref_" +
                        str(stepi) + ".tif", ref_img)
                    plt.figure(figsize=(10, 10))
                    plt.plot(range(256), lut[..., 2], 'b')
                    plt.plot(range(256), lut[..., 1], 'g')
                    plt.plot(range(256), lut[..., 0], 'r')
                    plt.plot(range(256), lut[..., 3], 'gray')
                    plt.ylabel('Mapping value')
                    plt.xlabel('Voxel intensity')
                    plt.title("Transfer function visualization")
                    plt.savefig("Ref_LUT" + "_" + str(num_tests) + "_" +
                                str(stepi) + ".png")

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #12
0
def test(args, shared_model, train_modes, n_iters):
    ptitle('Test Agent')
    n_iter = 0
    writer = SummaryWriter(os.path.join(args.log_dir, 'Test'))
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
        device = torch.device('cuda:' + str(gpu_id))
    else:
        device = torch.device('cpu')

    if args.env_base is None:
        env = create_env(args.env, args)
    else:
        env = create_env(args.env_base, args)
    env.seed(args.seed)
    start_time = time.time()
    count_eps = 0

    player = Agent(None, env, args, None, device)
    player.gpu_id = gpu_id
    player.model = build_model(player.env.observation_space,
                               player.env.action_space, args,
                               device).to(device)
    player.model.eval()
    max_score = -100
    seed = args.seed
    last_iter = 0
    iter_th = args.init_step
    while True:
        reward_sum = np.zeros(2)
        len_sum = 0
        for i_episode in range(args.test_eps):
            player.model.load_state_dict(shared_model.state_dict())
            player.env.seed(seed)
            # seed += 1
            player.reset()
            reward_sum_ep = np.zeros(player.num_agents)
            fps_counter = 0
            t0 = time.time()
            count_eps += 1
            while True:
                if args.render:
                    if 'Unreal' in args.env:
                        cv2_show(env, False)
                    else:
                        env.render()
                player.action_test()
                fps_counter += 1
                reward_sum_ep += player.reward
                if player.done:
                    reward_sum += reward_sum_ep[:2]
                    len_sum += player.eps_len
                    fps = fps_counter / (time.time() - t0)
                    n_iter = 0
                    for n in n_iters:
                        n_iter += n

                    for rank in range(len(n_iters)):
                        if n_iter < args.init_step:
                            train_modes[rank] = 0
                        elif args.train_mode == 2 and n_iter - last_iter > iter_th:
                            train_modes[rank] = 1 - train_modes[rank]
                            last_iter = n_iter
                            iter_th = args.init_step if train_modes[
                                rank] == 0 else args.adv_step
                        else:
                            train_modes[rank] = args.train_mode

                    for i, r_i in enumerate(reward_sum_ep):
                        writer.add_scalar('test/reward' + str(i), r_i, n_iter)

                    writer.add_scalar('test/fps', fps, n_iter)
                    writer.add_scalar('test/eps_len', player.eps_len, n_iter)
                    break

        ave_reward_sum = reward_sum / args.test_eps
        len_mean = len_sum / args.test_eps
        reward_step = reward_sum / len_sum
        log['{}_log'.format(args.env)].info(
            "Time {0}, ave eps reward {1}, ave eps length {2}, reward step {3}"
            .format(
                time.strftime("%Hh %Mm %Ss",
                              time.gmtime(time.time() - start_time)),
                ave_reward_sum, len_mean, reward_step))

        # save model
        if ave_reward_sum[0] >= max_score:
            print('Save best!')
            max_score = ave_reward_sum[0]
            model_dir = os.path.join(args.log_dir,
                                     'all-best-{0}.dat'.format(n_iter))
            tracker_model_dir = os.path.join(args.log_dir,
                                             'tracker-best.dat'.format(n_iter))
            target_model_dir = os.path.join(args.log_dir,
                                            'target-best.dat'.format(n_iter))
        else:
            model_dir = os.path.join(args.log_dir,
                                     'all-new.dat'.format(args.env))
            tracker_model_dir = os.path.join(args.log_dir, 'tracker-new.dat')
            target_model_dir = os.path.join(args.log_dir, 'target-new.dat')

        torch.save(player.model.state_dict(), model_dir)
        if args.split:
            torch.save(player.model.player0.state_dict(), tracker_model_dir)
            if not args.single:
                torch.save(player.model.player1.state_dict(), target_model_dir)

        time.sleep(args.sleep_time)
        if n_iter > args.max_step:
            env.close()
            for id in range(0, args.workers):
                train_modes[id] = -100
            break
def test(rank, args, shared_model):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    writer = SummaryWriter(log_dir=args.log_dir + 'tb_test')
    log = {}
    setup_logger('{}_log'.format('Test_' + str(rank)),
                 r'{0}{1}_log'.format(args.log_dir, 'Test_' + str(rank)))
    log['{}_log'.format('Test_' + str(rank))] = logging.getLogger(
        '{}_log'.format('Test_' + str(rank)))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format('Test_' + str(rank))].info('{0}: {1}'.format(
            k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = atari_env(env_id=rank, args=args, type='train')
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    num_inside_target_room = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = A3Clstm(player.env.observation_space.shape[2],
                           player.env.action_space.n)

    player.state = player.env.reset()
    player.state = normalize_rgb_obs(player.state)
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()

    player.model.eval()

    action_times = 0
    while True:
        action_times += 1
        if player.done:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())

        player.action_test()
        reward_sum += player.reward

        if not os.path.exists(args.log_dir + "video/" + str(rank) + "_" +
                              str(num_tests)):
            os.makedirs(args.log_dir + "video/" + str(rank) + "_" +
                        str(num_tests))

        cv2.imwrite(args.log_dir + "video/" + str(rank) + "_" +
                    str(num_tests) + "/" + str(action_times) + ".png",
                    player.env.get_rgb())  # (90, 120, 3)

        if player.done:
            frame_to_video(fileloc=args.log_dir + "video/" + str(rank) + "_" +
                           str(num_tests) + "/%d.png",
                           t_w=120,
                           t_h=90,
                           destination=args.log_dir + "video/" + str(rank) +
                           "_" + str(num_tests) + ".mp4")
            shutil.rmtree(args.log_dir + "video/" + str(rank) + "_" +
                          str(num_tests))
            action_times = 0
            num_tests += 1
            num_inside_target_room += player.env.inside_target_room
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            success_rate = num_inside_target_room / num_tests
            log['{}_log'.format('Test_' + str(rank))].info(
                "Time {0}, Tester {1}, test counter {2}, episode reward {3}, episode length {4}, reward mean {5:.4f}, success rate {6}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)), rank,
                    num_tests, reward_sum, player.eps_len, reward_mean,
                    success_rate))
            # Tensorboard
            writer.add_scalar("data/episode_reward", reward_sum, num_tests)
            writer.add_scalar("data/episode_length", player.eps_len, num_tests)
            writer.add_scalar("data/reward_mean", reward_mean, num_tests)
            writer.add_scalar("data/success_rate", success_rate, num_tests)

            if reward_sum > args.save_score_level:
                # player.model.load_state_dict(shared_model.state_dict())
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}_{2}.dat'.format(args.save_model_dir,
                                                    'Test_' + str(rank),
                                                    reward_sum))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}_{2}.dat'.format(args.save_model_dir,
                                                'Test_' + str(rank),
                                                reward_sum))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            time.sleep(10)
            state = normalize_rgb_obs(state)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #14
0
파일: test.py 프로젝트: hsuyunyu/a3c_modelB
def test(args, shared_model, env_conf):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = Environment(Config.SHOW_MODE)  #(True) or False
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    num_actions = env.get_num_actions()

    player.model = A3Clstm(Config.STACKED_FRAMES, num_actions)

    player.state, available = player.env.reset()
    # player.eps_len += 1
    player.state = torch.from_numpy(player.state).float()
    player.available = torch.from_numpy(available).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
            player.available = player.available.cuda()
    flag = True
    max_score = 0
    results_logger = open(Config.RESULTS_FILENAME, 'a')
    rolling_frame_count = 0
    rolling_reward = 0
    results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
    while True:
        if flag:  # first load state
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()  # model變成测试模式
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state, available = player.env.reset()
            # player.eps_len += 1
            player.state = torch.from_numpy(state).float()
            player.available = torch.from_numpy(available).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
                    player.available = player.available.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            rolling_frame_count += player.eps_len
            rolling_reward += reward_sum

            if results_q.full():
                old_length, old_reward = results_q.get()
                rolling_frame_count -= old_length
                rolling_reward -= old_reward
            results_q.put((player.eps_len, reward_sum))

            episode_time = int(
                time.time() - start_time
            )  # time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))
            log['{}_log'.format(args.env)].info(
                "Time {0:10d}, episode {1}, reward {2}, Step {3}, reward mean {4:.4f}, Rstep {5:.4f}, Rreward {6:.4f}"
                .format(episode_time, num_tests, reward_sum, player.eps_len,
                        reward_mean, (rolling_frame_count / results_q.qsize()),
                        (rolling_reward / results_q.qsize())))
            results_logger.write(
                '%d, %d, %10.4f, %d, %10.4f, %10.4f\n' %
                (episode_time, num_tests, reward_sum, player.eps_len,
                 player.envs_mean, player.envs_std))
            results_logger.flush()

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir, args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}.dat'.format(args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            state, available = player.env.reset()
            # player.eps_len += 1
            time.sleep(1)
            player.state = torch.from_numpy(state).float()
            player.available = torch.from_numpy(available).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
                    player.available = player.available.cuda()
    results_logger.close()
예제 #15
0
def test(args, shared_model, env_conf, datasets):
    ptitle('Test agent')
    gpu_id = args.gpu_ids[-1]
    log = {}

    logger = Logger(args.log_dir)

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw, gt_lbl = datasets
    env = EM_env(raw, gt_lbl, env_conf)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    # player.model = A3Clstm (env.observation_space.shape, env_conf["num_action"], args.hidden_feat)
    player.model = SimpleCNN(env.observation_space.shape,
                             env_conf["num_action"])
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()

    flag = True
    create_dir(args.save_model_dir)

    recent_episode_scores = []
    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores += [reward_sum]
            if len(recent_episode_scores) > 200:
                recent_episode_scores.pop(0)

            if args.save_max and np.mean(recent_episode_scores) >= max_score:
                max_score = np.mean(recent_episode_scores)
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                print("------------------------------------------------")
                print("Log test #:", num_tests)
                print("Prob: ")
                for i in range(player.env.agent_out_shape[1]):
                    for j in range(player.env.agent_out_shape[2]):
                        print("{:.3f}\t".format(player.prob_cpu[0, i, j]),
                              end='')
                    print()
                print("Actions :", player.actions)
                print("Actions transformed: ")
                print(player.actions_explained)
                print("rewards: ", player.rewards)
                print("sum rewards: ", reward_sum)
                print("------------------------------------------------")
                log_img = np.concatenate(renderlist, 0)
                log_info = {"test: traning_sample": log_img}
                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                log_info = {'test: mean_reward': reward_mean}
                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0
            time.sleep(30)
            player.clear_actions()
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #16
0
def train_rep(args, shared_model, env_conf):
    batch_size = 16
    train_times = args.rep_train_time
    trace = []
    td_class = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 7), (7, 9)]
    loss_fn = nn.CrossEntropyLoss()
    optimizer_r = Adam(shared_model.r_net.parameters(), lr=args.rl_r)
    optimizer_c = Adam(shared_model.c_net.parameters(), lr=args.rl_r)
    ptitle('Train rep')
    gpu_id = args.gpu_ids[-1]

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = atari_env(args.env, env_conf, args)
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = A3Clstm(player.env.observation_space.shape[0],
                           player.env.action_space)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
            # player.model.r_net = player.model.r_net.cuda()
            # player.model.c_net = player.model.c_net.cuda()
    flag = True
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.train()
            flag = False

        player.action_test()
        trace.append(player.state)
        if len(trace) > args.trace_length:
            # 训练几百次
            for _ in range(train_times):
                range_c = np.random.randint(0, len(td_class))
                TD = np.random.randint(td_class[range_c][0],
                                       td_class[range_c][1])
                begin = np.random.randint(0, len(trace) - TD - batch_size)
                former = torch.stack(trace[begin:begin + batch_size], dim=0)
                latter = torch.stack(trace[begin + TD:begin + TD + batch_size],
                                     dim=0)
                target = torch.zeros(batch_size, dtype=torch.long) + range_c
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        former = former.cuda()
                        latter = latter.cuda()
                        target = target.cuda()

                rep_f, rep_l = player.model.r_net(former), player.model.r_net(
                    latter)
                output = player.model.c_net(rep_f, rep_l, False)
                loss = loss_fn(output, target)
                optimizer_r.zero_grad()
                optimizer_c.zero_grad()
                loss.backward()
                ensure_shared_grads(player.model.r_net,
                                    shared_model.r_net,
                                    gpu=gpu_id >= 0)
                ensure_shared_grads(player.model.c_net,
                                    shared_model.c_net,
                                    gpu=gpu_id >= 0)
                optimizer_r.step()
                optimizer_c.step()
            trace = []
        if player.done and not player.info:
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            flag = True

            state = player.env.reset()
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #17
0
def test(args, shared_model, env_conf):
    #   print('IN TEST')
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    setup_logger('{}_map_log'.format(args.env),
                 r'{0}{1}_map_log'.format(args.log_dir, args.env))
    log['{}_map_log'.format(args.env)] = logging.getLogger('{}_map_log'.format(
        args.env))

    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    if 'micropolis' in args.env.lower():
        import gym_micropolis
        env = micropolis_env(args.env, env_conf, args)
    else:
        #      print('using atari env for test')
        env = atari_env(args.env, env_conf, args)
    reward_sum = 0
    entropy_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    if 'micropolis' in args.env.lower():
        modelInit = getattr(model, args.design_head)
        player.model = modelInit(player.env.observation_space.shape[0],
                                 player.env.action_space,
                                 player.env.env.env.MAP_X)
        player.lstm_sizes = player.model.getMemorySizes()
        if not 'arcade' in args.env.lower():
            player.lstm_size = (1, 16, player.env.env.env.MAP_X,
                                env.env.env.MAP_Y)
    else:
        player.model = A3Clstm(player.env.observation_space.shape[0],
                               player.env.action_space)

    player.state = player.env.reset()
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    flag = True
    max_score = 0
    i = 0
    while True:

        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward
        entropy_sum += player.entropy.data.item()

        if player.done and not player.info:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1:1.5e}, entropy {4:1.5e} episode length {2}, reward mean {3:1.5e}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, entropy_sum))
            import numpy as np
            np.set_printoptions(threshold=400)
            log['{}_map_log'.format(args.env)].info('\n{}'.format(
                np.array2string(
                    np.add(
                        player.env.env.env.micro.map.zoneMap[-1],
                        np.full((player.env.env.env.MAP_X,
                                 player.env.env.env.MAP_Y),
                                2))).replace('\n ',
                                             '').replace('][', ']\n[').replace(
                                                 '[[', '[').replace(']]',
                                                                    ']')))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}best_{1}.dat'.format(args.save_model_dir,
                                                     args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}best_{1}.dat'.format(args.save_model_dir,
                                                 args.env))
            if i % 10 == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}latest_{1}.dat'.format(args.save_model_dir,
                                                       args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}latest_{1}.dat'.format(args.save_model_dir,
                                                   args.env))
            reward_sum = 0
            entropy_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            player.eps_len += 2
            i += 1
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #18
0
def test(args, shared_model, env_conf, datasets=None, hasLbl=True):
    if hasLbl:
        ptitle('Valid agent')
    else:
        ptitle("Test agent")

    gpu_id = args.gpu_ids[-1]
    env_conf["env_gpu"] = gpu_id
    log = {}
    logger = Logger(args.log_dir)

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)

    if hasLbl:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    if "EM_env" in args.env:
        raw_list, gt_lbl_list = datasets
        env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)
    else:
        env = Voronoi_env(env_conf)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)

    player.gpu_id = gpu_id

    if args.model == "UNet":
        player.model = UNet(env.observation_space.shape[0], args.features, 2)
    elif args.model == "FusionNetLstm":
        player.model = FusionNetLstm(env.observation_space.shape,
                                     args.features, 2, args.hidden_feat)
    elif args.model == "FusionNet":
        player.model = FusionNet(env.observation_space.shape[0], args.features,
                                 2)
    elif (args.model == "UNetLstm"):
        player.model = UNetLstm(env.observation_space.shape, args.features, 2,
                                args.hidden_feat)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True

    create_dir(args.save_model_dir)

    recent_episode_scores = []
    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True

            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            if hasLbl:
                log['{}_log'.format(args.env)].info(
                    "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                    .format(
                        time.strftime("%Hh %Mm %Ss",
                                      time.gmtime(time.time() - start_time)),
                        reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores += [reward_sum]
            if len(recent_episode_scores) > 200:
                recent_episode_scores.pop(0)

            if args.save_max and np.mean(recent_episode_scores) >= max_score:
                max_score = np.mean(recent_episode_scores)
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                if hasLbl:
                    print(
                        "----------------------VALID SET--------------------------"
                    )
                    print("Log test #:", num_tests)
                    print("rewards: ", player.reward.mean())
                    print("sum rewards: ", reward_sum)
                    print("------------------------------------------------")

                log_img = np.concatenate(renderlist, 0)
                if hasLbl:
                    log_info = {"valid_sample": log_img}
                else:
                    log_info = {"test_sample": log_img}

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if hasLbl:
                    log_info = {'mean_valid_reward': reward_mean}
                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()
            renderlist.append(player.env.render())
            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #19
0
def test(args, shared_model, env_conf, lock, counter):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger(
        '{}_log'.format(args.env),
        r'{0}{1}-{2}_log'.format(args.log_dir, args.env, args.log_target))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = atari_env(args.env, env_conf, args)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = A3Clstm(player.env.observation_space.shape[0],
                           player.env.action_space)

    player.state = player.env.reset()
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    flag = True
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state = player.env.reset()
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            with lock:
                counter.value += 1
            log['{}_log'.format(args.env)].info(
                "UpdateStep {0} Time {1}, episode reward {2}, episode length {3}, reward mean {4:.4f}"
                .format(
                    counter.value,
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}_{2}.dat'.format(args.save_model_dir,
                                                    args.env, args.log_target))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}_{2}.dat'.format(args.save_model_dir, args.env,
                                                args.log_target))

            reward_sum = 0
            player.eps_len = 0
            state = player.env.reset()
            player.eps_len += 2
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #20
0
def test(args, shared_model, optimizer, train_modes, n_iters):
    ptitle('Test Agent')
    n_iter = 0
    writer = SummaryWriter(os.path.join(args.log_dir, 'Test'))
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
        device = torch.device('cuda:' + str(gpu_id))
    else:
        device = torch.device('cpu')

    env = create_env(args.env, args)
    env.seed(args.seed)
    start_time = time.time()
    count_eps = 0

    player = Agent(None, env, args, None, device)
    player.gpu_id = gpu_id
    player.model = build_model(player.env.observation_space,
                               player.env.action_space, args,
                               device).to(device)
    player.model.eval()
    max_score = -100

    while True:
        AG = 0
        reward_sum = np.zeros(player.num_agents)
        reward_sum_list = []
        len_sum = 0
        for i_episode in range(args.test_eps):
            player.model.load_state_dict(shared_model.state_dict())
            player.reset()
            reward_sum_ep = np.zeros(player.num_agents)
            rotation_sum_ep = 0

            fps_counter = 0
            t0 = time.time()
            count_eps += 1
            fps_all = []
            while True:
                player.action_test()
                fps_counter += 1
                reward_sum_ep += player.reward
                rotation_sum_ep += player.rotation
                if player.done:
                    AG += reward_sum_ep[0] / rotation_sum_ep * player.num_agents
                    reward_sum += reward_sum_ep
                    reward_sum_list.append(reward_sum_ep[0])
                    len_sum += player.eps_len
                    fps = fps_counter / (time.time() - t0)
                    n_iter = 0
                    for n in n_iters:
                        n_iter += n

                    for i, r_i in enumerate(reward_sum_ep):
                        writer.add_scalar('test/reward' + str(i), r_i, n_iter)

                    fps_all.append(fps)
                    writer.add_scalar('test/fps', fps, n_iter)
                    writer.add_scalar('test/eps_len', player.eps_len, n_iter)
                    break

        # player.max_length:
        ave_AG = AG / args.test_eps
        ave_reward_sum = reward_sum / args.test_eps
        len_mean = len_sum / args.test_eps
        reward_step = reward_sum / len_sum
        mean_reward = np.mean(reward_sum_list)
        std_reward = np.std(reward_sum_list)

        log['{}_log'.format(args.env)].info(
            "Time {0}, ave eps reward {1}, ave eps length {2}, reward step {3}, FPS {4}, "
            "mean reward {5}, std reward {6}, AG {7}".format(
                time.strftime("%Hh %Mm %Ss",
                              time.gmtime(time.time() - start_time)),
                np.around(ave_reward_sum, decimals=2),
                np.around(len_mean, decimals=2),
                np.around(reward_step, decimals=2),
                np.around(np.mean(fps_all), decimals=2), mean_reward,
                std_reward, np.around(ave_AG, decimals=2)))

        # save model
        if ave_reward_sum[0] >= max_score:
            print('save best!')
            max_score = ave_reward_sum[0]
            model_dir = os.path.join(args.log_dir, 'best.pth')
        else:
            model_dir = os.path.join(args.log_dir, 'new.pth'.format(args.env))
        state_to_save = {
            "model": player.model.state_dict(),
            "optimizer": optimizer.state_dict()
        }
        torch.save(state_to_save, model_dir)

        time.sleep(args.sleep_time)
        if n_iter > args.max_step:
            env.close()
            for id in range(0, args.workers):
                train_modes[id] = -100
            break
예제 #21
0
파일: test.py 프로젝트: hvcl/ColorRL
def test_func(args,
              shared_model,
              env_conf,
              datasets=None,
              tests=None,
              shared_dict=None):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        log = {}

        logger = Logger(args.log_dir)

        create_dir(args.log_dir + "models/")
        create_dir(args.log_dir + "tifs/")
        create_dir(args.log_dir + "tifs_test/")

        os.system("cp *.py " + args.log_dir)
        os.system("cp *.sh " + args.log_dir)
        os.system("cp models/*.py " + args.log_dir + "models/")

        setup_logger('{}_log'.format(args.env),
                     r'{0}{1}_log'.format(args.log_dir, args.env))
        log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
            args.env))
        d_args = vars(args)
        env_conf_log = env_conf

    if tests is not None:
        if args.testlbl:
            test_env = EM_env(tests[0],
                              env_conf,
                              type="test",
                              gt_lbl_list=tests[1])
        else:
            test_env = EM_env(tests[0], env_conf, type="test")

    if not args.deploy:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))
        for k in env_conf_log.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw_list, gt_lbl_list = datasets
    env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             gpu_id=gpu_id,
                             multi=args.multi)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_FgBgDice = ScalaTracker(100)
    recent_bestDice = ScalaTracker(100)
    recent_diffFG = ScalaTracker(100)

    recent_MUCov = ScalaTracker(100)
    recent_MWCov = ScalaTracker(100)
    recent_AvgFP = ScalaTracker(100)
    recent_AvgFN = ScalaTracker(100)

    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    # ----------------------------------------- Deploy / Inference -----------------------------------------
    if args.deploy:
        with torch.cuda.device(gpu_id):
            player.model.load_state_dict(shared_model.state_dict())

        # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0]))
        if len(tests) == 4:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]), tests[3])
        else:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]))

        return
    # ----------------------------------------- End Deploy / Inference -----------------------------------------

    merge_ratios = []
    split_ratios = []

    if args.wctrl == "s2m":
        schedule = args.wctrl_schedule

        delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 *
                                                                 len(schedule))

        mer_w_delta = delta
        mer_w_var = shared_dict['mer_w']
        mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta)

        split_delta = -delta / len(args.out_radius)
        split_var = shared_dict['spl_w'] / len(args.out_radius)
        spl_w_scheduler = Scheduler(split_var, schedule, split_delta)

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                if tests is not None and not args.DEBUG:
                    inference(args, logger, player.model, tests[0], test_env,
                              gpu_id, player.env.rng, num_tests)

                if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0):
                    bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate(
                        args, player.env)

                    recent_FgBgDice.push(FgBgDice)
                    recent_diffFG.push(abs(diffFG))
                    recent_bestDice.push(bestDice)

                    recent_MWCov.push(MWCov)
                    recent_MUCov.push(MUCov)
                    recent_AvgFP.push(AvgFP)
                    recent_AvgFN.push(AvgFN)

                    recent_rand_i.push(rand_i)

                    log_info = {
                        "bestDice": recent_bestDice.mean(),
                        "FgBgDice": recent_FgBgDice.mean(),
                        "diffFG": recent_diffFG.mean(),
                        "MWCov": recent_MWCov.mean(),
                        "MUCov": recent_MUCov.mean(),
                        "AvgFP": recent_AvgFP.mean(),
                        "AvgFN": recent_AvgFN.mean(),
                        "rand_i": recent_rand_i.mean()
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)
                else:
                    bestDice, FgBgDice, diffFG = 0, 0, 0
                    MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0
                    rand_i = 0

                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:",
                      diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:",
                      AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i)
                # print ("mean bestDice")
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                print("#gt_values:", len(np.unique(player.env.gt_lbl)))
                print("values:")
                values = player.env.unique()
                print(np.concatenate([values[0][None], values[1][None]], 0))
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist[::-1], 0)

                if not "3D" in args.data:
                    for i in range(3):
                        player.probs.insert(0, np.zeros_like(player.probs[0]))
                    while (len(player.probs) - 3 < args.max_episode_length):
                        player.probs.append(np.zeros_like(player.probs[0]))

                    probslist = [
                        np.repeat(np.expand_dims(prob, -1), 3, -1)
                        for prob in player.probs
                    ]
                    probslist = np.concatenate(probslist, 1)
                    probslist = (probslist * 256).astype(np.uint8, copy=False)
                    # log_img = renderlist [-1]
                    print(probslist.shape, log_img.shape)
                    log_img = np.concatenate([probslist, log_img], 0)

                log_info = {"valid_sample": log_img}

                print(log_img.shape)
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_sample.tif",
                    log_img.astype(np.uint8))
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_pred.tif",
                    player.env.lbl.astype(np.uint8))
                io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif",
                          player.env.gt_lbl.astype(np.int32))

                if args.seg_scale:
                    log_info["scaler"] = player.env.scaler

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward':
                        reward_mean,
                        '100_mean_reward':
                        recent_episode_scores.mean(),
                        'split_ratio':
                        player.env.split_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                        'merge_ratio':
                        player.env.merge_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                    }

                    if args.wctrl == 's2m':
                        log_info.update({
                            'mer_w':
                            mer_w_scheduler.value(),
                            'spl_w':
                            spl_w_scheduler.value() * len(args.out_radius),
                        })

                    merge_ratios.append(player.env.merge_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))
                    split_ratios.append(player.env.split_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))

                    print("split ratio: ", np.max(player.env.split_ratio_sum),
                          np.min(player.env.split_ratio_sum))
                    print("merge ratio: ", np.max(player.env.merge_ratio_sum),
                          np.min(player.env.merge_ratio_sum))

                    print("merge ratio: ", merge_ratios)
                    print("split ratio: ", split_ratios)

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            if args.wctrl == "s2m":
                shared_dict["spl_w"] = spl_w_scheduler.next()
                shared_dict["mer_w"] = mer_w_scheduler.next()
                player.env.config["spl_w"] = shared_dict["spl_w"]
                player.env.config["mer_w"] = shared_dict["mer_w"]

            player.clear_actions()
            state = player.env.reset(player.model, gpu_id)
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #22
0
def test(args, shared_model, env_conf):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    env = GymEnvironment(env_name='Pong-v4')
    action_size = env.get_action_size('Pong-v4')

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(model=None,
                   env=env,
                   action_size=action_size,
                   args=args,
                   state=None)
    player.gpu_id = gpu_id
    player.model = UNREALModule(3,
                                action_size=action_size,
                                enable_pixel_control=True)

    player.state = player.env.last_state
    player.eps_len += 2
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    flag = True
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            player.env.reset()
            state = player.env.last_state
            player.eps_len += 2
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir, args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}.dat'.format(args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            player.env.reset()
            state = player.env.last_state
            player.eps_len += 2
            time.sleep(10)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()