예제 #1
0
파일: test.py 프로젝트: hvcl/ColorRL
def test_func(args,
              shared_model,
              env_conf,
              datasets=None,
              tests=None,
              shared_dict=None):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        log = {}

        logger = Logger(args.log_dir)

        create_dir(args.log_dir + "models/")
        create_dir(args.log_dir + "tifs/")
        create_dir(args.log_dir + "tifs_test/")

        os.system("cp *.py " + args.log_dir)
        os.system("cp *.sh " + args.log_dir)
        os.system("cp models/*.py " + args.log_dir + "models/")

        setup_logger('{}_log'.format(args.env),
                     r'{0}{1}_log'.format(args.log_dir, args.env))
        log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
            args.env))
        d_args = vars(args)
        env_conf_log = env_conf

    if tests is not None:
        if args.testlbl:
            test_env = EM_env(tests[0],
                              env_conf,
                              type="test",
                              gt_lbl_list=tests[1])
        else:
            test_env = EM_env(tests[0], env_conf, type="test")

    if not args.deploy:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))
        for k in env_conf_log.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw_list, gt_lbl_list = datasets
    env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             gpu_id=gpu_id,
                             multi=args.multi)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_FgBgDice = ScalaTracker(100)
    recent_bestDice = ScalaTracker(100)
    recent_diffFG = ScalaTracker(100)

    recent_MUCov = ScalaTracker(100)
    recent_MWCov = ScalaTracker(100)
    recent_AvgFP = ScalaTracker(100)
    recent_AvgFN = ScalaTracker(100)

    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    # ----------------------------------------- Deploy / Inference -----------------------------------------
    if args.deploy:
        with torch.cuda.device(gpu_id):
            player.model.load_state_dict(shared_model.state_dict())

        # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0]))
        if len(tests) == 4:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]), tests[3])
        else:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]))

        return
    # ----------------------------------------- End Deploy / Inference -----------------------------------------

    merge_ratios = []
    split_ratios = []

    if args.wctrl == "s2m":
        schedule = args.wctrl_schedule

        delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 *
                                                                 len(schedule))

        mer_w_delta = delta
        mer_w_var = shared_dict['mer_w']
        mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta)

        split_delta = -delta / len(args.out_radius)
        split_var = shared_dict['spl_w'] / len(args.out_radius)
        spl_w_scheduler = Scheduler(split_var, schedule, split_delta)

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                if tests is not None and not args.DEBUG:
                    inference(args, logger, player.model, tests[0], test_env,
                              gpu_id, player.env.rng, num_tests)

                if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0):
                    bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate(
                        args, player.env)

                    recent_FgBgDice.push(FgBgDice)
                    recent_diffFG.push(abs(diffFG))
                    recent_bestDice.push(bestDice)

                    recent_MWCov.push(MWCov)
                    recent_MUCov.push(MUCov)
                    recent_AvgFP.push(AvgFP)
                    recent_AvgFN.push(AvgFN)

                    recent_rand_i.push(rand_i)

                    log_info = {
                        "bestDice": recent_bestDice.mean(),
                        "FgBgDice": recent_FgBgDice.mean(),
                        "diffFG": recent_diffFG.mean(),
                        "MWCov": recent_MWCov.mean(),
                        "MUCov": recent_MUCov.mean(),
                        "AvgFP": recent_AvgFP.mean(),
                        "AvgFN": recent_AvgFN.mean(),
                        "rand_i": recent_rand_i.mean()
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)
                else:
                    bestDice, FgBgDice, diffFG = 0, 0, 0
                    MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0
                    rand_i = 0

                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:",
                      diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:",
                      AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i)
                # print ("mean bestDice")
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                print("#gt_values:", len(np.unique(player.env.gt_lbl)))
                print("values:")
                values = player.env.unique()
                print(np.concatenate([values[0][None], values[1][None]], 0))
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist[::-1], 0)

                if not "3D" in args.data:
                    for i in range(3):
                        player.probs.insert(0, np.zeros_like(player.probs[0]))
                    while (len(player.probs) - 3 < args.max_episode_length):
                        player.probs.append(np.zeros_like(player.probs[0]))

                    probslist = [
                        np.repeat(np.expand_dims(prob, -1), 3, -1)
                        for prob in player.probs
                    ]
                    probslist = np.concatenate(probslist, 1)
                    probslist = (probslist * 256).astype(np.uint8, copy=False)
                    # log_img = renderlist [-1]
                    print(probslist.shape, log_img.shape)
                    log_img = np.concatenate([probslist, log_img], 0)

                log_info = {"valid_sample": log_img}

                print(log_img.shape)
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_sample.tif",
                    log_img.astype(np.uint8))
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_pred.tif",
                    player.env.lbl.astype(np.uint8))
                io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif",
                          player.env.gt_lbl.astype(np.int32))

                if args.seg_scale:
                    log_info["scaler"] = player.env.scaler

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward':
                        reward_mean,
                        '100_mean_reward':
                        recent_episode_scores.mean(),
                        'split_ratio':
                        player.env.split_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                        'merge_ratio':
                        player.env.merge_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                    }

                    if args.wctrl == 's2m':
                        log_info.update({
                            'mer_w':
                            mer_w_scheduler.value(),
                            'spl_w':
                            spl_w_scheduler.value() * len(args.out_radius),
                        })

                    merge_ratios.append(player.env.merge_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))
                    split_ratios.append(player.env.split_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))

                    print("split ratio: ", np.max(player.env.split_ratio_sum),
                          np.min(player.env.split_ratio_sum))
                    print("merge ratio: ", np.max(player.env.merge_ratio_sum),
                          np.min(player.env.merge_ratio_sum))

                    print("merge ratio: ", merge_ratios)
                    print("split ratio: ", split_ratios)

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            if args.wctrl == "s2m":
                shared_dict["spl_w"] = spl_w_scheduler.next()
                shared_dict["mer_w"] = mer_w_scheduler.next()
                player.env.config["spl_w"] = shared_dict["spl_w"]
                player.env.config["mer_w"] = shared_dict["mer_w"]

            player.clear_actions()
            state = player.env.reset(player.model, gpu_id)
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #2
0
def train (rank, args, shared_model, optimizer, env_conf, datasets=None):
    ptitle('Training Agent: {}'.format(rank))
    print ('Start training agent: ', rank)
    
    if rank == 0:
        logger = Logger (args.log_dir)
        train_step = 0

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    env_conf ["env_gpu"] = gpu_id
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)
    if "EM_env" in args.env:
        raw, lbl, prob, gt_lbl = datasets
        env = EM_env (raw, lbl, prob, env_conf, 'train', gt_lbl)
    else:
        env = Voronoi_env (env_conf)

    if optimizer is None:
        if args.optimizer == 'RMSprop':
            optimizer = optim.RMSprop (shared_model.parameters (), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = optim.Adam (shared_model.parameters (), lr=args.lr, amsgrad=args.amsgrad)

        # env.seed (args.seed + rank)
    if not args.continuous:
        player = Agent (None, env, args, None)
    else:
        player = Agent_continuous (None, env, args, None)
    player.gpu_id = gpu_id
    if not args.continuous:
        player.model = A3Clstm (env.observation_space.shape, env_conf["num_action"], args.hidden_feat)
    else:
        player.model = A3Clstm_continuous (env.observation_space.shape, env_conf["num_action"], args.hidden_feat)

    player.state = player.env.reset ()
    player.state = torch.from_numpy (player.state).float ()
    old_score = player.env.old_score
    final_score = 0

    if gpu_id >= 0:
        with torch.cuda.device (gpu_id):
            player.state = player.state.cuda ()
            player.model = player.model.cuda ()
    player.model.train ()

    if rank == 0:
        eps_reward = 0
        pinned_eps_reward = 0
        mean_log_prob = 0

    # print ("rank: ", rank)

    while True:
        if gpu_id >= 0:
            with torch.cuda.device (gpu_id):
                player.model.load_state_dict (shared_model.state_dict ())
        else:
            player.model.load_state_dict (shared_model.state_dict ())
        
        if player.done:
            player.eps_len = 0
            if rank == 0:
                if 0 <= (train_step % args.train_log_period) < args.max_episode_length:
                    print ("train: step", train_step, "\teps_reward", eps_reward, 
                        "\timprovement", final_score - old_score)
                old_score = player.env.old_score
                pinned_eps_reward = eps_reward
                eps_reward = 0
                mean_log_prob = 0
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.cx = Variable(torch.zeros(1, args.hidden_feat).cuda())
                    player.hx = Variable(torch.zeros(1, args.hidden_feat).cuda())
            else:
                player.cx = Variable(torch.zeros(1, args.hidden_feat))
                player.hx = Variable(torch.zeros(1, args.hidden_feat))
        else:
            player.cx = Variable(player.cx.data)
            player.hx = Variable(player.hx.data)

        for step in range(args.num_steps):
            player.action_train ()
            if rank == 0:
                # if 0 <= (train_step % args.train_log_period) < args.max_episode_length:
                #     print ("train: step", train_step, "\taction = ", player.action)
                eps_reward += player.reward
                # print (eps_reward)
                mean_log_prob += player.log_probs [-1] / env_conf ["T"]
            if player.done:
                break

        if player.done:
            # if rank == 0:
            #     print ("----------------------------------------------")
            final_score = player.env.old_score
            state = player.env.reset ()
            player.state = torch.from_numpy (state).float ()
            if gpu_id >= 0:
                with torch.cuda.device (gpu_id):
                    player.state = player.state.cuda ()

        R = torch.zeros (1, 1)
        if not player.done:
            if not args.continuous:
                value, _, _ = player.model((Variable(player.state.unsqueeze(0)),
                                        (player.hx, player.cx)))
            else:
                value, _, _, _ = player.model((Variable(player.state.unsqueeze(0)),
                                        (player.hx, player.cx)))
            R = value.data

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                R = R.cuda()

        player.values.append(Variable(R))
        policy_loss = 0
        value_loss = 0
        gae = torch.zeros(1, 1)
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                gae = gae.cuda()
        R = Variable(R)

        for i in reversed(range(len(player.rewards))):
            R = args.gamma * R + player.rewards[i]
            advantage = R - player.values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)

            delta_t = player.values[i + 1].data * args.gamma + player.rewards[i] - \
                        player.values[i].data

            gae = gae * args.gamma * args.tau + delta_t
            # print (player.rewards [i])
            if not args.continuous:
                policy_loss = policy_loss - \
                    player.log_probs[i] * \
                    Variable(gae) - 0.01 * player.entropies[i]
            else:
                policy_loss = policy_loss - \
                    player.log_probs[i].sum () * Variable(gae) - \
                    0.01 * player.entropies[i].sum ()

        player.model.zero_grad ()
        sum_loss = (policy_loss + value_loss)

        sum_loss.backward ()
        ensure_shared_grads (player.model, shared_model, gpu=gpu_id >= 0)
        optimizer.step ()
        player.clear_actions ()

        if rank == 0:
            train_step += 1
            if train_step % args.log_period == 0:
                log_info = {
                    # 'train: sum_loss': sum_loss, 
                    'train: value_loss': value_loss, 
                    'train: policy_loss': policy_loss, 
                    'train: advanage': advantage,
                    # 'train: entropy': entropy,
                    'train: eps reward': pinned_eps_reward,
                    # 'train: mean log prob': mean_log_prob
                }

                for tag, value in log_info.items ():
                    logger.scalar_summary (tag, value, train_step)
예제 #3
0
def test(args, shared_model, env_conf, datasets):
    ptitle('Test agent')
    gpu_id = args.gpu_ids[-1]
    log = {}

    logger = Logger(args.log_dir)

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw, gt_lbl = datasets
    env = EM_env(raw, gt_lbl, env_conf)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    # player.model = A3Clstm (env.observation_space.shape, env_conf["num_action"], args.hidden_feat)
    player.model = SimpleCNN(env.observation_space.shape,
                             env_conf["num_action"])
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()

    flag = True
    create_dir(args.save_model_dir)

    recent_episode_scores = []
    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores += [reward_sum]
            if len(recent_episode_scores) > 200:
                recent_episode_scores.pop(0)

            if args.save_max and np.mean(recent_episode_scores) >= max_score:
                max_score = np.mean(recent_episode_scores)
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                print("------------------------------------------------")
                print("Log test #:", num_tests)
                print("Prob: ")
                for i in range(player.env.agent_out_shape[1]):
                    for j in range(player.env.agent_out_shape[2]):
                        print("{:.3f}\t".format(player.prob_cpu[0, i, j]),
                              end='')
                    print()
                print("Actions :", player.actions)
                print("Actions transformed: ")
                print(player.actions_explained)
                print("rewards: ", player.rewards)
                print("sum rewards: ", reward_sum)
                print("------------------------------------------------")
                log_img = np.concatenate(renderlist, 0)
                log_info = {"test: traning_sample": log_img}
                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                log_info = {'test: mean_reward': reward_mean}
                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0
            time.sleep(30)
            player.clear_actions()
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #4
0
def train_func(rank,
               args,
               shared_model,
               optimizer,
               env_conf,
               datasets=None,
               shared_dict=None):
    if args.deploy:
        return
    ptitle('Train {0}'.format(rank))
    print('Start training agent: ', rank)

    if rank == 0:
        logger = Logger(args.log_dir[:-1] + '_losses/')
        train_step = 0

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    env_conf["env_gpu"] = gpu_id
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    raw_list, gt_lbl_list = datasets
    env = EM_env(raw_list,
                 env_conf,
                 type="train",
                 gt_lbl_list=gt_lbl_list,
                 seed=args.seed + rank)

    if optimizer is None:
        if args.optimizer == 'RMSprop':
            optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = optim.Adam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env.observation_space.shape,
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             gpu_id=gpu_id,
                             multi=args.multi)
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
            player.model = player.model.cuda()
    player.model.train()

    if rank == 0:
        eps_reward = 0
        pinned_eps_reward = 0

    while True:
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                player.model.load_state_dict(shared_model.state_dict())
        else:
            player.model.load_state_dict(shared_model.state_dict())

        if player.done:
            player.eps_len = 0

            if rank == 0:
                if train_step % args.train_log_period == 0 and train_step > 0:
                    print("train: step", train_step, "\teps_reward",
                          eps_reward)
                if train_step > 0:
                    pinned_eps_reward = player.env.sum_reward.mean()
                    eps_reward = 0

            if args.lstm_feats:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        player.cx, player.hx = player.model.lstm.init_hidden(
                            batch_size=1, use_cuda=True)
                else:
                    player.cx, player.hx = player.model.lstm.init_hidden(
                        batch_size=1, use_cuda=False)
        elif args.lstm_feats:
            player.cx = Variable(player.cx.data)
            player.hx = Variable(player.hx.data)

        for step in range(args.num_steps):

            if rank < args.lbl_agents:
                player.action_train(use_lbl=True)
            else:
                player.action_train()

            if rank == 0:
                eps_reward = player.env.sum_reward.mean()
            if player.done:
                break

        if player.done:
            state = player.env.reset(player.model, gpu_id)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

        if "3D" in args.data:
            R = torch.zeros(1, 1, env_conf["size"][0], env_conf["size"][1],
                            env_conf["size"][2])
        else:
            R = torch.zeros(1, 1, env_conf["size"][0], env_conf["size"][1])

        if args.lowres:
            R = torch.zeros(1, 1, env_conf["size"][0] // 2,
                            env_conf["size"][1] // 2)

        if not player.done:
            if args.lstm_feats:
                value, _, _ = player.model(
                    (Variable(player.state.unsqueeze(0)), (player.hx,
                                                           player.cx)))
            else:
                value, _ = player.model(Variable(player.state.unsqueeze(0)))
            R = value.data

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                R = R.cuda()

        player.values.append(Variable(R))
        policy_loss = 0
        value_loss = 0

        if "3D" in args.data:
            gae = torch.zeros(1, 1, env_conf["size"][0], env_conf["size"][1],
                              env_conf["size"][2])
        else:
            gae = torch.zeros(1, 1, env_conf["size"][0], env_conf["size"][1])

        if args.rew_drop:
            keep_map = torch.tensor(player.env.keep_map)
        if args.lowres:
            gae = torch.zeros(1, 1, env_conf["size"][0] // 2,
                              env_conf["size"][1] // 2)

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                gae = gae.cuda()
                if args.rew_drop:
                    keep_map = keep_map.cuda()
        R = Variable(R)

        for i in reversed(range(len(player.rewards))):
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    reward_i = torch.tensor(player.rewards[i]).cuda()
            else:
                reward_i = torch.tensor(player.rewards[i])

            R = args.gamma * R + reward_i
            if args.rew_drop:
                advantage = R - player.values[i]
                value_loss = value_loss + (0.5 * advantage * advantage *
                                           keep_map).mean()
                delta_t = player.values[
                    i + 1].data * args.gamma + reward_i - player.values[i].data
                gae = gae * args.gamma * args.tau + delta_t
            else:
                advantage = R - player.values[i]
                value_loss = value_loss + (0.5 * advantage * advantage).mean()
                delta_t = player.values[
                    i + 1].data * args.gamma + reward_i - player.values[i].data
                gae = gae * args.gamma * args.tau + delta_t
            if args.noisy:
                policy_loss = policy_loss - \
                    (player.log_probs[i] * Variable(gae)).mean ()
            else:
                if args.rew_drop:
                    policy_loss = policy_loss - \
                        (player.log_probs[i] * Variable(gae) * keep_map).mean () - \
                        (args.entropy_alpha * player.entropies[i] * keep_map).mean ()
                else:
                    policy_loss = policy_loss - \
                        (player.log_probs[i] * Variable(gae)).mean () - \
                        (args.entropy_alpha * player.entropies[i]).mean ()

        player.model.zero_grad()
        sum_loss = (policy_loss + value_loss)

        curtime = time.time()
        # print ("backward curtime:", curtime)
        sum_loss.backward()
        # print ("backward done", time.time () - curtime)
        ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0)

        curtime = time.time()
        # print ("optim curtime:", curtime)
        optimizer.step()
        # print ("optim done", time.time () - curtime)

        player.clear_actions()
        if args.wctrl == "s2m":
            player.env.config["spl_w"] = shared_dict["spl_w"]
            player.env.config["mer_w"] = shared_dict["mer_w"]

        if rank == 0:
            train_step += 1
            if train_step % args.log_period == 0 and train_step > 0:
                log_info = {
                    'train: value_loss': value_loss,
                    'train: policy_loss': policy_loss,
                    'train: eps reward': pinned_eps_reward,
                }

                if "EX" in args.model:
                    log_info["cell_prob_loss"] = cell_prob_loss

                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, train_step)
예제 #5
0
def test(args, shared_model, env_conf, datasets=None, hasLbl=True):
    if hasLbl:
        ptitle('Valid agent')
    else:
        ptitle("Test agent")

    gpu_id = args.gpu_ids[-1]
    env_conf["env_gpu"] = gpu_id
    log = {}
    logger = Logger(args.log_dir)

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)

    if hasLbl:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    if "EM_env" in args.env:
        raw_list, gt_lbl_list = datasets
        env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)
    else:
        env = Voronoi_env(env_conf)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)

    player.gpu_id = gpu_id

    if args.model == "UNet":
        player.model = UNet(env.observation_space.shape[0], args.features, 2)
    elif args.model == "FusionNetLstm":
        player.model = FusionNetLstm(env.observation_space.shape,
                                     args.features, 2, args.hidden_feat)
    elif args.model == "FusionNet":
        player.model = FusionNet(env.observation_space.shape[0], args.features,
                                 2)
    elif (args.model == "UNetLstm"):
        player.model = UNetLstm(env.observation_space.shape, args.features, 2,
                                args.hidden_feat)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True

    create_dir(args.save_model_dir)

    recent_episode_scores = []
    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0
    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True

            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            if hasLbl:
                log['{}_log'.format(args.env)].info(
                    "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                    .format(
                        time.strftime("%Hh %Mm %Ss",
                                      time.gmtime(time.time() - start_time)),
                        reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores += [reward_sum]
            if len(recent_episode_scores) > 200:
                recent_episode_scores.pop(0)

            if args.save_max and np.mean(recent_episode_scores) >= max_score:
                max_score = np.mean(recent_episode_scores)
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                if hasLbl:
                    print(
                        "----------------------VALID SET--------------------------"
                    )
                    print("Log test #:", num_tests)
                    print("rewards: ", player.reward.mean())
                    print("sum rewards: ", reward_sum)
                    print("------------------------------------------------")

                log_img = np.concatenate(renderlist, 0)
                if hasLbl:
                    log_info = {"valid_sample": log_img}
                else:
                    log_info = {"test_sample": log_img}

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if hasLbl:
                    log_info = {'mean_valid_reward': reward_mean}
                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()
            renderlist.append(player.env.render())
            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #6
0
def train_func(rank, args, shared_model, optimizer, env_conf, datasets):
    if args.deploy:
        return
    ptitle('Train {0}'.format(rank))
    print('Start training agent: ', rank)

    if rank == 0:
        logger = Logger(args.log_dir[:-1] + '_losses/')
        train_step = 0

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    env_conf["env_gpu"] = gpu_id
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    env = Debug_env(datasets, env_conf, seed=args.seed + rank)

    if optimizer is None:
        if args.optimizer == 'RMSprop':
            optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = optim.Adam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    nChan = 3
    if args.is3D:
        nChan = 4
    if args.alpha_only:
        nChan = 1
    if not args.is3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif not args.obs3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif args.obs3D:
        player.model = get_model(args,
                                 "Net3D",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
            player.model = player.model.cuda()
    player.model.train()

    if rank == 0:
        eps_reward = 0
        pinned_eps_reward = 0

    while True:
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                player.model.load_state_dict(shared_model.state_dict())
        else:
            player.model.load_state_dict(shared_model.state_dict())

        if player.done:
            player.eps_len = 0

            if rank == 0:
                if train_step % args.train_log_period == 0 and train_step > 0:
                    print("train: step", train_step, "\teps_reward",
                          eps_reward)
                if train_step > 0:
                    pinned_eps_reward = player.env.sum_rewards.mean()
                    eps_reward = 0

        for step in range(args.num_steps):
            player.action_train()
            if rank == 0:
                eps_reward = player.env.sum_rewards.mean()
            if player.done:
                break

        if player.done:
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

        if not args.alpha_only:
            if not args.is3D:
                R = torch.zeros(1, 1, args.num_actions * 3)
            else:
                R = torch.zeros(1, 1, args.num_actions * 4)
        else:
            R = torch.zeros(1, 1, args.num_actions)

        if not player.done:
            value, _ = player.model(Variable(player.state.unsqueeze(0)))
            R = value.data

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                R = R.cuda()

        player.values.append(Variable(R))
        policy_loss = 0
        value_loss = 0

        if not args.alpha_only:
            if not args.is3D:
                gae = torch.zeros(1, 1, args.num_actions * 3)
            else:
                gae = torch.zeros(1, 1, args.num_actions * 4)
        else:
            gae = torch.zeros(1, 1, args.num_actions)

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                gae = gae.cuda()
        R = Variable(R)

        for i in reversed(range(len(player.rewards))):
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    reward_i = torch.tensor(player.rewards[i]).cuda()
            else:
                reward_i = torch.tensor(player.rewards[i])

            R = args.gamma * R + reward_i

            advantage = R - player.values[i]
            value_loss = value_loss + (0.5 * advantage * advantage).mean()
            delta_t = player.values[
                i + 1].data * args.gamma + reward_i - player.values[i].data
            gae = gae * args.gamma * args.tau + delta_t

            policy_loss = policy_loss - \
                (player.log_probs[i] * Variable(gae)).mean () - \
                (args.entropy_alpha * player.entropies[i]).mean ()

        player.model.zero_grad()
        sum_loss = (policy_loss + value_loss)

        curtime = time.time()
        sum_loss.backward()
        ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0)

        curtime = time.time()
        optimizer.step()

        player.clear_actions()

        if rank == 0:
            train_step += 1
            if train_step % args.log_period * 10 == 0 and train_step > 0:
                log_info = {
                    'train: value_loss': value_loss,
                    'train: policy_loss': policy_loss,
                    'train: eps reward': pinned_eps_reward,
                }

                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, train_step)
예제 #7
0
def test(args, shared_model, env_conf):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu
    env_conf["env_gpu"] = gpu_id

    log = {}
    logger = Logger(args.log_dir)

    create_dir(args.log_dir + "models/")

    os.system("cp *.sh " + args.log_dir)
    os.system("cp *.py " + args.log_dir)
    os.system("cp models/models.py " + args.log_dir + "models/")
    os.system("cp models/basic_modules.py " + args.log_dir + "models/")

    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    env_conf_log = env_conf

    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))
    for k in env_conf_log.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(
            k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    env = database_env(env_conf, seed=0, dstype="test")
    env.max_step = 900

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None, gpu_id)
    player.gpu_id = gpu_id

    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             env_conf["num_actions"],
                             gpu_id=0,
                             lstm_feats=args.lstm_feats)

    with torch.cuda.device(gpu_id):
        player.model = player.model.cuda()

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    max_score = 0

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()

        reward_sum += player.reward.mean()

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save, '{0}{1}.dat'.format(
                                args.save_model_dir,
                                args.env + '_' + str(num_tests)))

            if num_tests % args.log_period == 0:
                print("------------------------------------------------")
                print(args.env)
                print("Log test #:", num_tests)
                print("sum rewards: ", player.env.sum_reward)
                print("action_history\n", player.env.action_his)
                print()
                print("------------------------------------------------")

                log_info = {
                    'mean_reward': reward_mean,
                    '100_mean_reward': recent_episode_scores.mean()
                }
                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, num_tests)

            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()

            time.sleep(15)

            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #8
0
def train(rank, args, shared_model, optimizer, env_conf):
    ptitle('Train {0}: {1}'.format(args.env, rank))
    print('Start training agent: ', rank)

    if rank == 0:
        logger = Logger(args.log_dir + '_losses/')
        train_step = 0

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    env_conf["env_gpu"] = gpu_id
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    env = database_env(env_conf, seed=0)

    if optimizer is None:
        if args.optimizer == 'RMSprop':
            optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = optim.Adam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)

    player = Agent(None, env, args, None, gpu_id)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             env_conf["num_actions"],
                             gpu_id=0,
                             lstm_feats=args.lstm_feats)
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
            player.model = player.model.cuda()
    player.model.train()

    if rank == 0:
        eps_reward = 0
        pinned_eps_reward = 0

    while True:
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                player.model.load_state_dict(shared_model.state_dict())
        else:
            player.model.load_state_dict(shared_model.state_dict())

        if player.done:
            player.eps_len = 0

            if rank == 0:
                if train_step % args.train_log_period == 0 and train_step > 0:
                    print("train: step", train_step, "\teps_reward",
                          eps_reward)
                if train_step > 0:
                    pinned_eps_reward = player.env.sum_reward
                    eps_reward = 0

            if args.lstm_feats:
                player.cx, player.hx = init_linear_lstm(
                    args.lstm_feats, gpu_id)

        elif args.lstm_feats:
            player.cx = Variable(player.cx.data)
            player.hx = Variable(player.hx.data)

        for step in range(args.num_steps):
            player.action_train()
            if rank == 0:
                eps_reward = player.env.sum_reward
            if player.done:
                break

        if player.done:
            if rank == 0:
                if train_step % args.train_log_period == 0 and train_step > 0:
                    print("train: step", train_step, "\teps_reward",
                          eps_reward)
                    # print ("rewards: ", player.env.rewards)
                    # print ("actions: ", player.actions)

        if player.done:
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

        R = torch.zeros(1, 1, 1, 1)

        if not player.done:
            if args.lstm_feats:
                value, _, _ = player.model(
                    (Variable(player.state.unsqueeze(0)), (player.hx,
                                                           player.cx)))
            else:
                value, _ = player.model(Variable(player.state.unsqueeze(0)))
            R = value.data

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                R = R.cuda()

        player.values.append(Variable(R))
        policy_loss = 0
        value_loss = 0

        gae = torch.zeros(1, 1, 1, 1)
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                gae = gae.cuda()
        R = Variable(R)

        for i in reversed(range(len(player.rewards))):
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    reward_i = torch.tensor(player.rewards[i]).cuda()
            else:
                reward_i = torch.tensor(player.rewards[i])

            R = args.gamma * R + reward_i
            advantage = R - player.values[i]
            value_loss = value_loss + (0.5 * advantage * advantage).mean()
            delta_t = player.values[
                i + 1].data * args.gamma + reward_i - player.values[i].data
            gae = gae * args.gamma * args.tau + delta_t
            policy_loss = policy_loss - \
                    (player.log_probs[i] * Variable(gae)).mean () - \
                    (args.entropy_alpha * player.entropies[i]).mean ()

        player.model.zero_grad()
        sum_loss = (policy_loss + value_loss)

        sum_loss.backward()
        ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0)
        optimizer.step()
        player.clear_actions()

        if rank == 0:
            train_step += 1
            if train_step % args.log_period == 0 and train_step > 0:
                log_info = {
                    'sum_loss': sum_loss,
                    'value_loss': value_loss,
                    'policy_loss': policy_loss,
                    'advanage': advantage,
                    'train eps reward': pinned_eps_reward,
                }

                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, train_step)
예제 #9
0
def test_func(args, shared_model, env_conf, datasets):
    ptitle('Valid agent')

    gpu_id = args.gpu_ids[-1]

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        logger = Logger(args.log_dir)

        saved_src_dir = args.log_dir + "/src/"
        create_dir(saved_src_dir)
        os.system("cp *.py " + saved_src_dir)
        os.system("cp -r Models " + saved_src_dir)
        os.system("cp -r run_scripts " + saved_src_dir)
        os.system("cp -r Utils " + saved_src_dir)

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    env = Debug_env(datasets, env_conf)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id

    nChan = 3
    if args.is3D:
        nChan = 4
    if args.alpha_only:
        nChan = 1

    if not args.is3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif not args.obs3D:
        player.model = get_model(args,
                                 "ENet",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)
    elif args.obs3D:
        player.model = get_model(args,
                                 "Net3D",
                                 input_shape=env_conf["obs_shape"],
                                 num_actions=args.num_actions * nChan)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    if args.deploy:
        deploy(args, shared_model, player, gpu_id)
        exit()

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            print(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                log_rewards = [
                    int(rew * 100) for rew in player.env.sum_rewards
                ]
                print("rewards:", log_rewards)
                print("action: ", player.env.actions)
                print("reward history: ", player.env.rewards)
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist, 0)
                log_info = {"valid_sample": log_img}

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward': reward_mean,
                        '100_mean_reward': recent_episode_scores.mean(),
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            if args.save_sample:
                deploy_list = player.env.deploy
                print(len(deploy_list))
                for stepi, (vol, ref_img, lut, _) in enumerate(deploy_list):
                    io.imsave(
                        args.log_dir + "/" + str(num_tests) + "_vol_" +
                        str(stepi) + ".tif", vol)
                    io.imsave(
                        args.log_dir + "/" + str(num_tests) + "_ref_" +
                        str(stepi) + ".tif", ref_img)
                    plt.figure(figsize=(10, 10))
                    plt.plot(range(256), lut[..., 2], 'b')
                    plt.plot(range(256), lut[..., 1], 'g')
                    plt.plot(range(256), lut[..., 0], 'r')
                    plt.plot(range(256), lut[..., 3], 'gray')
                    plt.ylabel('Mapping value')
                    plt.xlabel('Voxel intensity')
                    plt.title("Transfer function visualization")
                    plt.savefig("Ref_LUT" + "_" + str(num_tests) + "_" +
                                str(stepi) + ".png")

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            player.clear_actions()
            state = player.env.reset()
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
예제 #10
0
def train(rank, args, shared_model, optimizer, env_conf, datasets):
    ptitle('Training Agent: {}'.format(rank))
    print('Start training agent: ', rank)

    if rank == 0:
        logger = Logger(args.log_dir)
        train_step = 0

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)
    raw, gt_lbl = datasets
    env = EM_env(raw, gt_lbl, env_conf)
    if optimizer is None:
        if args.optimizer == 'RMSprop':
            optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = optim.Adam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)
    gamma = torch.tensor(1.0)
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            gamma = gamma.cuda()
        # env.seed (args.seed + rank)
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    # player.model = A3Clstm (env.observation_space.shape, env_conf["num_action"], args.hidden_feat)
    player.model = SimpleCNN(env.observation_space.shape,
                             env_conf["num_action"])

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.state = player.state.cuda()
            player.model = player.model.cuda()
    player.model.train()

    if rank == 0:
        eps_reward = 0
        pinned_eps_reward = 0
        mean_log_prob = 0

    while True:
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                player.model.load_state_dict(shared_model.state_dict())
        else:
            player.model.load_state_dict(shared_model.state_dict())

        if player.done:
            player.eps_len = 0
            if rank == 0:
                pinned_eps_reward = eps_reward
                eps_reward = 0
                mean_log_prob = 0

        for step in range(args.num_steps):
            player.action_train()
            # print ('step: ', step, 'reward_len: ', len (player.rewards))
            if rank == 0:
                eps_reward += player.reward
                # mean_log_prob += player.log_probs [-1]
            if player.done:
                break

        if player.done:
            state = player.env.reset()
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()

        R = torch.zeros(1, 1)
        if not player.done:
            value, _ = player.model(Variable(player.state.unsqueeze(0)))
            R = value.data

        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                R = R.cuda()

        player.values.append(Variable(R))
        policy_loss = 0
        value_loss = 0
        gae = torch.zeros(1, 1)
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                gae = gae.cuda()
        R = Variable(R)
        # print ("updating -------------------")
        # print ("values:", player.values)
        # print ("gamma:", args.gamma)
        # print ("rewards:", player.rewards)
        for i in reversed(range(len(player.rewards))):
            R = args.gamma * R + player.rewards[i]
            advantage = R - player.values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)

            # print ("advatage: ", advantage)
            # print ("value_loss: ", value_loss)
            # print ("delta_t: ", player.values[i + 1].data + player.rewards[i])
            # Generalized Advantage Estimataion
            delta_t = player.values[i + 1].data * args.gamma + player.rewards[i] - \
                        player.values[i].data

            gae = gae * args.gamma * args.tau + delta_t

            policy_loss = policy_loss - \
                player.log_probs[i] * \
                Variable(gae) - 0.01 * player.entropies[i]

        player.model.zero_grad()
        sum_loss = (policy_loss + value_loss)
        sum_loss.backward()
        ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0)
        optimizer.step()
        player.clear_actions()

        if rank == 0:
            train_step += 1
            if train_step % (args.log_period) == 0:
                log_info = {
                    'train: sum_loss': sum_loss,
                    'train: value_loss': value_loss,
                    'train: policy_loss': policy_loss,
                    'train: advanage': advantage,
                    # 'entropy': entropy,
                    'train: eps reward': pinned_eps_reward,
                    # 'mean log prob': mean_log_prob
                }

                for tag, value in log_info.items():
                    logger.scalar_summary(tag, value, train_step)
예제 #11
0
def train (model, args, name):

    model = model
    optimizer = optim.Adam (model.parameters (), lr=1e-4)
    lr_scheduler = optim.lr_scheduler.StepLR (optimizer, step_size=100, gamma=0.999)
    gpu_id = args.gpu_id 
    batch_size = args.batch_size
    
    optimizers = optim.Adam (model.parameters (), lr=1e-4)

    if args.loss == 'WBCE':
        loss_func = Losses.weighted_binary_cross_entropy
    else:
        loss_func = nn.BCELoss ()
    
    lr_scheduler = optim.lr_scheduler.StepLR (optimizer, step_size=100, gamma=0.999)
    print ('Prepare dataset ...')
    train_data, test_data = prepare_dataset (model, args)
    print ('Finish preparing dataset, start training')
    logger = Logger ('log_dir/' + name + '/')
    save_path = 'checkpoints/' + name + '/'

    create_dir (save_path)

    i_iter = 0
    for i_ipoc in range (10000000):
        ipoc_loss = 0
        for i_batch, sample in enumerate (train_data):
            if i_batch == len (train_data):
                break
            with torch.cuda.device (gpu_id):
                raw_t = torch.tensor (sample['raw'], dtype=torch.float32).cuda () / 255.0
                target_t = torch.tensor (sample['lbl'], dtype=torch.float32).cuda () / 255.0
                pred_t = model (raw_t)
                if args.loss == 'weighted':
                    if args.weights is not None:
                        weights = args.weights
                    else:
                        ESP = 1e-5
                        neg_weight = torch.sum (target_t) / (1.0 * np.prod (target_t.shape)) + ESP
                        weights = [neg_weight, 1 - neg_weight]
                    loss = loss_func (pred_t, target_t, weights)    
                else:
                    loss = loss_func (pred_t, target_t)

                optimizer.zero_grad ()
                loss.backward ()
                optimizer.step ()

            ipoc_loss += loss.item () / len (train_data)
            lr_scheduler.step ()
            i_iter += 1

        print('type: {}\tTrain Epoch: {} \tLoss: {:.6f}'.format(
            args.type, i_ipoc, ipoc_loss))

        info = {'loss': ipoc_loss, 'learning_rate': lr_scheduler.get_lr () [0]}
        for tag, value in info.items ():
            logger.scalar_summary (tag, value, i_iter)
        visual_log (raw_t, target_t, pred_t, logger, i_iter, 'train')

        if (i_ipoc + 1) % LOG_PERIOD == 0:
            raw_t, pred_t = eval (test_data, loss_func, hasTarget=False, model=model, gpu_id=gpu_id)
            visual_log (raw_t, None, pred_t, logger, i_iter, 'test', hasTarget=False)

        if i_ipoc % SAVE_PERIOD == 0:
            torch.save ({
                'i_iter': i_iter,
                'state_dict': model.state_dict (),
                'optimizer': optimizer.state_dict ()
                }, save_path + 'checkpoint_' + str (i_iter) + '.pth.tar')