Ejemplo n.º 1
0
Archivo: test.py Proyecto: hvcl/ColorRL
def test_func(args,
              shared_model,
              env_conf,
              datasets=None,
              tests=None,
              shared_dict=None):
    ptitle('Valid agent')

    if args.valid_gpu < 0:
        gpu_id = args.gpu_ids[-1]
    else:
        gpu_id = args.valid_gpu

    env_conf["env_gpu"] = gpu_id

    if not args.deploy:
        log = {}

        logger = Logger(args.log_dir)

        create_dir(args.log_dir + "models/")
        create_dir(args.log_dir + "tifs/")
        create_dir(args.log_dir + "tifs_test/")

        os.system("cp *.py " + args.log_dir)
        os.system("cp *.sh " + args.log_dir)
        os.system("cp models/*.py " + args.log_dir + "models/")

        setup_logger('{}_log'.format(args.env),
                     r'{0}{1}_log'.format(args.log_dir, args.env))
        log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
            args.env))
        d_args = vars(args)
        env_conf_log = env_conf

    if tests is not None:
        if args.testlbl:
            test_env = EM_env(tests[0],
                              env_conf,
                              type="test",
                              gt_lbl_list=tests[1])
        else:
            test_env = EM_env(tests[0], env_conf, type="test")

    if not args.deploy:
        for k in d_args.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, d_args[k]))
        for k in env_conf_log.keys():
            log['{}_log'.format(args.env)].info('{0}: {1}'.format(
                k, env_conf_log[k]))

    torch.manual_seed(args.seed)

    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)

    raw_list, gt_lbl_list = datasets
    env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list)

    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0

    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    player.model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             gpu_id=gpu_id,
                             multi=args.multi)

    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float()

    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    flag = True
    if not args.deploy:
        create_dir(args.save_model_dir)

    recent_episode_scores = ScalaTracker(100)
    recent_FgBgDice = ScalaTracker(100)
    recent_bestDice = ScalaTracker(100)
    recent_diffFG = ScalaTracker(100)

    recent_MUCov = ScalaTracker(100)
    recent_MWCov = ScalaTracker(100)
    recent_AvgFP = ScalaTracker(100)
    recent_AvgFN = ScalaTracker(100)

    recent_rand_i = ScalaTracker(100)

    renderlist = []
    renderlist.append(player.env.render())
    max_score = 0

    # ----------------------------------------- Deploy / Inference -----------------------------------------
    if args.deploy:
        with torch.cuda.device(gpu_id):
            player.model.load_state_dict(shared_model.state_dict())

        # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0]))
        if len(tests) == 4:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]), tests[3])
        else:
            inference(args, None, player.model, tests[0], test_env, gpu_id,
                      player.env.rng, len(tests[0]))

        return
    # ----------------------------------------- End Deploy / Inference -----------------------------------------

    merge_ratios = []
    split_ratios = []

    if args.wctrl == "s2m":
        schedule = args.wctrl_schedule

        delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 *
                                                                 len(schedule))

        mer_w_delta = delta
        mer_w_var = shared_dict['mer_w']
        mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta)

        split_delta = -delta / len(args.out_radius)
        split_var = shared_dict['spl_w'] / len(args.out_radius)
        spl_w_scheduler = Scheduler(split_var, schedule, split_delta)

    while True:
        if flag:
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()
            flag = False

        player.action_test()
        reward_sum += player.reward.mean()
        renderlist.append(player.env.render())

        if player.done:
            flag = True
            num_tests += 1

            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            log['{}_log'.format(args.env)].info(
                "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean, num_tests))

            recent_episode_scores.push(reward_sum)

            if args.save_max and recent_episode_scores.mean() >= max_score:
                max_score = recent_episode_scores.mean()
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = {}
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                'best_model_' + args.env))

            if num_tests % args.save_period == 0:
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir,
                                                str(num_tests)))

            if num_tests % args.log_period == 0:
                if tests is not None and not args.DEBUG:
                    inference(args, logger, player.model, tests[0], test_env,
                              gpu_id, player.env.rng, num_tests)

                if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0):
                    bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate(
                        args, player.env)

                    recent_FgBgDice.push(FgBgDice)
                    recent_diffFG.push(abs(diffFG))
                    recent_bestDice.push(bestDice)

                    recent_MWCov.push(MWCov)
                    recent_MUCov.push(MUCov)
                    recent_AvgFP.push(AvgFP)
                    recent_AvgFN.push(AvgFN)

                    recent_rand_i.push(rand_i)

                    log_info = {
                        "bestDice": recent_bestDice.mean(),
                        "FgBgDice": recent_FgBgDice.mean(),
                        "diffFG": recent_diffFG.mean(),
                        "MWCov": recent_MWCov.mean(),
                        "MUCov": recent_MUCov.mean(),
                        "AvgFP": recent_AvgFP.mean(),
                        "AvgFN": recent_AvgFN.mean(),
                        "rand_i": recent_rand_i.mean()
                    }

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)
                else:
                    bestDice, FgBgDice, diffFG = 0, 0, 0
                    MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0
                    rand_i = 0

                print(
                    "----------------------VALID SET--------------------------"
                )
                print(args.env)
                print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:",
                      diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:",
                      AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i)
                # print ("mean bestDice")
                print("Log test #:", num_tests)
                print("rewards: ", player.reward.mean())
                print("sum rewards: ", reward_sum)
                print("#gt_values:", len(np.unique(player.env.gt_lbl)))
                print("values:")
                values = player.env.unique()
                print(np.concatenate([values[0][None], values[1][None]], 0))
                print("------------------------------------------------")

                log_img = np.concatenate(renderlist[::-1], 0)

                if not "3D" in args.data:
                    for i in range(3):
                        player.probs.insert(0, np.zeros_like(player.probs[0]))
                    while (len(player.probs) - 3 < args.max_episode_length):
                        player.probs.append(np.zeros_like(player.probs[0]))

                    probslist = [
                        np.repeat(np.expand_dims(prob, -1), 3, -1)
                        for prob in player.probs
                    ]
                    probslist = np.concatenate(probslist, 1)
                    probslist = (probslist * 256).astype(np.uint8, copy=False)
                    # log_img = renderlist [-1]
                    print(probslist.shape, log_img.shape)
                    log_img = np.concatenate([probslist, log_img], 0)

                log_info = {"valid_sample": log_img}

                print(log_img.shape)
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_sample.tif",
                    log_img.astype(np.uint8))
                io.imsave(
                    args.log_dir + "tifs/" + str(num_tests) + "_pred.tif",
                    player.env.lbl.astype(np.uint8))
                io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif",
                          player.env.gt_lbl.astype(np.int32))

                if args.seg_scale:
                    log_info["scaler"] = player.env.scaler

                for tag, img in log_info.items():
                    img = img[None]
                    logger.image_summary(tag, img, num_tests)

                if not args.deploy:
                    log_info = {
                        'mean_valid_reward':
                        reward_mean,
                        '100_mean_reward':
                        recent_episode_scores.mean(),
                        'split_ratio':
                        player.env.split_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                        'merge_ratio':
                        player.env.merge_ratio_sum.sum() /
                        np.count_nonzero(player.env.gt_lbl),
                    }

                    if args.wctrl == 's2m':
                        log_info.update({
                            'mer_w':
                            mer_w_scheduler.value(),
                            'spl_w':
                            spl_w_scheduler.value() * len(args.out_radius),
                        })

                    merge_ratios.append(player.env.merge_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))
                    split_ratios.append(player.env.split_ratio_sum.sum() /
                                        np.count_nonzero(player.env.gt_lbl))

                    print("split ratio: ", np.max(player.env.split_ratio_sum),
                          np.min(player.env.split_ratio_sum))
                    print("merge ratio: ", np.max(player.env.merge_ratio_sum),
                          np.min(player.env.merge_ratio_sum))

                    print("merge ratio: ", merge_ratios)
                    print("split ratio: ", split_ratios)

                    for tag, value in log_info.items():
                        logger.scalar_summary(tag, value, num_tests)

            renderlist = []
            reward_sum = 0
            player.eps_len = 0

            if args.wctrl == "s2m":
                shared_dict["spl_w"] = spl_w_scheduler.next()
                shared_dict["mer_w"] = mer_w_scheduler.next()
                player.env.config["spl_w"] = shared_dict["spl_w"]
                player.env.config["mer_w"] = shared_dict["mer_w"]

            player.clear_actions()
            state = player.env.reset(player.model, gpu_id)
            renderlist.append(player.env.render())

            time.sleep(15)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
Ejemplo n.º 2
0
class MetaA2CModel:
    """
    The Meta A2C (Advantage Actor Critic) model class

    :param gamma: (float) Discount factor
    :param vf_coef: (float) Value function coefficient for the loss calculation
    :param ent_coef: (float) Entropy coefficient for the loss caculation
    :param max_grad_norm: (float) The maximum value for the gradient clipping
    :param learning_rate: (float) The learning rate
    :param alpha: (float)  RMSProp decay parameter (default: 0.99)
    :param epsilon: (float) RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update)
        (default: 1e-5)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                              'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param verbose: (int) the verbosity level: 0 none, 1 training information, 2 tensorflow debug
    :param _init_setup_model: (bool) Whether or not to build the network at the creation of the instance
                              (used only for loading)
        WARNING: this logging can take a lot of space quickly
    """

    def __init__(self, input_length: int, output_length: int, n_steps: int, window_size: int, seed=3, gamma=0.8, vf_coef=0.25, ent_coef=0,
                 max_grad_norm=0.5, learning_rate=1e-3, alpha=0.99, epsilon=1e-5, lr_schedule='linear', verbose=0, _init_setup_model=True):

        self.policy = MetaLstmActorCriticPolicy
        self.verbose = verbose
        self.input_length = input_length
        self.output_length = output_length
        self.num_train_steps = 0
        self.n_steps = n_steps
        self.window_size = window_size
        self.total_timesteps = config.max_timesteps/10_000

        self.gamma = gamma
        self.vf_coef = vf_coef
        self.ent_coef = ent_coef
        self.max_grad_norm = max_grad_norm
        self.alpha = alpha
        self.epsilon = epsilon
        self.lr_schedule = lr_schedule
        self.learning_rate = learning_rate

        self.graph = None
        self.sess = None
        self.learning_rate_ph = None
        self.actions_ph = None
        self.advs_ph = None
        self.rewards_ph = None
        self.pg_loss = None
        self.vf_loss = None
        self.entropy = None
        self.apply_backprop = None
        self.policy_model = None
        self.step = None
        self.value = None
        self.learning_rate_schedule = None
        self.trainable_variables = None

        self.layers = config.meta_layers
        self.lstm_units = config.meta_lstm_units

        if seed is not None:
            set_global_seeds(seed)

        self.learning_rate_schedule = Scheduler(initial_value=self.learning_rate, n_values=self.total_timesteps,
                                                schedule=self.lr_schedule, init_step=self.num_train_steps)

        if _init_setup_model:
            self.setup_model()

    def setup_model(self):
        """
        Create all the functions and tensorflow graphs necessary to train the model
        """

        assert issubclass(self.policy, MetaLstmActorCriticPolicy), "Error: the input policy for the A2C model must be an " \
                                                                         "instance of MetaLstmActorCriticPolicy."

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.sess = tf_utils.make_session(graph=self.graph)

            # azért nincs step model mert ugyanaz a lépés (n_batch) így felesleges.
            policy_model = self.policy(sess=self.sess, input_length=self.input_length, output_length=self.output_length, n_steps=self.n_steps,
                                       window_size=self.window_size, layers=self.layers, lstm_units=self.lstm_units)

            with tf.variable_scope("loss", reuse=False):
                self.actions_ph = policy_model.pdtype.sample_placeholder([self.n_steps], name="action_ph")
                self.advs_ph = tf.placeholder(tf.float32, [self.n_steps], name="advs_ph")
                self.rewards_ph = tf.placeholder(tf.float32, [self.n_steps], name="rewards_ph")
                self.learning_rate_ph = tf.placeholder(tf.float32, [], name="learning_rate_ph")

                neglogpac = policy_model.proba_distribution.neglogp(self.actions_ph)
                self.entropy = tf.reduce_mean(policy_model.proba_distribution.entropy())
                self.pg_loss = tf.reduce_mean(self.advs_ph * neglogpac)
                self.vf_loss = mse(tf.squeeze(policy_model.value_fn), self.rewards_ph)
                loss = self.pg_loss - self.entropy * self.ent_coef + self.vf_loss * self.vf_coef

                self.trainable_variables = tf_utils.find_trainable_variables("model")
                grads = tf.gradients(loss, self.trainable_variables)
                if self.max_grad_norm is not None:
                    grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm)
                grads = list(zip(grads, self.trainable_variables))

            trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.alpha,
                                                epsilon=self.epsilon)
            self.apply_backprop = trainer.apply_gradients(grads)
            self.step = policy_model.step
            self.policy_model = policy_model
            self.value = self.policy_model.value
            tf.global_variables_initializer().run(session=self.sess)

    def train_step(self, inputs: np.ndarray, discounted_rewards, actions, values):
        """
        applies a training step to the model
        """
        advs = discounted_rewards - values

        cur_lr = None
        for _ in range(self.n_steps):
            cur_lr = self.learning_rate_schedule.value()

        td_map = {self.policy_model.input_ph: inputs, self.actions_ph: actions, self.advs_ph: advs,
                  self.rewards_ph: discounted_rewards, self.learning_rate_ph: cur_lr}

        policy_loss, value_loss, policy_entropy, _ = self.sess.run(
            [self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop], td_map)

        return policy_loss, value_loss, policy_entropy

    def save(self, save_path: str, id: str):
        """
        Save the current parameters to file

        :param save_path: (str or file-like object) the save location
        """

        params = {
            "gamma": self.gamma,
            "vf_coef": self.vf_coef,
            "ent_coef": self.ent_coef,
            "max_grad_norm": self.max_grad_norm,
            "learning_rate": self.learning_rate,
            "alpha": self.alpha,
            "epsilon": self.epsilon,
            "lr_schedule": self.lr_schedule,
            "verbose": self.verbose,
            "policy": self.policy,
            "num_train_steps": self.num_train_steps,
            "input_length": self.input_length,
            "output_length": self.output_length,
            "n_steps": self.n_steps,
            "window_size": self.window_size,
            "total_timesteps": self.total_timesteps,
            "layers": self.layers,
            "lstm_units": self.lstm_units,
        }

        json_params = {
            "input_length": self.input_length,
            "output_length": self.output_length,
            "n_steps": self.n_steps,
            "window_size": self.window_size,
            "total_timesteps": self.total_timesteps,
            "gamma": self.gamma,
            "vf_coef": self.vf_coef,
            "ent_coef": self.ent_coef,
            "max_grad_norm": self.max_grad_norm,
            "learning_rate": self.learning_rate,
            "alpha": self.alpha,
            "epsilon": self.epsilon,
            "lr_schedule": self.lr_schedule,
            "layers": self.layers,
            "lstm_units": self.lstm_units,
        }

        weights = self.sess.run(self.trainable_variables)

        utils._save_model_to_file(save_path, id, 'meta', json_params=json_params, weights=weights, params=params)

    @classmethod
    def load(cls, model_id: str,  input_len: int, output_len: int):
        """
        Load the model from file

        """
        load_path = os.path.join(config.model_path, model_id)
        weights, params = utils._load_model_from_file(load_path, 'meta')

        if params['input_length'] != input_len or params['output_length'] != output_len:
            raise ValueError("The input and the output length must be the same as the model's that trying to load.")

        model = cls(input_length=params["input_length"], output_length=params["output_length"],
                    n_steps=params["n_steps"], window_size=params["window_size"], _init_setup_model=False)
        model.__dict__.update(params)
        model.setup_model()

        restores = []
        for param, loaded_weight in zip(model.trainable_variables, weights):
            restores.append(param.assign(loaded_weight))
        model.sess.run(restores)

        return model