def take_action(self, state, is_train, is_start):

        if is_train:
            if is_start:
                self.train_ep_count += 1

            if self.use_external_exploration:
                _, greedy_action = self.hydra_network.predict_action(np.expand_dims(state, 0), False)
                chosen_action = self.exploration_policy.generate(greedy_action[0], self.train_global_steps)

            else:
                # single state so first idx
                chosen_action = self.hydra_network.sample_action(np.expand_dims(state, 0), False, is_single_sample=True)[0][0]

            self.train_global_steps += 1

            if self.write_log:
                write_summary(self.writer, self.train_global_steps, chosen_action[0], tag='train/action_taken')

            if self.write_plot:
                alpha, mean, sigma = self.hydra_network.getModalStats()
                func1 = self.hydra_network.getQFunction(state)
                func2 = self.hydra_network.getPolicyFunction(alpha, mean, sigma)

                old_greedy_action, greedy_action = self.hydra_network.predict_action(np.expand_dims(state, 0), False)

                if self.hydra_network.use_better_q_gd:
                    greedy_action = self.hydra_network.q_gradient_ascent(np.expand_dims(state, 0), greedy_action, True, is_better_q_gd=True)

                old_greedy_action = old_greedy_action[0]
                greedy_action = greedy_action[0]

                utils.plot_utils.plotFunction("ActorExpert", [func1, func2], state, [greedy_action, old_greedy_action, mean], chosen_action,
                                              self.action_min, self.action_max,
                                              display_title='Actor-Expert+ , steps: ' + str(
                                                  self.train_global_steps),
                                              save_title='steps_' + str(self.train_global_steps),
                                              save_dir=self.writer.get_logdir(), ep_count=self.train_ep_count,
                                              show=False)
        else:

            old_greedy_action, greedy_action = self.hydra_network.predict_action(np.expand_dims(state, 0), False)

            if self.hydra_network.use_better_q_gd:
                greedy_action = self.hydra_network.q_gradient_ascent(np.expand_dims(state, 0), greedy_action, True,
                                                                     is_better_q_gd=True)

            old_greedy_action = old_greedy_action[0]
            greedy_action = greedy_action[0]

            if is_start:
                self.eval_ep_count += 1

            chosen_action = greedy_action
            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer, self.eval_global_steps, chosen_action[0], tag='eval/action_taken')

        return chosen_action
Beispiel #2
0
    def take_action(self, state, is_train, is_start):

        greedy_action, action_points = self.network.predict_action(
            state.reshape(-1, self.state_dim))

        # train
        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)

            else:
                chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')

            if self.write_plot:
                func1 = self.network.getQFunction(state)

                utils.plot_utils.plotFunction(
                    "WireFitting", [func1],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='WireFitting, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)

            return chosen_action

        # eval
        else:
            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1
            chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

            return chosen_action
Beispiel #3
0
    def take_action(self, state, is_train, is_start):

        # Train
        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            greedy_action = self.network.take_action(np.expand_dims(state,
                                                                    0))[0]
            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)

            else:
                # Get action from network
                chosen_action = greedy_action
                # print('train', chosen_action)

            if self.write_log:
                raise NotImplementedError

            if self.write_plot:
                q_func = self.network.getQFunction(state)
                utils.plot_utils.plotFunction(
                    "SoftQlearning", [q_func],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='SoftQlearning, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)
        # Eval
        else:

            # greedy action (mean)
            chosen_action = self.network.take_action(np.expand_dims(state,
                                                                    0))[0]

            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        return chosen_action
Beispiel #4
0
    def take_action(self, state, is_train, is_start):

        greedy_action = self.hydra_network.predict_action(
            np.expand_dims(state, 0), False)[0]

        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)
            else:
                chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')

            if self.write_plot:
                func1 = self.hydra_network.getQFunction(state)

                utils.plot_utils.plotFunction(
                    "DDPG", [func1],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='DDPG, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)

        else:
            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        return chosen_action
Beispiel #5
0
    def take_action(self, state, is_train, is_start):

        _, max_action_batch_target = self.q_network.get_max_action(
            np.expand_dims(state, 0), use_target=False, is_train=False)

        greedy_action = max_action_batch_target[0]
        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)
            else:
                chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')

            if self.write_plot:
                func1 = self.q_network.getQFunction(state)

                raise NotImplementedError
                # utils.plot_utils.plotFunction("OptimalQ", [func1], state, greedy_action, chosen_action, self.action_min,
                #                               self.action_max,
                #                               display_title='OptimalQ, steps: ' + str(
                #                                   self.train_global_steps),
                #                               save_title='steps_' + str(self.train_global_steps),
                #                               save_dir=self.writer.get_logdir(), ep_count=self.train_ep_count,
                #                               show=False)

        else:
            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        return chosen_action
Beispiel #6
0
    def take_action(self, state, is_train, is_start):

        if is_train:

            sample, greedy_action, weight_mean_var = self.qt_opt_network.sample_action(
                np.expand_dims(state, 0))

            greedy_action = greedy_action[0]
            means = weight_mean_var[0][1]

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)

            else:
                chosen_action = np.clip(sample[0][0], self.action_min,
                                        self.action_max)

            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.write_log:
                # only good for 1 dim action
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')

            if self.write_plot:

                func1 = self.qt_opt_network.getQFunction(state)
                func2 = self.qt_opt_network.getPolicyFunction(
                    weight_mean_var[0])

                utils.plot_utils.plotFunction(
                    "QT_OPT", [func1, func2],
                    state, [greedy_action, means],
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='QT-Opt, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)

            return chosen_action
        else:
            greedy_action = self.qt_opt_network.predict_action(
                np.expand_dims(state, 0))[0]
            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              greedy_action[0],
                              tag='eval/action_taken')

            return greedy_action
Beispiel #7
0
    def take_action(self, state, is_train, is_start):

        greedy_action = self.hydra_network.predict_action(
            np.expand_dims(state, 0), False)
        greedy_action = greedy_action[0]

        if is_train:
            if is_start:
                self.train_ep_count += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)
            else:
                # single state so first idx
                # single sample so first idx
                _, chosen_action = self.hydra_network.sample_action(
                    np.expand_dims(state, 0), False, is_single_sample=True)
                chosen_action = chosen_action[0][0]

            self.train_global_steps += 1

            if self.write_log:
                raise NotImplementedError

            if self.write_plot:
                alpha, mean, sigma = self.hydra_network.getModalStats()
                if self.use_true_q:
                    func1 = self.hydra_network.getTrueQFunction(state)
                else:
                    func1 = self.hydra_network.getQFunction(state)
                func2 = self.hydra_network.getPolicyFunction(
                    alpha, mean, sigma)

                utils.plot_utils.plotFunction(
                    "ActorCritic_unimodal", [func1, func2],
                    state, [greedy_action, mean],
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='Actor-Critic, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)
        else:
            if is_start:
                self.eval_ep_count += 1

            if self.sample_for_eval:
                # single state so first idx
                # single sample so first idx
                _, chosen_action = self.hydra_network.sample_action(
                    np.expand_dims(state, 0), False, is_single_sample=True)
                chosen_action = chosen_action[0][0]

            else:
                chosen_action = greedy_action

            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        # print('chosen_action: {}'.format(chosen_action))
        return chosen_action
    def take_action(self, state, is_train, is_start):

        greedy_action = self.actor_network.predict_action(np.expand_dims(state, 0), False)
        greedy_action = greedy_action[0]

        if is_train:
            if is_start:
                self.train_ep_count += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(greedy_action, self.train_global_steps)
            else:
                # single state so first idx
                # single sample so first idx
                chosen_action = self.actor_network.sample_action(np.expand_dims(state, 0), False, is_single_sample=True)[0]

            self.train_global_steps += 1

            if self.write_log:
                write_summary(self.writer, self.train_global_steps, chosen_action[0], tag='train/action_taken')

                alpha, mean, sigma = self.actor_network.getModalStats()

                write_summary(self.writer, self.train_global_steps, alpha[0], tag='train/alpha0')
                write_summary(self.writer, self.train_global_steps, alpha[1], tag='train/alpha1')
                write_summary(self.writer, self.train_global_steps, mean[0], tag='train/mean0')
                write_summary(self.writer, self.train_global_steps, mean[1], tag='train/mean1')
                write_summary(self.writer, self.train_global_steps, sigma[0], tag='train/sigma0')
                write_summary(self.writer, self.train_global_steps, sigma[1], tag='train/sigma1')

            if self.write_plot:
                alpha, mean, sigma = self.actor_network.getModalStats()
                func1 = self.critic_network.getQFunction(state)
                func2 = self.actor_network.getPolicyFunction(alpha, mean, sigma)

                utils.plot_utils.plotFunction("ActorCritic", [func1, func2], state, [greedy_action, mean], chosen_action,
                                              self.action_min, self.action_max,
                                              display_title='Actor-Critic, steps: ' + str(self.train_global_steps),
                                              save_title='steps_' + str(self.train_global_steps),
                                              save_dir=self.writer.get_logdir(), ep_count=self.train_ep_count,
                                              show=False)
        else:
            if is_start:
                self.eval_ep_count += 1

            if self.sample_for_eval:
                # single state so first idx
                # single sample so first idx
                chosen_action = self.actor_network.sample_action(np.expand_dims(state, 0), False, is_single_sample=True)[0]

            else:
                chosen_action = greedy_action

            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer, self.eval_global_steps, chosen_action[0], tag='eval/action_taken')

        return chosen_action
Beispiel #9
0
    def take_action(self, state, is_train, is_start):

        # Train
        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                greedy_action = self.network.predict_action(
                    np.expand_dims(state, 0))
                chosen_action = self.exploration_policy.generate(
                    greedy_action[0], self.train_global_steps)

            else:
                # Get action from network
                chosen_action = self.network.sample_action(
                    np.expand_dims(state, 0))[0]
                # print('train', chosen_action)

            if self.write_log:
                raise NotImplementedError

            if self.write_plot:

                if self.use_true_q:
                    # Loaded almost True Q
                    q_func = self.network.getQFunction(state)
                    # q_func = self.network.getTrueQFunction(state)
                    # raise NotImplementedError
                else:
                    q_func = self.network.getQFunction(state)
                pi_func = self.network.getPolicyFunction(state)
                greedy_action = self.network.predict_action(
                    np.expand_dims(state, 0))[0]

                utils.plot_utils.plotFunction(
                    "SoftCEM", [q_func, pi_func],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='SoftCEM, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)
        # Eval
        else:
            if self.sample_for_eval:
                # sample action
                chosen_action = self.network.sample_action(
                    np.expand_dims(state, 0))[0]

            else:
                # greedy action (mean)
                chosen_action = self.network.predict_action(
                    np.expand_dims(state, 0))[0]

            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        return chosen_action
Beispiel #10
0
    def take_action(self, state, is_train, is_start):

        greedy_action = self.network.predict_action(
            state.reshape(-1, self.state_dim))

        # train
        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)
                covmat = None

            else:
                chosen_action, covmat = self.network.sample_action(
                    np.expand_dims(state, 0), greedy_action)

            if self.write_log:
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')

            # currently doesn't handle external exploration
            if self.write_plot:
                assert (covmat != None)
                func1 = self.network.getQFunction(state)
                func2 = self.network.getPolicyFunction(greedy_action, covmat)

                utils.plot_utils.plotFunction(
                    "NAF", [func1, func2],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='NAF, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)

            return chosen_action

        # eval
        else:
            if is_start:
                self.eval_ep_count += 1
            chosen_action = greedy_action.reshape(-1)
            self.eval_global_steps += 1

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

            return chosen_action
Beispiel #11
0
    def take_action(self, state, is_train, is_start):
        # initialize action space
        if self.inference == 'bundle_entropy':
            action_init = np.expand_dims(
                (self.rng.uniform(self.action_min, self.action_max) -
                 self.action_min) * 1.0 / (self.action_max - self.action_min),
                0)
            action_init = np.clip(action_init, 0.0001, 0.9999)

        elif self.inference == 'adam':
            action_init = np.expand_dims(
                self.rng.uniform(self.action_min, self.action_max), 0)
        else:
            print('Do not know this inference method!')
            exit()

        greedy_action = self.entropy_network.alg_opt(np.expand_dims(state, 0),
                                                     action_init,
                                                     self.inference_max_steps,
                                                     False)[0]

        if is_train:
            if is_start:
                self.train_ep_count += 1
            self.train_global_steps += 1

            if self.use_external_exploration:
                chosen_action = self.exploration_policy.generate(
                    greedy_action, self.train_global_steps)
            else:
                chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.train_global_steps,
                              chosen_action[0],
                              tag='train/action_taken')
            if self.write_plot:
                func1 = self.entropy_network.getQFunction(state)

                utils.plot_utils.plotFunction(
                    "PICNN", [func1],
                    state,
                    greedy_action,
                    chosen_action,
                    self.action_min,
                    self.action_max,
                    display_title='PICNN, steps: ' +
                    str(self.train_global_steps),
                    save_title='steps_' + str(self.train_global_steps),
                    save_dir=self.writer.get_logdir(),
                    ep_count=self.train_ep_count,
                    show=False)

        else:
            if is_start:
                self.eval_ep_count += 1
            self.eval_global_steps += 1

            chosen_action = greedy_action

            if self.write_log:
                write_summary(self.writer,
                              self.eval_global_steps,
                              chosen_action[0],
                              tag='eval/action_taken')

        return chosen_action