Python DQNAgent.select_action Examples

Programming Language: Python

Namespace/Package Name: rl.agents.dqn

Class/Type: DQNAgent

Method/Function: select_action

Examples at hotexamples.com: 1

Python DQNAgent.select_action - 1 examples found. These are the top rated real world Python examples of rl.agents.dqn.DQNAgent.select_action extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQNAgent(30)

compile(30)

load_weights(30)

fit(30)

save_weights(30)

test(30)

forward(7)

processor(3)

target_model(3)

compute_batch_q_values(3)

compute_q_values(2)

test_policy(2)

backward(2)

training(2)

policy(2)

select_action(1)

save_model(1)

reset_states(1)

replay(1)

remember(1)

reload_memory(1)

reload(1)

model(1)

process_state_batch(1)

modelfile(1)

X(1)

memoryfile(1)

learning(1)

get_config(1)

enable_dueling_network(1)

cmopile(1)

act(1)

_build_model(1)

__init__(1)

Y(1)

update_target_model(1)

Example #1

Show file

class DQNAgent(Agent):
    def __init__(self, config, save_name=None):
        super().__init__(config, save_name)

    def load_model(self, model_path):
        self.model = load_model(model_path)

    def init_env(self):
        self.env = gym.make(self.env_name,
                            csv_list=self.csv_list,
                            trading_cost=self.trading_cost,
                            time_cost=self.time_cost,
                            market=self.market)

        self.env = MyWrapper(self.env)

        np.random.seed(self.seed)
        self.env.seed(self.seed)
        self.env.action_space.seed(self.seed)
        random.seed(self.seed)
        tf.random.set_random_seed(self.seed)

        self.steps_per_episode = int(self.env.processed_array.shape[0] *
                                     self.split_ratio)
        self.nb_actions = self.env.action_space.n

    def create_agent(self):
        session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                      inter_op_parallelism_threads=1)
        sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
        K.set_session(sess)

        self.model = Model.simple(self.env)
        memory = SequentialMemory(limit=self.memory_len, window_length=1)
        policy = EpsGreedyQPolicy(eps=self.eps)
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.nb_actions,
                              memory=memory,
                              nb_steps_warmup=self.steps_per_episode,
                              target_model_update=self.target_model_update,
                              policy=policy)
        self.agent.compile(Adam(lr=self.lr))

    def predict(self, df):
        data = self.env.fe.transform(df.iloc[:]).values
        val = np.argmax(self.agent.select_action(data[-1:], do_train=False))
        self.signal_transform(val)

    def signal_transform(self, val):
        if val == 0:
            print('Buy')
        elif val == 1:
            print('Sell')
        else:
            print('Close/Hold')