Example #1
0
class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make(ENV_NAME)
    env.seed(123)
    nb_actions = env.action_space.n
    window_length = 1  # "experience" consists of where we were, where we are now

    # generate a policy model
    model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)

    # configure and compile our agent
    # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    policy = BoltzmannQPolicy()

    # memory can help a model during training
    # for this, we only consider a single malware sample (window_length=1) for each "experience"
    memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length)

    # DQN agent as described in Mnih (2013) and Mnih (2015).
    # http://arxiv.org/pdf/1312.5602.pdf
    # http://arxiv.org/abs/1509.06461
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
                     enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
                     target_model_update=1e-2, policy=policy, batch_size=16)

    # keras-rl allows one to use and built-in keras optimizer
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])

    # play the game. learn something!
    agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)

    history_train = env.history
    history_test = None

    if run_test:
        # Set up the testing environment
        TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(TEST_NAME)

        # evaluate the agent on a few episodes, drawing randomly from the test samples
        agent.test(test_env, nb_episodes=100, visualize=False)
        history_test = test_env.history

    return agent, model, history_train, history_test
Example #3
0
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1750000,
            log_interval=10000,
            visualize=True)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
Example #4
0
outputFile = open("2105.csv", "w+")
outputFile.write(
    "iteration,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward\n"
)
iteration = 0

for i in range(0, 100):
    dqn.fit(trainEnv,
            nb_steps=3000,
            visualize=False,
            callbacks=[trainer],
            verbose=0)
    (episodes, trainCoverage, trainAccuracy, trainReward) = trainer.getInfo()
    dqn.test(validationEnv,
             nb_episodes=300,
             verbose=0,
             callbacks=[validator],
             visualize=False)
    (episodes, validCoverage, validAccuracy, validReward) = validator.getInfo()
    outputFile.write(
        str(iteration) + "," + str(trainAccuracy) + "," + str(trainCoverage) +
        "," + str(trainReward) + "," + str(validAccuracy) + "," +
        str(validCoverage) + "," + str(validReward) + "\n")
    print(
        str(iteration) + " TRAIN:  acc: " + str(trainAccuracy) + " cov: " +
        str(trainCoverage) + " rew: " + str(trainReward) + " VALID:  acc: " +
        str(validAccuracy) + " cov: " + str(validCoverage) + " rew: " +
        str(validReward))
    iteration += 1
    validator.reset()
    trainer.reset()
Example #5
0
class Agent(object):
    name = 'DQN'

    def __init__(self,
                 number_of_training_steps=1e5,
                 gamma=0.999,
                 load_weights=False,
                 visualize=False,
                 dueling_network=True,
                 double_dqn=True,
                 nn_type='mlp',
                 **kwargs):
        """
        Agent constructor
        :param step_size: int, number of steps to take in env for a given simulation step
        :param window_size: int, number of lags to include in observation
        :param max_position: int, maximum number of positions able to be held in inventory
        :param fitting_file: str, file used for z-score fitting
        :param testing_file: str,file used for dqn experiment
        :param env: environment name
        :param seed: int, random seed number
        :param action_repeats: int, number of steps to take in environment between actions
        :param number_of_training_steps: int, number of steps to train agent for
        :param gamma: float, value between 0 and 1 used to discount future DQN returns
        :param format_3d: boolean, format observation as matrix or tensor
        :param train: boolean, train or test agent
        :param load_weights: boolean, import existing weights
        :param z_score: boolean, standardize observation space
        :param visualize: boolean, visiualize environment
        :param dueling_network: boolean, use dueling network architecture
        :param double_dqn: boolean, use double DQN for Q-value approximation
        """
        # Agent arguments
        # self.env_name = id
        self.neural_network_type = nn_type
        self.load_weights = load_weights
        self.number_of_training_steps = number_of_training_steps
        self.visualize = visualize

        # Create environment
        self.env = gym.make(**kwargs)
        self.env_name = self.env.env.id

        # Create agent
        # NOTE: 'Keras-RL' uses its own frame-stacker
        self.memory_frame_stack = 1  # Number of frames to stack e.g., 1.
        self.model = self.create_model(name=self.neural_network_type)
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = self.env.env.training
        self.cwd = os.path.dirname(os.path.realpath(__file__))

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=dueling_network,
                              dueling_type='avg',
                              enable_double_dqn=double_dqn,
                              gamma=gamma,
                              target_model_update=1000,
                              delta_clip=1.0)
        self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self, name='cnn'):
        print("creating model for {}".format(name))
        if name == 'cnn':
            return self._create_cnn_model()
        elif name == 'mlp':
            return self._create_mlp_model()

    def _create_cnn_model(self):
        """
        Create a Convolutional neural network with dense layer at the end
        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D
        model.add(
            conv(input_shape=features_shape,
                 filters=16,
                 kernel_size=[10, 1],
                 padding='same',
                 activation='relu',
                 strides=[5, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[6, 1],
                 padding='same',
                 activation='relu',
                 strides=[3, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[4, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dense(self.env.action_space.n, activation='softmax'))
        print(model.summary())
        return model

    def _create_mlp_model(self):
        """
        Create a DENSE neural network with dense layer at the end
        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        model.add(
            Dense(units=256, input_shape=features_shape, activation='relu'))
        model.add(Dense(units=256, activation='relu'))
        model.add(Flatten())
        model.add(Dense(self.env.action_space.n, activation='softmax'))
        print(model.summary())
        return model

    def start(self):
        """
        Entry point for agent training and testing
        :return: (void)
        """
        output_directory = os.path.join(self.cwd, 'dqn_weights')
        if not os.path.exists(output_directory):
            print('{} does not exist. Creating Directory.'.format(
                output_directory))
            os.mkdir(output_directory)

        weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name,
                                                     self.neural_network_type)
        weights_filename = os.path.join(output_directory, weight_name)
        print("weights_filename: {}".format(weights_filename))

        if self.load_weights:
            print('...loading weights for {} from\n{}'.format(
                self.env_name, weights_filename))
            self.agent.load_weights(weights_filename)

        if self.train:
            step_chkpt = '{step}.h5f'
            step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt)
            checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights',
                                                       step_chkpt)
            print("checkpoint_weights_filename: {}".format(
                checkpoint_weights_filename))
            log_filename = os.path.join(
                self.cwd, 'dqn_weights',
                'dqn_{}_log.json'.format(self.env_name))
            print('log_filename: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print("training over.")
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
            print("AGENT weights saved.")
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def main():
    ml_variables = FXU.getMLVariables()
    sqlEngine = FXU.getSQLEngine()
    reinforce_test_tablename = "reinforcetests"
    actions_table_details = {
        'name': 'metaactions',
        'col': ['Action', 'Time'],
        'type': ['VARCHAR(20)', 'datetime'],
        'null': [False, False]
    }
    ### Clear the actions table

    FXU.execute_query_db("DELETE FROM metaactions", sqlEngine)
    env = ForexEnv(type="train", inputSymbol="EURUSD", show_trade=True)
    env_test = ForexEnv(type="test", inputSymbol="EURUSD", show_trade=True)

    n_actions = env.action_space.n
    print("Number of actions : ", n_actions)
    model = create_model(shape=env.observation_space.shape,
                         n_actions=n_actions)
    print(model.summary())

    #### Configuring the agent
    memory = SequentialMemory(limit=100000, window_length=env.window_size)
    policy = EpsGreedyQPolicy()

    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=n_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-4), metrics=['mae'])

    minPortfolioThreshold = 0.4

    training_episodes_n = int(ml_variables['TrainingEpisodesNumber'])

    ##### Load weights if available to resume previous learning
    if ml_variables['LoadWeights'] != 'no':
        model_file_name = "model\\dqnTrainingWeights_{0}.h5f".format(
            env_test.symbol)

        if ospath.isfile(model_file_name):
            print(
                "Weights for the previous session exist, so Going to load the weights"
            )
            dqn.load_weights(model_file_name)

    while True:

        ####### Load the best weights if available ####################
        """
        if ml_variables['LoadWeights'] != 'no':
            ##### Get from DB the best Profit #########################
            rs = FXU.getTableRows_db(
                "SELECT * FROM {0} WHERE Symbol = '{1}' AND MinPortfolio > {2} ORDER BY TotalProfit DESC".format(reinforce_test_tablename, env_test.symbol, (
                    minPortfolioThreshold * env_test.starting_balance)))
            firstRow = -1
            for row in rs:
                firstRow = row
                break
            if firstRow != -1:
                print("Best value : ", firstRow['TotalProfit'])
                model_file_name = "model\\duel_dqn_reward_{0}_{1}.h5f".format(env_test.symbol, int(firstRow['TotalProfit']))
                if ospath.isfile(model_file_name):
                    print("Weights for the best profit : {0} exist, so Going to load the weights".format(int(firstRow['TotalProfit'])))
                    dqn.load_weights(model_file_name)
            """

        # Train :
        dqn.fit(env,
                nb_steps=(env.split_point * training_episodes_n),
                nb_max_episode_steps=60000,
                visualize=False,
                verbose=2)
        dqn.save_weights('./model/dqnTrainingWeights_{0}.h5f'.format(
            env.symbol),
                         overwrite=True)
        try:
            info = dqn.test(env_test, nb_episodes=1, visualize=False)
            #reward = info.history['episode_reward']
            reward = env_test.portfolio - env_test.starting_balance
            print("Total Profit : ", reward)
            now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            # if reward > int(max_reward) and int(reward) != 0 and env_test.minPortfolio > (minPortfolioThreshold * env_test.starting_balance):
            #    max_reward = int(reward)

            #np.array([info.history]).dump('./info/duel_dqn_reward_{0}_{1}.info'.format(env_test.symbol, max_reward))
            if reward > 500 and env_test.minPortfolio > (
                    minPortfolioThreshold * env_test.starting_balance):
                dqn.save_weights('./model/duel_dqn_reward_{0}_{1}.h5f'.format(
                    env_test.symbol, int(reward)),
                                 overwrite=True)
            #print("Info of testing : ",info.history)
            FXU.execute_query_db(
                "INSERT INTO {0}(Symbol,StartingBalance,TotalProfit,Time,MinPortfolio) VALUES('{1}','{2}','{3}','{4}','{5}')"
                .format(reinforce_test_tablename, env_test.symbol,
                        env_test.starting_balance, reward, now,
                        env_test.minPortfolio), sqlEngine)
            #n_buys, n_lostBuys, n_sells, n_lostSells, portfolio = info['buys'], info['lostBuys'], info['sells'], info['lostBuys']
            #np.array([info]).dump('./info/duel_dqn_{0}_weights_{1}LS_{2}_{3}.info'.format(env_test.symbol, portfolio, n_buys, n_sells))
        except KeyboardInterrupt:
            return
def main():
    ml_variables = FXU.getMLVariables()
    actions_table_details = {
        'name': 'metaactions',
        'col': ['Action', 'Time'],
        'type': ['VARCHAR(20)', 'datetime'],
        'null': [False, False]
    }
    ### Clear the actions table

    FXU.execute_query_db("DELETE FROM metaactions")
    env = ForexEnv(type="train", inputSymbol="EURUSD", show_trade=True)
    env_test = ForexEnv(type="test", inputSymbol="EURUSD", show_trade=True)

    n_actions = env.action_space.n
    print("Number of actions : ", n_actions)
    model = create_model(shape=env.observation_space.shape,
                         n_actions=n_actions)
    print(model.summary())

    #### Configuring the agent
    memory = SequentialMemory(limit=100000, window_length=env.window_size)
    policy = EpsGreedyQPolicy()

    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=n_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   enable_dueling_network=True,
                   dueling_type='naive',
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-4), metrics=['mse'])
    if ml_variables['LoadWeights'] != 'no':
        path = 'model\\' + ml_variables['LoadWeights'] + ".h5f"
        if ospath.isfile(path):
            print("Weights exist, so Going to load the weights")
            dqn.load_weights(path)
    max_reward = -1000000

    while True:

        # Train :
        dqn.fit(env,
                nb_steps=env.split_point,
                nb_max_episode_steps=60000,
                visualize=False,
                verbose=2)

        try:
            info = dqn.test(env_test, nb_episodes=1, visualize=False)
            #reward = info.history['episode_reward']
            reward = env_test.balance - env_test.starting_balance
            print("reward : ", reward)
            if reward > int(max_reward) and int(reward) != 0:
                max_reward = int(reward)
                np.array([info.history
                          ]).dump('./info/duel_dqn_reward_{0}_{1}.info'.format(
                              env_test.symbol, max_reward))
                dqn.save_weights('./model/duel_dqn_reward_{0}_{1}.h5f'.format(
                    env_test.symbol, max_reward))
            #print("Info of testing : ",info.history)

            #n_buys, n_lostBuys, n_sells, n_lostSells, portfolio = info['buys'], info['lostBuys'], info['sells'], info['lostBuys']
            #np.array([info]).dump('./info/duel_dqn_{0}_weights_{1}LS_{2}_{3}.info'.format(env_test.symbol, portfolio, n_buys, n_sells))
        except KeyboardInterrupt:
            return

        ##### Saving weights after each fitting to resume afterwards ###############
        if ml_variables['LoadWeights'] != 'no':
            dqn.save_weights(filepath='model\\' + ml_variables['LoadWeights'] +
                             ".h5f",
                             overwrite=True)
else:
    raise('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')


callbacks = []
# callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)]
callbacks += [FileLogger(LOG_FILENAME, interval=100)]
# callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)]
callbacks += [ExploreExploit()]

# Optionally, we can reload a previous model's weights and continue training from there
# LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
# # # Load the model weights
# agent.load_weights(LOAD_WEIGHTS_FILENAME)

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500)

# After training is done, we save the final weights.
agent.save_weights(WEIGHT_FILENAME, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier. 
filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,  action_repetition=5) #nb_max_episode_steps=500,
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation='linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=False)

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=1000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025/4), metrics=['mae'])

folder_path = './'

mode = 'train'

if mode == 'train':
    weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(env_name)
    checkpoint_weights_filename = folder_path + 'pdd_dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'pdd_dqn_' + env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)]
    callbacks += [TrainEpisodeLogger()]
    dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=0, nb_max_episode_steps=20000)


elif mode == 'test':
    weights_filename = folder_path + 'pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)




Example #11
0
def attempt(lr, numTrainSteps, fnamePrefix, activation, exportVid, visualize):
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    print("env.observation_space.shape: " + str(env.observation_space.shape))

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation(activation))
    model.add(Dense(13))
    model.add(Activation(activation))
    model.add(Dense(10))
    model.add(Activation(activation))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=lr), metrics=['mae'])
    if not os.path.exists(fnamePrefix):
        os.makedirs(fnamePrefix)
    weights_fname = '%s/weights.h5f' % fnamePrefix
    if os.path.isfile(weights_fname):
        print("Loading weights from before")
        print("Skipping training")
        dqn.load_weights(weights_fname)
    else:
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=numTrainSteps, visualize=False, verbose=1)

        # After training is done, we save the final weights.
        dqn.save_weights(weights_fname, overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    env.reset()
    env.close()
    if exportVid:
        if not visualize:
            # print to stderr, since trainAll redirects stdout
            eprint(
                "Error: I don't think the video export works unless you choose visualize=True"
            )
        videoFname = fnamePrefix + '/videos/' + str(time())
        if not os.path.exists(videoFname):
            os.makedirs(videoFname)
        env = wrappers.Monitor(env, videoFname, force=True)
    result = dqn.test(env, nb_episodes=1, visualize=visualize)
    if exportVid:
        print("Video saved to %s" % videoFname)
    means = {'reward': mean(result.history['episode_reward'])}
    json_fname = fnamePrefix + '/result.json'
    with open(json_fname, "w") as f:
        json.dump(result.history, f)
    return (means)
Example #12
0
    checkpoint_weights_filename = 'dqn_' + Snake_env.name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(Snake_env.name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)
    ]
    callbacks += [FileLogger(log_filename, interval=1000)]

    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    #if weights:
    #    weights_filename_1 = weights
    #dqn.load_weights(weights_filename_1)

    #訓練開始
    dqn.fit(Snake_env,
            callbacks=callbacks,
            nb_steps=step,
            log_interval=1000,
            verbose=1)

    #把權重存起來
    dqn.save_weights(weights_filename, overwrite=True)

elif mode == 'test':

    #讀取權重
    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    if weights:
        weights_filename = weights
    dqn.load_weights(weights_filename)
    dqn.test(Snake_env, nb_episodes=10, visualize=True)
Example #13
0
class Agent(object):
    name = 'DQN'

    def __init__(
            self,
            step_size=1,
            window_size=20,
            max_position=5,
            fitting_file='ETH-USD_2018-12-31.xz',
            testing_file='ETH-USD_2018-01-01.xz',
            env='market-maker-v0',
            seed=1,
            action_repeats=4,
            number_of_training_steps=1e5,
            gamma=0.999,
            format_3d=False,  # add 3rd dimension for CNNs
            train=True,
            weights=True,
            z_score=True,
            visualize=False,
            dueling_network=True,
            double_dqn=True):
        """
        Agent constructor
        :param step_size: int, number of steps to take in env for a given simulation step
        :param window_size: int, number of lags to include in observation
        :param max_position: int, maximum number of positions able to be held in inventory
        :param fitting_file: str, file used for z-score fitting
        :param testing_file: str,file used for dqn experiment
        :param env: environment name
        :param seed: int, random seed number
        :param action_repeats: int, number of steps to take in environment between actions
        :param number_of_training_steps: int, number of steps to train agent for
        :param gamma: float, value between 0 and 1 used to discount future DQN returns
        :param format_3d: boolean, format observation as matrix or tensor
        :param train: boolean, train or test agent
        :param weights: boolean, import existing weights
        :param z_score: boolean, standardize observation space
        :param visualize: boolean, visiualize environment
        :param dueling_network: boolean, use dueling network architecture
        :param double_dqn: boolean, use double DQN for Q-value approximation
        """
        self.env_name = env
        self.env = gym.make(self.env_name,
                            fitting_file=fitting_file,
                            testing_file=testing_file,
                            step_size=step_size,
                            max_position=max_position,
                            window_size=window_size,
                            seed=seed,
                            action_repeats=action_repeats,
                            training=train,
                            z_score=z_score,
                            format_3d=format_3d)
        # Number of frames to stack e.g., 1.
        # NOTE: 'Keras-RL' uses its own frame-stacker
        self.memory_frame_stack = 1
        self.model = self.create_model()
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = train
        self.number_of_training_steps = number_of_training_steps
        self.weights = weights
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.visualize = visualize

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=dueling_network,
                              dueling_type='avg',
                              enable_double_dqn=double_dqn,
                              gamma=gamma,
                              target_model_update=1000,
                              delta_clip=1.0)
        self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self):
        """
        Create a Convolutional neural network with dense layer at the end
        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D

        model.add(
            conv(input_shape=features_shape,
                 filters=16,
                 kernel_size=[10, 1],
                 padding='same',
                 activation='relu',
                 strides=[5, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[6, 1],
                 padding='same',
                 activation='relu',
                 strides=[3, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[4, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation('linear'))
        model.add(Dense(self.env.action_space.n))
        model.add(Activation('softmax'))

        print(model.summary())
        return model

    def start(self):
        """
        Entry point for agent training and testing
        :return: (void)
        """
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)

        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_{}'.format(self.env_name) + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

processor = AtariProcessor()

policy = policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1250000)


dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=False, enable_dueling_network=False, nb_steps_warmup=1000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

folder_path = './machine-learning/pacman/'

mode = 'test'

if mode == 'train':
    weights_filename = folder_path + 'dqn_{}_weights.h5f'.format(env_name)
    checkpoint_weights_filename = folder_path + 'dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'dqn_' + env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)]
    callbacks += [TrainEpisodeLogger()]
    dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=1, nb_max_episode_steps=20000)

elif mode == 'test':
    weights_filename = folder_path + 'dqn_MsPacman-v0_weights_10000000.h5f'
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=20, visualize=False, nb_max_start_steps=80)
Example #15
0
class BaseAgent:

    dqn: DQNAgent

    def __init__(self, stock: str):
        self.env = gym.make('stockenv-v0', df=read_daily_data(stock))

        print(self.env)
        print(self.env.action_space)
        print(self.env.observation_space)

        self.env.seed(123)
        self.stock = stock

        memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                      attr='eps',
                                      value_max=1.,
                                      value_min=.1,
                                      value_test=.05,
                                      nb_steps=1000000)

        processor = StockProcessor(stock)
        model = self.create_model(30)
        print("output:", model.output.shape)
        print("output2:", self.env.action_space.shape)

        print(list(model.output.shape))
        print(list((None, self.env.action_space.shape)))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.env.action_space.n,
                            policy=policy,
                            memory=memory,
                            processor=processor,
                            nb_steps_warmup=50000,
                            gamma=.99,
                            target_model_update=10000,
                            train_interval=4,
                            delta_clip=1.)
        self.dqn.compile(Adam(lr=.00025), metrics=['mae'])

    def train(self):

        # Okay, now it's time to learn something! We capture the interrupt exception so that training
        # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
        weights_filename = self.get_weight_path(self.stock)
        checkpoint_weights_filename = self.get_weight_path(
            self.stock) + '_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(self.stock)

        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=250000)
        ]
        callbacks += [FileLogger(log_filename, interval=100)]
        # callbacks += [WandbLogger(
        #     project = "stock-bot-v0"
        # )]

        self.dqn.fit(self.env,
                     callbacks=callbacks,
                     nb_steps=1750000,
                     log_interval=10000)

        # After training is done, we save the final weights one more time.
        self.dqn.save_weights(weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        self.dqn.test(self.env, nb_episodes=10, visualize=False)

    def test(self):
        weights_filename = self.get_weight_path(self.stock)
        self.dqn.load_weights(weights_filename)
        self.dqn.test(self.env, nb_episodes=10, visualize=True)

    def get_weight_path(self, name: str) -> str:
        """Get weight path"""
        pass

    def create_model(self, input_size: int):
        """abstract"""
        pass
model.add(Activation('linear'))

print(model.summary())

# モデル書き出し
model_json_str = model.to_json()
open('dqn_{}_model.json'.format(ENV_NAME), 'w').write(model_json_str)

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
dqn.compute_q_values()
Example #17
0
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(RMSprop(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in tensorflow.keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=20000000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=50, visualize=False, nb_max_episode_steps=100)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Optionally, we can reload a previous model's weights and continue training from there
# FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f'
# Load the model weights
# dqn.load_weights(FILENAME)


# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500)

# After training is done, we save the final weights.
dqn.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)
Example #19
0
def dqn_rt(nonLinearLayers=3,
           neuronsPerLayer=4,
           epsilon=0.3,
           tau=1,
           exploration="tau",
           gamma=0.5):

    ENV_NAME = 'RtSimulationEnv-v0'

    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    for x in range(0, nonLinearLayers):
        model.add(Activation('relu'))
        model.add(Dense(neuronsPerLayer))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    if (exploration == "epsilon"):
        policy = EpsGreedyQPolicy(eps=epsilon)
    else:
        policy = BoltzmannQPolicy(tau=tau)

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy,
                   gamma=gamma)
    #dqn = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
    #dqn = DDPGAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,target_model_update=1e-2, policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! If we visualize the training here for show, this
    # slows down training quite a lot. We can also always safely abort the training prematurely using
    # Ctrl + C.
    env.testing = False
    foo = dqn.fit(env, nb_steps=1000, visualize=False, verbose=3)

    d = datetime.utcnow()
    unixtime = calendar.timegm(d.utctimetuple())
    ###save env.unwrapped.totalStates and env.unwrapped.actions as: [state,action] pairs
    with open('simulatedRTs_' + str(unixtime) + '.csv', 'wb') as f:
        writer = csv.writer(f)
        writer.writerows([
            env.unwrapped.totalStates, env.unwrapped.rewards,
            env.unwrapped.actions
        ])

    # After training is done, we save the final weights.
    # dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    env.testing = True
    dqn.test(env, nb_episodes=5, visualize=False)
Example #20
0
          log_interval=1000)

# Do early stopping fit

test_scores = []
train_scores = []
no_improvement = 0

f = open('log', 'w')
f.write('best'.rjust(10) + 'test'.rjust(10) + 'train'.rjust(10) + '\n')

f.flush()

# Find test score
agent.test(test_env,
           nb_episodes=100,
           nb_max_episode_steps=n_steps,
           visualize=False)
test_score = np.mean(test_env.value_history[-100:])
test_scores.append(test_score)

# Find train score
agent.test(train_env,
           nb_episodes=100,
           nb_max_episode_steps=n_steps,
           visualize=False)
train_score = np.mean(train_env.value_history[-100:])
train_scores.append(train_score)

best_score = min(test_score, train_score)

f.write('{0:.5f}'.format(best_score).rjust(10) +
ENV_NAME = 'FrozenLake-v0'

env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
Actions = env.action_space.n

model = Sequential()
model.add(Embedding(16, 4, input_length=1))
model.add(Reshape((4,)))
print(model.summary())

memory = SequentialMemory(limit=10000, window_length=1)
policy = BoltzmannQPolicy()
Dqn = DQNAgent(model=model, nb_actions=Actions,
               memory=memory, nb_steps_warmup=500,
               target_model_update=1e-2, policy=policy,
               enable_double_dqn=False, batch_size=512
               )
Dqn.compile(Adam())


Dqn.fit(env, nb_steps=1e5, visualize=False, verbose=1, log_interval=10000)

Dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

Dqn.test(env, nb_episodes=20, visualize=False)



Example #22
0
def training_game():
    env = Environment(
        map_name="ForceField",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        enable_double_dqn=True,
        enable_dueling_network=True,
        # 2019-07-12 GU Zhan (Sam)
        #                   nb_steps_warmup=500, target_model_update=1e-2, policy=policy,
        nb_steps_warmup=1500,
        target_model_update=1e-2,
        policy=policy,
        batch_size=150,
        processor=processor,
        delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    timestamp = f"{datetime.datetime.now():%Y-%m-%d %I:%M%p}"
    # 2019-07-12 GU Zhan (Sam) folder name for Lunux:
    #    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph/'+ timestamp, histogram_freq=0,
    #                                write_graph=True, write_images=False)

    # 2019-07-12 GU Zhan (Sam) folder name for Windows:
    callbacks = keras.callbacks.TensorBoard(log_dir='.\Graph\issgz',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "agent"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    class Saver(Callback):
        def on_episode_end(self, episode, logs={}):
            if episode % 200 == 0:
                self.model.save_weights(w_file, overwrite=True)

    s = Saver()
    logs = FileLogger('DQN_Agent_log.csv', interval=1)

    #    dqn.fit(env, callbacks=[callbacks,s,logs], nb_steps=600, action_repetition=2,
    dqn.fit(env,
            callbacks=[callbacks, s, logs],
            nb_steps=3000,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Example #23
0
import rl.callbacks


class EpisodeLogger(rl.callbacks.Callback):
    def __init__(self):
        self.observations = {}
        self.rewards = {}
        self.actions = {}

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []

    def on_step_end(self, step, logs):
        episode = logs['episode']
        self.observations[episode].append(logs['observation'])
        self.rewards[episode].append(logs['reward'])
        self.actions[episode].append(logs['action'])


cb_ep = EpisodeLogger()
dqn.test(env, nb_episodes=10, visualize=False, callbacks=[cb_ep])

import matplotlib.pyplot as plt

for obs in cb_ep.observations.values():
    plt.plot([o[0] for o in obs])
plt.xlabel("step")
plt.ylabel("pos")
Example #24
0
class DistopiaDQN:
    def __init__(self,
                 env_name='distopia-initial4-v0',
                 in_path=None,
                 out_path=None,
                 terminate_on_fail=False,
                 reconstruct=False):
        self.ENV_NAME = env_name
        self.filename = self.ENV_NAME
        self.init_paths(in_path, out_path)
        self.init_env(terminate_on_fail)
        self.init_model(reconstruct)
        self.compile_agent()

    def init_paths(self, in_path, out_path):
        self.in_path = in_path  #if self.in_path != None else './'
        self.out_path = out_path if out_path != None else './'
        self.log_path = "./logs/{}".format(time.time())
        os.mkdir(self.log_path)

    def init_env(self, terminate_on_fail):
        self.env = gym.make(self.ENV_NAME)
        self.env.terminate_on_fail = terminate_on_fail
        self.env.record_path = "{}/ep_".format(self.log_path)
        self.env = gym.wrappers.Monitor(self.env, "recording", force=True)
        np.random.seed(234)
        self.env.seed(234)
        self.nb_actions = np.sum(self.env.action_space.nvec)
        self.num_actions = self.env.NUM_DIRECTIONS
        self.num_blocks = self.env.NUM_DISTRICTS * self.env.BLOCKS_PER_DISTRICT

    def init_model(self, reconstruct=False):
        if self.in_path != None:
            if reconstruct == True:
                self.construct_model()
            else:
                yaml_file = open(
                    "{}/{}.yaml".format(self.in_path, self.filename), 'r')
                model_yaml = yaml_file.read()
                yaml_file.close()
                self.model = model_from_yaml(model_yaml)
            self.model.load_weights("{}/{}.h5".format(self.in_path,
                                                      self.filename))
        else:
            # Next, we build a very simple model.
            self.construct_model()
        self.save_model()
        print(self.model.summary())

    def construct_model(self):
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.env.observation_space.shape))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        # self.model.add(Dense(16))
        # self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))

    def save_model(self):
        if self.out_path != None:
            with open(self.filename + ".yaml", 'w+') as yaml_file:
                yaml_file.write(self.model.to_yaml())
            self.model.save_weights('{}/{}.h5'.format(self.out_path,
                                                      self.ENV_NAME))

    def compile_agent(self):
        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        processor = DistopiaProcessor(self.num_blocks, self.num_actions)
        memory = SequentialMemory(limit=50000, window_length=1)
        #policy = PatchedBoltzmannQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        #test_policy = PatchedGreedyQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        policy = RandomPolicy()
        test_policy = GreedyQPolicy()
        self.dqn = DQNAgent(model=self.model,
                            processor=processor,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=1000,
                            enable_double_dqn=True,
                            target_model_update=1e-2,
                            policy=policy,
                            test_policy=test_policy,
                            gamma=0.9)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    def train(self, max_steps=100, episodes=100):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.env._max_steps = max_steps
        #for i in range(episodes):
        self.env.current_step = 0
        n_steps = max_steps * episodes
        logger = FileLogger(
            filepath='{}/{}.json'.format(self.out_path, self.ENV_NAME))
        self.dqn.fit(self.env,
                     nb_steps=n_steps,
                     nb_max_episode_steps=max_steps,
                     visualize=False,
                     verbose=1,
                     callbacks=[logger])
        #self.env.reset()

        # After episode is done, we save the final weights.
        self.dqn.save_weights('{}/{}.h5'.format(self.out_path, self.ENV_NAME),
                              overwrite=True)

    def test(self):
        # Finally, evaluate our algorithm for 5 episodes.
        self.dqn.test(self.env,
                      nb_episodes=5,
                      nb_max_start_steps=0,
                      visualize=True)
print('connector shape', connector.shape)

## Environment parameters
observation_shape = market.observation_space.shape
nb_actions = market.action_space.n
print('state =', observation_shape, '| actions =', nb_actions)

## Init ML-model for agent
model = simple_model(observation_shape, nb_actions)

## Init RL-metod parameters
memory = SequentialMemory(limit=10000, window_length=1)
policy = BoltzmannQPolicy()

## Init RL agent
agent = DQNAgent(model=model, nb_actions=nb_actions,
                    memory=memory, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy,
                    # enable_dueling_network=True, dueling_type='avg'
                )
agent.compile(Adam(lr=1e-3), metrics=['mae'])

## Train and evaluation
# agent.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))

agent.fit(market, nb_steps=100000, visualize=False, verbose=2)

agent.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

agent.test(market, nb_episodes=5, visualize=False)
Example #26
0
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025 / 4), metrics=['mae'])

folder_path = '../model_saves/PDD/'

if args.mode == 'train':
    weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(
        args.env_name)
    checkpoint_weights_filename = folder_path + 'pdd_dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'pdd_dqn_' + args.env_name + '_REWARD_DATA.txt'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)
    ]
    callbacks += [TrainEpisodeLogger(log_filename)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=10000000,
            verbose=0,
            nb_max_episode_steps=20000)

elif args.mode == 'test':
    weights_filename = folder_path + 'pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)
Example #27
0
#policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=num_actions,
               memory=memory,
               nb_steps_warmup=window * 3,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.enable_dueling_network = True
if model_exist:
    dqn.load_weights(model_path)
    dqn.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                      attr='eps',
                                      value_max=.5,
                                      value_min=.1,
                                      value_test=.05,
                                      nb_steps=5000)

env.set_data_interval(train_start, train_end)
train_history = dqn.fit(env,
                        nb_steps=5000,
                        visualize=False,
                        verbose=2,
                        action_repetition=5)
env.set_data_interval(train_start, test_end)
print('Whole')
train_history = dqn.test(env, nb_episodes=2)
dqn.save_weights(model_path, overwrite=True)
env.save_action_plot('action_validate.csv')
plt.axvline(x=train_end - train_start)
plt.show()
Example #28
0
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=5000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=2500, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(Monitor(env, '.'), nb_episodes=5, visualize=True)
Example #29
0
# Okay, now it's time to learn something!
#We visualize the training here for show, but this
# slows down training quite a lot.
#You can always safely abort the training prematurely using Ctrl + C.
history_0 = dqn.fit(env,
                    nb_steps=175000,
                    visualize=False,
                    verbose=2,
                    nb_max_episode_steps=10000)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
history_1 = dqn.test(env, nb_episodes=10, visualize=False)

scipy.io.savemat('history_0.mat',
                 history_0.history,
                 appendmat=True,
                 format='5',
                 long_field_names=False,
                 do_compression=False,
                 oned_as='row')
scipy.io.savemat('history_1.mat',
                 history_1.history,
                 appendmat=True,
                 format='5',
                 long_field_names=False,
                 do_compression=False,
                 oned_as='row')
Example #30
0
class DQNKeras(AbstractAgent):
    def __init__(self,
                 env,
                 callbacks=None,
                 timesteps_per_episode=60,
                 batch_size=32):
        super().__init__(env, timesteps_per_episode)
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.callbacks = callbacks
        np.random.seed(123)
        if hasattr(env, '_seed'):
            env._seed(123)
        # Build networks
        self.model = self._build_compile_model()
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = EpsGreedyQPolicy()
        self.dqn_only_embedding = DQNAgent(model=self.model,
                                           nb_actions=self.action_size,
                                           memory=memory,
                                           nb_steps_warmup=500,
                                           target_model_update=1e-2,
                                           policy=policy)

    def _build_compile_model(self):
        model = Sequential()
        model.add(Embedding(self.state_size, 10, input_length=1))  # 600000
        model.add(Reshape((10, )))
        # model.add(Flatten())
        model.add(Dense(50, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        # print(model.summary())
        return model

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae'])
        history = self.dqn_only_embedding.fit(self.env,
                                              nb_steps=ITER_NUM,
                                              visualize=False,
                                              verbose=1,
                                              nb_max_episode_steps=ITER_NUM,
                                              log_interval=10000)

        # history = self.dqn_only_embedding.fit(self.env, nb_steps=2000, visualize=False, verbose=1,
        #                                       nb_max_episode_steps=60, log_interval=2000)
        result = {
            EPISODE_REWARD_MEAN: np.array(history.history["episode_reward"]),
            EPISODE_STEP_NUM_MEAN:
            np.array(history.history["nb_episode_steps"]),
            EPISODE_REWARD_MIN: np.empty([]),
            EPISODE_REWARD_MAX: np.empty([]),
            EPISODE_VARIANCE: np.empty([])
        }
        return result

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        # self.epsilon *= self.epsilon_decay
        # self.epsilon = max(self.epsilon_min, self.epsilon)
        # if np.random.random() < self.epsilon:
        #     return self.env.action_space.sample()
        state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=True):
        self.dqn_only_embedding.test(self.env,
                                     nb_episodes=1,
                                     visualize=visualize,
                                     nb_max_episode_steps=70)

    def load_existing_agent(self, dir_path):
        self.model.load_weights(dir_path)
        self.dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae'])
        return self
#   class TestCallback(Callback):
#       def on_epoch_end(self, epoch, logs=None):
#           test_env = gym.make(args.env_name)
#           test_env.setMapSize(MAP_X,MAP_Y)
#           dqn.test(test_env, nb_episodes=1, visualize=True, nb_max_start_steps=100)
#           test_env.win1.destroy()
#           test_env.close()
#           del(test_env)
#   callbacks += [TestCallback()]
#   if args.loadmodel:
#       dqn.model.load(args.loadmodel)
    if args.weights:
        dqn.load_weights(args.weights)

    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)
  # dqn.save_model(model_filename)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=True)
 #  gtk.main()

elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=00)
Example #32
0
class Agent(object):
    name = 'DQN'

    def __init__(
            self,
            step_size=1,
            window_size=20,
            train=True,
            max_position=5,
            weights=True,
            fitting_file='ETH-USD_2018-12-31.xz',
            testing_file='ETH-USD_2018-01-01.xz',
            format_3d=False,  # add 3rd dimension for CNNs
            env='market-maker-v0',
            seed=1,
            action_repeats=4,
            number_of_training_steps=1e5,
            visualize=False):
        self.env_name = env
        self.env = gym.make(self.env_name,
                            training=train,
                            fitting_file=fitting_file,
                            testing_file=testing_file,
                            step_size=step_size,
                            max_position=max_position,
                            window_size=window_size,
                            seed=seed,
                            action_repeats=action_repeats,
                            format_3d=format_3d)
        # Number of frames to stack e.g., 1; Keras-RL uses its own stacker
        self.memory_frame_stack = 1
        self.model = self.create_model()
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = train
        self.number_of_training_steps = number_of_training_steps
        self.weights = weights
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.visualize = visualize

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=True,
                              dueling_type='avg',
                              enable_double_dqn=True,
                              gamma=0.999,
                              target_model_update=1000,
                              delta_clip=1.0)

        self.agent.compile(RMSprop(lr=0.00048), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self):

        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D

        model.add(
            conv(input_shape=features_shape,
                 filters=16,
                 kernel_size=8,
                 padding='same',
                 activation='relu',
                 strides=4,
                 data_format='channels_first'))
        model.add(
            conv(filters=32,
                 kernel_size=4,
                 padding='same',
                 activation='relu',
                 strides=2,
                 data_format='channels_first'))
        model.add(
            conv(filters=32,
                 kernel_size=2,
                 padding='same',
                 activation='relu',
                 strides=1,
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation('linear'))
        model.add(Dense(self.env.action_space.n))
        model.add(Activation('softmax'))

        print(model.summary())
        return model

    def start(self):
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)
        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_' + self.env_name + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
class DeepQTrading:
    
    #Class constructor
    #model: Keras model considered
    #Explorations is a vector containing the policy of the probability of random predictions plus how many epochs will be 
    # runned by the algorithm (we run the algorithm several times-several iterations)  
    #trainSize: size of the training set
    #validationSize: size of the validation set
    #testSize: size of the testing set 
    #outputFile: name of the file to print results
    #begin: Initial date
    #end: final date
    #nbActions: number of decisions (0-Hold 1-Long 2-Short) 
    #nOutput is the number of walks. Tonio put 20 but it is 5 walks in reality.  
    #operationCost: Price for the transaction
    #telegramToken: token used for the bot that will send messages
    #telegramChatID: ID of messager receiver in Telegram
    #ensemble.py runs the ensemble
    def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, nOutput=1, operationCost=0,telegramToken="",telegramChatID=""):
        
        #If the telegram token for the bot and the telegram id of the receiver are empty, try to send a message 
        #otherwise print error
        if(telegramToken!="" and telegramChatID!=""):
            self.chatID=telegramChatID
            self.telegramOutput=True
            try:
                self.bot = telegram.Bot(token=telegramToken)
            except:
                print("Error with Telegram Bot")
        
        #If they are not empty, prepare the bot to send messages
        else:
            self.telegramOutput=True

        #Define the policy, explorations, actions and model as received by parameters
        self.policy = EpsGreedyQPolicy()
        self.explorations=explorations
        self.nbActions=nbActions
        self.model=model

        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)

        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
        
        #Compile the agent with the adam optimizer and with the mean absolute error metric
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Define the current starting point as the initial date
        self.currentStartingPoint = begin

        #Define the training, validation and testing size as informed by the call
        #Train: five years
        #Validation: 6 months
        #Test: 6 months
        self.trainSize=trainSize
        self.validationSize=validationSize
        self.testSize=testSize
        
        #The walk size is simply summing up the train, validation and test sizes
        self.walkSize=trainSize+validationSize+testSize
        
        #Define the ending point as the final date (January 1st of 2010)
        self.endingPoint=end

        #Read the hourly dataset
        #We join data from different files
        #Here read hour 
        self.dates= pd.read_csv('./dataset/'+MK+'Hour.csv')

        #Read the hourly dataset
        self.sp = pd.read_csv('./dataset/'+MK+'Hour.csv')
        #Convert the pandas format to date and time format
        self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time'])
        #Set an index to Datetime on the pandas loaded dataset. Register will be indexes through this value
        self.sp = self.sp.set_index('Datetime')
        #Drop Time and Date from the Dataset
        self.sp = self.sp.drop(['Time','Date'], axis=1)
        #Just the index will be important, because date and time will be used to define the train, validation and test 
        #for each walk
        self.sp = self.sp.index

        #Receives the operation cost which is 0
        #Operation cost is the cost for long and short. It is defined as zero
        self.operationCost = operationCost
        
        #Call the callback for training, validation and test in order to show the results for each episode 
        self.trainer=ValidationCallback()
        self.validator=ValidationCallback()
        self.tester=ValidationCallback()
        
        #Initiate the output file
        self.outputFile=[]
        
        #Write in the file
        for i in range(0,nOutput):
            
          
            self.outputFile.append(open(outputFile+str(i+1)+".csv", "w+"))

            #Write the fields in the file
            self.outputFile[i].write(
            "Iteration,"+
            "trainAccuracy,"+
            "trainCoverage,"+
            "trainReward,"+
            "trainLong%,"+
            "trainShort%,"+
            "trainLongAcc,"+
            "trainShortAcc,"+
            "trainLongPrec,"+
            "trainShortPrec,"+

            "validationAccuracy,"+
            "validationCoverage,"+
            "validationReward,"+
            "validationLong%,"+
            "validationShort%,"+
            "validationLongAcc,"+
            "validationShortAcc,"+
            "validLongPrec,"+
            "validShortPrec,"+
            
            "testAccuracy,"+
            "testCoverage,"+
            "testReward,"+
            "testLong%,"+
            "testShort%,"+
            "testLongAcc,"+
            "testShortAcc,"+
            "testLongPrec,"+
            "testShortPrec\n")
        

    def run(self):

        #Initiate the training, 
        trainEnv=validEnv=testEnv=" "

        iteration=-1

        #While we did not pass through all the dates (i.e., while all the walks were not finished)
        #walk size is train+validation+test size
        #currentStarting point begins with begin date
        while(self.currentStartingPoint+self.walkSize <= self.endingPoint):

            #Iteration is a walks
            iteration+=1

            #Send to the receiver the current walk
            if(self.telegramOutput):
                self.bot.send_message(chat_id=self.chatID, text="Walk "+str(iteration + 1 )+" started.")
            
            #Empty the memory and agent
            del(self.memory)
            del(self.agent)

            #Define the memory and agent
            #Memory is Sequential
            self.memory = SequentialMemory(limit=10000, window_length=1)
            #Agent is initiated as passed through parameters
            self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
            #Compile the agent with Adam initialization
            self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
            
            #Load the weights saved before in a random way if it is the first time
            self.agent.load_weights("q.weights")
            
            ########################################TRAINING STAGE########################################################
            
            #The TrainMinLimit will be loaded as the initial date at the beginning, and will be updated later.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMinLimit=None
            while(trainMinLimit is None):
                try:
                    trainMinLimit = self.sp.get_loc(self.currentStartingPoint)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #The TrainMaxLimit will be loaded as the interval between the initial date plus the training size.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMaxLimit=None
            while(trainMaxLimit is None):
                try:
                    trainMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)
            
            ########################################VALIDATION STAGE#######################################################
            
            #The ValidMinLimit will be loaded as the TrainMax limit
            validMinLimit=trainMaxLimit+1

            #The ValidMaxLimit will be loaded as the interval after the begin + train size +validation size
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date  
            validMaxLimit=None
            while(validMaxLimit is None):
                try:
                    validMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            ########################################TESTING STAGE######################################################## 
            #The TestMinLimit will be loaded as the ValidMaxlimit 
            testMinLimit=validMaxLimit+1

            #The testMaxLimit will be loaded as the interval after the begin + train size +validation size + Testsize
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date 
            testMaxLimit=None
            while(testMaxLimit is None):
                try:
                    testMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize+self.testSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #Separate the Validation and testing data according to the limits found before
            #Prepare the training and validation files for saving them later 
            ensambleValid=pd.DataFrame(index=self.dates[validMinLimit:validMaxLimit].ix[:,'Date'].drop_duplicates().tolist())
            ensambleTest=pd.DataFrame(index=self.dates[testMinLimit:testMaxLimit].ix[:,'Date'].drop_duplicates().tolist())
            
            #Put the name of the index for validation and testing
            ensambleValid.index.name='Date'
            ensambleTest.index.name='Date'
            
           
            #Explorations are epochs, 
            for eps in self.explorations:

                #policy will be 0.2, so the randomness of predictions (actions) will happen with 20% of probability 
                self.policy.eps = eps[0]
                
                #there will be 100 iterations, or eps[1])
                for i in range(0,eps[1]):
                    
                    del(trainEnv)

                    #Define the training, validation and testing environments with their respective callbacks
                    trainEnv = SpEnv(operationCost=self.operationCost,minLimit=trainMinLimit,maxLimit=trainMaxLimit,callback=self.trainer)
                    del(validEnv)
                    validEnv=SpEnv(operationCost=self.operationCost,minLimit=validMinLimit,maxLimit=validMaxLimit,callback=self.validator,ensamble=ensambleValid,columnName="iteration"+str(i))
                    del(testEnv)
                    testEnv=SpEnv(operationCost=self.operationCost,minLimit=testMinLimit,maxLimit=testMaxLimit,callback=self.tester,ensamble=ensambleTest,columnName="iteration"+str(i))

                    #Reset the callback
                    self.trainer.reset()
                    self.validator.reset()
                    self.tester.reset()

                    #Reset the training environment
                    trainEnv.resetEnv()
                    #Train the agent
                    self.agent.fit(trainEnv,nb_steps=floor(self.trainSize.days-self.trainSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the train callback
                    (_,trainCoverage,trainAccuracy,trainReward,trainLongPerc,trainShortPerc,trainLongAcc,trainShortAcc,trainLongPrec,trainShortPrec)=self.trainer.getInfo()
                    #Print Callback values on the screen
                    print(str(i) + " TRAIN:  acc: " + str(trainAccuracy)+ " cov: " + str(trainCoverage)+ " rew: " + str(trainReward))

                    #Reset the validation environment
                    validEnv.resetEnv()
                    #Test the agent on validation data
                    self.agent.test(validEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the validation callback
                    (_,validCoverage,validAccuracy,validReward,validLongPerc,validShortPerc,validLongAcc,validShortAcc,validLongPrec,validShortPrec)=self.validator.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " VALID:  acc: " + str(validAccuracy)+ " cov: " + str(validCoverage)+ " rew: " + str(validReward))

                    #Reset the testing environment
                    testEnv.resetEnv()
                    #Test the agent on testing data
                    self.agent.test(testEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the testing callback
                    (_,testCoverage,testAccuracy,testReward,testLongPerc,testShortPerc,testLongAcc,testShortAcc,testLongPrec,testShortPrec)=self.tester.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " TEST:  acc: " + str(testAccuracy)+ " cov: " + str(testCoverage)+ " rew: " + str(testReward))
                    print(" ")
                    
                    #write the walk data on the text file
                    self.outputFile[iteration].write(
                        str(i)+","+
                        str(trainAccuracy)+","+
                        str(trainCoverage)+","+
                        str(trainReward)+","+
                        str(trainLongPerc)+","+
                        str(trainShortPerc)+","+
                        str(trainLongAcc)+","+
                        str(trainShortAcc)+","+
                        str(trainLongPrec)+","+
                        str(trainShortPrec)+","+
                        
                        str(validAccuracy)+","+
                        str(validCoverage)+","+
                        str(validReward)+","+
                        str(validLongPerc)+","+
                        str(validShortPerc)+","+
                        str(validLongAcc)+","+
                        str(validShortAcc)+","+
                        str(validLongPrec)+","+
                        str(validShortPrec)+","+
                        
                        str(testAccuracy)+","+
                        str(testCoverage)+","+
                        str(testReward)+","+
                        str(testLongPerc)+","+
                        str(testShortPerc)+","+
                        str(testLongAcc)+","+
                        str(testShortAcc)+","+
                        str(testLongPrec)+","+
                        str(testShortPrec)+"\n")

            #Close the file                
            self.outputFile[iteration].close()

            #For the next walk, the current starting point will be the current starting point + the test size
            #It means that, for the next walk, the training data will start 6 months after the training data of 
            #the previous walk   
            self.currentStartingPoint+=self.testSize

            #Write validation and Testing Data into files
            #Save the files for processing later with the ensemble
            ensambleValid.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_valid.csv")
            ensambleTest.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_test.csv")

    #Function to end the Agent
    def end(self):
        import os 

        #Close the files where the results were written 
        for outputFile in self.outputFile:
            outputFile.close()
Example #34
0
policy = LinearAnnealedPolicy(policy,
                              attr='eps',
                              value_max=eps,
                              value_min=0,
                              value_test=0,
                              nb_steps=nb_steps)
test_policy = GreedyQPolicy()

####################################################################

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=window_length + batch_size,
               target_model_update=0.02,
               policy=policy,
               test_policy=test_policy,
               batch_size=batch_size,
               train_interval=train_interval,
               gamma=gamma)

dqn.compile(Adam(lr=0.00025), metrics=['mae'])

dqn.load_weights('dqn_{}_weights.h5f'.format('lunar'))

####################################################################

nb_episodes = 10

history = dqn.test(env, nb_episodes=nb_episodes)
Example #35
0
def build_train_test(args, timesteps):

    # Get the environment and extract the number of actions.
    env = gym.make(args.env_name)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    # Next, we build our model. We use the same model that was described by Mnih et al. (2015).
    input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE
    model = Sequential()
    if K.image_dim_ordering() == 'tf':
        # (width, height, channels)
        model.add(Permute((2, 3, 1), input_shape=input_shape))
    elif K.image_dim_ordering() == 'th':
        # (channels, width, height)
        model.add(Permute((1, 2, 3), input_shape=input_shape))
    else:
        raise RuntimeError('Unknown image_dim_ordering.')
    model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = AtariProcessor()

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000000)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    if args.mode == 'train':
        # Okay, now it's time to learn something! We capture the interrupt exception so that training
        # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
        weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
        checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(args.env_name)
        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=250000)
        ]
        callbacks += [FileLogger(log_filename, interval=100)]
        dqn.fit(env,
                callbacks=callbacks,
                nb_steps=timesteps,
                log_interval=10000)

        # After training is done, we save the final weights one more time.
        dqn.save_weights(weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        result = dqn.test(env, nb_episodes=10, visualize=False)
    elif args.mode == 'test':
        weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
        if args.weights:
            weights_filename = args.weights
        dqn.load_weights(weights_filename)
        result = dqn.test(env, nb_episodes=10, visualize=False)
    return result
Example #36
0
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
    processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.),
    target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=train_policy,
                   test_policy=test_policy)

    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}.h5f'.format(
        ENV_NAME, LAYER_SIZE, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=train_policy,
                   test_policy=test_policy)

    filename = 'weights/dqn_{}_weights_{}_{}_{}.h5f'.format(
        ENV_NAME, LAYER_SIZE, NUM_STEPS, TRIAL_ID)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load the model weights
dqn.load_weights(FILENAME)

# Finally, evaluate our algorithm for 1 episode.
dqn.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=500)
Example #38
0
from keras.optimizers import Adam
import gym
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

env = gym.make('MountainCar-v0')
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

memory = SequentialMemory(limit=30000, window_length=1)

policy = EpsGreedyQPolicy(eps=0.001)
dqn = DQNAgent(model=model, nb_actions=nb_actions,gamma=0.99, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

history = dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

dqn.test(env, nb_episodes=1, visualize=True)
Example #39
0
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=300)

import rl.callbacks
class EpisodeLogger(rl.callbacks.Callback):
    def __init__(self):
        self.observations = {}
        self.rewards = {}
        self.actions = {}

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []

    def on_step_end(self, step, logs):
        episode = logs['episode']
        self.observations[episode].append(logs['observation'])
        self.rewards[episode].append(logs['reward'])
        self.actions[episode].append(logs['action'])

cb_ep = EpisodeLogger()
dqn.test(env, nb_episodes=10, visualize=False, callbacks=[cb_ep])


import matplotlib.pyplot as plt

for obs in cb_ep.observations.values():
    plt.plot([o[0] for o in obs])
plt.xlabel("step")
plt.ylabel("pos")