def train_model(seed=1, setup=0):
    np.random.seed(seed)

    if setup == 0:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0)
    elif setup == 1:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8)
    else:
        env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8)

    env.seed(seed)

    model = define_model(actions=7)

    memory = SequentialMemory(limit=10000, window_length=1)

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05,
                                  nb_steps=95000)
    dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None,
                   nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32)
    dqn.compile(RMSprop(lr=.0001), metrics=['mae'])

    log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json'
    model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model'
    callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)]
    callbacks += [FileLogger(log_filename, interval=1)]

    dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1,
            callbacks=callbacks)

    # After training is done, save the final weights.
    model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model'
    dqn.save_weights(model_filename, overwrite=True)
Example #2
0
def train_1():
    n = 2
    env = Puzzle(n)

    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(env.action_space.n, activation='linear'))

    memory = SequentialMemory(limit=10000, window_length=1)
    policy = BoltzmannQPolicy()

    agent = DQNAgent(model=model,
                     nb_actions=env.action_space.n,
                     memory=memory,
                     nb_steps_warmup=100,
                     target_model_update=1e-2,
                     enable_dueling_network=True,
                     policy=policy)
    agent.compile(Adam(lr=1e-3), metrics=['mse'])

    agent.fit(env,
              nb_steps=500000,
              nb_max_episode_steps=50,
              visualize=False,
              verbose=2,
              callbacks=[TensorBoard(log_dir='temp')])

    agent.save_weights('model/puzzle_2x2.h5')
Example #3
0
def train_1():
    env = MazeEnv(maze_file='data/maze_5x5.npy')

    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(env.action_space.n, activation='linear'))

    memory = SequentialMemory(limit=10000, window_length=1)
    policy = BoltzmannQPolicy()

    agent = DQNAgent(model=model,
                     nb_actions=env.action_space.n,
                     memory=memory,
                     nb_steps_warmup=100,
                     enable_double_dqn=False,
                     enable_dueling_network=False,
                     policy=policy)
    agent.compile(Adam(lr=1e-3), metrics=['mse'])

    agent.fit(env,
              nb_steps=100000,
              nb_max_episode_steps=200,
              visualize=True,
              verbose=2,
              callbacks=[TensorBoard(log_dir='temp')])

    agent.save_weights('model/maze_5x5_1.h5')
def main():
    #logging.basicConfig(level=logging.DEBUG)
    ENV_NAME = "MineRLTreechop-v0"
    env = gym.make(ENV_NAME)  # A MineRLTreechop-v0 env
    nb_actions = 9

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(64, 64, 3)))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=2500, visualize=True, verbose=2)
    print(model.summary())

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Example #5
0
class ExpAgent:
    """
    ExpAgent: Experiment RL Agent
    Args:
        weights: (optional) the path to the pretrained weights
        env: the environment that the agent interacts with
    """
    def __init__(self, env, weights=None):
        # init D-QN model
        # based on the environment
        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(128))
        model.add(Activation('relu'))
        model.add(Dense(env.action_space.n))
        model.add(Activation('linear'))
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        self.dqn = DQNAgent(model=model,
                            nb_actions=env.action_space.n,
                            memory=memory,
                            nb_steps_warmup=1000,
                            target_model_update=1e-3,
                            policy=EpsGreedyQPolicy())
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if weights:
            self.dqn.load_weights(weights)

    def save_model(self, path):
        self.dqn.save_weights(filepath=path)
        print("{} saved.".format(path))
Example #6
0
def main():
    env = retro.make(game=ENV_NAME, state=STATE_NAME, use_restricted_actions=retro.Actions.DISCRETE)
    nb_actions = env.action_space.n
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(8, 8), strides=4, activation="relu", input_shape=(1,) + (128, 100), data_format='channels_first'))
    model.add(Conv2D(64, kernel_size=(4, 4), strides=2, activation="relu"))
    model.add(Conv2D(64, (3, 3), activation="relu"))
    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    print(env.observation_space)

    # Uncomment the following line to load the model weights from file
    if os.path.exists('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME)):
        model.load_weights('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME))
    dqn = DQNAgent(processor=CNNProcessor(), model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10000,
               target_model_update=1e-3, policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    dqn.fit(env, nb_steps=1000000, visualize=True, verbose=2, callbacks=[InfoCallbackTrain()], action_repetition=4)
    dqn.save_weights('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME), overwrite=True)
    plot_wins()
    #plot_reward(training_history)

    # Uncomment the following line to overwrite the model weights file after training

    dqn.test(env, nb_episodes=5, visualize=True)
Example #7
0
def train_dqn(env, args):
    from src.Agents import create_dqn_model, dqn_controls, EnvironmentWrapper
    from keras.optimizers import Adam

    from rl.agents.dqn import DQNAgent
    from rl.policy import EpsGreedyQPolicy
    from rl.memory import SequentialMemory

    env = EnvironmentWrapper(dqn_controls, env)

    model = create_dqn_model(env)

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = EpsGreedyQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=env.nb_actions,
                   memory=memory,
                   nb_steps_warmup=2000,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    try:
        dqn.load_weights(args.ai_in)
    except OSError:
        pass

    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
    dqn.save_weights(args.ai_out, overwrite=True)
    dqn.test(env, nb_episodes=1, visualize=False)
Example #8
0
class SimpleDQN:
    def __init__(self, observation_shape, nb_actions, eps_steps):
        # First, we build a very simple NN model.

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + observation_shape))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(nb_actions))
        model.add(Activation("linear"))
        print(model.summary())

        # Next, we configure and compile our agent. You can use every
        # built-in Keras optimizer and even the metrics!
        memory = SequentialMemory(limit=50000, window_length=1)

        # policy = BoltzmannQPolicy()
        policy = LinearAnnealedPolicy(
            EpsGreedyQPolicy(),
            attr="eps",
            value_max=1.0,
            value_min=0.1,
            value_test=0.05,
            nb_steps=eps_steps,
        )
        self.dqn = DQNAgent(
            model=model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=1000,
            target_model_update=1e-2,
            policy=policy,
        )

        self.dqn.compile(Adam(lr=1e-3), metrics=["mae"])

    def train(self, env, steps, log_interval=5000):
        self.dqn.fit(
            env,
            callbacks=[FileLogger("dqn_log.json")],
            log_interval=log_interval,
            nb_steps=steps,
            enable_dueling_network=True,  # Enable dueling
            dueling_type="avg",
            enable_double_dqn=True,  # Enable double dqn
            verbose=1,
            visualize=False,
        )

        # After training is done, we save the final weights.
        self.dqn.save_weights("dqn_weights.h5f", overwrite=True)

    def test(self, env, episodes):
        # Finally, evaluate our algorithm for 5 episodes.
        self.dqn.load_weights("dqn_weights.h5f")
        self.dqn.test(env, nb_episodes=episodes, visualize=False)
Example #9
0
def main():
    # Create env
    np.random.seed(SEED)    
    env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS)
    env.seed(SEED)
    nb_actions = env.action_space.n

    # Define model
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Configure and compile  agent
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1000, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))    
Example #10
0
def main():
    np.random.seed(123)    
    env = PentagoEnv(SIZE)
    env.seed(123)
    nb_actions = env.action_space.n

    model = Sequential()
    #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE)))
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)    
Example #11
0
def main():
    # OPTIONS
    ENV_NAME = 'OHLCV-v0'
    TIME_STEP = 30

    # Get the environment and extract the number of actions.
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    model = create_model(shape=env.shape, nb_actions=nb_actions)
    #print(model.summary())
    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = EpsGreedyQPolicy()
    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=200,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy,
                   processor=NormalizerProcessor())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    while True:
        # train
        dqn.fit(env,
                nb_steps=5500,
                nb_max_episode_steps=10000,
                visualize=False,
                verbose=2)
        try:
            # validate
            info = dqn.test(env_test, nb_episodes=1, visualize=False)
            n_long, n_short, total_reward, portfolio = info['n_trades'][
                'long'], info['n_trades']['short'], info['total_reward'], int(
                    info['portfolio'])
            np.array([info]).dump(
                './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward))
            dqn.save_weights(
                './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward),
                overwrite=True)
        except KeyboardInterrupt:
            continue
def main():
    # OPTIONS
    ENV_NAME = 'OHLCV-v0'
    TIME_STEP = 30
    WINDOW_LENGTH = TIME_STEP
    ADDITIONAL_STATE = 4

    # Get the environment and extract the number of actions.
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n

    model = Sequential()
    model.add(CuDNNLSTM(64, input_shape=env.shape, return_sequences=True))
    model.add(CuDNNLSTM(64))
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = EpsGreedyQPolicy()
    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=200,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy,
                   processor=NormalizerProcessor())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    ### now only test
    dqn.load_weights(
        "./model/duel_dqn_OHLCV-v0_weights_49112166LS_184_297_4.033341265853485.h5f"
    )
    # validate
    info = dqn.test(env_test, nb_episodes=1, visualize=False)
    n_long, n_short, total_reward, portfolio = info['n_trades']['long'], info[
        'n_trades']['short'], info['total_reward'], int(info['portfolio'])
    np.array([info]).dump(
        './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(
            ENV_NAME, portfolio, n_long, n_short, total_reward))
    dqn.save_weights(
        './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(
            ENV_NAME, portfolio, n_long, n_short, total_reward),
        overwrite=True)
def main():
    fleet_size = 2000
    surge = 2
    perc_k = 1
    bonus = 0
    pro_s = 0
    percent_false_demand = 0

    config = {
        "fleet_size": 2000,
        "surge": 2,
        "perc_k": 1,
        "bonus": 0,
        "pro_s": 0,
        "percent_false_demand": 0
    }

    # m = Model(ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=fleet_size, PRO_SHARE=pro_s,
    #         SURGE_MULTIPLIER=surge, BONUS=bonus, percent_false_demand=percent_false_demand, percentage_know_fare = perc_k)

    # make one veh to be AV
    # veh  = m.vehilcs[-1]
    # veh.is_AV = True
    #
    # env = RebalancingEnv(m, penalty=-10, config=config )
    env = RebalancingEnv(penalty=-10, config=config)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.state.shape
    input_dim = env.input_dim

    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))

    memory = SequentialMemory(limit=2000, window_length=1)
    policy = EpsGreedyQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy,
                   gamma=0.99)
    dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0), metrics=['mae'])

    dqn.load_weights('dqn_weights_%s.h5f' % (3000))

    history = dqn.fit(env,
                      nb_steps=10000,
                      action_repetition=1,
                      visualize=False,
                      verbose=2)

    dqn.save_weights('dqn_weights_%s.h5f' % (10000), overwrite=True)

    history_dict = history.history
    json.dump(history_dict, open(output_path + "history_10000.json", 'w'))
def main():
    weight_path = 'models/cartpole/keras_weights.h5'

    env = gym.make('CartPole-v0')

    env = gym.wrappers.Monitor(env, "./gym-results", force=True)

    input_shape = (1, ) + env.observation_space.shape
    output = env.action_space.n
    model = create_model(input_shape, output)

    model.summary()

    # https://qiita.com/goodclues/items/9b2b618ac5ba4c3be1c5
    dqn = DQNAgent(
        model=model,
        # 出力 分類数 action数
        nb_actions=output,
        # 割引率 https://github.com/keras-rl/keras-rl/blob/master/rl/agents/dqn.py#L307
        gamma=0.99,
        # experience replay
        # メモリにaction、reward、observationsなどのデータを経験(Experience)として保管しておいて、
        # 後でランダムにデータを再生(Replay)して学習を行う
        memory=SequentialMemory(
            # メモリの上限サイズ
            limit=5000,
            # 観測を何個連結して処理するか。例えば時系列の複数の観測をまとめて1つの状態とする場合に利用。
            window_length=1,
        ),
        # ウォームアップステップ数。学習の初期は安定しないため、学習率を徐々に上げていく期間。
        nb_steps_warmup=10,
        # bellman equation
        # 1未満の値の場合はSoft update
        # 1以上の値の場合はHard update = ステップごとに重みが完全に更新
        target_model_update=1e-2,
        # 環境において行動を選択する基準
        # GreedyQPolicy デフォルト 探索か活用か、学習が進むにつれて探索率を下げていく
        # BoltzmannQPolicy ボルツマン分布を利用したソフトマックス手法による方策
        policy=BoltzmannQPolicy(),
    )
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    if os.path.exists(weight_path):
        dqn.load_weights(weight_path)

    try:
        dqn.fit(
            env,
            nb_steps=5000,  # 3min
            visualize=False,
            log_interval=1000,
        )
    except KeyboardInterrupt:
        pass
    finally:
        dqn.save_weights(weight_path, overwrite=True)
Example #15
0
def playGame(train_indicator=0):    #1 means Train, 0 means simply Run
    BUFFER_SIZE = 100000
    BATCH_SIZE = 32
    GAMMA = 0.99
    TAU = 0.001     #Target Network HyperParameters
    LRA = 0.0001    #Learning rate for Actor
    LRC = 0.001     #Lerning rate for Critic

    action_dim = 4  #Steering/Acceleration/Brake
    state_dim = 29  #of sensors input

    np.random.seed(1337)

    vision = False

    EXPLORE = 100000.
    episode_count = 2000
    max_steps = 100000
    reward = 0
    done = False
    step = 0
    epsilon = 1
    indicator = 0

    # Generate a Torcs environment
    env = TorcsEnv(vision=vision, throttle=False,gear_change=False)
    nb_actions = 3  # left, nothing , right, break

    model = Sequential()
    model.add(Flatten(input_shape=(window_length,29)))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    memory = SequentialMemory(limit=1000000, window_length=window_length)
    policy = BoltzmannQPolicy(tau=1.)
    processor=MyProcessor()

    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
                enable_dueling_network=True, dueling_type='avg',
                target_model_update=1e-2, policy=policy,
                processor=processor)
    dqn.compile(RMSprop(lr=1e-3), metrics=['mae'])
    dqn.load_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME))
    dqn.fit(env, nb_steps=500000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)
class KerasDDQNAgent(object):
    '''
    classdocs
    '''
    def __init__(self, opts):
        self.metadata = {
            'discrete_actions': True,
        }

        self.opts = opts

    def configure(self, observation_space_shape, nb_actions):
        # Next, we build a simple model.
        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          observation_space_shape))  # input layer
        model.add(Dense(32))  # Just your regular fully connected NN layer
        model.add(Activation('tanh'))  # tanh activation layer
        model.add(
            Dense(16))  # more model capacity through fully connected NN layers
        model.add(Activation('relu'))  # Rectified Linear Units
        model.add(
            Dense(16))  # more model capacity through fully connected NN layers
        model.add(Activation('relu'))  # Rectified Linear Units
        model.add(Dense(nb_actions)
                  )  # fully connected NN layer with one output for each action
        model.add(
            Activation('linear'))  # we want linear activations in the end
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        self.agent = DQNAgent(enable_double_dqn=True,
                              model=model,
                              nb_actions=nb_actions,
                              memory=memory,
                              nb_steps_warmup=10,
                              target_model_update=1e-2,
                              policy=policy)
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])

    def train(self, env, nb_steps, visualize, verbosity):
        self.agent.fit(env,
                       nb_steps=nb_steps,
                       visualize=visualize,
                       verbose=verbosity)

    def test(self, env, nb_episodes, visualize):
        self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

    def load_weights(self, load_file):
        self.agent.load_weights(load_file)

    def save_weights(self, save_file, overwrite):
        self.agent.save_weights(save_file, overwrite)
Example #17
0
def startLearning(Env, max_board_size=7, loadFileNumber=-1, gpuToUse=None, memoryAllocation=800000):
    # Set to use GPU explicitly
    if gpuToUse != None:
        environ["CUDA_VISIBLE_DEVICES"]=gpuToUse
    else:
        environ["CUDA_VISIBLE_DEVICES"]="0"

    env = Env
    nb_actions = env.action_space.n

    # Init size based on max_board_size
    if max_board_size not in [11, 7, 19]:
        raise EnvironmentError

    layer0Size = 4096
    layer1Size = 4096
    layer2Size = 4096
    layer3Size = 0
    layer4Size = 0
    layer5Size = 0

    # Next, we build a very simple model. 
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(layer0Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer1Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer2Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    #A little diagnosis of the model summary
    print(model.summary())

    # Finally, we configure and compile our agent.
    memory = SequentialMemory(limit=memoryAllocation, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, batch_size=32, nb_actions=nb_actions, memory=memory, policy=policy, enable_dueling_network=True, gamma=.97)
    dqn.compile(nadam(lr=0.01), metrics=['mae']) 


    # Here we load from a file an old agent save if specified.
    if loadFileNumber >= 0:
        loadFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) +  "_SAVENUMBER_" + str(loadFileNumber) + ".h5f"
        dqn.load_weights(loadFile)

    saveFileNumberCounter = 0
    while True:
        dqn.fit(env, nb_steps=100010, visualize=False, verbose=1)
        saveFileNumberCounter+=1
        saveFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) + "_SAVENUMBER_" + str(loadFileNumber + saveFileNumberCounter) + ".h5f"
        dqn.save_weights(saveFile, overwrite=True)
Example #18
0
def main():
    ENV_NAME = 'OHLCV-v0'
    TIME_STEP = 30

    TRAIN_PATH = "./data/train"
    TEST_PATH = "./data/test"
    env_train = OhlcvEnv(TIME_STEP, path=TRAIN_PATH)
    env_test = OhlcvEnv(TIME_STEP, path=TEST_PATH)

    np.random.seed(456)
    env.seed(562)

    nb_actions = env.action_space.n
    model = model_create(shape=env.shape, nb_actions=nb_actions)
    print(model.summary())

    # finally, we configure and compile our agent
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = EpsGreedyQPolicy()
    # enable the dueling network
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=200,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy,
                   processor=Normalizerprocessor())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    while Train:
        #train
        dqn.load_weights('')
        dqn.fit(env,
                nb_steps=5500,
                nb_max_episode_steps=10000,
                visualize=True,
                verbose=2)
        #validate
        info = dqn.test(env_test, nb_episodes=1, visualize=True)
        n_long, n_short, total_reward, account = info['n_trades'][
            'long'], info['n_trades']['short'], info['total_reward'], int(
                info['account'])
        np.array([info]).dump(
            './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(
                ENV_NAME, account, n_long, n_short, total_reward))
        dqn.save_weights(
            './model/duel_LSTM_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(
                ENV_NAME, account, n_long, n_short, total_reward),
            overite=True)
Example #19
0
def keras_rl(env,
             model_name,
             saved_model_name="model",
             steps=50000,
             test_steps=5,
             visualize=False,
             hidden_layers=3,
             critic_hidden_layers=3):
    nb_actions = 0
    if (model_name == "DQN" or model_name == "SARSA"):
        nb_actions = env.action_space.n
    elif (model_name == "DDPG"):
        nb_actions = env.action_space.shape[0]

    model_structure = define_layers(env,
                                    nb_actions,
                                    num_of_hidden_layers=hidden_layers)
    memory = define_memory()
    policy = define_policy(model_name)

    if (model_name == "DQN"):
        model = DQNAgent(model=model_structure,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=100,
                         enable_double_dqn=True,
                         dueling_type='avg',
                         target_model_update=1e-2)
    elif (model_name == "SARSA"):
        model = SARSAAgent(model=model_structure,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           policy=policy)
    elif (model_name == "DDPG"):
        action_input, critic_layers = define_critic_layers(
            env, num_of_hidden_layers=critic_hidden_layers)
        random_process = define_random_process(nb_actions)
        model = DDPGAgent(nb_actions=nb_actions,
                          actor=model_structure,
                          critic=critic_layers,
                          critic_action_input=action_input,
                          memory=memory,
                          nb_steps_warmup_critic=100,
                          nb_steps_warmup_actor=100,
                          random_process=random_process,
                          gamma=.99,
                          target_model_update=1e-3)

    model.compile(Adam(lr=1e-3), metrics=['mae'])
    model.fit(env, nb_steps=steps, visualize=False, verbose=2)
    model.save_weights('{}.h5f'.format(model_name), overwrite=True)
    model.test(env, nb_episodes=test_steps, visualize=visualize)
Example #20
0
    def learn(self):
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=self.model,
                       nb_actions=self.nb_actions,
                       memory=memory,
                       nb_steps_warmup=2000,
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        dqn.fit(self.env, nb_steps=50000, visualize=True, verbose=2)
        dqn.save_weights('dqn_weights.h5f', overwrite=True)
def do_train(dqn: DQNAgent, env, save_path):
    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    history = dqn.fit(env, nb_steps=200000, visualize=False, verbose=0)
    history = history.history

    # After training is done, we save the final weights.
    dqn.save_weights(save_path, overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    # dqn.test(env, nb_episodes=5, visualize=True)

    return history
Example #22
0
def main() -> None:
    env = gym.make(ENV_NAME)
    nb_actions = env.action_space.n

    model = tf.keras.Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(
        Dense(512, activation='relu',
              kernel_regularizer=regularizers.l2(0.01)))
    model.add(
        Dense(512, activation='relu',
              kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    memory = SequentialMemory(limit=100000, window_length=1)
    policy = CustomEpsGreedy(max_eps=0.6, min_eps=0.1, eps_decay=0.9997)

    agent = DQNAgent(nb_actions=nb_actions,
                     model=model,
                     memory=memory,
                     policy=policy,
                     gamma=0.99,
                     batch_size=64)

    agent.compile(optimizer=Adam(lr=1e-3), metrics=['mae'])

    history = agent.fit(env,
                        nb_steps=100000,
                        visualize=False,
                        nb_max_episode_steps=300,
                        log_interval=300,
                        verbose=1)

    kill_all_node()

    dt_now = datetime.datetime.now()
    agent.save_weights(MODELS_PATH + 'dpg_{}_weights_{}{}{}.h5f'.format(
        ENV_NAME, dt_now.month, dt_now.day, dt_now.hour),
                       overwrite=True)
    # agent.test(env, nb_episodes=5, visualize=False)

    fig = plt.figure()
    plt.plot(history.history['episode_reward'])
    plt.xlabel("episode")
    plt.ylabel("reward")

    plt.savefig(FIGURES_PATH + 'learning_results_{}{}{}.png'.format(
        dt_now.month, dt_now.day, dt_now.hour))
Example #23
0
class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)
Example #24
0
def learn():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    # Action space details
    nb_devices = env.action_space.spaces["device"].n
    nb_durations = env.action_space.spaces["duration"].n
    nb_actions = nb_devices * nb_durations

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in
    # Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    processor = CounterTrafficProcessor()
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   processor=processor,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=1000,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for
    # show, but this slows down training quite a lot. You can always safely
    # abort the training prematurely using Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
    #dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))

    # Finally, evaluate our algorithm
    dqn.test(env, nb_episodes=5, visualize=True)
Example #25
0
class dqn():
    def __init__(self, Env):
        self.env = Env
        nb_actions = self.env.action_space.shape[0]

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          self.env.observation_space.shape))
        model.add(Dense(64))
        model.add(Activation('relu'))
        model.add(Dense(64))
        model.add(Activation('relu'))
        model.add(Dense(64))
        model.add(Activation('relu'))
        model.add(Dense(64))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))

        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        self.model = DQNAgent(model=model,
                              nb_actions=nb_actions,
                              memory=memory,
                              nb_steps_warmup=10,
                              target_model_update=1e-2,
                              policy=policy,
                              gamma=0)
        self.model.processor = ShowActionProcessor(self.model, self.env)
        self.model.compile(Adam(lr=1e-2), metrics=['mae'])

    def fit(self):
        self.model.fit(self.env,
                       nb_steps=30000,
                       visualize=False,
                       verbose=2,
                       nb_max_episode_steps=10000)

    def save_weights(self):
        self.model.save_weights(
            './store/dqn_{}_weights.h5f'.format('porfolio'), overwrite=True)

    def test(self):
        self.model.test(self.env,
                        nb_episodes=1,
                        visualize=False,
                        nb_max_episode_steps=10000)
Example #26
0
def main(options):
    env = gym.make(ENV_NAME)
    if options.gui:
        env.nogui = False
    options.prediction_type = options.type
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = make_model(env, nb_actions)

    # Configure and compile the agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=options.training_warmup,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Begin training
    print(
        "=================== Starting training.. =============================="
    )
    dqn.fit(env,
            nb_steps=options.training_steps,
            visualize=False,
            verbose=2,
            nb_max_episode_steps=options.training_max_steps)

    # After training is done, save the weights
    print(
        "=================== Finished training, saving weights.. =============="
    )
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

    # Evaluate the model
    print(
        "=================== Finished saving weights, evaluating model ========"
    )
    res = dqn.test(env,
                   nb_episodes=options.eval_episodes,
                   visualize=False,
                   nb_max_episode_steps=options.eval_max_steps,
                   verbose=1)
    pprint(res.history)
Example #27
0
def run_dqn():

    global N_NODE_NETWORK

    env = SnakeGymDiscrete()
    nb_actions = env.action_space.n

    # initialize randomness
    np.random.seed(123)
    env.seed(123)

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)

    adam = Adam(lr=1e-3)
    # setattr(adam, "_name", "Adam")
    dqn.compile(adam, metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)
    dqn.save_weights('dqn_SnakeGymDiscrete_weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
Example #28
0
def main():
    weight_path = 'models/breakout/keras_weights.h5'

    env = gym.make('BreakoutDeterministic-v4')
    nb_actions = env.action_space.n
    input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE

    model = create_model(input_shape, nb_actions)

    print(model.summary())

    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = AtariProcessor()

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000000)

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    if os.path.exists(weight_path):
        dqn.load_weights(weight_path)

    try:
        dqn.fit(
            env,
            nb_steps=1750000,  # 8h
            visualize=False,
        )
    except KeyboardInterrupt:
        pass
    finally:
        dqn.save_weights(weight_path, overwrite=True)
def main():
    env = gym.make("balancebot-v0")
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(12))
    model.add(Activation('relu'))
    model.add(Dense(9))
    model.add(Activation('softmax'))
    # print(model.summary())

    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    dqn.fit(env, nb_steps=15000, visualize=True, verbose=2, callbacks=None)

    # act = deepq.learn(env,
    #     q_func=model,
    #     lr=1e-3,
    #     max_timesteps=100000,
    #     buffer_size=100000,
    #     exploration_fraction=0.1,
    #     exploration_final_eps=0.02,
    #     print_freq=10,
    #     callback=callback
    # )
    print("Saving model to balance.pkl")
    # After training is done, we save the final weights.
    dqn.save_weights('balance.pkl', overwrite=True)
    print("================================================")
    print('\n')

    #Load the saved weights to dqn
    dqn.load_weights('balance.pkl')

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
Example #30
0
def Mainthread():
    for k in range(10):
        if (k == 0):
            Gen_C = Genetic.Chromosomes_Offset()
            Gen_List = Gen_C.initGen(8)
            for i in range(len(Gen_List)):
                f = open('./CrS/' + str(i) + '.txt', 'w')
                f.write(Gen_List[i][0])
                f.write(Gen_List[i][1])
                f.close()
            Mgen = []
            Sgen = []
            for i in range(len(Gen_List)):
                Mgen.append(Gen_List[i][0])
                Sgen.append(Gen_List[i][1])
            for i in range(len(Mgen)):
                Model = Model_Converter.GeneticModel(Mgen[i], Sgen[i]).model
                model_json = Model.to_json()
                f = open('./model/model' + str(i) + '.json', 'w')
                f.write(model_json)
                f.close()
        else:
            Gen_M = gen_main.GenMain()
            Gen_M.main()

        for j in range(8):
            json_file = open("./model/model" + str(i) + ".json", "r")
            loaded_model_json = json_file.read()
            json_file.close()
            loaded_model = keras.models.model_from_json(loaded_model_json)
            memory = SequentialMemory(limit=50000, window_length=1)
            policy = BoltzmannQPolicy()
            dqn = DQNAgent(model=loaded_model,
                           nb_actions=nb_actions,
                           memory=memory,
                           nb_steps_warmup=10,
                           target_model_update=1e-2,
                           policy=policy)
            dqn.compile(Adam(lr=1e-3), metrics=['mae'])
            dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)
            dqn.save_weights('t_score/dqn_' + str(k) + '_' + str(j) +
                             '{}_weights.h5f'.format(env_name),
                             overwrite=True)
            Calc_E_Cons_and_Perfomance(dqn, j)
Example #31
0
def main():

    # Get the environment and extract the number of actions.
    environment_name = "lawnmower-medium-obstacles-v0"
    environment = gym.make(environment_name)
    environment.print_description()
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Create sequential memory for memory replay.
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

    # Process environment inputs and outputs.
    processor = LawnmowerProcessor()

    # Use epsilon-greedy as our policy.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                  nb_steps=int(STEPS * 0.8))

    # Instantiate and compile our agent.
    dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
                   processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
                   train_interval=4, delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    # Set up some callbacks for training.
    checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(environment_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))]
    callbacks += [FileLogger(log_filename, interval=100)]

    # Train the agent.
    dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000)

    # Save the final networkt after training.
    weights_filename = 'dqn_{}_weights.h5f'.format(environment_name)
    dqn.save_weights(weights_filename, overwrite=True)

    # Run the agent.
    dqn.test(environment, nb_episodes=10, visualize=False)
Example #32
0
def cartpole():


	ENV_NAME = 'CartPole-v0'

	# Get the environment and extract the number of actions.
	env = gym.make(ENV_NAME)
	np.random.seed(123)
	env.seed(123)
	nb_actions = env.action_space.n

	# Next, we build a very simple model.
	model = Sequential()
	model.add(Flatten(input_shape=2))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(4))
	model.add(Activation('softmax'))

	print(model.summary())

	# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
	# even the metrics!
	memory = SequentialMemory(limit=50000, window_length=1)
	policy = BoltzmannQPolicy()
	dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
				   target_model_update=1e-2, policy=policy)
	dqn.compile(Adam(lr=1e-3), metrics=['mae'])

	# Okay, now it's time to learn something! We visualize the training here for show, but this
	# slows down training quite a lot. You can always safely abort the training prematurely using
	# Ctrl + C.
	dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

	# After training is done, we save the final weights.
	dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

	# Finally, evaluate our algorithm for 5 episodes.
	dqn.test(env, nb_episodes=5, visualize=True)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Optionally, we can reload a previous model's weights and continue training from there
# FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f'
# Load the model weights
# dqn.load_weights(FILENAME)


# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500)

# After training is done, we save the final weights.
dqn.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)
Example #34
0
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
else:
    raise('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')


callbacks = []
# callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)]
callbacks += [FileLogger(LOG_FILENAME, interval=100)]
# callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)]
callbacks += [ExploreExploit()]

# Optionally, we can reload a previous model's weights and continue training from there
# LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
# # # Load the model weights
# agent.load_weights(LOAD_WEIGHTS_FILENAME)

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500)

# After training is done, we save the final weights.
agent.save_weights(WEIGHT_FILENAME, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier. 
filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,  action_repetition=5) #nb_max_episode_steps=500,