Exemple #1
0
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions=actions,
                   nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn
Exemple #2
0
    def eval(self, grn):
        self.eval_count += 1
        self.env.seed(self.seed + 123)
        np.random.seed(self.seed + 123)
        model = self.get_model(grn)

        memory = SequentialMemory(limit=self.nsteps,
                                  window_length=self.window_length)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=self.nb_actions,
                       memory=memory,
                       nb_steps_warmup=self.warmup,
                       processor=self.processor,
                       gamma=0.99,
                       train_interval=self.window_length,
                       delta_clip=1.0,
                       target_model_update=1e-2,
                       policy=policy,
                       custom_model_objects={
                           'GRNLayer': GRNLayer,
                           'FixedGRNLayer': FixedGRNLayer
                       })
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        history = dqn.fit(self.env,
                          nb_steps=self.nsteps,
                          visualize=False,
                          verbose=0)
        final = dqn.test(self.env, nb_episodes=3, visualize=False)
        fit = 0.0
        for i in range(len(final.history['episode_reward'])):
            fit += final.history['episode_reward'][i]
        fit /= len(final.history['episode_reward'])
        if self.env_name == "Acrobot-v1":
            fit += 500
        elif self.env_name == "MountainCar-v0":
            fit += 200

        with open(self.log_file, 'a') as f:
            if 'episode_reward' in history.history:
                for i in range(len(history.history['episode_reward'])):
                    f.write(
                        'L,%s,%s,%d,%d,%d,%d,%d,%f\n' %
                        (datetime.now().isoformat(), self.env_name, self.seed,
                         self.generation, self.eval_count, i, self.nsteps,
                         history.history['episode_reward'][i]))
            f.write('M,%s,%s,%d,%d,%d,%d,%f\n' %
                    (datetime.now().isoformat(), self.env_name, self.seed,
                     self.generation, self.eval_count, self.nsteps, fit))

        del model
        K.clear_session()
        np.random.seed(self.seed + self.eval_count)
        return fit
def initialize_agent(model):
    memory = SequentialMemory(limit=1000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=COLUMNS_AMOUNT,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make(ENV_NAME)
    env.seed(123)
    nb_actions = env.action_space.n
    window_length = 1  # "experience" consists of where we were, where we are now

    # generate a policy model
    model = generate_dense_model(
        (window_length, ) + env.observation_space.shape, layers, nb_actions)

    # configure and compile our agent
    # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    policy = BoltzmannQPolicy()

    # memory can help a model during training
    # for this, we only consider a single malware sample (window_length=1) for each "experience"
    memory = SequentialMemory(limit=32,
                              ignore_episode_boundaries=False,
                              window_length=window_length)

    # DQN agent as described in Mnih (2013) and Mnih (2015).
    # http://arxiv.org/pdf/1312.5602.pdf
    # http://arxiv.org/abs/1509.06461
    agent = DQNAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     nb_steps_warmup=16,
                     enable_double_dqn=True,
                     enable_dueling_network=True,
                     dueling_type='avg',
                     target_model_update=1e-2,
                     policy=policy,
                     batch_size=16)

    # keras-rl allows one to use and built-in keras optimizer
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])

    # play the game. learn something!
    agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)

    history_train = env.history
    history_test = None

    if run_test:
        # Set up the testing environment
        TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(TEST_NAME)

        # evaluate the agent on a few episodes, drawing randomly from the test samples
        agent.test(test_env, nb_episodes=100, visualize=False)
        history_test = test_env.history

    return agent, model, history_train, history_test
Exemple #5
0
def startLearning(Env, max_board_size=7, loadFileNumber=-1, gpuToUse=None, memoryAllocation=800000):
    # Set to use GPU explicitly
    if gpuToUse != None:
        environ["CUDA_VISIBLE_DEVICES"]=gpuToUse
    else:
        environ["CUDA_VISIBLE_DEVICES"]="0"

    env = Env
    nb_actions = env.action_space.n

    # Init size based on max_board_size
    if max_board_size not in [11, 7, 19]:
        raise EnvironmentError

    layer0Size = 4096
    layer1Size = 4096
    layer2Size = 4096
    layer3Size = 0
    layer4Size = 0
    layer5Size = 0

    # Next, we build a very simple model. 
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(layer0Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer1Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer2Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    #A little diagnosis of the model summary
    print(model.summary())

    # Finally, we configure and compile our agent.
    memory = SequentialMemory(limit=memoryAllocation, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, batch_size=32, nb_actions=nb_actions, memory=memory, policy=policy, enable_dueling_network=True, gamma=.97)
    dqn.compile(nadam(lr=0.01), metrics=['mae']) 


    # Here we load from a file an old agent save if specified.
    if loadFileNumber >= 0:
        loadFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) +  "_SAVENUMBER_" + str(loadFileNumber) + ".h5f"
        dqn.load_weights(loadFile)

    saveFileNumberCounter = 0
    while True:
        dqn.fit(env, nb_steps=100010, visualize=False, verbose=1)
        saveFileNumberCounter+=1
        saveFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) + "_SAVENUMBER_" + str(loadFileNumber + saveFileNumberCounter) + ".h5f"
        dqn.save_weights(saveFile, overwrite=True)
Exemple #6
0
    def __init__(self, n=100, recycle=True):
        print('モデルを作成します。')
        self.train_interval_logger = None

        # Get the environment and extract the number of actions.
        self.env = Secretary(n=n)
        self.env_name = 'secretary'
        self.weightfile = self.__class__.weightfile.format(self.env_name)
        self.nb_actions = self.env.action_space.n

        # Next, we build a very simple model.
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.env.observation_space.shape))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))
        #print(self.model.summary())

        # Finally, we configure and compile our agent.
        # You can use every built-in Keras optimizer and even the metrics!
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy(tau=1.)
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=1000,
                            target_model_update=1e-2,
                            policy=policy)
        self.dqn.compile(Adam(lr=1e-3), metrics=[])

        self.__istrained = False
        print('モデルを作成しました。')

        if recycle:
            if exists(self.weightfile):
                try:
                    print('訓練済み重みを読み込みます。')
                    self.dqn.load_weights(self.weightfile)
                    self.__istrained = True
                    print('訓練済み重みを読み込みました。')
                    return None
                except:
                    print('訓練済み重みの読み込み中にエラーが発生しました。')
                    print('Unexpected error:', exc_info()[0])
                    raise
            else:
                print('訓練済み重みが存在しません。訓練を行ってください。')
Exemple #7
0
def get_agent(model, nb_actions, learning_rate):
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=learning_rate), metrics=['mae'])

    return dqn
Exemple #8
0
def create_agent_dqn(env, args):
    model = create_model(env, args)
    memory = SequentialMemory(limit=args.memory, window_length=args.window)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   nb_steps_warmup=args.memory,
                   target_model_update=1e-4,
                   policy=policy)
    dqn.compile(RMSprop(lr=1e-4), metrics=['mae'])
    return dqn
Exemple #9
0
 def run_test(self, weight_file):
     self.model.load_weights(weight_file)
     memory = SequentialMemory(limit=50000, window_length=1)
     policy = BoltzmannQPolicy()
     dqn = DQNAgent(model=self.model,
                    nb_actions=self.nb_actions,
                    memory=memory,
                    nb_steps_warmup=1000,
                    target_model_update=1e-2,
                    policy=policy)
     dqn.compile(Adam(lr=1e-3), metrics=['mae'])
     dqn.test(self.env, nb_episodes=5, visualize=True)
Exemple #10
0
    def train_implementation(self, train_context: core.StepsTrainContext):
        assert train_context
        dc: core.StepsTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='linear')
        self.log_api(f'SequentialMemory',
                     f'(limit={dc.max_steps_in_buffer}, window_length=1)')
        memory = SequentialMemory(limit=dc.max_steps_in_buffer,
                                  window_length=1)
        self.log_api(f'BoltzmannQPolicy', f'()')
        policy = BoltzmannQPolicy()
        num_actions = train_env.action_space.n
        self.log_api(
            f'DQNAgent', f'(nb_actions={num_actions}, ' +
            f'enable_double_dqn={self._enable_double_dqn}, ' +
            f'enable_dueling_network={self._enable_dueling_network}, ' +
            f'nb_steps_warmup={dc.num_steps_buffer_preload}, target_model_update=1e-2,'
            +
            f'gamma={dc.reward_discount_gamma}, batch_size={dc.num_steps_sampled_from_buffer}, '
            +
            f'train_interval={dc.num_steps_per_iteration}, model=..., memory=..., policy=...)'
        )
        self._agent = KerasRlDqnAgent.DQNAgentWrapper(
            enable_double_dqn=self._enable_double_dqn,
            enable_dueling_network=self._enable_dueling_network,
            model=keras_model,
            nb_actions=num_actions,
            memory=memory,
            nb_steps_warmup=dc.num_steps_buffer_preload,
            target_model_update=1e-2,
            gamma=dc.reward_discount_gamma,
            batch_size=dc.num_steps_sampled_from_buffer,
            train_interval=dc.num_steps_per_iteration,
            policy=policy)
        self.log_api(f'agent.compile', f'(Adam(lr=1e-3), metrics=["mae"]')
        self._agent.compile(Adam(lr=1e-3), metrics=['mae'])
        num_steps = dc.num_iterations * dc.num_steps_per_iteration

        loss_metric_idx = None
        if 'loss' in self._agent.metrics_names:
            loss_metric_idx = self._agent.metrics_names.index("loss")
        dqn_callback = KerasRlDqnAgent.DqnCallback(self, dc, loss_metric_idx)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={num_steps})')
        self._agent.fit(train_env,
                        nb_steps=num_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[dqn_callback])
        if not dc.training_done:
            self.on_train_iteration_end(math.nan)
Exemple #11
0
    def learn(self):
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=self.model,
                       nb_actions=self.nb_actions,
                       memory=memory,
                       nb_steps_warmup=2000,
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        dqn.fit(self.env, nb_steps=50000, visualize=True, verbose=2)
        dqn.save_weights('dqn_weights.h5f', overwrite=True)
def setup_dqn(model, nb_actions):
    # Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
Exemple #13
0
def get_rl_agent(agent):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    WINDOW_LENGTH = 1

    num_actions = 3
    view_shape = (21, 21)
    input_shape = (WINDOW_LENGTH,) + view_shape

    model = Sequential()

    model.add(Permute((2, 3, 1), input_shape=input_shape))

    model.add(Conv2D(32, (5, 5), padding="same", strides=(3, 3)))
    model.add(Activation("relu"))

    model.add(Conv2D(64, (4, 4), padding="same", strides=(2, 2)))
    model.add(Activation("relu"))

    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Flatten())

    model.add(Dense(1028))
    model.add(Activation("relu"))

    model.add(Dense(num_actions, activation="linear"))

    model.summary()

    np.random.seed(2363)

    policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau', value_max=2.,
                                  value_min=.1, value_test=.1, nb_steps=1000000 // TRAIN_DIV)

    processor = TronProcessor()

    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

    dqn = DQNAgent(model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor,
                   nb_steps_warmup=50000 // TRAIN_DIV, gamma=.9, target_model_update=1e-2,
                   train_interval=4, delta_clip=1., enable_dueling_network=True, dueling_type="avg")

    dqn.compile(Adam(), metrics=["mae"])

    return dqn
Exemple #14
0
def buildModel(weight_path, num_actions):
    model = Sequential()
    # Conv1 32 32 (3) => 30 30 (32)
    # model.add(Conv2D(32, (3, 3), input_shape=X_shape[1:]))
    model.add(
        Conv2D(32,
               kernel_size=(8, 8),
               strides=4,
               activation="relu",
               input_shape=(1, ) + (128, 100),
               data_format='channels_first'))
    model.add(Activation('relu'))
    # Conv2 30 30 (32) => 28 28 (32)
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    # Pool1 28 28 (32) => 14 14 (32)
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Conv3 14 14 (32) => 12 12 (64)
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    # Conv4 12 12 (64) => 6 6 (64)
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    # Pool2 6 6 (64) => 3 3 (64)
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # FC layers 3 3 (64) => 576
    model.add(Flatten())
    # Dense1 576 => 256
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128))
    model.add(Activation('relu'))
    # Dense2 256 => 10
    model.add(Dense(num_actions))
    model.add(Activation('softmax'))

    # number of steps? and policy used for learning
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    # policy = MaxBotlzmannQPolicy()

    if weight_path != None:
        model.load_weights(weight_path)

    return (model, memory, policy)
Exemple #15
0
def get_agent(agent_type, model_type, lr):
    if agent_type == "sarsa":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        agent = SARSAAgent(model=model,
                           policy=policy,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           gamma=0.99)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "dqn":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        memory = SequentialMemory(limit=50000, window_length=1)
        agent = DQNAgent(model=model,
                         policy=policy,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         enable_double_dqn=True)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "a2c":
        agent = A2CAgent(nb_actions,
                         len(env.observation_space.high),
                         nb_steps_warmup=10,
                         actor_lr=0.001,
                         critic_lr=0.005)
        agent.compile(Adam(lr))
        return agent
    elif agent_type == "ppo":
        pass
    else:
        print("Unsupported model")
        exit(1)
Exemple #16
0
def main():

    # nb_actions = cpst._action_space
    nb_actions = 2
    # Next, we build a very simple model.
    model = Sequential()
    #n_os = cpst._observation_space.shape

    n_os = 4
    model.add(Flatten(input_shape=[1] + [n_os]))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    print(model.summary())
    model._make_predict_function()

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    cart_pole = CartPole(name='cp')

    log = logging.getLogger('bact2')

    RE = RunEngine({})
    RE.log.setLevel('DEBUG')
    cart_pole.log = RE.log

    stm = [cart_pole.x, cart_pole.x_dot, cart_pole.theta, cart_pole.theta_dot]
    cpst = CartPoleEnv(detectors=[cart_pole],
                       motors=[cart_pole],
                       state_motors=stm,
                       user_kwargs={'mode_var': cart_pole.rl_mode})

    np.random.seed(123)
    cpst.seed(123)

    partial = functools.partial(run_test, sarsa, cpst, log=RE.log)
    RE(run_environement(cpst, partial, log=RE.log))
Exemple #17
0
def create_agent(config: Config, env, model):
    memory = SequentialMemory(limit=config.memory_size,
                              window_length=config.window_length)
    # 行動方策はオーソドックスなepsilon-greedy。ほかに、各行動のQ値によって確率を決定するBoltzmannQPolicyが利用可能
    # policy = EpsGreedyQPolicy(eps=0.1)
    policy = BoltzmannQPolicy()

    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
Exemple #18
0
def create_dqn_agent(env):
    ''' create dqn agent '''
    model = create_deep_model(env)
    nb_actions = env.action_space.n
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        nb_steps_warmup=500,
        target_model_update=1e-2,
        policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
Exemple #19
0
def learn():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    # Action space details
    nb_devices = env.action_space.spaces["device"].n
    nb_durations = env.action_space.spaces["duration"].n
    nb_actions = nb_devices * nb_durations

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in
    # Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    processor = CounterTrafficProcessor()
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   processor=processor,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=1000,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for
    # show, but this slows down training quite a lot. You can always safely
    # abort the training prematurely using Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
    #dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))

    # Finally, evaluate our algorithm
    dqn.test(env, nb_episodes=5, visualize=True)
Exemple #20
0
def main(options):
    env = gym.make(ENV_NAME)
    if options.gui:
        env.nogui = False
    options.prediction_type = options.type
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = make_model(env, nb_actions)

    # Configure and compile the agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=options.training_warmup,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Begin training
    print(
        "=================== Starting training.. =============================="
    )
    dqn.fit(env,
            nb_steps=options.training_steps,
            visualize=False,
            verbose=2,
            nb_max_episode_steps=options.training_max_steps)

    # After training is done, save the weights
    print(
        "=================== Finished training, saving weights.. =============="
    )
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

    # Evaluate the model
    print(
        "=================== Finished saving weights, evaluating model ========"
    )
    res = dqn.test(env,
                   nb_episodes=options.eval_episodes,
                   visualize=False,
                   nb_max_episode_steps=options.eval_max_steps,
                   verbose=1)
    pprint(res.history)
def run_dqn():

    global N_NODE_NETWORK

    env = SnakeGymDiscrete()
    nb_actions = env.action_space.n

    # initialize randomness
    np.random.seed(123)
    env.seed(123)

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)

    adam = Adam(lr=1e-3)
    # setattr(adam, "_name", "Adam")
    dqn.compile(adam, metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)
    dqn.save_weights('dqn_SnakeGymDiscrete_weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
Exemple #22
0
    def build_agent(self, mem_file=None, w_file=None):
        #Create a dummy env to get size of input/output.
        #Makes it simpler if we ever choose to update env shapes.
        env = TradingEnv([], "", [])
        np.random.seed(314)
        env.seed(314)

        nb_actions = env.action_space.n
        obs_dim = env.observation_space.shape[0]
        model = Sequential()
        model.add(
            LSTM(5, input_shape=(7, 4),
                 return_sequences=True))  # 4 features + 1 bias term. 5 neurons
        model.add(Activation('tanh'))
        model.add(LSTM(4))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
        model.add(Dense(4))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))  #Best activation for BoltzmanPolicy

        #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy
        policy = BoltzmannQPolicy()  #Off-policy
        test_policy = MaxBoltzmannQPolicy()  #On-policy
        memory = None
        if mem_file is None:
            memory = SequentialMemory(
                limit=50000,
                window_length=7)  ## returns observations of len (7,)
        else:
            (memory, memory.actions, memory.rewards, memory.terminals,
             memory.observations) = pickle.load(open(mem_file, "rb"))

        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       gamma=GAMMA_VAL,
                       nb_steps_warmup=100,
                       policy=policy,
                       test_policy=test_policy)
        dqn.compile("adam", metrics=['mse'])

        if w_file is not None:
            model.load_weights(w_file)

        return dqn, env, memory
Exemple #23
0
def train_cartpole_nnet():
    #from test import CartPoleContEnv

    ENV_NAME = 'CartPole-v0'
    gym.undo_logger_setup()

    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=60000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=60000, visualize=False, verbose=2)

    # get model weights
    weights = model.get_weights()

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
    return weights
 def __init__(self, agent_weights=None, *args, **kwargs):
     super(ReusePolicy, self).__init__(*args, **kwargs)
     self.nb_actions = 3
     self.model = Sequential()
     self.model.add(Flatten(input_shape=(1,) + (4,)))
     self.model.add(Dense(64))
     self.model.add(Activation('relu'))
     self.model.add(Dense(32))
     self.model.add(Activation('relu'))
     self.model.add(Dense(self.nb_actions))
     self.model.add(Activation('linear'))
     self.memory = SequentialMemory(limit=500, window_length=1)
     self.policy = BoltzmannQPolicy()
     self.dqn = DQNAgent(model=self.model, nb_actions=self.nb_actions, memory=self.memory, nb_steps_warmup=5,
                    target_model_update=1e-2, policy=self.policy)
     self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])
     self.dqn.load_weights(agent_weights)
Exemple #25
0
 def compile_agent(self):
     # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
     # even the metrics!
     processor = DistopiaProcessor(self.num_blocks, self.num_actions)
     #memory = SequentialMemory(limit=50000, window_length=1)
     #policy = PatchedBoltzmannQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
     #test_policy = PatchedGreedyQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
     policy = BoltzmannQPolicy()
     test_policy = GreedyQPolicy()
     self.sarsa = SARSAAgent(model=self.model,
                             processor=processor,
                             nb_actions=self.nb_actions,
                             nb_steps_warmup=1000,
                             policy=policy,
                             test_policy=test_policy,
                             gamma=0.9)
     self.sarsa.compile(Adam(lr=1e-3), metrics=['mae'])
Exemple #26
0
def main():
    # with ServerProxy("http://127.0.0.1:8000/", verbose=False, allow_none=True) as proxy:
    if True:
        pass

    #D:\Devel\github\keras-rl;D:\Devel\github\Devel\hz-b\naus
    # set PYTHONPATH=D:\Devel\github\keras-rl;D:\Devel\github\Devel\hz-b\naus
    # & python d:\Devel\github\Devel\hz-b\naus\examples\rl\cart_pole\sarsa_cartpole.py

    def stop_my_application():
        print('Stopping application')

    with allow_interrupt():
        # main polling loop.

        env = EnvironmentProxyForClient(receiver=None)
        np.random.seed(1974)
        env.seed(1974)

        env.reset()

        # nb_actions = cpst._action_space
        nb_actions = 2
        # Next, we build a very simple model.
        model = Sequential()
        #n_os = cpst._observation_space.shape

        n_os = 4
        model.add(Flatten(input_shape=[1] +[n_os]))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))
        print(model.summary())

        # SARSA does not require a memory.
        policy = BoltzmannQPolicy()
        sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
        sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

        run_test(sarsa, env, log=log)
def main():
    env = gym.make("balancebot-v0")
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(12))
    model.add(Activation('relu'))
    model.add(Dense(9))
    model.add(Activation('softmax'))
    # print(model.summary())

    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    dqn.fit(env, nb_steps=15000, visualize=True, verbose=2, callbacks=None)

    # act = deepq.learn(env,
    #     q_func=model,
    #     lr=1e-3,
    #     max_timesteps=100000,
    #     buffer_size=100000,
    #     exploration_fraction=0.1,
    #     exploration_final_eps=0.02,
    #     print_freq=10,
    #     callback=callback
    # )
    print("Saving model to balance.pkl")
    # After training is done, we save the final weights.
    dqn.save_weights('balance.pkl', overwrite=True)
    print("================================================")
    print('\n')

    #Load the saved weights to dqn
    dqn.load_weights('balance.pkl')

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
Exemple #28
0
def get_dqn(layer1, layer2, layer3, dropout):
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(layer1, activation='relu'))
    model.add(Dense(layer2, activation='relu'))
    model.add(Dense(layer3, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=200000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10000,
                   target_model_update=args.target_model_update, policy=policy)
    dqn.compile(Adam(lr=args.learning_rate), metrics=['mae'])
    return dqn
Exemple #29
0
def getSmallDuelDQNModel():
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + (NUM_OBSERVATIONS, )))
    model.add(Dense(170, activation='relu', use_bias=False))
    model.add(Dense(nb_actions, activation='linear', use_bias=False))
    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    tmpDQN = DQNAgent(model=model,
                      nb_actions=nb_actions,
                      memory=memory,
                      nb_steps_warmup=1000,
                      enable_dueling_network=True,
                      dueling_type='avg',
                      enable_double_dqn=True,
                      target_model_update=1e-2,
                      policy=policy)
    tmpDQN.compile(Adam(lr=0.003), metrics=['mae'])
    return tmpDQN
Exemple #30
0
def Mainthread():
    for k in range(10):
        if (k == 0):
            Gen_C = Genetic.Chromosomes_Offset()
            Gen_List = Gen_C.initGen(8)
            for i in range(len(Gen_List)):
                f = open('./CrS/' + str(i) + '.txt', 'w')
                f.write(Gen_List[i][0])
                f.write(Gen_List[i][1])
                f.close()
            Mgen = []
            Sgen = []
            for i in range(len(Gen_List)):
                Mgen.append(Gen_List[i][0])
                Sgen.append(Gen_List[i][1])
            for i in range(len(Mgen)):
                Model = Model_Converter.GeneticModel(Mgen[i], Sgen[i]).model
                model_json = Model.to_json()
                f = open('./model/model' + str(i) + '.json', 'w')
                f.write(model_json)
                f.close()
        else:
            Gen_M = gen_main.GenMain()
            Gen_M.main()

        for j in range(8):
            json_file = open("./model/model" + str(i) + ".json", "r")
            loaded_model_json = json_file.read()
            json_file.close()
            loaded_model = keras.models.model_from_json(loaded_model_json)
            memory = SequentialMemory(limit=50000, window_length=1)
            policy = BoltzmannQPolicy()
            dqn = DQNAgent(model=loaded_model,
                           nb_actions=nb_actions,
                           memory=memory,
                           nb_steps_warmup=10,
                           target_model_update=1e-2,
                           policy=policy)
            dqn.compile(Adam(lr=1e-3), metrics=['mae'])
            dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)
            dqn.save_weights('t_score/dqn_' + str(k) + '_' + str(j) +
                             '{}_weights.h5f'.format(env_name),
                             overwrite=True)
            Calc_E_Cons_and_Perfomance(dqn, j)