예제 #1
0
# model = Sequential()
# model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
예제 #2
0
    observation_n = env.observation_space.shape[0]

    # create a model
    model = Sequential()
    model.add(Flatten(input_shape=(args.batch_size, observation_n)))
    # Complex Deep NN Model
    for i in range(args.hidden_layers):
        model.add(Dense(args.hidden_units))
        model.add(Activation(args.activation_function))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))
    print(model.summary())

    Agent = AGENT_DIC[args.agent]
    if args.agent == 'cem':
        memory = EpisodeParameterMemory(limit=args.memory_limit,
                                        window_length=args.batch_size)
        agent = Agent(model=model,
                      nb_actions=nb_actions,
                      memory=memory,
                      batch_size=args.batch_size,
                      nb_steps_warmup=args.steps_warmup,
                      train_interval=1,
                      elite_frac=args.elite_frac)
        agent.compile()
    elif args.agent == 'dqn':
        memory = SequentialMemory(limit=args.memory_limit,
                                  window_length=args.batch_size)
        policy = BoltzmannQPolicy()
        agent = DQNAgent(model=model,
                         nb_actions=nb_actions,
                         memory=memory,
예제 #3
0
def main():
    """Build model and train on environment."""
    #env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv("BTC-USD", max_quantity = 10, quantity_increment = 1, obs_type = 'time', obs_size = 30, obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=2, loglevel=logging.DEBUG)
    env = gym.make('trading-v0').env
    env.initialise(symbol='000001',
                   start='2012-01-01',
                   end='2017-01-01',
                   days=252)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3  # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    # Option 2: deep network
    hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    model = Sequential([
        Flatten(input_shape=(1, ) + env.observation_space.shape),
        Dense(hidden_nodes),
        Activation('relu'),
        Dense(hidden_nodes),
        Activation('relu'),
        Dense(hidden_nodes),
        Activation('relu'),
        Dense(nb_actions),
        Activation('softmax')
    ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format('aaaa'))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format('aaaa'),
                   interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params(
    )  # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(
            param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(
        limit=EPISODES, window_length=1
    )  # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=EPISODES,
                   nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE,
                   train_interval=TRAIN_INTERVAL_EPISODES,
                   elite_frac=0.2,
                   theta_init=theta_init,
                   processor=DiscreteProcessor(),
                   noise_decay_const=0,
                   noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env,
            nb_steps=STEPS_PER_EPISODE * EPISODES,
            visualize=False,
            verbose=2,
            callbacks=callbacks)
    # cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
    cem.test(env, nb_episodes=2, visualize=True)
예제 #4
0
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError
예제 #5
0
파일: pem.py 프로젝트: mintxtinm/IGPM-PEM
sys.path.append(".")

from patternmatching.gray.incremental.query_call import load_graph, parse_args
from patternmatching.gray.incremental.rl_model import GraphEnv

logging.basicConfig(level=logging.INFO)

policies = {
    "bqp": BoltzmannQPolicy(),  # Unstable
    "gqp": GreedyQPolicy(),
    "egqp": EpsGreedyQPolicy(eps=0.1)  # eps should be around 0.1
}

window_length = 5  # Should be less than 20 (too large value will not converge Q-values)
memories = {
    "epm": EpisodeParameterMemory(limit=20,
                                  window_length=window_length),  # Non-episodic
    "sm": SequentialMemory(limit=20,
                           window_length=window_length)  # should use this
}

argv = sys.argv
if len(argv) < 4:
    print("Usage: python %s [ConfFile] [Policy] [Memory]" % argv[0])
    exit(1)

policy_name = argv[2]
if not policy_name in policies:
    print("Please specify correct policy name: %s" % str(policies.keys()))
    exit(1)
policy = policies[policy_name]
예제 #6
0
# Option 2: deep network
# model = Sequential()
# model.add(Dense(16,input_dim=obs_dim))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, max_episode_steps=200)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
예제 #7
0
def run_agent(agent):
    print("started new process")

    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    WINDOW_LENGTH = 1

    num_actions = 3
    view_shape = (21, 21)
    input_shape = (WINDOW_LENGTH, ) + view_shape

    env = RestrictedViewTronEnv(agent, 10)

    model = Sequential()

    model.add(Permute((2, 3, 1), input_shape=input_shape))

    model.add(Conv2D(16, (3, 3), padding="same"))
    model.add(Activation("relu"))

    model.add(Conv2D(32, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Flatten())

    model.add(Dense(256))
    model.add(Activation("relu"))

    model.add(Dense(num_actions))
    model.add(Activation('softmax'))
    np.random.seed(2363)

    #policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau', value_max=2.,
    #                              value_min=.1, value_test=.1, nb_steps=1000000 // 10)

    processor = TronProcessor()

    memory = EpisodeParameterMemory(limit=1000000, window_length=WINDOW_LENGTH)

    cem = CEMAgent(model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=50000 // 5,
                   train_interval=4)

    #dqn.compile(Adam(lr=.00025), metrics=["mae"])
    cem.compile()

    weights_filename = 'tmp/dqn_test_weights.h5f'
    checkpoint_weights_filename = 'tmp/dqn_test_weights_{step}.h5f'
    log_filename = 'tmp/dqn_test_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename,
                                interval=250000 // 10)
    ]
    callbacks += [FileLogger(log_filename, interval=10000)]

    def train(transfer=False):
        print(cem.get_config())  # todo save to file

        if transfer:
            cem.load_weights(weights_filename)

        cem.fit(env,
                callbacks=callbacks,
                nb_steps=1750000 // 10,
                log_interval=10000)
        cem.save_weights(weights_filename, overwrite=True)
        cem.test(env, nb_episodes=20, visualize=True)

    def opponent():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=200000, visualize=False)

    def test():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=20, visualize=True)

    # opponent()
    train()  # True
예제 #8
0
def main(params=None):
    """
    performs training and evaluation of params
    :return: model
    """
    if params is None:
        params = {
            'model_type': 'dqn_agent',
            'l1_out': 128,
            'l2_out': 64,
            'gamma': 0.5,
            'target_model_update': 1,
            'delta_clip': 0.01,
            'nb_steps_warmup': 1000
        }

    model_type = 'dqn_agent'
    env_player = SimpleRLPlayer(battle_format="gen8randombattle")
    # print('env_player',env_player)
    # print('help', help(env_player))
    env_player2 = SimpleRLPlayer(battle_format="gen8randombattle")

    opponent = RandomPlayer(battle_format="gen8randombattle")
    second_opponent = MaxDamagePlayer(battle_format="gen8randombattle")

    # Output dimension
    n_action = len(env_player.action_space)

    # model_params = {
    #     'n_actions': n_action,
    #     'l1_out': 128,
    #     'l2_out': 64,
    #     'model_type': params['model_type']
    # }
    model_params = params
    model_params['n_actions'] = n_action

    model = get_model(model_params)

    # print('first model summary')
    # print(model.summary())
    # model = Sequential()
    # model.add(Dense(128, activation="elu", input_shape=(1, 10)))
    #
    # # Our embedding have shape (1, 10), which affects our hidden layer
    # # dimension and output dimension
    # # Flattening resolve potential issues that would arise otherwise
    # model.add(Flatten())
    # model.add(Dense(64, activation="elu"))
    # model.add(Dense(n_action, activation="linear"))

    # elu activation is similar to relu
    # https://ml-cheatsheet.readthedocs.io/en/latest/activation_functions.html#elu

    # determine memory type
    if params['model_type'] in {'dqn_agent', 'sarsa_agent'}:
        # memory = SequentialMemory(limit=10000, window_length=1)
        memory = SequentialMemory(limit=NB_TRAINING_STEPS, window_length=1)
    else:
        memory = EpisodeParameterMemory(limit=10000, window_length=1)

    # Simple epsilon greedy
    # What is linear annealed policy?
    # - this policy gives gradually decreasing thresholds for the epsilon greedy policy
    # - it acts as a wrapper around epsilon greedy to feed in a custom threshold
    pol_steps = NB_TRAINING_STEPS
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr="eps",
        value_max=1.0,
        value_min=0.05,
        value_test=0,
        nb_steps=pol_steps,
    )
    # pol_steps = NB_TRAINING_STEPS
    policy_boltz = BoltzmannQPolicy(tau=1)
    # policy = LinearAnnealedPolicy(
    #     BoltzmannQPolicy(),
    #     attr="tau",
    #     value_max=1.0,
    #     value_min=0.05,
    #     value_test=0,
    #     nb_steps=pol_steps,
    # )
    policy = policy_boltz

    # Defining our DQN
    # model = tf.keras.models.load_model('dqn_v_dqn')

    if params['model_type'] == 'dqn_agent':
        dqn = DQNAgent(
            model=model,
            nb_actions=len(env_player.action_space),
            policy=policy,
            memory=memory,
            nb_steps_warmup=params['nb_steps_warmup'],
            gamma=params['gamma'],
            target_model_update=params['target_model_update'],
            # delta_clip=0.01,
            delta_clip=params['delta_clip'],
            enable_double_dqn=params['enable_double_dqn__'],
            enable_dueling_network=params['enable_double_dqn__'],
            dueling_type=params['dueling_type__'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])

    elif params['model_type'] == 'sarsa_agent':
        dqn = SARSAAgent(model=model,
                         nb_actions=len(env_player.action_space),
                         policy=policy,
                         nb_steps_warmup=params['nb_steps_warmup'],
                         gamma=params['gamma'],
                         delta_clip=params['delta_clip'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    else:
        # CEMAgent
        # https://towardsdatascience.com/cross-entropy-method-for-reinforcement-learning-2b6de2a4f3a0
        dqn = CEMAgent(model=model,
                       nb_actions=len(env_player.action_space),
                       memory=memory,
                       nb_steps_warmup=params['nb_steps_warmup'])
        # different compile function
        dqn.compile()

    # dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    # opponent dqn
    dqn_opponent = DQNAgent(
        model=model,
        nb_actions=len(env_player.action_space),
        policy=policy,
        memory=memory,
        nb_steps_warmup=params['nb_steps_warmup'],
        gamma=params['gamma'],
        target_model_update=params['target_model_update'],
        # delta_clip=0.01,
        delta_clip=params['delta_clip'],
        enable_double_dqn=params['enable_double_dqn__'],
        enable_dueling_network=params['enable_double_dqn__'],
        dueling_type=params['dueling_type__'])
    dqn_opponent.compile(Adam(lr=0.00025), metrics=["mae"])
    # NB_TRAINING_STEPS = NB_TRAINING_STEPS

    # rl_opponent = TrainedRLPlayer(model)
    # Training
    rounds = 4
    n_steps = NB_TRAINING_STEPS // rounds

    for k in range(rounds):
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=second_opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )

    name = params["name"] + "_model"
    model.save(name)

    # loaded_model = tf.keras.models.load_model(name)

    # Evaluation
    print("Results against random player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    print("\nResults against max player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=second_opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    return model
#Standard DQN model architecture.
input_shape = (WINDOW_LENGTH * INPUT_SHAPE[0], )
frame = Input(shape=(input_shape))
dense = Dense(512, activation='relu')(frame)
dense = Dense(512, activation='relu')(dense)
buttons = Dense(nb_actions, activation='linear')(dense)
buttons = Softmax()(buttons)
model = Model(inputs=frame, outputs=buttons)
print(model.summary())

processor = AtariProcessor()

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=100000, window_length=WINDOW_LENGTH)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               processor=processor,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=1000000, visualize=False, verbose=2)
예제 #10
0
# model = Sequential()
# model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=MEMORY_LIMIT,
                                window_length=WINDOW_LENGHT)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=BATCH_SIZE,
               nb_steps_warmup=NB_STEPS_WARMUP,
               train_interval=TRAIN_INTERVAL,
               elite_frac=ELITE_FRAC)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=NB_STEPS, visualize=VISUALIZE_TRAIN, verbose=VERBOSE)