def main():
    trainer = make_trainer(
        'a3c',
        env_maker=lambda: make_env(*sys.argv[1:]),
        feature_maker=lambda o: make_feature(o, num_hid=100),
        state_to_input=state_to_input,
        num_parallel=12,
        train_steps=100000000,
        interval_save=100000,
        save_dir='output',
        catch_signal=True,
        verbose=True,
    )
    trainer.run()
Esempio n. 2
0
    #
    # feature = layers.Dense(512)(conv_flat)
    # feature = layers.Activation('relu')(feature)
    # actor (policy) and critic (value) streams
    # logits = model.add(Dense(4, activation="linear"))
    # value = layers.Dense(4)(feature)
    #return models.Model(inputs=ph_state, outputs=[modela, modelb])
    #return model

if __name__ == '__main__':
    trainer = make_trainer(
        algorithm='acer',
        env_maker=lambda: gym.make("EuroDolTrain-v0"),
        model_maker=make_model,
        num_parallel=1,
        train_steps=1000,
        online_learning=False,
        verbose=True,
        batch_size=1,
        save_dir=PATH_TO_MODEL
        )
    trainer.run()

# if __name__ == '__main__':
#     trainer = make_trainer(
#        algorithm='impala',
#        env_maker=lambda: gym.make("EuroDolTrain-v0"),
#        feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]),
#        num_parallel=1,
#        train_steps=1000,
#        online_learning=False,
Esempio n. 3
0
from drlbox.evaluator import make_evaluator
# from baselines import deepq
from tesr import *


def callback(lcl, _glb):
    return False


def make_feature(observation_space, num_hid_list):
    inp_state = Input(shape=observation_space)
    print('\n observation space:', observation_space)
    feature = inp_state
    for num_hid in num_hid_list:
        feature = Dense(num_hid)(feature)
        feature = Activation('relu')(feature)
    return inp_state, feature


if __name__ == '__main__':
    trainer = make_trainer(
        algorithm='impala',
        env_maker=lambda: gym.make("EuroDolTrain-v0"),
        feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]),
        num_parallel=1,
        train_steps=2000,
        verbose=True,
        batch_size=2,
        save_dir="dir",
    )
    trainer.run()
Esempio n. 4
0
'''
DQN on Breakout-v0
'''
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--envname', type=str, default='Pong-v0')
    parser.add_argument('--nbsteps', default=1750000)
    parser.add_argument('--exp',
                        choices=['eps', 'bq', 'bgq', 'leps', 'noisy'],
                        default='eps')
    args = parser.parse_args()

    ENV_NAME = args.envname
    POL = args.exp
    nb_steps = args.nbsteps

    trainer = make_trainer('dqn',
                           env_maker=lambda: make_env(ENV_NAME),
                           model_maker=make_model,
                           state_to_input=state_to_input,
                           train_steps=nb_steps,
                           rollout_maxlen=4,
                           batch_size=32,
                           verbose=True,
                           dqn_double=False,
                           noisynet='fg',
                           num_parallel=2,
                           replay_type='uniform',
                           replay_kwargs=dict(maxlen=1000000))
    trainer.run()
Esempio n. 5
0
from drlbox.trainer import make_trainer


'''
Input arguments:
    observation_space: Observation space of the environment;
    num_hid_list:      List of hidden unit numbers in the fully-connected net.
'''
def make_feature(observation_space, num_hid_list):
    inp_state = Input(shape=observation_space.shape)
    feature = inp_state
    for num_hid in num_hid_list:
        feature = Dense(num_hid)(feature)
        feature = Activation('relu')(feature)
    return inp_state, feature


'''
A3C, CartPole-v0
'''
if __name__ == '__main__':
    trainer = make_trainer(
        algorithm='a3c',
        env_maker=lambda: gym.make('CartPole-v0'),
        feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]),
        num_parallel=1,
        train_steps=1000,
        verbose=True,
        )
    trainer.run()
Esempio n. 6
0
#         num_parallel=1,
#         train_steps=1000,
#         online_learning=False,
#         verbose=True,
#         batch_size=1,
#         save_dir="F:\\model.h5"
#         )
#     trainer.run()

if __name__ == '__main__':
    trainer = make_trainer(
       algorithm='acer',
       env_maker=lambda: gym.make("EuroDolTrain-v0"),
       feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]),
       num_parallel=1,
       train_steps=1000,
       online_learning=False,
       verbose=True,
       batch_size=1,
       save_dir="F:\\model.h5"
       )
    trainer.run()

    evaluator = make_evaluator(
        env_maker=lambda: gym.make("EuroDolEval-v0"),
        render_timestep=1,
        load_model="F:\\dir\\model.h5",
        render_end=False,
        num_episode=1,
        algorithm='acer',
        verbose=True,
Esempio n. 7
0
    conv3 = layers.Conv2D(64, (3, 3), strides=(1, 1))(conv2)
    conv3 = layers.Activation('relu')(conv3)
    conv_flat = layers.Flatten()(conv3)
    feature = layers.Dense(512)(conv_flat)
    feature = layers.Activation('relu')(feature)

    # actor (policy) and critic (value) streams
    size_logits = size_value = env.action_space.n
    logits_init = initializers.RandomNormal(stddev=1e-3)
    logits = layers.Dense(size_logits, kernel_initializer=logits_init)(feature)
    value = layers.Dense(size_value)(feature)
    return models.Model(inputs=ph_state, outputs=[logits, value])


'''
ACER on Breakout-v0
'''
if __name__ == '__main__':
    trainer = make_trainer(
        'acer',
        env_maker=lambda: make_env('Breakout-v0'),
        model_maker=make_model,
        state_to_input=state_to_input,
        num_parallel=1,
        train_steps=1000,
        rollout_maxlen=4,
        batch_size=8,
        verbose=True,
    )
    trainer.run()
Esempio n. 8
0
Input arguments:
    observation_space: Observation space of the environment;
    num_hid_list:      List of hidden unit numbers in the fully-connected net.
'''


def make_feature(observation_space, num_hid_list):
    inp_state = Input(shape=observation_space.shape)
    feature = inp_state
    for num_hid in num_hid_list:
        feature = Dense(num_hid)(feature)
        feature = Activation('relu')(feature)
    return inp_state, feature


if __name__ == '__main__':
    current_dir = os.getcwd()
    config_path = os.path.join(current_dir, "../configs/iris.config")
    os.environ["GYMFC_CONFIG"] = config_path
    env = gym.make('AttFC_GyroErr-MotorVel_M4_Con-v0')
    #     env = RewScale(env, 0.1)
    trainer = make_trainer(
        algorithm='a3c',
        env_maker=lambda: env,
        feature_maker=lambda obs_space: make_feature(obs_space, [64, 128, 64]),
        num_parallel=5,
        train_steps=1000,
        verbose=True,
    )
    trainer.run()