Beispiel #1
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1 import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    #env = make_atari(env_id)

    env = gym_super_mario_bros.make('SuperMarioBros-v1')
    # env = gym_super_mario_bros.make('SuperMarioBrosNoFrameskip-v3')

    env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
    env = ProcessFrame84(env)

    env = FrameMemoryWrapper(env)




    def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    #env = wrap_deepmind(env)
    env.seed(workerseed)


    def render_callback(lcl, _glb):
        # print(lcl['episode_rewards'])
        total_steps = lcl['env'].total_steps
        #if total_steps % 1000 == 0:
        #    print("Saving model to mario_model.pkl")
        #    act.save("../models/mario_model_{}.pkl".format(modelname))


        env.render()
        # pass


    pposgd_simple.learn(env, policy_fn,
        max_timesteps=int(num_timesteps * 1.1),
        timesteps_per_actorbatch=2048,
        clip_param=0.2, entcoeff=0.01,
        optim_epochs=4,
        optim_stepsize=1e-3, # 3e-4
        optim_batchsize=64, #256
        gamma=0.99, lam=0.95,
        schedule='linear',
        callback = render_callback
    )
    env.close()
Beispiel #2
0
		exploration=exploration_schedule,
		replay_buffer_size=REPLAY_BUFFER_SIZE,
		batch_size=BATCH_SIZE,
		gamma=GAMMA,
		learning_starts=LEARNING_STARTS,
		learning_freq=LEARNING_FREQ,
		frame_history_len=FRAME_HISTORY_LEN,
		target_update_freq=TARGET_UPDATE_FREQ
	)

if __name__ == '__main__':

	# Initialize the environment using gym_super_mario_bros
	env = gym_super_mario_bros.make('SuperMarioBros-1-1-v3')
	env = BinarySpaceToDiscreteSpaceEnv(env, COMPLEX_MOVEMENT)

	# set global seeds
	env.seed(SEED)
	torch.manual_seed(SEED)
	np.random.seed(SEED)
	random.seed(SEED)

	# monitor & wrap the game
	env = wrap_mario(env)

	expt_dir = 'video/mario'
	env = wrappers.Monitor(env, expt_dir, force=True, video_callable=lambda count: count % 10 == 0)

	# main
	main(env)
Beispiel #3
0
    ['NOOP'],
    ['right'],
    ['right', 'A'],
    ['right', 'B'],
    ['right', 'A', 'B'],
    ['A'],
    ['left'],
]"""

state_size = env.observation_space.shape
action_size = env.action_space.n
print("state_size", state_size)
print("action_size", action_size)

"""Set random seed"""
env.seed(random_seed)
np.random.seed(random_seed)
tf.set_random_seed(random_seed)

# create dqn agent
sess = tf.Session()
dqn = DQNAgent(sess, state_size, action_size)

sess.run(tf.global_variables_initializer())

if args.load_from is not None:
    dqn.load_model(args.load_from)


def train():
    total_step = 1
Beispiel #4
0
    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)


parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['train', 'test'], default='train')
parser.add_argument('--env-name', type=str, default='SuperMarioBros-v0')
parser.add_argument('--weights', type=str, default=None)
args = parser.parse_args()

# Get the environment and extract the number of actions.
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE
model = Sequential()
if K.image_dim_ordering() == 'tf':
    # (width, height, channels)
    model.add(Permute((2, 3, 1), input_shape=input_shape))
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    model.add(Permute((1, 2, 3), input_shape=input_shape))
else:
    raise RuntimeError('Unknown image_dim_ordering.')
model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
use_cuda = torch.cuda.is_available()

device = torch.device("cuda:0" if use_cuda else "cpu")
Tensor = torch.Tensor
LongTensor = torch.LongTensor

env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)


directory = './resVideos/'
env = gym.wrappers.Monitor(env, directory, video_callable=lambda episode_id: episode_id%1==0)


seed_value = 23
env.seed(seed_value)
torch.manual_seed(seed_value)
random.seed(seed_value)

###### PARAMS ######
learning_rate = 0.0001
num_episodes = 5000
startNum = 500
newModel = False

gamma = 0.99

hidden_layer = 512

replay_mem_size = 100000
batch_size = 32