Esempio n. 1
0
    model = generate_model()

# env.action_space.sample() = numbers, for example, 0,1,2,3...
# state = RGB of raw picture; is a numpy array with shape (240, 256, 3)
# reward = int; for example, 0, 1 ,2, ...
# done = False or True
# info = {'coins': 0, 'flag_get': False, 'life': 3, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40}

done = True
last_state = None
last_action = None
identity = np.identity(
    env.action_space.n)  # for quickly get a hot vector, like 0001000000000000

x_pos = 0
max_x_pos = io.read_settings("max_x_pos", 0)
perfect_model = model
reward = 0
failer_mode = False
max_attemps_in_failer_mode = 50
history_rewards = []
while 1:
    model = perfect_model

    for step in range(1000):
        if done:
            state = env.reset()
            model = perfect_model

        if reward < 0:
            ratio = 1
Esempio n. 2
0
# done = False or True
# info = {'coins': 0, 'flag_get': False, 'life': 3, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40}

identity = np.identity(
    env.action_space.n)  # for quickly get a hot vector, like 0001000000000000

state = None
reward = 0
done = True
info = None

last_state = None
last_info = None

max_x_pos = io.read_settings(
    "max_x_pos", 0
)  # save personal record, so we can save the best model when we hit our own limitation
history_x_pos_list_where_I_die = [0]  # define those points where I failed
learning_area = 50  # how wide the death area is. we will learn from past failures
history_data = []  # for traning when passing died point
last_place_I_die = 0
while 1:
    for step in range(1000):
        if done or reward < -5:
            state = env.reset()
            if last_info != None:
                if len(history_x_pos_list_where_I_die) > 0:
                    if last_info['x_pos'] < history_x_pos_list_where_I_die[0]:
                        history_x_pos_list_where_I_die.insert(
                            0, last_info['x_pos'])
                else: