model = generate_model() # env.action_space.sample() = numbers, for example, 0,1,2,3... # state = RGB of raw picture; is a numpy array with shape (240, 256, 3) # reward = int; for example, 0, 1 ,2, ... # done = False or True # info = {'coins': 0, 'flag_get': False, 'life': 3, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40} done = True last_state = None last_action = None identity = np.identity( env.action_space.n) # for quickly get a hot vector, like 0001000000000000 x_pos = 0 max_x_pos = io.read_settings("max_x_pos", 0) perfect_model = model reward = 0 failer_mode = False max_attemps_in_failer_mode = 50 history_rewards = [] while 1: model = perfect_model for step in range(1000): if done: state = env.reset() model = perfect_model if reward < 0: ratio = 1
# done = False or True # info = {'coins': 0, 'flag_get': False, 'life': 3, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40} identity = np.identity( env.action_space.n) # for quickly get a hot vector, like 0001000000000000 state = None reward = 0 done = True info = None last_state = None last_info = None max_x_pos = io.read_settings( "max_x_pos", 0 ) # save personal record, so we can save the best model when we hit our own limitation history_x_pos_list_where_I_die = [0] # define those points where I failed learning_area = 50 # how wide the death area is. we will learn from past failures history_data = [] # for traning when passing died point last_place_I_die = 0 while 1: for step in range(1000): if done or reward < -5: state = env.reset() if last_info != None: if len(history_x_pos_list_where_I_die) > 0: if last_info['x_pos'] < history_x_pos_list_where_I_die[0]: history_x_pos_list_where_I_die.insert( 0, last_info['x_pos']) else: