model.train_on_batch(x=np.expand_dims(last_state, axis=0), y=identity[action:action + 1]) print( f" training happens: {reward}\nx_pos:{info['x_pos']}" ) elif reward < 0: action = env.action_space.sample() model.train_on_batch(x=np.expand_dims(last_state, axis=0), y=identity[action:action + 1]) print( f" training happens: {reward}\nx_pos:{info['x_pos']}" ) env.render() x_pos = info["x_pos"] if x_pos > max_x_pos: max_x_pos = x_pos io.write_settings("max_x_pos", int(max_x_pos)) if info["life"] == 2: failer_mode = False perfect_model = model model.save(model_file_path) if info["stage"] == 2: io.write_settings("max_x_pos", int(max_x_pos)) model.save(final_model_file_path) input("congraduations!") exit() env.close()
last_info = info action = 3 state, reward, done, info = env.step(action) history_actions.append(action) history_actions = history_actions[-200:] history_x_pos.append(info['x_pos']) history_x_pos = history_x_pos[-200:] history_y_pos.append(info['y_pos']) history_y_pos = history_y_pos[-200:] if isinstance(last_state, (np.ndarray, np.generic)): if len(history_actions) >= 101 and len(history_x_pos) >= 101 and len(history_y_pos) >= 101: training_couting += 1 io.write_settings("training_couting", int(training_couting)) print(f" learning happend with action: {SIMPLE_MOVEMENT[action]}") train_once(last_state, history_actions[-101:-1], history_x_pos[-101:-1], history_y_pos[-101:-1], action, reward) temp = 15 while 1: if isinstance(state, (np.ndarray, np.generic)) and info != None: last_state = state last_info = info action = 2 state, reward, done, info = env.step(action) history_actions.append(action) history_actions = history_actions[-200:]