q_mask[2] -= 1000 actions_value_mask = [0, 0, 0, 0, 0] for i in range(len(actions_value)): actions_value_mask[i] = actions_value[i] + q_mask[i] return actions_value_mask logger = logging.getLogger(__name__) logger.setLevel(level=logging.INFO) handler = logging.FileHandler("log.txt") handler.setLevel(logging.INFO) logger.addHandler(handler) my_env = env.TrafficEnv() #bt.saver.restore(bt.sess,"./model/my_light_model/my-model.ckpt-3500") reward_list = [] for i_episode in range(1000000): # listener() s = my_env.reset() N_others = 12 * 10 s_pre_others = np.zeros((N_others)) s_pre_others2 = np.array(s[1] + s[2]) #print(s_pre_others2) for i in range(N_others): s_pre_others[i] = s_pre_others2[i % 12] s_sliding, s_others = s[0], s_pre_others #print(s_sliding,s_others)
import env import time import random myenv = env.TrafficEnv() myenv.reset() while(1): action = random.randint(0,4) s,r,end,dis = myenv.step(action) #time.sleep(1) if end == 1: myenv.reset() print("end! ") elif end == 100: myenv.reset() print("arrive!")