model_r = Model(input=Inr, output=r2) model_r.compile(optimizer=Adam(lr=0.000003), loss='kullback_leibler_divergence') # KL divergence capacity = 200000 TAU = 0.01 alpha = 0.6 GAMMA = 0.98 episode_before_train = 2000 i_episode = 0 mini_batch = 10 loss, score = 0, 0 num = 0 times = [0] * n_data total_time = 0 buff = ReplayBuffer(capacity) f = open('log_router_gqn.txt', 'w') # playing # while (1): i_episode += 1 for i in range(n_data): times[i] = times[i] + 1 if data[i].now == data[i].target: num += 1 data[i].now = np.random.randint(n_router) data[i].target = np.random.randint(n_router) data[i].time = 0 if data[i].edge != -1: edges[data[i].edge].load -= data[i].size
alpha = 0.6 GAMMA = 0.96 n_episode = 100000 max_steps = 300 episode_before_train = 200 n_agent = 20 magent.utility.init_logger("battle") env = magent.GridWorld("battle", map_size=30) env.set_render_dir("build/render") handles = env.get_handles() sess = tf.Session() K.set_session(sess) n = len(handles) n_actions = env.get_action_space(handles[0])[0] i_episode = 0 buff = ReplayBuffer(capacity) l = 40 print(env.get_action_space(handles[0])[0]) print(env.get_action_space(handles[1])[0]) #f = open('log.txt','w') ######build the model######### cnn = MLP() m1 = MultiHeadsAttModel(l=4) m2 = MultiHeadsAttModel(l=4) q_net = Q_Net(action_dim=9) vec = np.zeros((1, 4)) vec[0][0] = 1 In = []