Example #1
0
model_r = Model(input=Inr, output=r2)
model_r.compile(optimizer=Adam(lr=0.000003),
                loss='kullback_leibler_divergence')  # KL divergence

capacity = 200000
TAU = 0.01
alpha = 0.6
GAMMA = 0.98
episode_before_train = 2000
i_episode = 0
mini_batch = 10
loss, score = 0, 0
num = 0
times = [0] * n_data
total_time = 0
buff = ReplayBuffer(capacity)
f = open('log_router_gqn.txt', 'w')

# playing #
while (1):

    i_episode += 1
    for i in range(n_data):
        times[i] = times[i] + 1
        if data[i].now == data[i].target:
            num += 1
            data[i].now = np.random.randint(n_router)
            data[i].target = np.random.randint(n_router)
            data[i].time = 0
            if data[i].edge != -1:
                edges[data[i].edge].load -= data[i].size
Example #2
0
alpha = 0.6
GAMMA = 0.96
n_episode = 100000
max_steps = 300
episode_before_train = 200
n_agent = 20
magent.utility.init_logger("battle")
env = magent.GridWorld("battle", map_size=30)
env.set_render_dir("build/render")
handles = env.get_handles()
sess = tf.Session()
K.set_session(sess)
n = len(handles)
n_actions = env.get_action_space(handles[0])[0]
i_episode = 0
buff = ReplayBuffer(capacity)
l = 40

print(env.get_action_space(handles[0])[0])
print(env.get_action_space(handles[1])[0])
#f = open('log.txt','w')

######build the model#########
cnn = MLP()
m1 = MultiHeadsAttModel(l=4)
m2 = MultiHeadsAttModel(l=4)
q_net = Q_Net(action_dim=9)
vec = np.zeros((1, 4))
vec[0][0] = 1

In = []