def playGame(DDPG_config, train_indicator=1): #1 means Train, 0 means simply Run # SETUP STARTS HERE if train_indicator > 0: folder = setup_run(DDPG_config) elif train_indicator == 0: folder = DDPG_config['EXPERIMENT'] if DDPG_config['RSEED'] == 0: DDPG_config['RSEED'] = None np.random.seed(DDPG_config['RSEED']) ACTIVE_NODES = DDPG_config['ACTIVE_NODES'] # Generate an environment if DDPG_config['ENV'] == 'balancing': env = OmnetBalancerEnv(DDPG_config, folder) elif DDPG_config['ENV'] == 'label': env = OmnetLinkweightEnv(DDPG_config, folder) action_dim, state_dim = env.a_dim, env.s_dim MU = DDPG_config['MU'] THETA = DDPG_config['THETA'] SIGMA = DDPG_config['SIGMA'] ou = OU(action_dim, MU, THETA, SIGMA) #Ornstein-Uhlenbeck Process BUFFER_SIZE = DDPG_config['BUFFER_SIZE'] BATCH_SIZE = DDPG_config['BATCH_SIZE'] GAMMA = DDPG_config['GAMMA'] EXPLORE = DDPG_config['EXPLORE'] EPISODE_COUNT = DDPG_config['EPISODE_COUNT'] MAX_STEPS = DDPG_config['MAX_STEPS'] if EXPLORE <= 1: EXPLORE = EPISODE_COUNT * MAX_STEPS * EXPLORE # SETUP ENDS HERE reward = 0 done = False wise = False step = 0 epsilon = 1 indicator = 0 #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) actor = ActorNetwork(sess, state_dim, action_dim, DDPG_config) critic = CriticNetwork(sess, state_dim, action_dim, DDPG_config) buff = ReplayBuffer(BUFFER_SIZE) #Create replay buffer ltm = ['a_h0', 'a_h1', 'a_V', 'c_w1', 'c_a1', 'c_h1', 'c_h3', 'c_V'] layers_to_mind = {} L2 = {} for k in ltm: layers_to_mind[k] = 0 L2[k] = 0 vector_to_file(ltm, folder + 'weightsL2' + 'Log.csv', 'w') #Now load the weight try: actor.model.load_weights(folder + "actormodel.h5") critic.model.load_weights(folder + "criticmodel.h5") actor.target_model.load_weights(folder + "actormodel.h5") critic.target_model.load_weights(folder + "criticmodel.h5") print("Weight load successfully") except: print("Cannot find the weight") print("OMNeT++ Experiment Start.") # initial state of simulator s_t = env.reset() loss = 0 for i in range(EPISODE_COUNT): print("Episode : " + str(i) + " Replay Buffer " + str(buff.count())) total_reward = 0 for j in range(MAX_STEPS): print('step ', j) epsilon -= 1.0 / EXPLORE a_t = np.zeros([1, action_dim]) noise_t = np.zeros([1, action_dim]) a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) if train_indicator and epsilon > 0 and (step % 1000) // 100 != 9: noise_t[0] = epsilon * ou.evolve() a = a_t_original[0] n = noise_t[0] a_t[0] = np.where((a + n > 0) & (a + n < 1), a + n, a - n).clip(min=0, max=1) # execute action s_t1, r_t, done = env.step(a_t[0], j) # print(s_t1) print('reward ', r_t) buff.add(s_t, a_t[0], r_t, s_t1, done) #Add replay buffer scale = lambda x: x #Do the batch update batch = buff.getBatch(BATCH_SIZE) states = scale(np.asarray([e[0] for e in batch])) actions = scale(np.asarray([e[1] for e in batch])) rewards = scale(np.asarray([e[2] for e in batch])) new_states = scale(np.asarray([e[3] for e in batch])) dones = np.asarray([e[4] for e in batch]) y_t = np.zeros([len(batch), action_dim]) target_q_values = critic.target_model.predict( [new_states, actor.target_model.predict(new_states)]) for k in range(len(batch)): if dones[k]: y_t[k] = rewards[k] else: y_t[k] = rewards[k] + GAMMA * target_q_values[k] if train_indicator and len(batch) >= BATCH_SIZE: loss = critic.model.train_on_batch([states, actions], y_t) a_for_grad = actor.model.predict(states) grads = critic.gradients(states, a_for_grad) # does this give an output like train_on_batch above? NO actor.train(states, grads) actor.target_train() critic.target_train() with open(folder + 'lossLog.csv', 'a') as file: file.write(pretty(loss) + '\n') total_reward += r_t s_t = s_t1 for layer in actor.model.layers + critic.model.layers: if layer.name in layers_to_mind.keys(): L2[layer.name] = np.linalg.norm( np.ravel(layer.get_weights()[0]) - layers_to_mind[layer.name]) # vector_to_file(np.ravel(layer.get_weights()[0]), folder + 'weights_' + layer.name + 'Log.csv', 'a') layers_to_mind[layer.name] = np.ravel( layer.get_weights()[0]) # if max(L2.values()) <= 0.02: # wise = True if train_indicator and len(batch) >= BATCH_SIZE: vector_to_file([L2[x] for x in ltm], folder + 'weightsL2' + 'Log.csv', 'a') vector_to_file(a_t_original[0], folder + 'actionLog.csv', 'a') vector_to_file(noise_t[0], folder + 'noiseLog.csv', 'a') if 'PRINT' in DDPG_config.keys() and DDPG_config['PRINT']: print("Episode", "%5d" % i, "Step", "%5d" % step, "Reward", "%.6f" % r_t) print("Epsilon", "%.6f" % max(epsilon, 0)) att_ = np.split(a_t[0], ACTIVE_NODES) for _ in range(ACTIVE_NODES): att_[_] = np.insert(att_[_], _, -1) att_ = np.concatenate(att_) print("Action\n", att_.reshape(ACTIVE_NODES, ACTIVE_NODES)) print(max(L2, key=L2.get), pretty(max(L2.values()))) step += 1 if done or wise: break if step % 1000 == 0: # writes at every 1000 step if (train_indicator): actor.model.save_weights(folder + "actormodel.h5", overwrite=True) actor.model.save_weights(folder + "actormodel" + str(step) + ".h5") with open(folder + "actormodel.json", "w") as outfile: outfile.write(actor.model.to_json(indent=4) + '\n') critic.model.save_weights(folder + "criticmodel.h5", overwrite=True) critic.model.save_weights(folder + "criticmodel" + str(step) + ".h5") with open(folder + "criticmodel.json", "w") as outfile: outfile.write(critic.model.to_json(indent=4) + '\n') print("TOTAL REWARD @ " + str(i) + "-th Episode : Reward " + str(total_reward)) print("Total Step: " + str(step)) print("") env.end() # This is for shutting down print("Finish.")
class Traffic(): def __init__(self, nodes_num, type, capacity): self.nodes_num = nodes_num self.prev_traffic = None self.type = type self.capacity = capacity * nodes_num / (nodes_num - 1) self.dictionary = {} self.dictionary['NORM'] = self.normal_traffic self.dictionary['UNI'] = self.uniform_traffic self.dictionary['CONTROLLED'] = self.controlled_uniform_traffic self.dictionary['EXP'] = self.exp_traffic self.dictionary['OU'] = self.ou_traffic self.dictionary['STAT'] = self.stat_traffic self.dictionary['STATEQ'] = self.stat_eq_traffic self.dictionary['FILE'] = self.file_traffic self.dictionary['DIR'] = self.dir_traffic self.dictionary['STATIC'] = self.static_traffic if self.type.startswith('DIR:'): self.dir = sorted(listdir(self.type.split('DIR:')[-1]), key=lambda x: natural_key((x))) self.static = None self.total_ou = OU(1, self.capacity/2, 0.1, self.capacity/2) self.nodes_ou = OU(self.nodes_num**2, 1, 0.1, 1) def normal_traffic(self): t = np.random.normal(self.capacity/2, self.capacity/2) return np.asarray(t * softmax(np.random.randn(self.nodes_num, self.nodes_num))).clip(min=0.001) def uniform_traffic(self): t = np.random.uniform(0, self.capacity*1.25) return np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001) def controlled_uniform_traffic(self): t = np.random.uniform(0, self.capacity*1.25) if self.prev_traffic is None: self.prev_traffic = np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001) dist = [1] dist += [0]*(self.nodes_num**2 - 1) ch = np.random.choice(dist, [self.nodes_num]*2) tt = np.multiply(self.prev_traffic, 1 - ch) nt = np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001) nt = np.multiply(nt, ch) self.prev_traffic = tt + nt return self.prev_traffic def exp_traffic(self): a = np.random.exponential(size=self.nodes_num) b = np.random.exponential(size=self.nodes_num) T = np.outer(a, b) np.fill_diagonal(T, -1) T[T!=-1] = np.asarray(np.random.exponential()*T[T!=-1]/np.average(T[T!=-1])).clip(min=0) return T def static_traffic(self): a = np.full([self.nodes_num]*2, 0, dtype=float) def stat_traffic(self): if self.static is None: string = self.type.split('STAT:')[-1] v = np.asarray(tuple(float(x) for x in string.split(',')[:self.nodes_num**2])) M = np.split(v, self.nodes_num) self.static = np.vstack(M) return self.static def stat_eq_traffic(self): if self.static is None: value = float(self.type.split('STATEQ:')[-1]) self.static = np.full([self.nodes_num]*2, value, dtype=float) return self.static def ou_traffic(self): t = self.total_ou.evolve()[0] nt = t * softmax(self.nodes_ou.evolve()) i = np.split(nt, self.nodes_num) return np.vstack(i).clip(min=0.001) def file_traffic(self): if self.static is None: fname = 'traffic/' + self.type.split('FILE:')[-1] v = np.loadtxt(fname, delimiter=',') self.static = np.split(v, self.nodes_num) return self.static def dir_traffic(self): while len(self.dir) > 0: tm = self.dir.pop(0) if not tm.endswith('.txt'): continue fname = self.type.split('DIR:')[-1] + '/' + tm v = np.loadtxt(fname, delimiter=',') return np.split(v, self.nodes_num) return False def generate(self): return self.dictionary[self.type.split(":")[0]]()