def drawer(self, con, is_first): while (True): try: data = util64.recv_msg(con) k = pickle.loads(data) if (k.type == 'reg'): self.init_episode(k) con.send(b'ok') break else: if (is_first != 1): return msg = k.msg X = self.units.msg2state( self.mapSet.find_map(self.mapName), msg) mask = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) if (msg.myInfo.killCount == 1): time.sleep(100) util64.send_msg(con, pickle.dumps([256, 256, 0])) #ans = self.units.predict_ans_masked(X, mask, is_first == 1) #util64.send_msg(con, pickle.dumps(ans)) except EOFError: break
def controller(self, con, is_first): rl = self.lock.genRlock() wl = self.lock.genWlock() while (True): try: data = pickle.loads(util64.recv_msg(con)) if (data.type == 'reg'): self.init_episode(data) con.send(b'ok') break else: msg = data.msg pos = 0 X = self.units.msg2state( self.mapSet.find_map(self.mapName), msg) mask = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) if (data.type == 'terminal'): self.units.predict_ans_masked(X, mask, True) break for i in msg.resources: if (i.type == 'Resource_Vespene_Geyser'): pos = i.coord pos[0] = pos[0] - msg.myInfo.coord[0] + WINDOW_SIZE // 2 pos[1] = pos[1] - msg.myInfo.coord[1] + WINDOW_SIZE // 2 places = numpy.nonzero(mask) #ans=numpy.random.choice(len(places)) #util64.send_msg(con, pickle.dumps([places[0][ans], places[1][ans], places[2][ans]])) ans = [256, 256, 0] util64.send_msg(con, pickle.dumps(ans)) if (is_first == 1): Y = numpy.zeros([ WINDOW_SIZE, WINDOW_SIZE, self.units._out_channel ]) for ind, _ in numpy.ndenumerate(Y[:, :, 1]): Y[ind[0], ind[1], 1] = -numpy.linalg.norm(numpy.array(ind) - pos) / 256.0 ''' ftarget=open('target.txt','wb') pickle.dump(Y, ftarget) ftarget.close() ''' history = self.units.train( X.reshape([ -1, WINDOW_SIZE, WINDOW_SIZE, self.units._in_channel ]), Y.reshape([ -1, WINDOW_SIZE, WINDOW_SIZE, self.units._out_channel ])) self.ferr.write(str(history.history['loss'][0]) + '\n') self.ferr.flush() os.fsync(self.ferr.fileno()) except ConnectionError: break
def controller(self, con, is_first): rl = self.lock.genRlock() last_state = None last_act = None memory = [] fval = None frwd = None last_val = None if (is_first == 1): fval = open('cVal.txt', 'a') frwd = open('reward.txt', 'a') while (True): try: data = pickle.loads(util64.recv_msg(con)) if (data.type == 'reg'): self.init_episode(data) con.send(b'ok') break else: X = self.actor.msg2state( self.mapSet.find_map(self.mapName), data.msg) mask = self.actor.msg2mask( self.mapSet.find_map(self.mapName), data.msg) rl.acquire() act = self.actor.sample_ans_masked(X, mask) rl.release() if (is_first == 1 and last_val is not None): #print(act, self.critic.predict([X])[0,0], data.value) fval.write(str(self.critic.predict([X])[0, 0]) + '\n') fval.flush() os.fsync(fval.fileno()) util64.send_msg(con, pickle.dumps(act)) if (last_state is not None): if (data.type == 'terminal'): memory.append([ last_state, last_act, last_state, 0, data.value ]) if (is_first == 1): frwd.write(str(data.value) + '\n') frwd.flush() os.fsync(frwd.fileno()) break else: memory.append([ last_state, last_act, data.msg, 1, data.value ]) last_val = data.value last_state = data.msg last_act = act except EOFError: break rl.acquire() self.memory.append(memory) self.memory_map.append(self.mapName) rl.release()
def asyncController(self, con, is_first): ETARGET = 150 EUPDATE = 50 epoch = 0 self. async = 1 last_state = None last_action = None last_value = 0 gradients = [] epsilon = self.epsilon * numpy.random.uniform(0.8, 1.2) while (True): data = pickle.loads(util64.recv_msg(con)) if (data.type == 'reg'): self.init_episode(data) con.send(b'ok') break else: msg = data.msg if (data.type == 'terminal'): #apply gradients pass X = self.units.msg2state(self.mapSet.find_map(self.mapName), msg) places = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) if (numpy.random.random() < epsilon): ini, inj, ink = numpy.nonzero(places) ind = numpy.random.choice(len(ini)) ans = [ini[ind], inj[ind], ink[ind]] else: ans = self.units.predict_ans_masked(X, places) util64.send_msg(con, pickle.dumps(ans)) maxNext = self.target.predict_max_masked(X, places) if (last_state is not None): Y = self.units.predict_all( self.units.msg2state( self.mapSet.find_map(self.mapName), last_state)) Y_ = numpy.copy(Y) Y_[0, last_action[0], last_action[1], last_action[ 2]] = data.value - last_value + self.discount * maxNext gradient = self.units.gradient( Y_, self.units.msg2state( self.mapSet.find_map(self.mapName), last_state)) print([i.eval() for i in gradient]) gradients += [gradient] last_state = msg last_action = ans last_value = data.value self.learn_epoch += 1 epoch += 1 if (self.learn_epoch % ETARGET == 0): self.target.set_weights(self.units.get_weights()) if (epoch % EUPDATE == 0): self.units.apply_gradient(gradients) gradients = 0
def exploiter(self, con, is_first): while (True): try: data = util64.recv_msg(con) k = pickle.loads(data) if (k.type == 'reg'): self.init_episode(k) con.send(b'ok') break else: msg = k.msg X = self.units.msg2state( self.mapSet.find_map(self.mapName), msg) mask = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) ans = self.units.predict_ans_masked(X, mask, is_first == 1) if (is_first == 1): print('exploiting', ans[0], ans[1]) ans = ans[0] util64.send_msg(con, pickle.dumps(ans)) except EOFError: break
def controller(self, con, is_first): last_state = None last_action = None last_value = 0 visited = numpy.zeros([1, 1]) unvisited = 0 rl = self.lock.genRlock() feval = 0 fq = 0 pSticky = 0.2 if (is_first == 1): feval = open('rewards.txt', 'a') fq = open('Qvals.txt', 'a') while (True): try: data = util64.recv_msg(con) k = pickle.loads(data) if (k.type == 'reg'): print(self.explore_count, self.exploit_count) self.init_episode(k) con.send(b'ok') break else: msg = k.msg X = self.units.msg2state( self.mapSet.find_map(self.mapName), msg) if (k.type == 'terminal' and last_action is not None): self.buf.add( last_state, last_action, last_state, (k.value - self.exploration_weight * unvisited - last_value), 1, self.mapName) if (is_first == 1): feval.write(str(k.value) + '\n') feval.flush() os.fsync(feval.fileno()) self.epsilon *= 0.98 print(self.epsilon) break if (visited.shape[0] == 1): visited = numpy.zeros( self.mapSet.find_map(self.mapName).regions.shape) unvisited = visited.shape[0] * visited.shape[1] last_value = -self.exploration_weight * unvisited # print(k) visited[msg.myInfo.coord[0], msg.myInfo.coord[1]] += 1 if (visited[msg.myInfo.coord[0], msg.myInfo.coord[1]] == 1): unvisited -= 1 if (numpy.random.random() < self.epsilon): self.explore_count += 1 if (numpy.random.random() < pSticky): print('sticky') ans = [256, 256, -1] else: places = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) _, _, ink = numpy.nonzero(places) moveType = numpy.random.choice(numpy.unique(ink)) ini, inj = numpy.nonzero(places[:, :, moveType]) ind = numpy.random.choice(len(ini)) ans = [ini[ind], inj[ind], moveType] if (is_first == 1): print('exploring', ans) fq.write('None\n') fq.flush() os.fsync(fq.fileno()) # print(ans) else: mask = self.units.msg2mask( self.mapSet.find_map(self.mapName), msg) rl.acquire() ans = self.units.predict_ans_masked( X, mask, is_first == 1) rl.release() if (is_first == 1): print('exploiting', ans[0], ans[1]) self.exploit_count += 1 fq.write(str(ans[1]) + '\n') fq.flush() os.fsync(fq.fileno()) ans = ans[0] util64.send_msg(con, pickle.dumps(ans)) if (last_action is not None): self.buf.add( last_state, last_action, msg, (k.value - self.exploration_weight * unvisited - last_value), 0, self.mapName) last_state = msg last_action = ans last_value = k.value - self.exploration_weight * unvisited except EOFError: print('exception found') break if (is_first == 1): feval.close() fq.close()