Beispiel #1
0
    def drawer(self, con, is_first):
        while (True):
            try:
                data = util64.recv_msg(con)
                k = pickle.loads(data)
                if (k.type == 'reg'):
                    self.init_episode(k)
                    con.send(b'ok')
                    break
                else:
                    if (is_first != 1):
                        return
                    msg = k.msg
                    X = self.units.msg2state(
                        self.mapSet.find_map(self.mapName), msg)
                    mask = self.units.msg2mask(
                        self.mapSet.find_map(self.mapName), msg)
                    if (msg.myInfo.killCount == 1):
                        time.sleep(100)
                    util64.send_msg(con, pickle.dumps([256, 256, 0]))

                    #ans = self.units.predict_ans_masked(X, mask, is_first == 1)
                    #util64.send_msg(con, pickle.dumps(ans))
            except EOFError:
                break
    def controller(self, con, is_first):
        rl = self.lock.genRlock()
        wl = self.lock.genWlock()
        while (True):
            try:
                data = pickle.loads(util64.recv_msg(con))
                if (data.type == 'reg'):
                    self.init_episode(data)
                    con.send(b'ok')
                    break
                else:
                    msg = data.msg
                    pos = 0
                    X = self.units.msg2state(
                        self.mapSet.find_map(self.mapName), msg)
                    mask = self.units.msg2mask(
                        self.mapSet.find_map(self.mapName), msg)
                    if (data.type == 'terminal'):
                        self.units.predict_ans_masked(X, mask, True)
                        break
                    for i in msg.resources:
                        if (i.type == 'Resource_Vespene_Geyser'):
                            pos = i.coord
                    pos[0] = pos[0] - msg.myInfo.coord[0] + WINDOW_SIZE // 2
                    pos[1] = pos[1] - msg.myInfo.coord[1] + WINDOW_SIZE // 2
                    places = numpy.nonzero(mask)
                    #ans=numpy.random.choice(len(places))
                    #util64.send_msg(con, pickle.dumps([places[0][ans], places[1][ans], places[2][ans]]))
                    ans = [256, 256, 0]
                    util64.send_msg(con, pickle.dumps(ans))
                    if (is_first == 1):
                        Y = numpy.zeros([
                            WINDOW_SIZE, WINDOW_SIZE, self.units._out_channel
                        ])
                        for ind, _ in numpy.ndenumerate(Y[:, :, 1]):

                            Y[ind[0], ind[1],
                              1] = -numpy.linalg.norm(numpy.array(ind) -
                                                      pos) / 256.0
                        '''
                        ftarget=open('target.txt','wb')
                        pickle.dump(Y, ftarget)
                        ftarget.close()
                        '''
                        history = self.units.train(
                            X.reshape([
                                -1, WINDOW_SIZE, WINDOW_SIZE,
                                self.units._in_channel
                            ]),
                            Y.reshape([
                                -1, WINDOW_SIZE, WINDOW_SIZE,
                                self.units._out_channel
                            ]))
                        self.ferr.write(str(history.history['loss'][0]) + '\n')
                        self.ferr.flush()
                        os.fsync(self.ferr.fileno())
            except ConnectionError:
                break
Beispiel #3
0
    def controller(self, con, is_first):
        rl = self.lock.genRlock()
        last_state = None
        last_act = None
        memory = []
        fval = None
        frwd = None
        last_val = None
        if (is_first == 1):
            fval = open('cVal.txt', 'a')
            frwd = open('reward.txt', 'a')
        while (True):
            try:
                data = pickle.loads(util64.recv_msg(con))
                if (data.type == 'reg'):
                    self.init_episode(data)
                    con.send(b'ok')
                    break
                else:
                    X = self.actor.msg2state(
                        self.mapSet.find_map(self.mapName), data.msg)
                    mask = self.actor.msg2mask(
                        self.mapSet.find_map(self.mapName), data.msg)
                    rl.acquire()
                    act = self.actor.sample_ans_masked(X, mask)
                    rl.release()
                    if (is_first == 1 and last_val is not None):
                        #print(act, self.critic.predict([X])[0,0], data.value)
                        fval.write(str(self.critic.predict([X])[0, 0]) + '\n')

                        fval.flush()
                        os.fsync(fval.fileno())
                    util64.send_msg(con, pickle.dumps(act))
                    if (last_state is not None):
                        if (data.type == 'terminal'):
                            memory.append([
                                last_state, last_act, last_state, 0, data.value
                            ])
                            if (is_first == 1):
                                frwd.write(str(data.value) + '\n')
                                frwd.flush()
                                os.fsync(frwd.fileno())
                            break
                        else:
                            memory.append([
                                last_state, last_act, data.msg, 1, data.value
                            ])
                    last_val = data.value
                    last_state = data.msg
                    last_act = act
            except EOFError:
                break

            rl.acquire()
            self.memory.append(memory)
            self.memory_map.append(self.mapName)
            rl.release()
Beispiel #4
0
 def asyncController(self, con, is_first):
     ETARGET = 150
     EUPDATE = 50
     epoch = 0
     self. async = 1
     last_state = None
     last_action = None
     last_value = 0
     gradients = []
     epsilon = self.epsilon * numpy.random.uniform(0.8, 1.2)
     while (True):
         data = pickle.loads(util64.recv_msg(con))
         if (data.type == 'reg'):
             self.init_episode(data)
             con.send(b'ok')
             break
         else:
             msg = data.msg
             if (data.type == 'terminal'):
                 #apply gradients
                 pass
             X = self.units.msg2state(self.mapSet.find_map(self.mapName),
                                      msg)
             places = self.units.msg2mask(
                 self.mapSet.find_map(self.mapName), msg)
             if (numpy.random.random() < epsilon):
                 ini, inj, ink = numpy.nonzero(places)
                 ind = numpy.random.choice(len(ini))
                 ans = [ini[ind], inj[ind], ink[ind]]
             else:
                 ans = self.units.predict_ans_masked(X, places)
             util64.send_msg(con, pickle.dumps(ans))
             maxNext = self.target.predict_max_masked(X, places)
             if (last_state is not None):
                 Y = self.units.predict_all(
                     self.units.msg2state(
                         self.mapSet.find_map(self.mapName), last_state))
                 Y_ = numpy.copy(Y)
                 Y_[0, last_action[0], last_action[1], last_action[
                     2]] = data.value - last_value + self.discount * maxNext
                 gradient = self.units.gradient(
                     Y_,
                     self.units.msg2state(
                         self.mapSet.find_map(self.mapName), last_state))
                 print([i.eval() for i in gradient])
                 gradients += [gradient]
             last_state = msg
             last_action = ans
             last_value = data.value
             self.learn_epoch += 1
             epoch += 1
             if (self.learn_epoch % ETARGET == 0):
                 self.target.set_weights(self.units.get_weights())
             if (epoch % EUPDATE == 0):
                 self.units.apply_gradient(gradients)
                 gradients = 0
Beispiel #5
0
 def exploiter(self, con, is_first):
     while (True):
         try:
             data = util64.recv_msg(con)
             k = pickle.loads(data)
             if (k.type == 'reg'):
                 self.init_episode(k)
                 con.send(b'ok')
                 break
             else:
                 msg = k.msg
                 X = self.units.msg2state(
                     self.mapSet.find_map(self.mapName), msg)
                 mask = self.units.msg2mask(
                     self.mapSet.find_map(self.mapName), msg)
                 ans = self.units.predict_ans_masked(X, mask, is_first == 1)
                 if (is_first == 1):
                     print('exploiting', ans[0], ans[1])
                     ans = ans[0]
                 util64.send_msg(con, pickle.dumps(ans))
         except EOFError:
             break
Beispiel #6
0
    def controller(self, con, is_first):
        last_state = None
        last_action = None
        last_value = 0
        visited = numpy.zeros([1, 1])
        unvisited = 0
        rl = self.lock.genRlock()
        feval = 0
        fq = 0
        pSticky = 0.2
        if (is_first == 1):
            feval = open('rewards.txt', 'a')
            fq = open('Qvals.txt', 'a')
        while (True):
            try:
                data = util64.recv_msg(con)
                k = pickle.loads(data)
                if (k.type == 'reg'):
                    print(self.explore_count, self.exploit_count)
                    self.init_episode(k)
                    con.send(b'ok')
                    break
                else:
                    msg = k.msg
                    X = self.units.msg2state(
                        self.mapSet.find_map(self.mapName), msg)
                    if (k.type == 'terminal' and last_action is not None):
                        self.buf.add(
                            last_state, last_action, last_state,
                            (k.value - self.exploration_weight * unvisited -
                             last_value), 1, self.mapName)
                        if (is_first == 1):
                            feval.write(str(k.value) + '\n')
                            feval.flush()
                            os.fsync(feval.fileno())
                            self.epsilon *= 0.98
                            print(self.epsilon)
                        break
                    if (visited.shape[0] == 1):
                        visited = numpy.zeros(
                            self.mapSet.find_map(self.mapName).regions.shape)
                        unvisited = visited.shape[0] * visited.shape[1]
                        last_value = -self.exploration_weight * unvisited
                    # print(k)
                    visited[msg.myInfo.coord[0], msg.myInfo.coord[1]] += 1
                    if (visited[msg.myInfo.coord[0],
                                msg.myInfo.coord[1]] == 1):
                        unvisited -= 1
                    if (numpy.random.random() < self.epsilon):
                        self.explore_count += 1
                        if (numpy.random.random() < pSticky):
                            print('sticky')
                            ans = [256, 256, -1]
                        else:
                            places = self.units.msg2mask(
                                self.mapSet.find_map(self.mapName), msg)
                            _, _, ink = numpy.nonzero(places)
                            moveType = numpy.random.choice(numpy.unique(ink))
                            ini, inj = numpy.nonzero(places[:, :, moveType])
                            ind = numpy.random.choice(len(ini))
                            ans = [ini[ind], inj[ind], moveType]
                            if (is_first == 1):
                                print('exploring', ans)
                                fq.write('None\n')
                                fq.flush()
                                os.fsync(fq.fileno())
                        # print(ans)
                    else:
                        mask = self.units.msg2mask(
                            self.mapSet.find_map(self.mapName), msg)
                        rl.acquire()
                        ans = self.units.predict_ans_masked(
                            X, mask, is_first == 1)
                        rl.release()
                        if (is_first == 1):
                            print('exploiting', ans[0], ans[1])
                            self.exploit_count += 1
                            fq.write(str(ans[1]) + '\n')
                            fq.flush()
                            os.fsync(fq.fileno())
                            ans = ans[0]
                    util64.send_msg(con, pickle.dumps(ans))
                    if (last_action is not None):
                        self.buf.add(
                            last_state, last_action, msg,
                            (k.value - self.exploration_weight * unvisited -
                             last_value), 0, self.mapName)
                    last_state = msg
                    last_action = ans
                    last_value = k.value - self.exploration_weight * unvisited

            except EOFError:
                print('exception found')
                break
        if (is_first == 1):
            feval.close()
            fq.close()