Ejemplo n.º 1
0
 def buildTrainData(self, currState, nextState, reward, done, action):
     states = np.asarray([currState, nextState])
     q = self.model.predict(np.reshape(self.nState(states), (-1, 2)))
     self.qValues = q[0]
     qVal = q[1]
     qMax = np.max(qVal)
     Y = copy.deepcopy(self.qValues)
     if done:
         y = reward
     else:
         y = reward + self.discount * qMax
     #check if replaced prpoerly, 1 epoh loss should be mpr, initial loss has to be more
     #check if values are referenced rather rhan copy
     Y[action] = y
     self.trainX.append(self.nState(currState))
     self.trainY.append(Y)
     return skMSE(Y, self.qValues)
Ejemplo n.º 2
0
    def buildMiniBatchTrainData(self):

        c = []
        n = []
        r = []
        d = []
        a = []

        if len(self.curState) > self.batchSize:
            ndxs = random.sample(range(len(self.curState)), self.batchSize)
        else:
            ndxs = range(len(self.curState))

        c = itemgetter(*ndxs)(self.curState)
        n = itemgetter(*ndxs)(self.nxtState)
        r = np.asanyarray(np.array(itemgetter(*ndxs)(self.rwdList)))
        d = np.asanyarray(np.array(itemgetter(*ndxs)(self.doneList)))
        a_ = np.array(itemgetter(*ndxs)(self.actnList))
        aTemp = np.vstack((np.array(range(len(a_))), a_))
        a = np.asanyarray(aTemp)

        # sending current states and next states together for inference
        X = torch.stack(n + c)

        self.model.eval()

        qVal = self.model(X.float()).cpu().detach().numpy()

        # splitting them to get the current and next states
        hIndx = self.batchSize
        qVal_n = qVal[:hIndx]
        qMax_n = np.max(qVal_n, axis=1)
        qVal_c = qVal[hIndx:]

        Y = copy.deepcopy(qVal_c)
        y = np.zeros(r.shape)
        ndx = np.where(d == True)
        y[ndx] = r[ndx]
        ndx = np.where(d == False)
        y[ndx] = r[ndx] + self.discount * qMax_n[ndx]
        Y[a[0], a[1]] = y
        self.trainX = X[hIndx:]
        self.trainY = torch.from_numpy(Y).to(self.device)

        return skMSE(Y, qVal_c)
Ejemplo n.º 3
0
 def buildMiniBatchTrainData(self):
     c = []
     n = []
     r = []
     d = []
     a = []
     if len(self.replayMemory) > self.batchSize:
         minibatch = random.sample(self.replayMemory, self.batchSize)
     else:
         minibatch = self.replayMemory
     for ndx, [currState, nextState, reward, done,
               action] in enumerate(minibatch):
         c.append(currState)
         n.append(nextState)
         r.append(reward)
         d.append(done)
         a.append([ndx, action])
     c = np.asanyarray(c)
     n = np.asanyarray(n)
     r = np.asanyarray(r)
     d = np.asanyarray(d)
     a = np.asanyarray(a)
     a = a.T
     self.model.eval()
     X = torch.from_numpy(np.reshape(self.nState(n),
                                     (-1, 2))).to(self.device)
     qVal_n = self.model(X.float()).cpu().detach().numpy()
     qMax_n = np.max(qVal_n, axis=1)
     X = torch.from_numpy(np.reshape(self.nState(c),
                                     (-1, 2))).to(self.device)
     qVal_c = self.model(X.float()).cpu().detach().numpy()
     Y = copy.deepcopy(qVal_c)
     y = np.zeros(r.shape)
     ndx = np.where(d == True)
     y[ndx] = r[ndx]
     ndx = np.where(d == False)
     y[ndx] = r[ndx] + self.discount * qMax_n[ndx]
     Y[a[0], a[1]] = y
     self.trainX = c
     self.trainY = Y
     return skMSE(Y, qVal_c)
Ejemplo n.º 4
0
    def buildMiniBatchTrainData(self):
        
        c = []
        n = []
        r = []
        d = []
        a = []
        
        if len(self.curState)>self.batchSize:
            ndxs = random.sample(range(len(self.curState)), self.batchSize)
        else:
            ndxs = range(len(self.curState))
       
        bSize = len(ndxs)   
        
        c = np.asanyarray(np.array(itemgetter(*ndxs)(self.curState)))
        n = np.asanyarray(np.array(itemgetter(*ndxs)(self.nxtState)))
        r = np.asanyarray(np.array(itemgetter(*ndxs)(self.rwdList)))
        d = np.asanyarray(np.array(itemgetter(*ndxs)(self.doneList)))
        a_ = np.array(itemgetter(*ndxs)(self.actnList))
        aTemp = np.vstack((np.array(range(len(a_))),a_))
        a = np.asanyarray(aTemp)
        

        self.model.eval()
        X = torch.from_numpy(np.reshape(n,(bSize,-1))).to(self.device)
        qVal_n = self.model(X.float()).cpu().detach().numpy()
        qMax_n = np.max(qVal_n, axis  = 1)
        X = torch.from_numpy(np.reshape(c,(bSize,-1))).to(self.device)
        qVal_c = self.model(X.float()).cpu().detach().numpy()
        Y = copy.deepcopy(qVal_c)
        y = np.zeros(r.shape)
        ndx = np.where(d == True)
        y[ndx] = r[ndx]
        ndx = np.where(d == False)
        y[ndx] = r[ndx] + self.discount * qMax_n[ndx]
        Y[a[0],a[1]] = y
        self.trainX = c
        self.trainY = Y

        return skMSE(Y,qVal_c)