def _updateKernelParameters(self, S, A, random=True, normalize=True): SA = self._getStateActionMatrix(S, A) if random: self.MuS = Helper.getRandomSubset(S, self.numFeatures) self.MuSA = Helper.getRandomSubset(SA, self.numFeatures) else: self.MuS = Helper.getRepresentativeRows(S, self.numFeatures, normalize) self.MuSA = Helper.getRepresentativeRows(SA, self.numFeatures, normalize) NUM_SAMPLES_FOR_BW_ESTIMATE = 500 # bandwidth for PHI_S bwNonKbS = Helper.getBandwidth(self.MuS[:, 0:self.NUM_NON_KB_DIM], NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbS) kbPosS = self._reshapeKbPositions(self.MuS[:, self.NUM_NON_KB_DIM:]) bwKbS = Helper.getBandwidth(kbPosS, NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorKbS) self.kernelS.setBandwidth(bwNonKbS, bwKbS) self.kernelS.setWeighting(self.weightNonKbS) # bandwidth for PHI_SA bwNonKbSA = Helper.getBandwidth(self.MuSA[:, 0:(self.NUM_NON_KB_DIM + 2)], NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbSA) kbPosSA = self._reshapeKbPositions(self.MuSA[:, (self.NUM_NON_KB_DIM + 2):]) bwKbSA = Helper.getBandwidth(kbPosSA, NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorKbSA) self.kernelSA.setBandwidth(bwNonKbSA, bwKbSA) self.kernelSA.setWeighting(self.weightNonKbSA)
def _updateBandwidthsGP(self, Ssub): bwNonKb = Helper.getBandwidth(Ssub[:, 0:self.NUM_NON_KB_DIM], Ssub.shape[0], self.bwFactorNonKbGP) kbPos = Ssub[:, self.NUM_NON_KB_DIM:] bwKb = Helper.getBandwidth(self._reshapeKbPositions(kbPos), Ssub.shape[0], self.bwFactorKbGP) self.policy.kernel.setBandwidth(bwNonKb, bwKb) self.policy.kernel.setWeighting(self.weightNonKbGP)