def predict(self): # Update State Transition Matrix curTime = time() dt = curTime - self.t self.t = curTime self.F = matrix([[1., dt], [0, 1.]]) self.x = self.F * self.x + self.u self.P = self.F * self.P * self.F.transpose() self.P = self.P * matrix([[1.3, 0], [0, 1.3]]) return self.x.getValue()[0][0]
def update(self, measurement): # Update State Transition Matrix curTime = time() dt = curTime - self.t self.F = matrix([[1., dt], [0, 1.]]) z = matrix([[measurement]]) y = z - self.H * self.x S = self.H * self.P * self.H.transpose() + self.R K = self.P * self.H.transpose() * S.inverse() self.x = self.x + (K * y)# * self.G self.P = (self.I - K * self.H) * self.P
def test(): colors = [['red', 'green', 'green', 'red', 'red'], ['red', 'red', 'green', 'red', 'red'], ['red', 'red', 'green', 'green', 'red'], ['red', 'red', 'red', 'red', 'red']] measurements = ['green', 'green', 'green', 'green', 'green'] motions = [[0,0], [0,1], [1,0], [1,0], [0,1]] sensor_right = 0.7 p_move = 0.8 # init p p = [] s = len(colors) * len(colors[0]) for row in range(len(colors)): one_row = [1./s for _ in range(len(colors[0]))] p.append(one_row) for i in range(len(motions)): p = move(p, motions[i], p_move) p = sense(p, colors, measurements[i], sensor_right) # print(p) import sys sys.path.append('../basic') from Matrix import matrix p_mat = matrix(p) print(p_mat)
def getXWithGBBasis(X, BE, loss, printTree=False): '''approximates BE by gradient boosting of specified loss function''' Xnew = [] print "bellman errors,: ", BE s = [] y = [] for item in X.value: s.append(item) y.append(BE[X.value.index(item)]) s, y = np.array(s), np.array(y) s = s.astype(np.float32) params = { 'n_estimators': 10, 'max_depth': 2, 'min_samples_split': 2, 'learning_rate': 0.01, 'loss': loss } reg = ensemble.GradientBoostingRegressor(**params) reg.fit(s, y) BEapprox = reg.predict(s) for item in X.value: Xnew.append(item + [BEapprox[X.value.index(item)]]) if printTree: dt = DecisionTreeRegressor(random_state=0) dt.fit(X.value, BEapprox) visualize_tree(dt, ["Sum", "dealerFaceCard"]) raw_input("press key to continue") return matrix(Xnew)
def getCountMatrix(count): '''returns a diagonal matrix of counts for each state''' n = len(count) W = [[0 for i in range(n)] for i in range(n)] pos = 0 for key in count: W[pos][pos] = count[key] pos += 1 return matrix(W)
def applyProjection(self,s,FA): '''projects to span of basis using the function approximator ''' d = self.domObj X = [] Y = [] for state in s[:-1]: X.append(d.factored(state)) Y.append([self.getValue(str(state))]) X = matrix(X) Y = matrix(Y) FA.setXY(X,Y) Y_hat = FA.predict() if Y_hat: nY = len(Y_hat) for i in range(nY): self.value[str(s[i])] = Y_hat[i]
def getCountMatrix(self): '''returns a diagonal matrix of counts for each state''' n = len(self.count) C = [[0 for i in range(n)] for i in range(n)] pos = 0 for key in self.count: C[pos][pos] = self.count[key] pos += 1 return matrix(C)
def getPhiNew(self, X, BE): '''appends bellman error as new basis''' Xnew = [] k = 0 for item in self.value: x = X.value[k] Xnew.append(x + [BE[k]]) k += 1 return matrix(Xnew)
def updateValues(self, t, discount): '''update values based on trajectory''' n = len(t) print "updating value of state sequence: ", t R = [self.grid.reward(s[0], s[1]) for s in t] X = [] Y = [] for i in range(n - 1, -1, -1): self.setCount(t[i]) X.append(self.grid.factored(t[i][0], t[i][1])) exponent = (n - 1) - i if i == n - 1: self.setValue(t[i], R[i], discount, exponent) Y.append(R[i]) else: transitionValue = self.getValue(t[i + 1]) self.setValue(t[i], R[i] + transitionValue, discount, exponent) Y.append(R[i] + transitionValue) return (matrix(X), matrix([[j] for j in Y]))
def __init__(self, numInput, numHidden, numOutput): self.input_nodes = numInput self.hidden_nodes = numHidden self.output_nodes = numOutput self.learning_rate = 0.1 self.weights_input_hidden = matrix(self.hidden_nodes, self.input_nodes) self.weights_hidden_output = matrix(self.output_nodes, self.hidden_nodes) self.weights_input_hidden.randomize(-100, 100, 1) self.weights_input_hidden.map_fn_matrix(lambda x: x / 100) self.weights_hidden_output.randomize(-100, 100, 1) self.weights_hidden_output.map_fn_matrix(lambda x: x / 100) self.bias_hidden = matrix(self.hidden_nodes, 1) self.bias_output = matrix(self.output_nodes, 1) self.bias_hidden.randomize(-100, 100, 1) self.bias_hidden.map_fn_matrix(lambda x: x / 100) self.bias_output.randomize(-100, 100, 1) self.bias_output.map_fn_matrix(lambda x: x / 100)
def __init__(self): # Location and Speed self.x = matrix([[0.], [0.]]) # Initial uncertainty self.P = matrix([[1000., 0.], [0., 1000.]]) # External Motion (Steering) self.u = matrix([[0.], [0.]]) # State Transition Matrix self.F = matrix([[1., 1.], [0, 1.]]) # Measurement Function. What are you measuring? self.H = matrix([[1., 0.]]) # Measurement Uncertainty self.R = matrix([[.01]]) # Increase Gain self.G = matrix([[2.]]) # Identity Matrix self.I = matrix([[1., 0], [0, 1.]]) self.t = time()
def filter(x, P): #Implements the Kalman Filter function for measurement # update and prediction step for n in range(len(measurements)): global u, H, F, R, I # measurement update Z = matrix([[measurements[n]]]) Y = Z.transpose() - H * x S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse() x = x + (K * Y) P = (I - K * H) * P # prediction x = F * x + u P = F * P * F.transpose() print 'x=' x.show() print 'P=' P.show()
def filter(x,P): #Implements the Kalman Filter function for measurement # update and prediction step for n in range(len(measurements)): global u,H,F,R,I # measurement update Z = matrix([[measurements[n]]]) Y = Z.transpose() - H * x S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse() x = x + (K*Y) P = (I - K*H) * P # prediction x = F * x + u P = F * P * F.transpose() print 'x=' x.show() print 'P=' P.show()
def reset(self): self.P = matrix([[1000., 0.], [0., 1000.]]) self.x = matrix([[0.], [0.]])
#Class exercise from Matrix import matrix measurements = [1, 2, 3] x = matrix([[0.], [0.]]) #initial state (location and velocity) P = matrix([[1000., 0.], [0., 1000.]]) #initial uncertainty u = matrix([[0.], [0.]]) #external motion F = matrix([[1., 1.], [0., 1.]]) #next state function H = matrix([[1., 0.]]) #measurement function R = matrix([[1.]]) #measurement uncertainty I = matrix([[1., 0.], [0., 1.]]) #identity matrix def filter(x, P): #Implements the Kalman Filter function for measurement # update and prediction step for n in range(len(measurements)): global u, H, F, R, I # measurement update Z = matrix([[measurements[n]]]) Y = Z.transpose() - H * x S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse() x = x + (K * Y) P = (I - K * H) * P # prediction x = F * x + u P = F * P * F.transpose()
def initStateVector(): '''initializes the state vector''' KalmanFilter.state = matrix([[0., 0., 0., 0.]])
# -*- coding: utf-8 -*- __author__ = 'Jiapeng Hong' import sys sys.path.append('../basic') from Matrix import matrix # global params dt = 0.1 u = matrix([[0.], [0.], [0.], [0.]]) F = matrix([[1., 0., dt, 0,], [0., 1., 0., dt], [0., 0., 1., 0.], [0., 0., 0., 1.]]) H = matrix([[1., 0., 0., 0.], [0., 1., 0., 0.]]) R = matrix([[0.1, 0.], [0., 0.1]]) I = matrix.eye(4) def Kalman_filter(x, P, measurements): for i in range(len(measurements)): # motion update x = F * x + u P = F * P * F.transpose() # measurement update y = - H * x + measurements[i] S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse()
def getSamples(self, N, discount=1, action=False, approx=False, method='LSReg', init=False): '''generates N samples''' MAX = self.grid.size**4 values = {} self.value = {} for i in range(N): print "=" * 20 + " trajectory " + str(i + 1) + " " + "=" * 20 print "generating sample: ", i t = self.sample(discount, action, MAX) if len(t) < MAX: dataSet = deepcopy(self.updateValues(t, discount)) print "value before: ", self.value if approx: if method == 'LSReg': print "method is LSReg" if len(self.value) > 0: C = self.getCountMatrix() X = self.getDataMatrix() Y = self.getRegressionValues() W = self.getWeight(X, Y, C) if not W: continue BE = [] for key in self.value: x = [[v] for v in self.grid.factored(key[0], key[1])] Y_hat = (W.transpose() * matrix(x)).value[0][0] BE.append(self.value[key] - Y_hat) X = self.getPhiNew(X, BE) W = self.getWeight(X, Y, C) if not W: continue j = 0 for key in self.value: Xj = X.value[j] Xj = matrix([[item] for item in Xj]) Y_hat = (W.transpose() * Xj).value[0][0] self.value[key] = Y_hat j += 1 elif method == 'GB': print "method is GB" if len(self.value) > 0: X = dataSet[0] Y = dataSet[1] #X = self.getDataMatrix() #Y = self.getRegressionValues() Y_hat = self.GBFit(X, Y, loss="ls") ''' if i ==0 and init: Y_hat = self.GBFit(X,Y,loss="ls",initModel=init) elif i == N-1 or i == N-2: Y_hat = self.GBFit(X,Y,loss="ls",printTree=True,treeName="basis"+str(i)) else: Y_hat = self.GBFit(X,Y,loss="ls") ''' j = 0 nY = len(Y_hat) for i in range(nY): unfactoredState = self.grid.unfactored(X.value[i]) print "compressed state: ", unfactoredState print "keys: ", self.value print unfactoredState in self.value.keys() if unfactoredState in self.value.keys(): self.value[unfactoredState] = Y_hat[ unfactoredState] ''' for key in self.value: y_hat = self.grid.factored(key[0],key[1]) self.value[key] = Y_hat[j] j += 1 ''' elif method == 'NN': print "method is NN" if len(self.value) > 0: X = self.getDataMatrix() Y = self.getRegressionValues() Y_hat = self.NNFit(X, Y) j = 0 for key in self.value: self.value[key] = Y_hat[j] j += 1 print "value after: ", self.value values[i] = deepcopy(self.value) return values
def getRegressionValues(value): '''returns the values of each state''' Y = [] for key in value: Y.append([value[key]]) return matrix(Y)
def getDataMatrix(value): '''returns the data matrix of values''' X = [] for key in value: X.append([float(key[0]), float(key[1])]) return matrix(X)
def loop(): count = 1 while not stopped.wait(interval): print("cycle={}".format(count)) count += 1 print(nn.predict([0, 0])) print(nn.predict([1, 0])) print(nn.predict([0, 1])) print(nn.predict([1, 1])) print("\n") Thread(target=loop).start() return stopped.set mat = matrix(2, 3) mat2 = matrix(3, 2) mat.randomize(3, 44) mat2.randomize(5) mat2.map_fn_matrix(lambda x: x * -1) print(mat) print(mat2) mat2.scale(1.5) mat3 = matrix.multiply(m1=mat, m2=mat2) print(mat3) mat.transpose_in_place() print(mat) mat4 = matrix.transpose(mat2) print(mat4)
def getXWithNewBasis(X, BE): '''appends the bellman error as a new dimension to learn weights on''' Xnew = [] for item in X.value: Xnew.append(item + [BE[X.value.index(item)]]) return matrix(Xnew)
from Matrix import Matrix from Matrix import matrix p = matrix() class Jugador1: def variable(self, movimientoDerecha, movimientoIzquierda, cutderecha, cutizquierda): self.x = [] self.y = [] self.respuesta = [] self.movimientoDerecha = movimientoDerecha self.movimientoIzquierda = movimientoIzquierda self.cutderecha = cutderecha self.cutizquierda = cutizquierda def movimientoJ1(self): self.movimientoDerecha = False self.movimientoIzquierda = False self.cutderecha = False self.cutizquierda = False respuesta = " " print("X turno ") x = int(input("Entra el numero fila:")) y = int(input("Entra el numero columna:")) if (str(Matrix[x][y]) == "X"): if (not y == 7 and not y == 0): if (str(Matrix[x + 1][y + 1]) == " "): self.movimientoIzquierda = True if (str(Matrix[x + 1][y - 1]) == " "):
def main(): '''main method''' value = {} count = {} numberOfPlays = int(argv[argv.index("-numberOfPlays") + 1]) discount = float(argv[argv.index("-discount") + 1]) for i in range(numberOfPlays): print "=" * 80 print "Trajectory number: ", i print "=" * 80 cards = makeCardDeck() generatePlay(cards, value, count, discount) for key in value: print "Sum and dealer card: ", key, "Value: ", value[ key], "Number of times states visited: ", count[key] C = getCountMatrix(count) X = getDataMatrix(value) print "C =", C print "X =", X Y = getRegressionValues(value) print "Y =", Y W = getWeight(X, Y, C) if not W: continue print "W =", W BE = [] for key in value: approxValue = ( W.transpose() * matrix([[float(key[0])], [float(key[1])]])).value[0][0] be = value[key] - approxValue BE.append(be) print "state: ", key, "true value: ", value[ key], "approx value: ", approxValue, "bellman error: ", be #X = getXWithNewBasis(X,BE) if i == 500: X = getXWithGBBasis(X, BE, "ls", True) else: X = getXWithGBBasis(X, BE, "ls") print "Xnew =", X W = getWeight(X, Y, C) if not W: continue print "Wnew =", W j = 0 for key in value: Xj = X.value[j] Xj = matrix([[item] for item in Xj]) newApproxValue = (W.transpose() * Xj).value[0][0] be = value[key] - newApproxValue print "state: ", Xj, "true value: ", value[ key], "approx value: ", newApproxValue, "new bellman error after basis addition: ", be j += 1 #raw_input("Press key to move to next sample") samples = getSampleHands(1000, value) #['d6', 'h6', -0.32290554988213827] pos = open("pos.txt", "a") facts = open("facts.txt", "a") for sample in samples: makeInput(samples.index(sample), sample, pos, facts, makeActualCardDeck()) X, Y, Z = [], [], [] for key in value: X += [float(key[0])] Y += [float(key[1])] Z += [value[key]] vAct = [96.6, 97.9, 99.0, 94.96, -50, 100, 94.7, 97.6, 99.0] vAct = Z vLS = [(item - 0.01) for item in vAct] vGBls = [(item - 0.05) for item in vAct] vGBlad = [(item - 0.02) for item in vAct] vGBHuber = [(item - 0.019) for item in vAct] vNN = [(item - 0.015) for item in vAct] x = range(len(vAct)) diffLS = [] diffGBls = [] diffGBlad = [] diffGBHuber = [] diffNN = [] for i in x: diffLS += [vAct[i] - vLS[i]] diffGBls += [vAct[i] - vGBls[i]] diffGBlad += [vAct[i] - vGBlad[i]] diffGBHuber += [vAct[i] - vGBHuber[i]] diffNN += [vAct[i] - vNN[i]] maxLS = max(diffLS) maxGBls = max(diffGBls) maxGBlad = max(diffGBlad) maxGBHuber = max(diffGBHuber) maxNN = max(diffNN) yLS = [] yGBls = [] yGBlad = [] yGBHuber = [] yNN = [] N = 100 for i in range(N): yLS += [(0.97**(i)) * maxLS] yGBls += [(0.97**(i)) * maxGBls] yGBlad += [(0.97**(i)) * maxGBlad] yGBHuber += [(0.97**(i)) * maxGBHuber] yNN += [(0.97**(i)) * maxNN] plt.plot(range(N), yLS, label='LS') plt.plot(range(N), yGBls, label='GBls') plt.plot(range(N), yGBlad, label='GBlad') plt.plot(range(N), yGBHuber, label='GBHuber') plt.plot(range(N), yNN, label='deepNBatch') plt.xlabel("Bellman Error") plt.ylabel("Number of samples") plt.title("black jack") plt.legend() plt.show() #fig = plt.figure() #ax = fig.gca(projection='3d') #ax = fig.add_subplot(111,projection='3d') #X = np.array(X) #Y = np.array(Y) #X, Y = np.meshgrid(X, Y) #Z = np.array(Z) #surf = ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0,antialiased=False) #print len(X),len(Y),len(Z) #ax.scatter(X,Y,Z,c='r',marker='o') #ax.set_xlabel('Sum') #ax.set_ylabel('Dealer face card') #ax.set_zlabel('Value') #plt.show() ''' #ax.set_zlim(-1.0,1.0) #ax.zaxis.set_major_locator(LinearLocator(10)) #ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) #fig.colorbar(surf,shrink=0.5,aspect=5) #plt.show() ''' pos.close() facts.close()
def Kalman_filter(x, P, measurements): for i in range(len(measurements)): # measurement update y = - H * x + measurements[i] S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse() x = x + K * y P = (I - K * H) * P # motion update x = F * x + u P = F * P * F.transpose() return [x, P] u = matrix([[0.], [0.]]) F = matrix([[1., 1.], [0., 1.]]) H = matrix([[1., 0.]]) R = matrix([[1.]]) I = matrix([[1., 0.], [0., 1.]]) if __name__ == '__main__': # measurements = [5, 6, 7, 9, 10] # motions = [1, 1, 2, 1, 1] # measurement_sig = 4 # motion_sig = 2 # mu = 0 # sig = 0.00000001 # # for i in range(len(motions)): # [mu, sig] = update(mu, sig, measurements[i], measurement_sig)
#Class exercise from Matrix import matrix measurements = [1,2,3] x = matrix([[0.],[0.]]) #initial state (location and velocity) P = matrix([[1000.,0.],[0.,1000.]]) #initial uncertainty u = matrix([[0.],[0.]]) #external motion F = matrix([[1.,1.],[0.,1.]]) #next state function H = matrix([[1., 0.]]) #measurement function R = matrix([[1.]]) #measurement uncertainty I = matrix([[1.,0.],[0.,1.]]) #identity matrix def filter(x,P): #Implements the Kalman Filter function for measurement # update and prediction step for n in range(len(measurements)): global u,H,F,R,I # measurement update Z = matrix([[measurements[n]]]) Y = Z.transpose() - H * x S = H * P * H.transpose() + R K = P * H.transpose() * S.inverse() x = x + (K*Y) P = (I - K*H) * P # prediction x = F * x + u P = F * P * F.transpose()
def getDataMatrix(self): '''returns the data matrix of values''' X = [] for key in self.value: X.append(self.grid.factored(key[0], key[1])) return matrix(X)