def redraw(): # Clean up the screen and start a new grid and new frame of pendulum with new coordinates background.fill(black) global cube_points # projection at last #perspective distance = 5*scale z = distance + cube_points[:, 2]# - np.mean(cube_points[:, 2])) z = distance/z projected_points = T(np.matmul(ortho_mat, T(cube_points))) projected_points[:, 0] *= z projected_points[:, 1] *= z projected_points += win_width/2 for i, points in enumerate(projected_points): x = int(points[0]) y = int(points[1]) pygame.draw.circle(background, white, (x,y), 5) for i in range(4): connect(i, (i+1)%4, projected_points) connect(i, i+4, projected_points) connect(i+4, ((i+1)%4)+4, projected_points) pygame.display.update()
def find_covariance(self): # loop counter index = 0 # a temporary matrix used to store (x_i - miu_k) i -> 1:n, k->1:0 temp_matrix = np.zeros((self.m, self.m)) # an all-zero matrix covariance = np.zeros((self.m, self.m)) for element in self.y: # calculate (x_i - miu_k) i -> 1:n, k->1:0 # calculate it times its transpose matrix # add the result to the covariance matrix if element == 0: temp_matrix[0] = np.subtract(self.X[index], self.miu_0) covariance = np.add(covariance, np.matmul(T(temp_matrix), temp_matrix)) elif element == 1: temp_matrix[0] = np.subtract(self.X[index], self.miu_1) covariance = np.add(covariance, np.matmul(T(temp_matrix), temp_matrix)) else: print("ERROR: Binary Data ONLY") # increment index index += 1 # unbiased covariance matrix self.covariance = np.multiply(covariance, 1 / (self.N0 + self.N1 - 2))
def find_log_odds(self, X_input): x_n = X_input.shape[0] index = 0 log_odds = 0 y_output = np.zeros(x_n) first_term = np.log(self.PY1 / self.PY0) second_term = 1 / 2 * np.dot( np.dot(T(self.miu_1), inv(self.covariance)), self.miu_1) third_term = 1 / 2 * np.dot( np.dot(T(self.miu_0), inv(self.covariance)), self.miu_0) forth_term_part_2 = np.subtract(self.miu_1, self.miu_0) for element in X_input: forth_term_part_1 = np.dot(T(X_input[index]), inv(self.covariance)) forth_term = np.dot(forth_term_part_1, forth_term_part_2) # calculate the log odds for each entity log_odds = np.add(np.add(first_term, -second_term), np.add(third_term, forth_term)) # classify according to log odds ratio # if the result > 0, -> class 0, -> class 1 otherwise if log_odds > 0: y_output[index] = 1 elif log_odds <= 0: y_output[index] = 0 else: print("ERROR: Log odds ratio cannot be processed") index += 1 # output the binary resuult return y_output
def lmlgh(params, y, R): k1, k2 = params al = mul(Kyinvh, y) dKdk1 = Kfh * (1 / k1) dKdk2 = sqexp(x, None, k1, k2**0.5)[1].reshape(n, n) lmlg1 = -(0.5 * np.trace(mul(mul(al, T(al)) - Kyinvh, dKdk1))) lmlg2 = -(0.5 * np.trace(mul(mul(al, T(al)) - Kyinvh, dKdk2))) return np.ndarray((2, ), buffer=np.array([lmlg1, lmlg2]), dtype=float)
def predict_probabilities(self, X_new): """ Returns a probablistic prediction using the model parameters. inputs: @ self @ X_new : (n' x m) input vector in list or numpy format. """ X_new = np.array(X_new) input_shape = X_new.shape print(input_shape) # If input is a vector if X_new.shape[0] == 1: # If input length doesn't match if (X_new.shape[1] + 1) != self.m: message = "Input number of features doesn't match model number of parameters" message += "\nInput is has {} features but model has {} features".format( len(X_new), self.m - 1) raise Exception(message) else: print("vector") x = np.insert(X_new, 0, 1) # insert an extra one at the beginning wTx = float(np.matmul(T(self.w), x)) sigm_wTx = self.sigmoid(wTx) print("sigm_wTx", sigm_wTx) return [sigm_wTx] # if input is a matrix of new examples elif input_shape[0] > 1 and input_shape[1] > 1: print("matrix") # # if number of attributes don't match if (X_new.shape[1] + 1) != self.m: message = "Input dimensions don't match" message += "\nInput matrix contains {} features, but the model has {} fitted features".format( self.m - 1) raise Exception(message) # right dimensions else: pred_probs = np.zeros( (X_new.shape[0], 1)) # to store the probs X_0 = np.ones((X_new.shape[0], 1)) # n-dim vector of ones X_new = np.c_[X_0, X_new] # concatenate # since X_new is a matrix, we have to loop # over each of its rows, which comes out as # a column vector for i in range(len(X_new)): x_i = X_new[i] # row = example wTx = float(np.matmul(T(self.w), x_i)) # w^Tx sigm_wTx = self.sigmoid(wTx) pred_probs[i] = sigm_wTx return pred_probs
def lmlh(params, y, R, y_gn): #print(params) # show progress of fit [k1, k2] = params global Kfh Kfh = sqexp(x, None, k1, k2**0.5)[0] #print(np.size(Kfh)) Ky = Kfh + R # calculate initial kernel with noise global Kyinvh Kyinvh = inv(Ky) return -(-0.5 * mul(mul(T(y), Kyinvh), y) - 0.5 * np.log( (det(Ky))) - 0.5 * n * np.log(2 * np.pi)) + -( -0.5 * mul(mul(T(y_gn), Kyinvh), y_gn) - 0.5 * np.log( (det(Ky))) - 0.5 * n * np.log(2 * np.pi) ) # marginal likelihood - (5.8)
def GPRfith(xs,k1,k2,R,Rs): #Kst = RBF2(xtest,x,k1,k2)[0] Ky = RBF(x,None,k1,k2**0.5)[0] + R Ks = RBF(xs, x, k1, k2**0.5) Kss = RBF(xs, None, k1, k2)[0] L = cholesky(Ky) al = solve(T(L), solve(L,y)) fmst = mul(Ks,al) varfmst = np.empty([n,1]) for i in range(np.size(xs)): v = solve(L,T(Ks[:,i])) varfmst[i] = Kss[i,i] + Rs[i,i] - mul(T(v),v) lmlopt = -0.5*mul(T(y),al) - np.trace(np.log(L)) - 0.5*n*np.log(2*np.pi) return fmst, varfmst[::-1], lmlopt
def GPRfit(xs, k1, k2, sig): Ky = sqexp(x, None, k1, k2**0.5)[0] + (sig**2) * np.identity(n) Ks = sqexp(xs, x, k1, k2**0.5) Kss = sqexp(xs, None, k1, k2**0.5)[0] L = cholesky(Ky) al = solve(T(L), solve(L, y)) fmst = mul(Ks, al) varfmst = np.empty([n, 1]) for i in range(np.size(xs)): v = solve(L, T(Ks[:, i])) varfmst[i] = Kss[i, i] - mul(T(v), v) + sig**2 lmlopt = -0.5 * mul(T(y), al) - np.trace( np.log(L)) - 0.5 * n * np.log(2 * np.pi) #return fmst, varfmst[::-1], lmlopt return fmst, varfmst, lmlopt
def new_feats(X, only_quadratic=False, only_interactions=False, all_interactions=False, exponential=False, correlation=0.6): """ Adds second order terms to the dataset: Either only quadratic terms, only interaction terms (X_i != X_j) or both, if they correlation is higher than a specified value. If 'logarithmic' is specified, it converts the parameters into logarithmic values. """ # copy the original feature set new_X = X.copy() # try logarithmic features if exponential: new_feats_X = pd.DataFrame(new_X) for col in new_feats_X: new_feats_X[col] = new_feats_X[col].apply(lambda x: np.exp(x)) new_feats_X = np.array(new_feats_X) new_X = np.c_[new_X, new_feats_X] for col1 in T(X): for col2 in T(X): # all of these have the original dataset if only_interactions: if stats.pearsonr(col1, col2)[0] >= 0.6 and not np.array_equal( col1, col2): new_feat = np.multiply(col1, col2) new_X = np.c_[new_X, new_feat] elif only_quadratic: if stats.pearsonr(col1, col2)[0] >= 0.6 and np.array_equal( col1, col2): new_feat = np.multiply(col1, col2) new_X = np.c_[new_X, new_feat] elif all_interactions: if stats.pearsonr(col1, col2)[0] >= 0.6: new_feat = np.multiply(col1, col2) new_X = np.c_[new_X, new_feat] return new_X
def gradient(self, norm='none', C=1.0): """ Calculates the gradient for the Logistic Regression model """ grad = np.zeros((self.m, )) # initialize gradient # calcualte gradient of each example # and add together for i in range(self.n): x_i = self.X[i] y_i = self.y[i] wTx = float(np.matmul(T(self.w), x_i)) # w^T x sigm_wTx = self.sigmoid(wTx) grad += x_i * (y_i - sigm_wTx) # add to previous grad # TESTING if norm == 'l1': grad += C * np.sign(self.w) elif norm == 'l2': grad += 2 * C * np.array(self.w).reshape(self.w.shape[0], ) # TESTING return grad.reshape((len(grad), 1))
def main(): sizes=[] diags=[] for line in open("data.csv").readlines(): values = line.split(',') if len(values)>3: sizes.append(values[2]) if values[1]=='M': diags.append(1.) else: diags.append(0.) #for i in range(1,31): # print("Size {} was diagnosed as {}".format(sizes[i],diags[i])) X = np.asarray( sizes[1:], dtype=float ) Y = np.asarray( diags[1:], dtype=float ) clm1 = np.ones( np.shape( X ), dtype=float ) trSet = T( np.vstack( ( clm1, X, Y ) ) ) initTheta = np.asarray([ 0.1, 0.7 ]) # model for the boundary between benign and malign theta = gradDesc( initTheta, trSet ) print(theta) Xarr = np.arange(5.,30.,0.5) Yarr = np.asarray(list(map(lambda x:theta[0] + x*theta[1] , Xarr))) pp.scatter(X,Y) pp.plot(Xarr,Yarr,c='r') pp.show()
def gradDesc( theta, trSet, alpha = 0.003, minErr = 1/10**5 ): k=alpha/len(trSet) #print(len(trSet)) count=0 converged = convergeVec converging = True while converging: temp = np.zeros( np.shape( theta ) ) #p=np.concatenate((theta,[-1]),axis=0) inner = np.reciprocal( 1 + np.exp( - np.sum( theta * trSet[:,:-1], axis = 1 ) ) ) - trSet[:,-1] # print(np.shape(inner)) outer = inner * T(trSet[:,:-1]) # print(np.shape(outer)) delta = np.sum(outer,axis=1) # print(delta) #print(inner[:30]) temp = theta - k * delta if converged(temp, theta, minErr): converging = False count+=1 theta = temp print("Logistic regression converged after {} iterations.".format(count)) return theta
def get_mnist(): rows = 28 cols = 28 categories = 10 (X_Train, Y_Train), (X_Test, Y_Test) = mnist.load_data() # Resizing the array and taking the Transpose of the array X_Train = (T(X_Train.reshape(X_Train.shape[0], 1, rows, cols), axes= [0, 2, 3, 1])).astype('float32') X_Test = (T(X_Test.reshape(X_Test.shape[0], 1, rows, cols), axes= [0, 2, 3, 1])).astype('float32') X_Train /= 255 X_Test /= 255 # Get the Binary Class Matrices from the Class/Category Vectors Y_Train = util.to_categorical(Y_Train, categories) Y_Test = util.to_categorical(Y_Test, categories) return X_Train[0:10000], X_Test[0:1000], Y_Train[0:10000], Y_Test[0:1000], rows, cols, categories
def lml(params,y,sig): #print(params) # show progress of fit [k1, k2] = params global Kf #Kf = RBF(x,x,k1,k2)[0] Kf = RBF(x,None,k1,k2**0.5)[0] Ky = Kf + (sig**2)*np.identity(n) # calculate initial kernel with noise global Kyinv Kyinv = inv(Ky) return -(-0.5*mul(mul(T(y),Kyinv), y) - 0.5*np.log((det(Ky))) - 0.5*n*np.log(2*np.pi)) # marginal likelihood - (5.8)
def lmlh(params,y,R): #print(params) # show progress of fit [k1, k2] = params global Kfh #Kf = RBF(x,x,k1,k2)[0] Kfh = RBF(x,None,k1,k2**0.5)[0] Ky = Kfh + R # calculate initial kernel with noise global Kyinvh Kyinvh = inv(Ky) return -(-0.5*mul(mul(T(y),Kyinvh), y) - 0.5*np.log((det(Ky))) - 0.5*n*np.log(2*np.pi)) # marginal likelihood - (5.8)
def gradDesc(theta, TrainingSet, alpha=0.003, minDelta=1 / 10**10): ##currently only implemented for linear regression. ##traininset is 2 dimensional array where the rows are as follows: [X0,X1,X2...Xn,Y]. ##theta is the [P0,P1,P2...Pn] hypothesis. ##if convergence fails, try lower alpha value. converging = True k = alpha / np.shape(TrainingSet)[0] converged = convVec # currently implemented only for numpy arrays. count = 0 while converging: temp = np.zeros(np.shape(theta)) p = np.concatenate( (theta, [-1]), axis=0 ) # append -1 to theta vector for '-Y', which is the last column in the Set # print(p) inner = np.sum(TrainingSet * p, 1) # Calculates the model mismatch # print(inner) outer = inner * T( TrainingSet[:, :-1]) # Multiply with X-vector to get gradient # print (outer) delta = np.sum(outer, axis=0) # Sums all gradients to gradients*m # print(delta) # input("Press Enter to continue...") temp = theta - k * delta #subtracts a alpha portion of the mean gradient of the error(k=alpha/m) if converged( temp, theta, minDelta ): # calls the designated function to check for convergence. converging = False theta = temp count += 1 print("Gradient descent finished after {} iterations.".format(count)) return theta
def cross_entropy_loss(self, verbose=False, norm='none'): losses = [] # for each datapoint for i in range(self.n): x_i = self.X[i] y_i = self.y[i] wTx = float(np.matmul(T(self.w), x_i)) #w^Tx sigm_wTx = self.sigmoid(wTx) if verbose: print("wTx: ", wTx) print("sigm_wTx ", sigm_wTx) print("log(sigm_wTx) ", math.log(sigm_wTx)) if y_i == 1: losses.append(math.log(sigm_wTx + 0.0001)) else: losses.append(math.log(1 - sigm_wTx + 0.0001)) total_loss = -1 * np.sum(np.array(losses)) if norm == 'l1': abs_w = [np.abs(w_j) for w_j in self.w] # calculate l1 norm total_loss = +np.sum(abs_w) # add to the loss elif norm == 'l2': w_2 = [float(np.power(w_j, 2)) for w_j in self.w] # calculate l2 norm total_loss = +np.sum(w_2) # add ot the loss if verbose: print(losses) print("Model loss: ", total_loss) return total_loss
def minimizza(self): self.coefficienti = np.dot( np.dot(inv(np.dot(T(self.A), self.A)), T(self.A)), self.y)
def fn(A, x, b): v1 = T(A) v2 = dot(v1, x) v3 = v2 - b return v3
def gradient(self): pred = self.predict(self.X) # Xw diff = self.y - pred # y-Xw return -2 * np.matmul(T(self.X), diff) # -2X^T (y-Xw)
def MSE(self): # (y- Xw)T (y-Xw) pred = self.predict(self.X) diff = self.y - pred return np.matmul(T(diff), diff)[0][0]
import numpy as np from numpy import transpose as T from matplotlib import pyplot as plt from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C np.random.seed(1) def f(x): return x * np.sin(x) X = T(np.atleast_2d([1., 3., 5., 6., 7., 8., 10.])) y = np.ravel(f(X)) x = T(np.atleast_2d(np.linspace(0, 10, 1000))) kernel = C(1.0, (1e-13, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gpfit = gp.fit(X, y) y_pred, sigma = gp.predict(x, return_std=True) plt.figure() plt.plot(x, f(x), 'r:', label=r'$f(x)=x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None',