def FM_on_diabetes(): train_df,test_df = read_data('diabetes_train.txt','diabetes_test.txt') x_train,y_train,x_test,y_test = split_X_Y(train_df,test_df) k=5 x_train=normalize_columns(x_train) x_test=normalize_columns(x_test) model = createFMModel(x_train.shape[1],k) gradientDescent(x_train,y_train,x_test,y_test,model,learningRate=0.0004,maxIter=100000,tol=1.e-7)
def FM_on_sp(learningRate=0.00000001,k=5,maxIter=100000,tol=1.e-7): train_df,test_df = read_data('tr.rx.app.sp','va.rx.app.sp',sep=' ') x_train,y_train,x_test,y_test = split_X_Y_sp(train_df,test_df) x_train=normalize_columns(x_train) x_train = x_train.drop([12],axis=1) x_test=normalize_columns(x_test) x_test = x_test.drop([12],axis=1) model = createFMModel(x_train.shape[1],k) gradientDescent(x_train,y_train,x_test,y_test,model,learningRate,maxIter,tol)
def test_gradientDescent(self): theta = np.zeros((2,1)) iterations = 1500 alpha = 0.01 converged_theta = gradientDescent(self.X, self.y, theta, alpha, iterations) self.assertAlmostEqual(converged_theta[0], -3.63, places=2) self.assertAlmostEqual(converged_theta[1], 1.17, places=2) self.assertAlmostEqual(hypothesis(np.array([1,3.5]),converged_theta), 0.45, places=2) self.assertAlmostEqual(hypothesis(np.array([1,7]),converged_theta), 4.53, places=2)
def test_gradient_descent(self): iterations = 400 alpha = 0.01 #[0.01, 0.03, 0.1, 0.3, 1.0] m,n = self.X.shape theta = np.zeros((n+1,1)) # add x_0 and do feature normalization on the rest of the columns self.X = np.concatenate([np.ones((self.m,1)),self.X],axis=1) self.X[:,1:n+1], mu, sigma = feature_normalization(self.X[:,1:n+1]) theta = gradientDescent(self.X, self.y, theta, alpha, iterations) test = np.array([1.0, 1650.0, 3.0]).reshape((3,1)) test[1:,:] = ( test[1:,:] - mu ) / sigma test = test.reshape((1,3)) # m=1, n=2, because there is 1 test case, with 2 features in it self.assertAlmostEqual(hypothesis(test, theta), 289314.62, places=2)