def test_logistic_regression(self): # Input parameters tests args = getargspec(logistic_regression) self.assertEqual(len(args[0]), 4, "Expected arguments %d, Given %d" % (4, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") # Return data types loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) X_train, X_test, y_train, y_test = data_cleaning_2( X_train, X_test, y_train, y_test) cm = logistic_regression(X_train, X_test, y_train, y_test) self.assertIsInstance( cm, numpy.ndarray, "Expected data type for return value is `numpy.ndarray`, you are returning %s" % (type(cm))) # Return value tests self.assertEqual(cm.max(), 89, "Expected return value does not given return value")
def test_data_cleaning_2(self): # Input parameters tests args = getargspec(data_cleaning_2) self.assertEqual(len(args[0]), 4, "Expected arguments %d, Given %d" % (4, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") # Return data types loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) X_train, X_test, y_train, y_test = data_cleaning_2( X_train, X_test, y_train, y_test) self.assertIsInstance( X_test, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_test))) self.assertIsInstance( X_train, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_train))) self.assertIsInstance( y_train, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_train))) self.assertIsInstance( y_test, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_test))) # Return value tests train_val = X_train['Dependents_1'].value_counts() train_val1 = X_train['Property_Area_Urban'].value_counts() test_val = X_test['Property_Area_Urban'].value_counts() test_val1 = X_test['Dependents_1'].value_counts() self.assertEqual( list(train_val), [343, 65], "Return value counts does not match expected value counts") self.assertEqual( list(train_val1), [277, 131], "Return value counts does not match expected value counts") self.assertEqual( list(test_val), [87, 50], "Return value counts does not match expected value counts") self.assertEqual( list(test_val1), [114, 23], "Return value counts does not match expected value counts") self.assertEqual(X_train.shape, (408, 14), "Return value shape does not match expected value") self.assertEqual(X_test.shape, (137, 14), "Return value shape does not match expected value")
# %load q03_logistic_regression/build.py # Default Imports import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning from greyatomlib.logistic_regression_project.q02_data_cleaning_all_2.build import data_cleaning_2 loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) X_train, X_test, y_train, y_test = data_cleaning_2(X_train, X_test, y_train, y_test) def logistic_regression(X_train, X_test, y_train, y_test): column_transform = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount'] stand_scale = StandardScaler() X_train.loc[:, column_transform] = stand_scale.fit_transform(X_train.loc[:, column_transform]) X_test.loc[:, column_transform] = stand_scale.fit_transform(X_test.loc[:, column_transform]) lr = LogisticRegression(random_state=9) lr.fit(X_train, y_train) y_pred = lr.predict(X_test) cm = confusion_matrix(y_test,y_pred) return cm