def test_logistic_regression(self): # Input parameters tests args = getargspec(logistic_regression) self.assertEqual(len(args[0]), 4, "Expected arguments %d, Given %d" % (4, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") # Return data types loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) X_train, X_test, y_train, y_test = data_cleaning_2( X_train, X_test, y_train, y_test) cm = logistic_regression(X_train, X_test, y_train, y_test) self.assertIsInstance( cm, numpy.ndarray, "Expected data type for return value is `numpy.ndarray`, you are returning %s" % (type(cm))) # Return value tests self.assertEqual(cm.max(), 89, "Expected return value does not given return value")
def test_data_cleaning_2(self): # Input parameters tests args = getargspec(data_cleaning_2) self.assertEqual(len(args[0]), 4, "Expected arguments %d, Given %d" % (4, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") # Return data types loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) X_train, X_test, y_train, y_test = data_cleaning_2( X_train, X_test, y_train, y_test) self.assertIsInstance( X_test, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_test))) self.assertIsInstance( X_train, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_train))) self.assertIsInstance( y_train, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_train))) self.assertIsInstance( y_test, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_test))) # Return value tests train_val = X_train['Dependents_1'].value_counts() train_val1 = X_train['Property_Area_Urban'].value_counts() test_val = X_test['Property_Area_Urban'].value_counts() test_val1 = X_test['Dependents_1'].value_counts() self.assertEqual( list(train_val), [343, 65], "Return value counts does not match expected value counts") self.assertEqual( list(train_val1), [277, 131], "Return value counts does not match expected value counts") self.assertEqual( list(test_val), [87, 50], "Return value counts does not match expected value counts") self.assertEqual( list(test_val1), [114, 23], "Return value counts does not match expected value counts") self.assertEqual(X_train.shape, (408, 14), "Return value shape does not match expected value") self.assertEqual(X_test.shape, (137, 14), "Return value shape does not match expected value")
def test_data_cleaning(self): # Input parameters tests args = getargspec(data_cleaning) self.assertEqual(len(args[0]), 1, "Expected arguments %d, Given %d" % (1, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) # Return data types self.assertIsInstance( X_test, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_test))) self.assertIsInstance( X_train, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X_train))) self.assertIsInstance( X, pd.core.frame.DataFrame, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(X))) self.assertIsInstance( y, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y))) self.assertIsInstance( y_train, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_train))) self.assertIsInstance( y_test, pd.core.series.Series, "Expected data type for return value is `pandas DataFrame`, you are returning %s" % (type(y_test))) # Return value tests Return_val = X_train.isnull().values.any() Return_val1 = X_test.isnull().values.any() self.assertEqual(Return_val, False, "Return value contains NaN values") self.assertEqual(Return_val1, False, "Return value contains NaN values")
# Default Imports import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal from sklearn.preprocessing import Imputer loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) # Write your solution here : def data_cleaning(data): np.random_state=9 imp_mean=Imputer(missing_values='NaN', strategy='mean') imp_mode=Imputer(missing_values='NaN', strategy='most_frequent') data['LoanAmount']=imp_mean.fit_transform(data[['LoanAmount']]) data['Gender']=data['Gender'].fillna(data['Gender'].mode()[0]) data['Married']=data['Married'].fillna(data['Married'].mode()[0]) data['Dependents']=data['Dependents'].fillna(data['Dependents'].mode()[0]) data['Married']=data['Married'].fillna(data['Married'].mode()[0]) data['Self_Employed']=data['Self_Employed'].fillna(data['Self_Employed'].mode()[0]) data['Loan_Amount_Term']=data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0]) data['Credit_History']=data['Credit_History'].fillna(data['Credit_History'].mode()[0]) X=data.iloc[:,:-1] y=data.iloc[:,-1] X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.25,random_state=9) return X,y,X_train,X_test,y_train,y_test