Exemple #1
0
    def test_logistic_regression(self):

        # Input parameters tests
        args = getargspec(logistic_regression)
        self.assertEqual(len(args[0]), 4,
                         "Expected arguments %d, Given %d" % (4, len(args[0])))
        self.assertEqual(
            args[3], None,
            "Expected default values do not match given default values")

        # Return data types
        loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
        loan_data = loan_data.drop('Loan_ID', 1)
        loan_data = outlier_removal(loan_data)
        X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
        X_train, X_test, y_train, y_test = data_cleaning_2(
            X_train, X_test, y_train, y_test)
        cm = logistic_regression(X_train, X_test, y_train, y_test)
        self.assertIsInstance(
            cm, numpy.ndarray,
            "Expected data type for return value is `numpy.ndarray`, you are returning %s"
            % (type(cm)))

        # Return value tests
        self.assertEqual(cm.max(), 89,
                         "Expected return value does not given return value")
    def test_data_cleaning_2(self):

        # Input parameters tests
        args = getargspec(data_cleaning_2)
        self.assertEqual(len(args[0]), 4,
                         "Expected arguments %d, Given %d" % (4, len(args[0])))
        self.assertEqual(
            args[3], None,
            "Expected default values do not match given default values")

        # Return data types
        loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
        loan_data = loan_data.drop('Loan_ID', 1)
        loan_data = outlier_removal(loan_data)
        X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
        X_train, X_test, y_train, y_test = data_cleaning_2(
            X_train, X_test, y_train, y_test)

        self.assertIsInstance(
            X_test, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_test)))
        self.assertIsInstance(
            X_train, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_train)))
        self.assertIsInstance(
            y_train, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_train)))
        self.assertIsInstance(
            y_test, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_test)))

        # Return value tests
        train_val = X_train['Dependents_1'].value_counts()
        train_val1 = X_train['Property_Area_Urban'].value_counts()
        test_val = X_test['Property_Area_Urban'].value_counts()
        test_val1 = X_test['Dependents_1'].value_counts()

        self.assertEqual(
            list(train_val), [343, 65],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(train_val1), [277, 131],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(test_val), [87, 50],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(test_val1), [114, 23],
            "Return value counts does not match expected value counts")

        self.assertEqual(X_train.shape, (408, 14),
                         "Return value shape does not match expected value")
        self.assertEqual(X_test.shape, (137, 14),
                         "Return value shape does not match expected value")
# %load q03_logistic_regression/build.py
# Default Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning
from greyatomlib.logistic_regression_project.q02_data_cleaning_all_2.build import data_cleaning_2

loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
loan_data = loan_data.drop('Loan_ID', 1)
loan_data = outlier_removal(loan_data)
X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
X_train, X_test, y_train, y_test = data_cleaning_2(X_train, X_test, y_train, y_test)

def logistic_regression(X_train, X_test, y_train, y_test):
    column_transform = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
  
    stand_scale = StandardScaler()
    X_train.loc[:, column_transform] = stand_scale.fit_transform(X_train.loc[:, column_transform])
    X_test.loc[:, column_transform] = stand_scale.fit_transform(X_test.loc[:, column_transform])
        
    lr = LogisticRegression(random_state=9)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_test)
    cm = confusion_matrix(y_test,y_pred)
    
    return cm