Ejemplo n.º 1
0
    def test_logistic_regression(self):

        # Input parameters tests
        args = getargspec(logistic_regression)
        self.assertEqual(len(args[0]), 4,
                         "Expected arguments %d, Given %d" % (4, len(args[0])))
        self.assertEqual(
            args[3], None,
            "Expected default values do not match given default values")

        # Return data types
        loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
        loan_data = loan_data.drop('Loan_ID', 1)
        loan_data = outlier_removal(loan_data)
        X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
        X_train, X_test, y_train, y_test = data_cleaning_2(
            X_train, X_test, y_train, y_test)
        cm = logistic_regression(X_train, X_test, y_train, y_test)
        self.assertIsInstance(
            cm, numpy.ndarray,
            "Expected data type for return value is `numpy.ndarray`, you are returning %s"
            % (type(cm)))

        # Return value tests
        self.assertEqual(cm.max(), 89,
                         "Expected return value does not given return value")
    def test_data_cleaning_2(self):

        # Input parameters tests
        args = getargspec(data_cleaning_2)
        self.assertEqual(len(args[0]), 4,
                         "Expected arguments %d, Given %d" % (4, len(args[0])))
        self.assertEqual(
            args[3], None,
            "Expected default values do not match given default values")

        # Return data types
        loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
        loan_data = loan_data.drop('Loan_ID', 1)
        loan_data = outlier_removal(loan_data)
        X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
        X_train, X_test, y_train, y_test = data_cleaning_2(
            X_train, X_test, y_train, y_test)

        self.assertIsInstance(
            X_test, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_test)))
        self.assertIsInstance(
            X_train, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_train)))
        self.assertIsInstance(
            y_train, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_train)))
        self.assertIsInstance(
            y_test, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_test)))

        # Return value tests
        train_val = X_train['Dependents_1'].value_counts()
        train_val1 = X_train['Property_Area_Urban'].value_counts()
        test_val = X_test['Property_Area_Urban'].value_counts()
        test_val1 = X_test['Dependents_1'].value_counts()

        self.assertEqual(
            list(train_val), [343, 65],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(train_val1), [277, 131],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(test_val), [87, 50],
            "Return value counts does not match expected value counts")
        self.assertEqual(
            list(test_val1), [114, 23],
            "Return value counts does not match expected value counts")

        self.assertEqual(X_train.shape, (408, 14),
                         "Return value shape does not match expected value")
        self.assertEqual(X_test.shape, (137, 14),
                         "Return value shape does not match expected value")
Ejemplo n.º 3
0
    def test_data_cleaning(self):

        # Input parameters tests
        args = getargspec(data_cleaning)
        self.assertEqual(len(args[0]), 1,
                         "Expected arguments %d, Given %d" % (1, len(args[0])))
        self.assertEqual(
            args[3], None,
            "Expected default values do not match given default values")

        loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
        loan_data = loan_data.drop('Loan_ID', 1)
        loan_data = outlier_removal(loan_data)
        X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)

        # Return data types
        self.assertIsInstance(
            X_test, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_test)))
        self.assertIsInstance(
            X_train, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X_train)))
        self.assertIsInstance(
            X, pd.core.frame.DataFrame,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(X)))
        self.assertIsInstance(
            y, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y)))
        self.assertIsInstance(
            y_train, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_train)))
        self.assertIsInstance(
            y_test, pd.core.series.Series,
            "Expected data type for return value is `pandas DataFrame`, you are returning %s"
            % (type(y_test)))

        # Return value tests
        Return_val = X_train.isnull().values.any()
        Return_val1 = X_test.isnull().values.any()

        self.assertEqual(Return_val, False, "Return value contains NaN values")
        self.assertEqual(Return_val1, False,
                         "Return value contains NaN values")
Ejemplo n.º 4
0
# Default Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
from sklearn.preprocessing import Imputer

loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
loan_data = loan_data.drop('Loan_ID', 1)
loan_data = outlier_removal(loan_data)


# Write your solution here :
def data_cleaning(data):
    np.random_state=9
    imp_mean=Imputer(missing_values='NaN', strategy='mean')
    imp_mode=Imputer(missing_values='NaN', strategy='most_frequent')
    data['LoanAmount']=imp_mean.fit_transform(data[['LoanAmount']])
    data['Gender']=data['Gender'].fillna(data['Gender'].mode()[0])
    data['Married']=data['Married'].fillna(data['Married'].mode()[0])
    data['Dependents']=data['Dependents'].fillna(data['Dependents'].mode()[0])
    data['Married']=data['Married'].fillna(data['Married'].mode()[0])
    data['Self_Employed']=data['Self_Employed'].fillna(data['Self_Employed'].mode()[0])
    data['Loan_Amount_Term']=data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0])
    data['Credit_History']=data['Credit_History'].fillna(data['Credit_History'].mode()[0])
    X=data.iloc[:,:-1]
    y=data.iloc[:,-1]
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.25,random_state=9)
    return X,y,X_train,X_test,y_train,y_test