def test_training_set(): matrix_train, matrix_test, dependent_train, dependent_test = training_set( *features_and_dependent_vars(import_data())) assert matrix_train.shape == (8, 3) assert matrix_test.shape == (2, 3) assert dependent_train.shape == (8, 1) assert dependent_test.shape == (2, 1)
def test_feature_scaling(): matrix, dep_vars = cleanup_data(import_data()) matrix_train, matrix_test, dependent_train, dependent_test = training_set( encode_feature(matrix), encode_feature(dep_vars)) scaled_matrix_train, scaled_matrix_test = feature_scaling( matrix_train, matrix_test) assert scaled_matrix_train.shape == matrix_train.shape
def test_training_and_test_sets(): train_x, test_x, train_y, test_y = dp.training_set( *dp.features_and_dependent_vars(dp.import_data(DATA_FILE)), test_size=1 / 3) assert train_x[0][0] == 2.9 assert test_x[0][0] == 1.5 assert train_y[0][0] == 56642 assert test_y[0][0] == 37731
def test_predict(): train_x, test_x, train_y, test_y = dp.training_set( *dp.features_and_dependent_vars(dp.import_data(DATA_FILE)), test_size=1 / 3) machine = train_the_machine(train_x, train_y) predicted = predict(machine, train_x) data, max_error = error(train_y, predicted) assert max_error < 0.2 assert data['err'].mean() < 0.1
def test_train_the_machine(): train_x, _, train_y, _ = dp.training_set(*dp.features_and_dependent_vars( dp.import_data(DATA_FILE)), test_size=1 / 3) machine = train_the_machine(train_x, train_y) assert isinstance(machine, LinearRegression)
def test_read_data(): data = dp.import_data(DATA_FILE) assert data.iloc[0, 0] == 1.1 matrix, depend = dp.features_and_dependent_vars(data) assert matrix[0][0] == 1.1 and depend[0][0] == 39343
def test_encode_data(): original_data, data_with_dummies, dependent_vars = encode_data( import_data()) assert original_data[6, 2] is not np.nan assert data_with_dummies.shape == (10, 5) assert dependent_vars[0] == 0
def test_categorical_data(): matrix = encode_feature( cleanup_data(import_data())[0], slice(None, None), 0) assert set(matrix[:, 0]) == {0, 1, 2} assert create_dummy_variables(matrix, [0]).shape == (10, 5)
def test_cleanup_data(): matrix, _ = cleanup_data(import_data()) assert matrix[4, 2] == 63777.77777777778
def test_import_data(): dataset = import_data() assert dataset is not None
def test_features_matrix(): matrix, dep_vars = features_and_dependent_vars(import_data()) assert matrix[0][0] == 'France' assert dep_vars[0][0] == 'No'