def predict(self, this_dict): first_variable_value = this_dict[self.first_variable] for key in self.df.columns: if key not in (self.first_variable, self.dependent_variable): exponent = float(key.split('^')[1]) value = first_variable_value**exponent this_dict[key] = value linear_regressor = LinearRegressor(self.df, self.dependent_variable) return linear_regressor.predict(this_dict)
print('Does all the linear_regressor stuff work') reg_coeff = regressor.coefficients.copy() for (key, value) in reg_coeff.items(): reg_coeff[key] = round(value, 8) assert reg_coeff == { 'constant': 0.19252336, 'scoops of chocolate': -0.05981308, 'scoops of vanilla': 0.13271028 }, 'No, coefficients does not work' assert round( regressor.predict({ 'scoops of chocolate': 2, 'scoops of vanilla': 3 }), 8) == 0.47102804, 'No, predict does not work' print('Yes they do', "\n") df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0], [3, 4, 0]], columns=['beef', 'pb', 'rating']) df = df.create_interaction_terms('beef', 'pb') assert df.columns == ['beef', 'pb', 'rating', 'beef * pb'] assert df.to_array() == [[0, 0, 1, 0], [1, 0, 2, 0], [2, 0, 4, 0], [4, 0, 8, 0], [6, 0, 9, 0], [0, 2, 2, 0], [0, 4, 5,
import sys sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6]], columns=['slices of roast beef', 'tbsp of peanut butter', 'rating']) regressor = LinearRegressor(df, dependent_variable='rating') print(regressor.coefficients) print( regressor.predict({ 'slices of roast beef': 5, 'tbsp of peanut butter': 0 })) print( regressor.predict({ 'slices of roast beef': 5, 'tbsp of peanut butter': 5 }))
sys.path.append('src') from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor from matrix import Matrix from dataframe import DataFrame df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6]], columns=['slices of roast beef', 'tablespoons of peanut butter', 'rating']) regressor = LinearRegressor(df, dependent_variable='rating') print(regressor.coefficients) print( regressor.predict({ 'slices of roast beef': 5, 'tablespoons of peanut butter': 0 })) print( regressor.predict({ 'slices of roast beef': 5, 'tablespoons of peanut butter': 5 })) df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0], [3, 4, 0]], columns=['beef', 'pb', 'rating']) df = df.create_interaction_terms('beef', 'pb')
} == { 'constant': 0.19252336, 'scoops of chocolate': -0.05981308, 'scoops of vanilla': 0.13271028 }, { key: round(regressor.coefficients[key], 7) for key in regressor.coefficients } print('PASSED') # these coefficients are rounded, you should only round # in your assert statement print('Testing method predict...') assert round( regressor.predict({ 'scoops of chocolate': 2, 'scoops of vanilla': 3 }), 8) == 0.47102804 print('PASSED') df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb')
'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0] }) linear_regressor = LinearRegressor(df, prediction_column = 'rating') print("\n Testing Coefficients of LinearRegressor") assert linear_regressor.coefficients == {'beef': 0.25, 'pb': 0.4, 'mayo': -1.25, 'jelly': 1.5, 'beef_pb': -0.21, 'beef_mayo': 1.05, 'beef_jelly': -0.85, 'pb_mayo': -0.65, 'pb_jelly': 0.65, 'mayo_jelly': -3.25, 'constant': 2.19}, 'Incorrect coefficients for Linear Regressor' print(" passed") print("\n Testing Gathering all inputs of LinearRegressor") assert linear_regressor.gather_all_inputs({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == {'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, 'beef_pb': 25, 'beef_mayo': 5, 'beef_jelly': 5, 'pb_mayo': 5, 'pb_jelly': 5, 'mayo_jelly': 1, 'constant': 1}, 'Incorrect gather_all_inputs' print(" passed") print("\n Testing Prediction #1") assert linear_regressor.predict({'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == -1.81, 'Incorrect prediction from linear regressor' print(" passed") print("\n Testing Prediction #2") assert linear_regressor.predict({ 'beef': 0, 'pb': 3, 'mayo': 0, 'jelly': 1, }) == 6.84 , 'Incorrect prediction from linear regressor' print(" passed") print("\n Testing Prediction #3") assert linear_regressor.predict({ 'beef': 1, 'pb': 1, 'mayo': 1, 'jelly': 0,
'mayo': [1], 'jelly': [1], 'beef_pb': [25], 'beef_mayo': [5], 'beef_jelly': [5], 'pb_mayo': [5], 'pb_jelly': [5], 'mayo_jelly': [1], 'constant': [1] } assert linear_regressor.current_input.data_dict == input_dict, 'Wrong INputs' assert linear_regressor.predict({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, }) == -1.8125, 'Nah' assert linear_regressor.predict({ 'beef': 0, 'pb': 3, 'mayo': 0, 'jelly': 1, }) == 6.8375, 'Nah' assert linear_regressor.predict({ 'beef': 1, 'pb': 1, 'mayo': 1, 'jelly': 0,
new_df = DataFrame(new_dict, new_columns) training_df = new_df.select_rows([i for i in range(501)]) testing_df = new_df.select_rows( [i for i in range(501, len(new_df.to_array()))]) print('\nSex:') sex_df = training_df.select(['Sex', 'Survived']) test_sex_df = testing_df.select(['Sex', 'Survived']) survival_regressor = LinearRegressor(sex_df, 'Survived') print(survival_regressor.coefficients) correct_classifications = 0 for i in range(len(training_df.data_dict['Sex'])): prediction = round( survival_regressor.predict( {var: sex_df.data_dict[var][i] for var in sex_df.columns[:-1]})) if prediction > 1: prediction = 1 if prediction == sex_df.data_dict['Survived'][i]: correct_classifications += 1 print('train accuracy:', correct_classifications / len(training_df.data_dict['Sex'])) correct_classifications = 0 for i in range(len(testing_df.data_dict['Sex'])): prediction = round( survival_regressor.predict({ var: test_sex_df.data_dict[var][i] for var in test_sex_df.columns[:-1]