def main(): df, X, y = preprocess_data() X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8) logistic_regressor = LogisticRegressor(alpha=0.05, c=0.01, T=1000, random_seed=0, intercept=True) losses = logistic_regressor.fit(X_train, y_train) plot_losses(losses=losses, savefig=True) train_error = error_rate(y_train, logistic_regressor.predict(X_train)) test_error = error_rate(y_test, logistic_regressor.predict(X_test)) print('Training Error Rate: %f' % train_error) print('Test Error Rate: %f' % test_error)
[0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly') log_df = DataFrame(df.data_dict, df.columns) logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating') # test 8 slices of beef + mayo observation = {'beef': 8, 'mayo': 1} assert round(logistic_regressor.predict(observation), 2) == 9.72 # test 4 tbsp of pb + 8 slices of beef + mayo observation = {'beef': 8, 'pb': 4, 'mayo': 1} assert round(logistic_regressor.predict(observation), 2) == 0.77 # test 8 slices of beef + mayo + jelly observation = {'beef': 8, 'mayo': 1, 'jelly': 1} assert round(logistic_regressor.predict(observation), 2) == 0.79
['mayo', 'jelly'], [], ['mayo'], ['jelly'], ['mayo', 'jelly']], 'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0] } df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments']) df = df.create_dummy_variables() df = df.append_pairwise_interactions() df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['rating']))], 'rating': data_dict['rating'] }) df = df.apply('rating', lambda x: 0.1 if x == 0 else x) regressor = LogisticRegressor(df, prediction_column='rating', max_val=10) assert regressor.multipliers == [ -0.039, -0.0205, 1.7483, -0.3978, 0.1497, -0.7485, 0.4682, 0.3296, -0.5288, 2.6441, 1.0125 ], 'Wong multipliers' assert regressor.predict({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, }) == 0.02342, 'Nah bruh' assert regressor.predict({ 'beef': 0, 'pb': 3,
print 'Plotting data with green circle indicating (y=1) examples and red circle indicating (y=0) examples ...' plot_utils.plot_twoclass_data(X,y,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted']) plt.savefig('fig1.pdf') ######################################################################## ##================ Part 1: Compute cost and gradient ==================# ######################################################################## # set up the X matrix with the column of ones as intercept XX = np.vstack([np.ones((X.shape[0],)),X.T]).T # set up a logistic regression model log_reg1 = LogisticRegressor() # test the loss and gradient function theta = np.zeros((XX.shape[1],)) loss = log_reg1.loss(theta,XX,y) print "Loss on all-zeros theta vector (should be around 0.693) = ", loss grad = log_reg1.grad_loss(theta,XX,y) print "Gradient of loss wrt all-zeros theta vector (should be around [-0.1, -12.01, -11.26]) = ", grad # run fmin on the loss function and gradient implemented in logistic_regressor.py theta_opt = log_reg1.train(XX,y,num_iters=400) # print the theta found
from dataframe import DataFrame from logistic_regressor import LogisticRegressor students_dict = { 'ACT': [33, 34, 35, 30, 36, 29, 36, 31, 36, 32], 'extra': [1, 0, 1, 1, 1, 1, 1, 1, 0, 0], 'Bias': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] } students_dict.update({ 'interaction': [ students_dict['ACT'][i] * students_dict['extra'][i] for i in range(len(students_dict['ACT'])) ] }) students_dict.update({ 'accepted': [0.95, 0.001, 0.95, 0.001, 0.95, 0.001, 0.95, 0.001, 0.95, 0.001] }) df = DataFrame(students_dict) regressor = LogisticRegressor(df, 'accepted', 0.999) # predictor = {'ACT': 36, 'extra': 0} # df_bruh = DataFrame(predictor) # print(df_bruh.gather_all_inputs()) # print(regressor.predict(predictor)) print(regressor.multipliers)
delta_table = [0.1, 0.01, 0.001, 0.0001] all_coords = [] for delta_low in delta_table: # new_list=[] # for pair in list_data: # if pair[1] == 0: # new_list.append([pair[0],delta]) # else: # new_list.append([pair[0],1-delta]) df = DataFrame.from_array(list_data, columns=['x', 'y']) regressor = LogisticRegressor(df, prediction_column='y', max_value=1, delta=delta_low) coords = [[], []] for x in range(20): coords[0].append(x / 100) coords[1].append(regressor.predict({'constant': 1, 'x': x})) all_coords.append(coords) print(all_coords) plt.style.use('bmh') for coords in all_coords: plt.plot(coords[0], coords[1], linewidth=2.5) plt.legend(['0.1', '0.01', '0.001', '0.0001']) plt.savefig('logistic_regressor_109.png') # dfgd = DataFrame.from_array(
reg = LogisticRegressor(df, dependent_variable='y', premade = True) reg.set_coefficients({'constant': 0.5, 'x': 0.5}) print(reg.calc_rss()) print(reg.calc_gradient(delta)) reg.gradient_descent(alpha, delta, num_steps) print(reg.coefficients) ''' df = DataFrame.from_array([[2, 1], [3, 0]], columns=['x', 'y']) alpha = 0.2 delta = 0.1 num_steps = 20000 reg = LogisticRegressor(df, dependent_variable='y', premade=True) reg.set_coefficients({'constant': 1, 'x': 1}) print(reg.calc_rss()) print(reg.calc_gradient(delta)) #reg.gradient_descent(alpha, delta, num_steps) #print(reg.coefficients) '''' import matplotlib.pyplot as plt plt.style.use('bmh') points = {'x': [], 'y': []} x = -5
print('regressor with interaction terms') print(regressor.coefficients) print(regressor.predict({'beef': 5, 'pb': 0, 'beef * pb': 0})) print(regressor.predict({'beef': 5, 'pb': 5, 'beef * pb': 25})) df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0.1], [3, 4, 0.1]], columns=['beef', 'pb', 'rating']) df = df.create_interaction_terms('beef', 'pb') regressor = LogisticRegressor(df, 'rating', 10) print('Logistic regressor with interaction terms') print(regressor.coefficients) print(regressor.predict({'beef': 5, 'pb': 0, 'beef * pb': 0})) print(regressor.predict({'beef': 12, 'pb': 0, 'beef * pb': 0})) print(regressor.predict({'beef': 5, 'pb': 5, 'beef * pb': 25})) print('-----------Assignment 52-----------') df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
import matplotlib.pyplot as plt plt.style.use('bmh') import sys sys.path.append('src') from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor arr = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]] df = DataFrame.from_array(arr, columns=['x', 'y']) ''' python tests/test_regressors.py ''' reg = LogisticRegressor(df, dependent_variable='y', upper_bound=1) reg.set_coefficients({'constant': 0.5, 'x': 0.5}) alpha = 0.01 delta = 0.01 num_steps = 20000 reg.gradient_descent(alpha, delta, num_steps) print("\nreg.coefficients:", reg.coefficients) # should be {'constant': 2.7911, 'x': -1.1165} x = [pair[0] for pair in arr] y = [pair[1] for pair in arr] lots_of_xs = [x / 100 for x in range(100, 401)] prediction = [reg.predict({'x': x}) for x in lots_of_xs]
'condiments': [[], ['mayo'], ['jelly'], ['mayo', 'jelly'], [], ['mayo'], ['jelly'], ['mayo', 'jelly'], [], ['mayo'], ['jelly'], ['mayo', 'jelly'], [], ['mayo'], ['jelly'], ['mayo', 'jelly']], 'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0] } df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments']) df = df.create_dummy_variables() df = df.append_pairwise_interactions() df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['beef']))], 'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0] }) df = df.apply('rating', lambda x: 0.1 if x == 0 else x) regressor = LogisticRegressor(df, prediction_column='rating', max_value=10) # print("\n Testing multipliers") # assert regressor.multipliers == { # 'beef': -0.03900793, # 'pb': -0.02047944, # 'mayo': 1.74825378, # 'jelly': -0.39777219, # 'beef_pb': 0.14970983, # 'beef_mayo': -0.74854916, # 'beef_jelly': 0.46821312, # 'pb_mayo': 0.32958369, # 'pb_jelly': -0.5288267, # 'mayo_jelly': 2.64413352, # 'constant': 1.01248436 # }, 'Incorrect multipliers an is instead:'+str(regressor.multipliers)
if index in dataframe_indices] dataframe.append_columns({'constant': [1 for _ in dataframe_indices]}) linear_regressor = LinearRegressor(dataframe, ratings, prediction_column='Survived') linear_regressor.solve_coefficients() linear_regressor_classifications = get_classifications(linear_regressor, testing_dataframe) for row in linear_regressor_classifications: print(row) print('\n') logistic_regressor = LogisticRegressor(dataframe, ratings, prediction_column='Survived') logistic_regressor.solve_coefficients() logistic_regressor_classifications = get_classifications( logistic_regressor, testing_dataframe) for row in logistic_regressor_classifications: print(row) dataframe.remove_columns(['constant']) dataframe.append_columns({ 'Survived': [ did_survive for index, did_survive in enumerate(survived_people) if index in dataframe_indices ] })
} df = DataFrame(data_dict, column_order=['percentile', 'ACT', 'extracurricular']) # print(df.ordered_dict) df = df.append_pairwise_interactions() # print(df.ordered_dict) df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['percentile']))], 'acceptance': [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001] }) # print(df.ordered_dict) df = df.apply('acceptance', lambda x: 0.1 if x == 0 else x) # print(df.ordered_dict) regressor = LogisticRegressor(df, prediction_column='acceptance', max_value=1) print(regressor.coefficients) print( "Martha: " + str(regressor.predict({ 'percentile': 95, 'ACT': 33, 'extracurricular': 1 }))) print( "Jeremy: " + str(regressor.predict({ 'percentile': 95, 'ACT': 34, 'extracurricular': 0 })))
import sys sys.path.append('src') from logistic_regressor import LogisticRegressor from dataframe import DataFrame data = [[10, 0.05], [100, 0.35], [1000, 0.95]] df = DataFrame.from_array(data, ['x', 'y']) regressor = LogisticRegressor(df, 'y', 1) print(regressor.coefficients) print(regressor.predict({'x': 500}))
import sys sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor df = DataFrame.from_array( [[0, 0, 1, 0], [1, 0, 2, 0], [2, 0, 4, 0], [4, 0, 8, 0], [6, 0, 9, 0], [0, 2, 2, 0], [0, 4, 5, 0], [0, 6, 7, 0], [0, 8, 6, 0], [2, 2, 0.1, 4], [3, 4, 0.1, 12]], columns = ['beef', 'pb', 'rating', 'interactive'] ) log_reg = LogisticRegressor(df,10, dependent_variable = 'rating') print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0})) print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0})) print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))
print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))''' df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly') log_df = DataFrame(df.data_dict, df.columns) logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating') ''' print('logistic_regressor') print(logistic_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0})) print(logistic_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 0, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 4, 'mayo * jelly': 0})) print(logistic_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(logistic_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(logistic_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))'''
assert len(train_imgs) + len(val_imgs) == n_imgs trafos = [utils.to_channel_first, utils.normalize, utils.to_tensor] trafos = partial(utils.compose, transforms=trafos) train_data = utils.DatasetWithTransform(train_imgs, train_labels, transform=trafos) val_data = utils.DatasetWithTransform(val_imgs, val_labels, transform=trafos) print("N Training: ", len(train_imgs)) print("N Val: ", len(val_imgs)) n_pixels = images[0].size n_classes = 10 model = LogisticRegressor(n_pixels, n_classes) model.to(device) train_batch_size = 4 train_loader = torch.utils.data.DataLoader(train_data, batch_size=train_batch_size, shuffle=True) val_batch_size = 25 val_loader = torch.utils.data.DataLoader(val_data, batch_size=val_batch_size) optimizer = torch.optim.Adam(model.parameters(), lr=1.e-3) loss = torch.nn.NLLLoss() loss.to(device) tb_logger = torch.utils.tensorboard.SummaryWriter('runs/log_reg')
import sys sys.path.append('src') from logistic_regressor import LogisticRegressor from matrix import Matrix from dataframe import DataFrame df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y']) log_reg = LogisticRegressor(df, 'y', 1) print('Testing method predict...') assert round(log_reg.predict({'x': 5}), 3) == 0.777 print('PASSED') df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0.1], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0.1], [0, 5, [], 5], [0, 5, ['mayo'], 0.1], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0.1], [5, 5, [], 0.1], [5, 5, ['mayo'], 0.1], [5, 5, ['jelly'], 0.1], [5, 5, ['mayo', 'jelly'], 0.1]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly')
points = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]] df = DataFrame.from_array(points, ['x', 'y']) def change_1s_0s_to(x, zero_val, one_val): if x == 0: return zero_val elif x == 1: return one_val return x df1 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.1, 0.9))) regressor1 = LogisticRegressor(df1, 'y', 1) df2 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.01, 0.99))) regressor2 = LogisticRegressor(df2, 'y', 1) df3 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.001, 0.999))) regressor3 = LogisticRegressor(df3, 'y', 1) df4 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.0001, 0.9999))) regressor4 = LogisticRegressor(df4, 'y', 1) plt.clf() plt.style.use('bmh') plt.plot([point[0] for point in points], [point[1] for point in points]) plt.plot([x / 1000 for x in range(5001)],
sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor #test 1 Test_1 = False if Test_1 == True: df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y']) log_reg = LogisticRegressor(df, dependent_variable='y') assert round(log_reg.predict({'x': 5}), 3) == 0.777