Exemple #1
0
def main():

    df, X, y = preprocess_data()
    X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8)
    logistic_regressor = LogisticRegressor(alpha=0.05,
                                           c=0.01,
                                           T=1000,
                                           random_seed=0,
                                           intercept=True)
    losses = logistic_regressor.fit(X_train, y_train)
    plot_losses(losses=losses, savefig=True)

    train_error = error_rate(y_train, logistic_regressor.predict(X_train))
    test_error = error_rate(y_test, logistic_regressor.predict(X_test))

    print('Training Error Rate: %f' % train_error)
    print('Test Error Rate: %f' % test_error)
Exemple #2
0
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
log_df = DataFrame(df.data_dict, df.columns)

logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating')

# test 8 slices of beef + mayo
observation = {'beef': 8, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 9.72

# test 4 tbsp of pb + 8 slices of beef + mayo
observation = {'beef': 8, 'pb': 4, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 0.77

# test 8 slices of beef + mayo + jelly
observation = {'beef': 8, 'mayo': 1, 'jelly': 1}
assert round(logistic_regressor.predict(observation), 2) == 0.79
                   ['mayo', 'jelly'], [], ['mayo'], ['jelly'],
                   ['mayo', 'jelly']],
    'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0]
}

df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments'])
df = df.create_dummy_variables()
df = df.append_pairwise_interactions()
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['rating']))],
    'rating': data_dict['rating']
})

df = df.apply('rating', lambda x: 0.1 if x == 0 else x)

regressor = LogisticRegressor(df, prediction_column='rating', max_val=10)

assert regressor.multipliers == [
    -0.039, -0.0205, 1.7483, -0.3978, 0.1497, -0.7485, 0.4682, 0.3296, -0.5288,
    2.6441, 1.0125
], 'Wong multipliers'

assert regressor.predict({
    'beef': 5,
    'pb': 5,
    'mayo': 1,
    'jelly': 1,
}) == 0.02342, 'Nah bruh'
assert regressor.predict({
    'beef': 0,
    'pb': 3,
Exemple #4
0
print 'Plotting data with green circle indicating (y=1) examples and red circle indicating (y=0) examples ...'
plot_utils.plot_twoclass_data(X,y,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted'])
plt.savefig('fig1.pdf')

########################################################################
##================ Part 1: Compute cost and gradient ==================#
########################################################################

# set up the X matrix with the column of ones as intercept

XX = np.vstack([np.ones((X.shape[0],)),X.T]).T

# set up a logistic regression model

log_reg1 = LogisticRegressor()

# test the loss and gradient function

theta = np.zeros((XX.shape[1],))
loss = log_reg1.loss(theta,XX,y)
print "Loss on all-zeros theta vector (should be around 0.693) = ", loss
grad = log_reg1.grad_loss(theta,XX,y)
print "Gradient of loss wrt all-zeros theta vector (should be around [-0.1, -12.01, -11.26]) = ", grad

# run fmin on the loss function and gradient implemented in logistic_regressor.py

theta_opt = log_reg1.train(XX,y,num_iters=400)

# print the theta found
Exemple #5
0
from dataframe import DataFrame
from logistic_regressor import LogisticRegressor

students_dict = {
    'ACT': [33, 34, 35, 30, 36, 29, 36, 31, 36, 32],
    'extra': [1, 0, 1, 1, 1, 1, 1, 1, 0, 0],
    'Bias': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
}

students_dict.update({
    'interaction': [
        students_dict['ACT'][i] * students_dict['extra'][i]
        for i in range(len(students_dict['ACT']))
    ]
})

students_dict.update({
    'accepted':
    [0.95, 0.001, 0.95, 0.001, 0.95, 0.001, 0.95, 0.001, 0.95, 0.001]
})

df = DataFrame(students_dict)

regressor = LogisticRegressor(df, 'accepted', 0.999)

# predictor = {'ACT': 36, 'extra': 0}

# df_bruh = DataFrame(predictor)
# print(df_bruh.gather_all_inputs())
# print(regressor.predict(predictor))
print(regressor.multipliers)
Exemple #6
0
delta_table = [0.1, 0.01, 0.001, 0.0001]
all_coords = []

for delta_low in delta_table:
    # new_list=[]
    # for pair in list_data:
    #     if pair[1] == 0:
    #         new_list.append([pair[0],delta])
    #     else:
    #         new_list.append([pair[0],1-delta])

    df = DataFrame.from_array(list_data, columns=['x', 'y'])

    regressor = LogisticRegressor(df,
                                  prediction_column='y',
                                  max_value=1,
                                  delta=delta_low)

    coords = [[], []]
    for x in range(20):
        coords[0].append(x / 100)
        coords[1].append(regressor.predict({'constant': 1, 'x': x}))
    all_coords.append(coords)
print(all_coords)
plt.style.use('bmh')
for coords in all_coords:
    plt.plot(coords[0], coords[1], linewidth=2.5)
plt.legend(['0.1', '0.01', '0.001', '0.0001'])
plt.savefig('logistic_regressor_109.png')

# dfgd = DataFrame.from_array(
Exemple #7
0
reg = LogisticRegressor(df, dependent_variable='y', premade = True)

reg.set_coefficients({'constant': 0.5, 'x': 0.5})

print(reg.calc_rss())
print(reg.calc_gradient(delta))
reg.gradient_descent(alpha, delta, num_steps)
print(reg.coefficients)
'''
df = DataFrame.from_array([[2, 1], [3, 0]], columns=['x', 'y'])

alpha = 0.2
delta = 0.1
num_steps = 20000

reg = LogisticRegressor(df, dependent_variable='y', premade=True)

reg.set_coefficients({'constant': 1, 'x': 1})

print(reg.calc_rss())
print(reg.calc_gradient(delta))
#reg.gradient_descent(alpha, delta, num_steps)
#print(reg.coefficients)
''''
import matplotlib.pyplot as plt
plt.style.use('bmh')

points = {'x': [], 'y': []}

x = -5
Exemple #8
0
print('regressor with interaction terms')

print(regressor.coefficients)
print(regressor.predict({'beef': 5, 'pb': 0, 'beef * pb': 0}))

print(regressor.predict({'beef': 5, 'pb': 5, 'beef * pb': 25}))

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0.1], [3, 4, 0.1]],
    columns=['beef', 'pb', 'rating'])

df = df.create_interaction_terms('beef', 'pb')

regressor = LogisticRegressor(df, 'rating', 10)

print('Logistic regressor with interaction terms')

print(regressor.coefficients)
print(regressor.predict({'beef': 5, 'pb': 0, 'beef * pb': 0}))

print(regressor.predict({'beef': 12, 'pb': 0, 'beef * pb': 0}))

print(regressor.predict({'beef': 5, 'pb': 5, 'beef * pb': 25}))

print('-----------Assignment 52-----------')

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
Exemple #9
0
print 'Plotting data with green circle indicating (y=1) examples and red circle indicating (y=0) examples ...'
plot_utils.plot_twoclass_data(X,y,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted'])
plt.savefig('fig1.pdf')

########################################################################
##================ Part 1: Compute cost and gradient ==================#
########################################################################

# set up the X matrix with the column of ones as intercept

XX = np.vstack([np.ones((X.shape[0],)),X.T]).T

# set up a logistic regression model

log_reg1 = LogisticRegressor()

# test the loss and gradient function

theta = np.zeros((XX.shape[1],))
loss = log_reg1.loss(theta,XX,y)
print "Loss on all-zeros theta vector (should be around 0.693) = ", loss
grad = log_reg1.grad_loss(theta,XX,y)
print "Gradient of loss wrt all-zeros theta vector (should be around [-0.1, -12.01, -11.26]) = ", grad

# run fmin on the loss function and gradient implemented in logistic_regressor.py

theta_opt = log_reg1.train(XX,y,num_iters=400)

# print the theta found
import matplotlib.pyplot as plt
plt.style.use('bmh')
import sys
sys.path.append('src')
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

arr = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]]
df = DataFrame.from_array(arr, columns=['x', 'y'])
'''
python tests/test_regressors.py
'''

reg = LogisticRegressor(df, dependent_variable='y', upper_bound=1)
reg.set_coefficients({'constant': 0.5, 'x': 0.5})

alpha = 0.01
delta = 0.01
num_steps = 20000
reg.gradient_descent(alpha, delta, num_steps)

print("\nreg.coefficients:",
      reg.coefficients)  # should be {'constant': 2.7911, 'x': -1.1165}

x = [pair[0] for pair in arr]
y = [pair[1] for pair in arr]

lots_of_xs = [x / 100 for x in range(100, 401)]
prediction = [reg.predict({'x': x}) for x in lots_of_xs]
    'condiments': [[], ['mayo'], ['jelly'], ['mayo', 'jelly'], [], ['mayo'],
                   ['jelly'], ['mayo', 'jelly'], [], ['mayo'], ['jelly'],
                   ['mayo', 'jelly'], [], ['mayo'], ['jelly'],
                   ['mayo', 'jelly']],
    'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0]
}
df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments'])

df = df.create_dummy_variables()
df = df.append_pairwise_interactions()
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['beef']))],
    'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0]
})
df = df.apply('rating', lambda x: 0.1 if x == 0 else x)
regressor = LogisticRegressor(df, prediction_column='rating', max_value=10)

# print("\n Testing multipliers")
# assert regressor.multipliers == {
#     'beef': -0.03900793,
#     'pb': -0.02047944,
#     'mayo': 1.74825378,
#     'jelly': -0.39777219,
#     'beef_pb': 0.14970983,
#     'beef_mayo': -0.74854916,
#     'beef_jelly': 0.46821312,
#     'pb_mayo': 0.32958369,
#     'pb_jelly': -0.5288267,
#     'mayo_jelly': 2.64413352,
#     'constant': 1.01248436
# }, 'Incorrect multipliers an is instead:'+str(regressor.multipliers)
Exemple #12
0
           if index in dataframe_indices]

dataframe.append_columns({'constant': [1 for _ in dataframe_indices]})

linear_regressor = LinearRegressor(dataframe,
                                   ratings,
                                   prediction_column='Survived')
linear_regressor.solve_coefficients()
linear_regressor_classifications = get_classifications(linear_regressor,
                                                       testing_dataframe)
for row in linear_regressor_classifications:
    print(row)

print('\n')
logistic_regressor = LogisticRegressor(dataframe,
                                       ratings,
                                       prediction_column='Survived')
logistic_regressor.solve_coefficients()
logistic_regressor_classifications = get_classifications(
    logistic_regressor, testing_dataframe)
for row in logistic_regressor_classifications:
    print(row)

dataframe.remove_columns(['constant'])

dataframe.append_columns({
    'Survived': [
        did_survive for index, did_survive in enumerate(survived_people)
        if index in dataframe_indices
    ]
})
Exemple #13
0
}
df = DataFrame(data_dict,
               column_order=['percentile', 'ACT', 'extracurricular'])
# print(df.ordered_dict)
df = df.append_pairwise_interactions()
# print(df.ordered_dict)
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['percentile']))],
    'acceptance':
    [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001]
})
# print(df.ordered_dict)
df = df.apply('acceptance', lambda x: 0.1 if x == 0 else x)
# print(df.ordered_dict)

regressor = LogisticRegressor(df, prediction_column='acceptance', max_value=1)
print(regressor.coefficients)
print(
    "Martha: " +
    str(regressor.predict({
        'percentile': 95,
        'ACT': 33,
        'extracurricular': 1
    })))
print(
    "Jeremy: " +
    str(regressor.predict({
        'percentile': 95,
        'ACT': 34,
        'extracurricular': 0
    })))
import sys

sys.path.append('src')
from logistic_regressor import LogisticRegressor
from dataframe import DataFrame

data = [[10, 0.05], [100, 0.35], [1000, 0.95]]

df = DataFrame.from_array(data, ['x', 'y'])

regressor = LogisticRegressor(df, 'y', 1)

print(regressor.coefficients)
print(regressor.predict({'x': 500}))
Exemple #15
0
import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

 df = DataFrame.from_array(
[[0, 0, 1, 0], 
[1, 0, 2, 0], 
[2, 0, 4, 0], 
[4, 0, 8, 0], 
[6, 0, 9, 0], 
[0, 2, 2, 0], 
[0, 4, 5, 0], 
[0, 6, 7, 0], 
[0, 8, 6, 0],
[2, 2, 0.1, 4],
[3, 4, 0.1, 12]],
columns = ['beef', 'pb', 'rating', 'interactive']
)
log_reg = LogisticRegressor(df,10, dependent_variable = 'rating')
print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))
print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))'''

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
log_df = DataFrame(df.data_dict, df.columns)

logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating')
'''
print('logistic_regressor')
print(logistic_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(logistic_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 0, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 4, 'mayo * jelly': 0}))
print(logistic_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(logistic_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(logistic_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))'''
Exemple #17
0
assert len(train_imgs) + len(val_imgs) == n_imgs

trafos = [utils.to_channel_first, utils.normalize, utils.to_tensor]
trafos = partial(utils.compose, transforms=trafos)

train_data = utils.DatasetWithTransform(train_imgs,
                                        train_labels,
                                        transform=trafos)
val_data = utils.DatasetWithTransform(val_imgs, val_labels, transform=trafos)

print("N Training: ", len(train_imgs))
print("N Val: ", len(val_imgs))

n_pixels = images[0].size
n_classes = 10
model = LogisticRegressor(n_pixels, n_classes)
model.to(device)

train_batch_size = 4
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=train_batch_size,
                                           shuffle=True)

val_batch_size = 25
val_loader = torch.utils.data.DataLoader(val_data, batch_size=val_batch_size)

optimizer = torch.optim.Adam(model.parameters(), lr=1.e-3)
loss = torch.nn.NLLLoss()
loss.to(device)

tb_logger = torch.utils.tensorboard.SummaryWriter('runs/log_reg')
Exemple #18
0
import sys
sys.path.append('src')
from logistic_regressor import LogisticRegressor
from matrix import Matrix
from dataframe import DataFrame

df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y'])

log_reg = LogisticRegressor(df, 'y', 1)
print('Testing method predict...')
assert round(log_reg.predict({'x': 5}), 3) == 0.777
print('PASSED')

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0.1], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0.1], [0, 5, [], 5],
     [0, 5, ['mayo'], 0.1], [0, 5, ['jelly'], 9],
     [0, 5, ['mayo', 'jelly'], 0.1], [5, 5, [], 0.1], [5, 5, ['mayo'], 0.1],
     [5, 5, ['jelly'], 0.1], [5, 5, ['mayo', 'jelly'], 0.1]],
    columns=['beef', 'pb', 'condiments', 'rating'])

df = df.create_dummy_variables('condiments')

df = df.create_interaction_terms('beef', 'pb')
df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
Exemple #19
0
points = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]]

df = DataFrame.from_array(points, ['x', 'y'])


def change_1s_0s_to(x, zero_val, one_val):
    if x == 0:
        return zero_val
    elif x == 1:
        return one_val
    return x


df1 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.1, 0.9)))
regressor1 = LogisticRegressor(df1, 'y', 1)

df2 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.01, 0.99)))
regressor2 = LogisticRegressor(df2, 'y', 1)

df3 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.001, 0.999)))
regressor3 = LogisticRegressor(df3, 'y', 1)

df4 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.0001, 0.9999)))
regressor4 = LogisticRegressor(df4, 'y', 1)

plt.clf()
plt.style.use('bmh')
plt.plot([point[0] for point in points], [point[1] for point in points])

plt.plot([x / 1000 for x in range(5001)],
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor
#test 1
Test_1 = False
if Test_1 == True:
    df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]],
                              columns=['x', 'y'])

    log_reg = LogisticRegressor(df, dependent_variable='y')
    assert round(log_reg.predict({'x': 5}), 3) == 0.777