'condiments': [[], ['mayo'], ['jelly'], ['mayo', 'jelly'], [], ['mayo'],
                   ['jelly'], ['mayo', 'jelly'], [], ['mayo'], ['jelly'],
                   ['mayo', 'jelly'], [], ['mayo'], ['jelly'],
                   ['mayo', 'jelly']],
    'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0]
}

df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments'])
df = df.create_dummy_variables()
df = df.append_pairwise_interactions()
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['rating']))],
    'rating': data_dict['rating']
})

df = df.apply('rating', lambda x: 0.1 if x == 0 else x)

regressor = LogisticRegressor(df, prediction_column='rating', max_val=10)

assert regressor.multipliers == [
    -0.039, -0.0205, 1.7483, -0.3978, 0.1497, -0.7485, 0.4682, 0.3296, -0.5288,
    2.6441, 1.0125
], 'Wong multipliers'

assert regressor.predict({
    'beef': 5,
    'pb': 5,
    'mayo': 1,
    'jelly': 1,
}) == 0.02342, 'Nah bruh'
assert regressor.predict({
df2 = df1.select(['Sarah', 'Pete'])

print('Testing method "select_columns"...')
assert df2.to_array() == [[3, 1], [1, 0], [4, 1], [0, 0]]
assert df2.columns == ['Sarah', 'Pete']
print('PASSED')

df3 = df1.select_rows([1, 3])
print('Testing method "select_rows"...')
assert df3.to_array() == [[0, 1, 1], [0, 2, 0]]
print('PASSED')

data_dict = {'Pete': [1, 0, 1, 0], 'John': [2, 1, 0, 2], 'Sarah': [3, 1, 4, 0]}

df1 = DataFrame(data_dict, column_order=['Pete', 'John', 'Sarah'])
df2 = df1.apply('John', lambda x: 7 * x)
print('Testing method "apply"...')
assert df2.data_dict == {
    'Pete': [1, 0, 1, 0],
    'John': [14, 7, 0, 14],
    'Sarah': [3, 1, 4, 0]
}
print('PASSED')

columns = ['firstname', 'lastname', 'age']
arr = [['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17], ['Anna', 'Smith', 13],
       ['Sylvia', 'Mendez', 9]]
df = DataFrame.from_array(arr, columns)

print('Testing method "select_rows_where"...')
assert df.where(lambda row: len(row['firstname']) >= len(row['lastname']) and
Exemple #3
0
    'extracurricular': [1, 0, 1, 1, 1, 1, 1, 1, 0, 0],
    'acceptance':
    [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001]
}
df = DataFrame(data_dict,
               column_order=['percentile', 'ACT', 'extracurricular'])
# print(df.ordered_dict)
df = df.append_pairwise_interactions()
# print(df.ordered_dict)
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['percentile']))],
    'acceptance':
    [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001]
})
# print(df.ordered_dict)
df = df.apply('acceptance', lambda x: 0.1 if x == 0 else x)
# print(df.ordered_dict)

regressor = LogisticRegressor(df, prediction_column='acceptance', max_value=1)
print(regressor.coefficients)
print(
    "Martha: " +
    str(regressor.predict({
        'percentile': 95,
        'ACT': 33,
        'extracurricular': 1
    })))
print(
    "Jeremy: " +
    str(regressor.predict({
        'percentile': 95,