def mark(self):
     blocks = []
     xy_lim = get_xy_lim(self.points)  # [x_min, x_max, y_min, y_max]
     self.origin = [xy_lim[0], xy_lim[2]]
     row = int(ceil((xy_lim[3] - xy_lim[2]) / self.resolution))
     col = int(ceil((xy_lim[1] - xy_lim[0]) / self.resolution))
     reg = LinearRegressor()
     for i in range(row):
         row_blocks = []
         for j in range(col):
             position = [i, j]
             cordinate = self.pos2cordinate(position)
             points = filter_points(self.points, cordinate[0], cordinate[0] + self.resolution, \
                 cordinate[1], cordinate[1] + self.resolution)
             if len(
                     points
             ) < self.LOWEST_POINTS_COUNT:  # very few points in the block, param is None
                 row_blocks.append(Block(None, points, position))
             else:
                 reg.process(points)
                 param = reg.get_parameters()[0]
                 slope = k2slope(param[0])
                 row_blocks.append(Block(param, points, position))
         blocks.append(row_blocks)
     return blocks
    def predict(self, this_dict):

        first_variable_value = this_dict[self.first_variable]

        for key in self.df.columns:
            if key not in (self.first_variable, self.dependent_variable):
                exponent = float(key.split('^')[1])
                value = first_variable_value**exponent
                this_dict[key] = value

        linear_regressor = LinearRegressor(self.df, self.dependent_variable)
        return linear_regressor.predict(this_dict)
 def calculate_coefficients(self):
   df = self.dataframe
   dep_vals = df.data_dict[self.dependent_variable]
   new_dep_vals = [math.log(self.upper_bound/y - 1) for y in dep_vals]
   df.data_dict[self.dependent_variable] = new_dep_vals
   
   return LinearRegressor(df, self.dependent_variable).calculate_coefficients()
예제 #4
0
 def calc_coefficients(self):
     df_transform = {
         key: self.df.data_dict[key]
         for key in self.df.data_dict
     }
     df_transform[self.dv] = [
         math.log((self.up_bound / i) - 1) for i in df_transform[self.dv]
     ]
     df_transform = DataFrame(df_transform, self.df.columns)
     linear_reg = LinearRegressor(df_transform, self.dv)
     return linear_reg.coefficients
 def generate(self, points):
     divider = PointsDivider()
     controller = RegressionController()
     controller.set_points(points)
     controller.set_parts(divider, 0.5)
     reg = LinearRegressor()
     controller.fit(reg)
     result = []
     
     for part, param, ends in zip(controller.parts, controller.parameters, controller.intersections):
         if not part.isolated: 
             result.extend(self.generate_for_line(param[0], ends, self.get_linewidth(part.points, param[0])))
     return result
예제 #6
0
  def __init__(self, dataframe,upperbound, dependent_variable):
    self.upperbound = upperbound
    self.dependent_variable = dependent_variable
    dataframe.data_dict[dependent_variable] = [0.1 if value==0 else value for value in dataframe.data_dict[dependent_variable]]
    dependent_variable_column = dataframe.columns.index(dependent_variable)
    dependent_list = [math.log(self.upperbound/value -1) for value in dataframe.data_dict[dependent_variable]]
    dependent_transformed = dependent_variable + "_transfromed"
    new_columns = dataframe.columns
    new_columns[dependent_variable_column] = dependent_transformed
    transformed_data_dict = dataframe.data_dict

    #switching out old dependent variable list with transformed one
    transformed_data_dict[dependent_variable] = dependent_list
    transformed_data_dict[dependent_transformed] = transformed_data_dict[dependent_variable]
    del transformed_data_dict[dependent_variable]

    #Creating Dataframe from new datadict
    transformed_datafame = DataFrame(transformed_data_dict, new_columns)

    #linear regressor
    linear_regressor = LinearRegressor(transformed_datafame, dependent_transformed)
    self.coefficients = linear_regressor.coefficients
예제 #7
0
def main():
    start, end, num_points = -1.8, 2.0, 20

    x = np.linspace(start, end, num_points)
    y_truth = data_generators.ground_truth(x)
    y_generators_dict = {
        "not disturbed": data_generators.data_values,
        "disturbed": data_generators.data_values_disturbed
    }
    model = LinearRegressor()

    for name, generator in y_generators_dict.items():
        y_target = generator(x)

        b_0_l2, b_1_l2 = model.fit(x, y_target, 'l2')
        y_pred_l2 = model.eval(x)

        b_0_l1, b_1_l1 = model.fit(x, y_target, 'l1')
        y_pred_l1 = model.eval(x)

        print(name)
        print(f"b_0_l2 = {b_0_l2} ; b_1_l2 = {b_1_l2} \n "
              f"b_0_l1 = {b_0_l1} ; b_1_l1 = {b_1_l1}")

        plt.plot(x, y_target, 'ko', mfc='none')
        plt.plot(
            x,
            y_truth,
        )
        plt.plot(
            x,
            y_pred_l2,
        )
        plt.plot(
            x,
            y_pred_l1,
        )
        plt.legend(('Выборка', 'Модель', 'МНК', 'МНМ'))
        plt.xlabel('x')
        plt.ylabel('y')
        plt.savefig(f"{PATH_PLOTS}{name}.png")
        plt.show()

    pass
예제 #8
0
                data_dict[str(y)].append(new_data[x][y])
            else:
                data_dict[str(y)] = [new_data[x][y]]
    return data_dict


polynomial_data = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3),
                   (0.8, 41.6), (1.0, 56.5), (1.2, 72.4), (1.4, 88.7),
                   (1.6, 104.8), (1.8, 120.1), (2.0, 134.0), (2.2, 145.9),
                   (2.4, 155.2), (2.6, 161.3), (2.8, 163.6), (3.0, 161.5),
                   (3.2, 154.4), (3.4, 141.7), (3.6, 122.8), (3.8, 97.1),
                   (4.0, 64.0), (4.2, 22.9), (4.4, -26.8), (4.6, -85.7),
                   (4.8, -154.4)]

trigonometry_data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23),
                     (0.8, -1.03), (1.0, -2.89), (1.2, -4.06), (1.4, -4.39),
                     (1.6, -3.88), (1.8, -2.64), (2.0, -0.92), (2.2, 0.95),
                     (2.4, 2.63), (2.6, 3.79), (2.8, 4.22), (3.0, 3.8),
                     (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84),
                     (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76),
                     (4.8, -5.22)]

data_dict = transform_polynomial_data(polynomial_data, 3)
df = DataFrame(data_dict)
regression = LinearRegressor(df, "4")
print("polynomial_data" + str(regression.coefficients))

data_dict = transform_trigonometric_data(trigonometry_data)
df = DataFrame(data_dict)
regression = LinearRegressor(df, "4")
print("trigonometry_data" + str(regression.coefficients))
예제 #9
0
                           (4.8, -5.22)],
                          columns=['x', 'y'])
#add different columns
df = df.apply_add('x', lambda x: math.sin(x), 'sin(x)')
df = df.apply_add('x', lambda x: math.cos(x), 'cos(x)')
df = df.apply_add('x', lambda x: math.sin(2 * x), 'sin(2*x)')
df = df.apply_add('x', lambda x: math.cos(2 * x), 'cos(2*x)')
#save x and y values
x_values = list(df.data_dict['x'])
y_values = list(df.data_dict['y'])

#delete x values

df = df.del_column('x')
#find coefficients
linear_regressor = LinearRegressor(df, dependent_variable='y')
coefficients = linear_regressor.coefficients
print(coefficients)


def apply_function(x, function):
    return function(x)


print('x_values')
print(x_values)
print('x_values length', len(x_values))
new_y_values = []
for x in x_values:
    new_y_values.append(
        apply_function(
}
df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments'])
assert df.columns == ['beef', 'pb', 'condiments'], 'Wrong columns'

df = df.create_dummy_variables()
df = df.append_pairwise_interactions()
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['rating']))],
    'rating': data_dict['rating']
})
assert df.columns == [
    'beef', 'pb', 'mayo', 'jelly', 'beef_pb', 'beef_mayo', 'beef_jelly',
    'pb_mayo', 'pb_jelly', 'mayo_jelly', 'constant', 'rating'
], 'Wrong Columns'

linear_regressor = LinearRegressor(df, prediction_column='rating')
coeffs = {
    'beef': 0.25,
    'pb': 0.4,
    'mayo': -1.25,
    'jelly': 1.5,
    'beef_pb': -0.21,
    'beef_mayo': 1.05,
    'beef_jelly': -0.85,
    'pb_mayo': -0.65,
    'pb_jelly': 0.65,
    'mayo_jelly': -3.25,
    'constant': 2.1875
}

assert linear_regressor.coefficients == coeffs, 'Wrong Coeffs'
data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23), (0.8, -1.03),
        (1.0, -2.89), (1.2, -4.06), (1.4, -4.39), (1.6, -3.88), (1.8, -2.64),
        (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79), (2.8, 4.22),
        (3.0, 3.8), (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84),
        (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76), (4.8, -5.22)]

columns = ['y', 'sin(x)', 'cos(x)', 'sin(2x)', 'cos(2x)']

new_data = [[y, math.sin(x),
             math.cos(x),
             math.sin(2 * x),
             math.cos(2 * x)] for (x, y) in data]

df = DataFrame.from_array(new_data, columns)

regressor = LinearRegressor(df, 'y')

print(regressor.coefficients)
'''
import matplotlib.pyplot as plt
plt.style.use('bmh')

x_points = []
predicted_points = []

x = 0
while x <= 5 :
    data_dict = {'sin(x)' : math.sin(x), 'cos(x)' : math.cos(x), 'sin(2x)' : math.sin(2 * x), 'cos(2x)' : math.cos(2 * x)}
    x_points.append(x)
    predicted_points.append(regressor.predict(data_dict))
    x+=0.1
예제 #12
0
replace_none = (lambda element: avg_age if (element == None) else element)
df = df.apply('Age', replace_none)

sibsp_0 = df.apply('SibSp', (lambda element: 1 if
                             (element == 0) else 0)).data_dict['SibSp']
df = df.add_data('SibSp=0', sibsp_0, df.columns.index('SibSp') + 1)

df = df.apply('Parch', (lambda element: 1 if (element == 0) else 0), 'Parch=0')

df = df.create_dummy_variables(initial_key='CabinType', add_on='CabinType=')

df = df.create_dummy_variables(initial_key='Embarked', add_on='Embarked=')

df_1 = df.select_columns(['Sex'])

titanic_reg_1 = LinearRegressor(df_1, 'Survived')
''''
df_1 = df.select_columns(['Sex'])

titanic_reg_1 = LinearRegressor(df_1,'Survived')

df_1 = df.select_columns(['Sex'])

titanic_reg_1 = LinearRegressor(df_1,'Survived')

df_1 = df.select_columns(['Sex'])

titanic_reg_1 = LinearRegressor(df_1,'Survived')

df_1 = df.select_columns(['Sex'])
import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6]],
    columns=['slices of roast beef', 'tbsp of peanut butter', 'rating'])

regressor = LinearRegressor(df, dependent_variable='rating')
print(regressor.coefficients)
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 0
    }))
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 5
    }))
예제 #14
0
    "PassengerId", "Survived", "Pclass", "Surname", "Sex", "Age", "SibSp",
    "SibSp=0", "Parch=0", "TicketType", "TicketNumber", "Fare", "CabinType=A",
    "CabinType=B", "CabinType=C", "CabinType=D", "CabinType=E", "CabinType=F",
    "CabinType=G", "CabinType=None", "CabinType=T", "CabinNumber",
    "Embarked=C", "Embarked=None", "Embarked=Q", "Embarked=S"
]

new_df = DataFrame(new_dict, new_columns)
training_df = new_df.select_rows([i for i in range(501)])
testing_df = new_df.select_rows(
    [i for i in range(501, len(new_df.to_array()))])

print('\nSex:')
sex_df = training_df.select(['Sex', 'Survived'])
test_sex_df = testing_df.select(['Sex', 'Survived'])
survival_regressor = LinearRegressor(sex_df, 'Survived')
print(survival_regressor.coefficients)

correct_classifications = 0
for i in range(len(training_df.data_dict['Sex'])):
    prediction = round(
        survival_regressor.predict(
            {var: sex_df.data_dict[var][i]
             for var in sex_df.columns[:-1]}))
    if prediction > 1:
        prediction = 1
    if prediction == sex_df.data_dict['Survived'][i]:
        correct_classifications += 1

print('train accuracy:',
      correct_classifications / len(training_df.data_dict['Sex']))
                   [],['mayo'],['jelly'],['mayo','jelly']],
}
df = DataFrame(data_dict)
print("\n Testing columns of DataFrame")
assert df.columns == ['beef', 'pb', 'condiments'],'Incorrect columns for DataFrame'
print("     passed")

df = df.create_dummy_variables()
df = df.append_pairwise_interactions()
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['beef']))],
    'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0]
})


linear_regressor = LinearRegressor(df, prediction_column = 'rating')
print("\n Testing Coefficients of LinearRegressor")
assert linear_regressor.coefficients == {'beef': 0.25, 'pb': 0.4, 'mayo': -1.25, 'jelly': 1.5, 'beef_pb': -0.21, 'beef_mayo': 1.05, 'beef_jelly': -0.85, 'pb_mayo': -0.65, 'pb_jelly': 0.65, 'mayo_jelly': -3.25, 'constant': 2.19}, 'Incorrect coefficients for Linear Regressor'
print("     passed")

print("\n Testing Gathering all inputs of LinearRegressor")
assert linear_regressor.gather_all_inputs({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == {'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, 'beef_pb': 25, 'beef_mayo': 5, 'beef_jelly': 5, 'pb_mayo': 5, 'pb_jelly': 5, 'mayo_jelly': 1, 'constant': 1}, 'Incorrect gather_all_inputs'
print("     passed")


print("\n Testing Prediction #1")
assert linear_regressor.predict({'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == -1.81, 'Incorrect prediction from linear regressor'
print("     passed")
print("\n Testing Prediction #2")
assert linear_regressor.predict({
    'beef': 0,
import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

dataset = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3), (0.8, 41.6),
           (1.0, 56.5), (1.2, 72.4), (1.4, 88.7), (1.6, 104.8), (1.8, 120.1),
           (2.0, 134.0), (2.2, 145.9), (2.4, 155.2), (2.6, 161.3),
           (2.8, 163.6), (3.0, 161.5), (3.2, 154.4), (3.4, 141.7),
           (3.6, 122.8), (3.8, 97.1), (4.0, 64.0), (4.2, 22.9), (4.4, -26.8),
           (4.6, -85.7), (4.8, -154.4)]

new_columns = ['x', 'x^2', 'x^3', 'y']
new_dataset = [(pair[0], pair[0]**2, pair[0]**3, pair[1]) for pair in dataset]

df = DataFrame.from_array(new_dataset, new_columns)
polynomial_regressor = LinearRegressor(df, 'y')
polynomial_regressor_coefficients = polynomial_regressor.coefficients
print("polynomial_regressor_coefficients:", polynomial_regressor_coefficients)
예제 #17
0
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
lin_df = DataFrame(df.data_dict, df.columns)

linear_regressor = LinearRegressor(lin_df, dependent_variable='rating')
print('linear_regressor')
'''
print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 0, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 4, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0}))
print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))'''

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
예제 #18
0
import sys

sys.path.append('src')
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor
from matrix import Matrix
from dataframe import DataFrame

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6]],
    columns=['slices of roast beef', 'tablespoons of peanut butter', 'rating'])
regressor = LinearRegressor(df, dependent_variable='rating')

print(regressor.coefficients)
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tablespoons of peanut butter': 0
    }))

print(
    regressor.predict({
        'slices of roast beef': 5,
        'tablespoons of peanut butter': 5
    }))

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0], [3, 4, 0]],
    columns=['beef', 'pb', 'rating'])
예제 #19
0
            guess = 1
        else:
            guess = 0
        classifications.append((testing_passenger_ids[index], guess))
    return classifications


print('\n')

ratings = [[survived] for index, survived in enumerate(survived_people)
           if index in dataframe_indices]

dataframe.append_columns({'constant': [1 for _ in dataframe_indices]})

linear_regressor = LinearRegressor(dataframe,
                                   ratings,
                                   prediction_column='Survived')
linear_regressor.solve_coefficients()
linear_regressor_classifications = get_classifications(linear_regressor,
                                                       testing_dataframe)
for row in linear_regressor_classifications:
    print(row)

print('\n')
logistic_regressor = LogisticRegressor(dataframe,
                                       ratings,
                                       prediction_column='Survived')
logistic_regressor.solve_coefficients()
logistic_regressor_classifications = get_classifications(
    logistic_regressor, testing_dataframe)
for row in logistic_regressor_classifications:
예제 #20
0
import sys
sys.path.append('src')
from linear_regressor import LinearRegressor
from matrix import Matrix
from dataframe import DataFrame

df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]],
                          columns=['hours worked', 'progress'])
regressor = LinearRegressor(df, dependent_variable='progress')

# print('Testing attribute coefficients...')
# assert [round(i,5) for i in regressor.coefficients] == [0.01667, 0.15]
# print('PASSED')

# print('Testing method predict...')
# assert round(regressor.predict({'hours worked': 4}),5) == 0.61667
# print('PASSED')

#Assignment 40
df = DataFrame.from_array(
    [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]],
    columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating'])
regressor = LinearRegressor(df, dependent_variable='taste rating')

print('Testing attribute coefficients...')
assert {
    key: round(regressor.coefficients[key], 8)
    for key in regressor.coefficients
} == {
    'constant': 0.19252336,
    'scoops of chocolate': -0.05981308,
regressor = LinearRegressor(df, dependent_variable='progress')

print('Does all the linear_regressor stuff work')

assert regressor.coefficients == [0.01667, 0.15], 'No, coefficients does not work'

assert regressor.predict({'hours worked': 4}) == 0.61667, 'No, predict does not work'

print('Yes they do', "\n")
'''

df = DataFrame.from_array(
    [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]],
    columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating'])

regressor = LinearRegressor(df, dependent_variable='taste rating')

print('Does all the linear_regressor stuff work')

reg_coeff = regressor.coefficients.copy()
for (key, value) in reg_coeff.items():
    reg_coeff[key] = round(value, 8)

assert reg_coeff == {
    'constant': 0.19252336,
    'scoops of chocolate': -0.05981308,
    'scoops of vanilla': 0.13271028
}, 'No, coefficients does not work'

assert round(
    regressor.predict({
    def solve_coefficients(self):

        linear_regressor = LinearRegressor(self.df, self.dependent_variable)
        self.coefficients = linear_regressor.coefficients