Python DataFrame.from_array 예제들, dataframe.DataFrame.from_array Python 예제들

예제 #1

0

파일 보기

파일: decision_tree.py 프로젝트: DrM00G/machine-learning

 def split(self, max_depth, depth):
     # print("testing")
     if self.impurity != 0 and (max_depth > depth or max_depth == False):
         if self.unsplit:
             if self.best_split[0] == 'x':
                 axis = 0
             else:
                 axis = 1
             low_points = []
             high_points = []
             for point in self.df.to_array():
                 if point[axis] < self.best_split[1]:
                     low_points.append(point)
                 elif point[axis] >= self.best_split[1]:
                     high_points.append(point)
             self.low = Node(
                 DataFrame.from_array(low_points, self.df.columns),
                 self.split_metric)
             self.high = Node(
                 DataFrame.from_array(high_points, self.df.columns),
                 self.split_metric)
             self.unsplit = False
         elif max_depth > depth + 1 or max_depth == False:
             if self.low.impurity != 0:
                 self.low.split(max_depth, depth + 1)
             if self.high.impurity != 0:
                 self.high.split(max_depth, depth + 1)
     else:
         self.unsplit = False

예제 #2

0

파일 보기

파일: polynomial_regressor.py 프로젝트: charliebrown364/machine-learning

    def fit(self, dataframe, dependent_variable):

        self.first_variable = dataframe.columns[0]
        self.dependent_variable = dependent_variable

        if self.degree == 0:
            new_columns = [self.dependent_variable]
        elif self.degree == 1:
            new_columns = [self.first_variable, self.dependent_variable]
        else:
            new_columns = [self.first_variable]
            for i in range(2, self.degree + 1):
                new_term = self.first_variable + '^' + str(i)
                new_columns.append(new_term)
            new_columns.append(self.dependent_variable)

        new_dataset = []
        for pair in dataframe.to_array():
            new_values = []
            for i in range(1, self.degree + 1):
                value = pair[0]**i
                new_values.append(value)
            new_values.append(pair[1])
            new_dataset.append(new_values)

        self.df = DataFrame.from_array(new_dataset, new_columns)

예제 #3

0

파일 보기

파일: logistic_regressor.py 프로젝트: DrM00G/machine-learning

 def __init__(self,
              data_class,
              prediction_column,
              max_value,
              delta,
              constant=True):
     super().__init__(data_class, prediction_column)
     self.prediction = prediction_column
     self.current_input = None
     self.max_val = max_value
     self.original_data = DataFrame.from_array(data_class.to_array(),
                                               data_class.columns)
     print("#0" + str(self.original_data.to_array()))
     self.original_data = self.original_data.append_columns(
         {'constant': [1 for _ in range(len(data_class.to_array()))]},
         ['constant'] + data_class.columns)
     self.data = data_class.apply(
         self.prediction_column,
         lambda x: self.set_bound_replacements(delta, x))
     if constant:
         self.data = self.data.append_columns(
             {'constant': [1 for _ in range(len(self.data.to_array()))]},
             ['constant'] + self.data.columns)
     self.multipliers = self.solve_coefficients()
     print("#1" + str(self.multipliers))
     print("#2" + str(self.original_data.to_array()))

예제 #4

0

파일 보기

 def nearest_neighbors(self, observation):
     close_list = self.compute_distances(observation).to_array()
     sorted_list = []
     for n in range(len(close_list)):
         sorted_list.append(
             close_list.pop(self.sort_closest_cookie(close_list)))
     return DataFrame.from_array(sorted_list[::-1],
                                 columns=['distance', 'Cookie Type'])

예제 #5

0

파일 보기

파일: decision_tree.py 프로젝트: DrM00G/machine-learning

 def calc_goodness(self, split, axis):
     goodness = self.impurity
     low = []
     high = []
     for point in self.df.to_array():
         if point[axis] < split:
             low.append(point)
         elif point[axis] >= split:
             high.append(point)
     low_node = Node(DataFrame.from_array(low, self.df.columns),
                     self.split_metric)
     high_node = Node(DataFrame.from_array(high, self.df.columns),
                      self.split_metric)
     new_nodes = [low_node, high_node]
     for split_node in new_nodes:
         goodness -= (len(split_node.row_indices) /
                      len(self.row_indices)) * split_node.impurity
     return round(goodness, 3)

예제 #6

0

파일 보기

파일: k_nearest_neighbors.py 프로젝트: geomeza/machine-learning

 def compute_distances(self, observation):
     data_arr = self.dataframe.to_array()
     data_dict = self.dataframe.data_dict
     distances = []
     for i in range(len(data_arr)):
         distances.append([
             sum([(observation[entry] - data_dict[entry][i])**2
                  for entry in observation])**(0.5), data_arr[i][0]
         ])
     return DataFrame.from_array(distances, ['Distance', 'Cookie Type'])

예제 #7

0

파일 보기

 def split(self, if_once=False, depth_needed=None):
     if depth_needed is None or self.depth < depth_needed:
         if self.low is None and self.high is None:
             if self.final_split is False:
                 self.possible_splits = self.get_possible_splits()
                 self.get_best_split()
                 if self.best_split is None:
                     return
                 if str(self.depth) in self.tree.splits:
                     self.tree.splits[str(self.depth)].append(
                         self.best_split)
                 else:
                     self.tree.splits[str(self.depth)] = [self.best_split]
                 low = []
                 high = []
                 for entry in self.df.to_array():
                     if entry[self.best_split_index] < self.best_split[1]:
                         low.append(entry)
                     elif entry[
                             self.best_split_index] >= self.best_split[1]:
                         high.append(entry)
                 self.low = Node(DataFrame.from_array(low, self.df.columns),
                                 self.split_metric, (self.depth + 1),
                                 tree=self.tree)
                 self.high = Node(DataFrame.from_array(
                     high, self.df.columns),
                                  self.split_metric, (self.depth + 1),
                                  tree=self.tree)
                 if not if_once:
                     self.low.split(depth_needed=depth_needed)
                     self.high.split(depth_needed=depth_needed)
             else:
                 return
         else:
             if self.low is not None:
                 self.low.split(if_once, depth_needed=depth_needed)
             if self.high is not None:
                 self.high.split(if_once, depth_needed=depth_needed)
             return
     else:
         return

예제 #8

0

파일 보기

파일: csv_training.py 프로젝트: geomeza/machine-learning

def run_tests(training_set, testing_set, decision_tree, forest = False):
    correct = 0
    training_df = DataFrame.from_array(training_set, ['bmi', 'weight', 'class'])
    decision_tree.fit(training_df)
    for test in testing_set:
        test_dict = {'bmi' : test[0], 'weight' : test[1]}
        if forest:
            prediction = decision_tree.predict(test_dict)
        else:
            prediction = decision_tree.classify(test_dict)
        if prediction == test[2]:
            correct += 1
    return correct,len(testing_set)

예제 #9

0

파일 보기

 def compute_distances(self, observation):
     distances = []
     for data in self.df.to_array():
         distances.append(
             self.compute_distance(observation, [
                 data[n] for n in range(len(data))
                 if n != self.df.columns.index(self.prediction_column)
             ]))
     result = [[n] for n in distances]
     for n in range(len(distances)):
         result[n].append(self.df.to_array()[n][self.df.columns.index(
             self.prediction_column)])
     return DataFrame.from_array(result,
                                 columns=['distance', 'Cookie Type'])

예제 #10

0

파일 보기

 def calc_goodness(self, split, axis_index):
     goodness = self.impurity
     low = []
     high = []
     for point in self.df.to_array():
         if point[axis_index] < split:
             low.append(point)
         elif point[axis_index] >= split:
             high.append(point)
     low_node = Node(DataFrame.from_array(low, self.df.columns),
                     self.split_metric,
                     depth=int(self.depth) + 1,
                     check_splits=False,
                     tree=self.tree)
     high_node = Node(DataFrame.from_array(high, self.df.columns),
                      self.split_metric,
                      depth=(self.depth + 1),
                      check_splits=False,
                      tree=self.tree)
     nodes = [low_node, high_node]
     for split_node in nodes:
         goodness -= (len(split_node.row_indices) /
                      len(self.row_indices)) * split_node.impurity
     return goodness

예제 #11

0

파일 보기

파일: decision_tree.py 프로젝트: DrM00G/machine-learning

 def calc_possible_splits(self):
     points = [[], 'x', [], 'y']
     for x in self.df.ordered_dict['x']:
         if x not in points[0]:
             points[0].append(x)
     for y in self.df.ordered_dict['y']:
         if y not in points[2]:
             points[2].append(y)
     splits = []
     for n in range(2):
         for i in range(len(points[2 * n]) - 1):
             splits.append([
                 points[2 * n + 1],
                 (points[2 * n][i] + points[2 * n][i + 1]) / 2,
                 self.calc_goodness(
                     (points[2 * n][i] + points[2 * n][i + 1]) / 2, n)
             ])
     return DataFrame.from_array(splits,
                                 ['feature', 'value', 'goodness of split'])

예제 #12

0

파일 보기

 def get_possible_splits(self):
     axis = [
         axis for axis in self.df.columns
         if axis != 'class' and axis != 'indices'
     ]
     all_splits = []
     for i in range(len(self.distinct_values)):
         for j in range(len(self.distinct_values[i]) - 1):
             split_value = (self.distinct_values[i][j] +
                            self.distinct_values[i][j + 1]) / 2
             all_splits.append(
                 [axis[i], split_value,
                  self.calc_goodness(split_value, i)])
     if self.split_metric == 'random':
         if len(list(set([split[0] for split in all_splits]))) == 0:
             return []
         random_choice = random.choice(
             list(set([split[0] for split in all_splits])))
         new_splits = [
             split for split in all_splits if split[0] == random_choice
         ]
         all_splits = new_splits
     return DataFrame.from_array(
         all_splits, ['axis', 'split_value', 'goodness of split'])

예제 #13

0

파일 보기

파일: test_dataframe.py 프로젝트: anton-perez/machine-learning

data_dict = {'Pete': [1, 0, 1, 0], 'John': [2, 1, 0, 2], 'Sarah': [3, 1, 4, 0]}

df1 = DataFrame(data_dict, column_order=['Pete', 'John', 'Sarah'])
df2 = df1.apply('John', lambda x: 7 * x)
print('Testing method "apply"...')
assert df2.data_dict == {
    'Pete': [1, 0, 1, 0],
    'John': [14, 7, 0, 14],
    'Sarah': [3, 1, 4, 0]
}
print('PASSED')

columns = ['firstname', 'lastname', 'age']
arr = [['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17], ['Anna', 'Smith', 13],
       ['Sylvia', 'Mendez', 9]]
df = DataFrame.from_array(arr, columns)

print('Testing method "select_rows_where"...')
assert df.where(lambda row: len(row['firstname']) >= len(row['lastname']) and
                row['age'] > 10).to_array() == [['Charles', 'Trapp', 17]]
print('PASSED')

print('Testing method "order_by"...')
assert df.order_by('age', order="ASC").to_array() == [['Kevin', 'Fray', 5],
                                                      ['Sylvia', 'Mendez', 9],
                                                      ['Anna', 'Smith', 13],
                                                      ['Charles', 'Trapp', 17]]

assert df.order_by('firstname',
                   order="DESC").to_array() == [['Sylvia', 'Mendez', 9],
                                                ['Kevin', 'Fray', 5],

예제 #14

0

파일 보기

파일: polynomial_regression.py 프로젝트: charliebrown364/machine-learning

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

dataset = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3), (0.8, 41.6),
           (1.0, 56.5), (1.2, 72.4), (1.4, 88.7), (1.6, 104.8), (1.8, 120.1),
           (2.0, 134.0), (2.2, 145.9), (2.4, 155.2), (2.6, 161.3),
           (2.8, 163.6), (3.0, 161.5), (3.2, 154.4), (3.4, 141.7),
           (3.6, 122.8), (3.8, 97.1), (4.0, 64.0), (4.2, 22.9), (4.4, -26.8),
           (4.6, -85.7), (4.8, -154.4)]

new_columns = ['x', 'x^2', 'x^3', 'y']
new_dataset = [(pair[0], pair[0]**2, pair[0]**3, pair[1]) for pair in dataset]

df = DataFrame.from_array(new_dataset, new_columns)
polynomial_regressor = LinearRegressor(df, 'y')
polynomial_regressor_coefficients = polynomial_regressor.coefficients
print("polynomial_regressor_coefficients:", polynomial_regressor_coefficients)

예제 #15

0

파일 보기

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
import math
import matplotlib.pyplot as plt

df = DataFrame.from_array([(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23),
                           (0.8, -1.03), (1.0, -2.89), (1.2, -4.06),
                           (1.4, -4.39), (1.6, -3.88), (1.8, -2.64),
                           (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79),
                           (2.8, 4.22), (3.0, 3.8), (3.2, 2.56), (3.4, 0.68),
                           (3.6, -1.58), (3.8, -3.84), (4.0, -5.76),
                           (4.2, -7.01), (4.4, -7.38), (4.6, -6.76),
                           (4.8, -5.22)],
                          columns=['x', 'y'])
#add different columns
df = df.apply_add('x', lambda x: math.sin(x), 'sin(x)')
df = df.apply_add('x', lambda x: math.cos(x), 'cos(x)')
df = df.apply_add('x', lambda x: math.sin(2 * x), 'sin(2*x)')
df = df.apply_add('x', lambda x: math.cos(2 * x), 'cos(2*x)')
#save x and y values
x_values = list(df.data_dict['x'])
y_values = list(df.data_dict['y'])

#delete x values

df = df.del_column('x')
#find coefficients
linear_regressor = LinearRegressor(df, dependent_variable='y')

예제 #16

0

파일 보기

#   #   [ 5,  0,  1,  0,  0,  5,  0,  0,  0,  0,  1,  8],
#   #   [ 5,  0,  0,  1,  0,  0,  5,  0,  0,  0,  1,  1],
#   #   [ 5,  0,  1,  1,  0,  5,  5,  0,  0,  1,  1,  0],
#   #   [ 0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  1,  5],
#   #   [ 0,  5,  1,  0,  0,  0,  0,  5,  0,  0,  1,  0],
#   #   [ 0,  5,  0,  1,  0,  0,  0,  0,  5,  0,  1,  9],
#   #   [ 0,  5,  1,  1,  0,  0,  0,  5,  5,  1,  1,  0],
#   #   [ 5,  5,  0,  0, 25,  0,  0,  0,  0,  0,  1,  0],
#   #   [ 5,  5,  1,  0, 25,  5,  0,  5,  0,  0,  1,  0],
#   #   [ 5,  5,  0,  1, 25,  0,  5,  0,  5,  0,  1,  0],
#   #   [ 5,  5,  1,  1, 25,  5,  5,  5,  5,  1,  1,  0]]

columns = ['firstname', 'lastname', 'age']
arr = [['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17], ['Anna', 'Smith', 13],
       ['Sylvia', 'Mendez', 9]]
df = DataFrame.from_array(arr, columns)

print(df.to_array())

print(df.select_columns(['firstname', 'age']).to_array())
# [['Kevin', 5],
# ['Charles', 17],
# ['Anna', 13],
# ['Sylvia', 9]]

print(df.select_rows([1, 3]).to_array())
# [['Charles', 'Trapp', 17],
# ['Sylvia', 'Mendez', 9]]

print(
    df.select_rows_where(lambda row: len(row['firstname']) >= len(row[

예제 #17

0

파일 보기

파일: poly_regression.py 프로젝트: snowthesprite/machine-learning

 (1.0, 56.5),
 (1.2, 72.4),
 (1.4, 88.7),
 (1.6, 104.8),
 (1.8, 120.1),
 (2.0, 134.0),
 (2.2, 145.9),
 (2.4, 155.2),
 (2.6, 161.3),
 (2.8, 163.6),
 (3.0, 161.5),
 (3.2, 154.4),
 (3.4, 141.7),
 (3.6, 122.8),
 (3.8, 97.1),
 (4.0, 64.0),
 (4.2, 22.9),
 (4.4, -26.8),
 (4.6, -85.7),
 (4.8, -154.4)]

df = DataFrame.from_array(arr, ['x', 'y'])

df = df.create_interaction_terms('x', 'x')

df = df.create_interaction_terms('x * x', 'x')

regressor = LinearRegressor(df, 'y')

print(regressor.coefficients)

예제 #18

0

파일 보기

import matplotlib.pyplot as plt

list_data = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]]

delta_table = [0.1, 0.01, 0.001, 0.0001]
all_coords = []

for delta_low in delta_table:
    # new_list=[]
    # for pair in list_data:
    #     if pair[1] == 0:
    #         new_list.append([pair[0],delta])
    #     else:
    #         new_list.append([pair[0],1-delta])

    df = DataFrame.from_array(list_data, columns=['x', 'y'])

    regressor = LogisticRegressor(df,
                                  prediction_column='y',
                                  max_value=1,
                                  delta=delta_low)

    coords = [[], []]
    for x in range(20):
        coords[0].append(x / 100)
        coords[1].append(regressor.predict({'constant': 1, 'x': x}))
    all_coords.append(coords)
print(all_coords)
plt.style.use('bmh')
for coords in all_coords:
    plt.plot(coords[0], coords[1], linewidth=2.5)

예제 #19

0

파일 보기

파일: test_dataframe.py 프로젝트: charliebrown364/machine-learning

import sys
sys.path.append('src')
from dataframe import DataFrame
sys.path.append('kaggle/titanic')
from parse_line import parse_line

df = DataFrame.from_array([['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17],
                           ['Anna', 'Smith', 13], ['Sylvia', 'Mendez', 9]],
                          columns=['firstname', 'lastname', 'age'])

assert df.query(
    "SELECT lastname, firstname, age ORDER BY age DESC").to_array() == [[
        'Trapp', 'Charles', 17
    ], ['Smith', 'Anna', 13], ['Mendez', 'Sylvia', 9], ['Fray', 'Kevin', 5]]

print("\npassed test 1")

assert df.query("SELECT firstname ORDER BY lastname ASC").to_array() == [[
    'Kevin'
], ['Sylvia'], ['Anna'], ['Charles']]

print("\npassed test 2")

df = DataFrame.from_array(
    [['Kevin', 'Fray', 5], ['Melvin', 'Fray', 5], ['Charles', 'Trapp', 17],
     ['Carl', 'Trapp', 17], ['Anna', 'Smith', 13], ['Hannah', 'Smith', 13],
     ['Sylvia', 'Mendez', 9], ['Cynthia', 'Mendez', 9]],
    columns=['firstname', 'lastname', 'age'])

assert df.query(
    "SELECT lastname, firstname, age ORDER BY age ASC, firstname DESC"

예제 #20

0

파일 보기

파일: signal_separation.py 프로젝트: snowthesprite/machine-learning

import math

data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23), (0.8, -1.03),
        (1.0, -2.89), (1.2, -4.06), (1.4, -4.39), (1.6, -3.88), (1.8, -2.64),
        (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79), (2.8, 4.22),
        (3.0, 3.8), (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84),
        (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76), (4.8, -5.22)]

columns = ['y', 'sin(x)', 'cos(x)', 'sin(2x)', 'cos(2x)']

new_data = [[y, math.sin(x),
             math.cos(x),
             math.sin(2 * x),
             math.cos(2 * x)] for (x, y) in data]

df = DataFrame.from_array(new_data, columns)

regressor = LinearRegressor(df, 'y')

print(regressor.coefficients)
'''
import matplotlib.pyplot as plt
plt.style.use('bmh')

x_points = []
predicted_points = []

x = 0
while x <= 5 :
    data_dict = {'sin(x)' : math.sin(x), 'cos(x)' : math.cos(x), 'sin(2x)' : math.sin(2 * x), 'cos(2x)' : math.cos(2 * x)}
    x_points.append(x)

예제 #21

0

파일 보기

파일: rocket_takeoff_regression.py 프로젝트: anton-perez/machine-learning

import sys
sys.path.append('src')
from polynomial_regressor import PolynomialRegressor
from dataframe import DataFrame

data = [(1, 3.1), (2, 10.17), (3, 20.93), (4, 38.71), (5, 60.91), (6, 98.87),
        (7, 113.92), (8, 146.95), (9, 190.09), (10, 232.65)]

df = DataFrame.from_array(data, ['time', 'distance'])

quadratic_regressor = PolynomialRegressor(degree=2)
quadratic_regressor.fit(df, 'distance')
print('Quadratic Regressor:')
print(quadratic_regressor.coefficients)

for t in [5, 10, 200]:
    print('Distance after ' + str(t) + ' seconds:',
          quadratic_regressor.predict({'time': t}))

df = DataFrame.from_array(data, ['time', 'distance'])

cubic_regressor = PolynomialRegressor(degree=3)
cubic_regressor.fit(df, 'distance')
print('Cubic Regressor:')
print(cubic_regressor.coefficients)

for t in [5, 10, 200]:
    print('Distance after ' + str(t) + ' seconds:',
          cubic_regressor.predict({'time': t}))

예제 #22

0

파일 보기

파일: sandwich_rating.py 프로젝트: NateOwl1108/machine-learning

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6]],
    columns=['slices of roast beef', 'tbsp of peanut butter', 'rating'])

regressor = LinearRegressor(df, dependent_variable='rating')
print(regressor.coefficients)
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 0
    }))
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 5
    }))

예제 #23

0

파일 보기

import sys
sys.path.append('src')
from dataframe import DataFrame
from decision_tree import DecisionTree
from random_forest import RandomForest

data = [[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],
        [3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],
        [2,12,'B'],[2,12,'B'],
        [3,12,'A'],[3,12,'A'],
        [3,11,'A'],[3,11,'A'],
        [3,11.5,'A'],[3,11.5,'A'],
        [4,11,'A'],[4,11,'A'],
        [4,11.5,'A'],[4,11.5,'A'],
        [2,10.5,'A'],[2,10.5,'A'],
        [3,10.5,'B'],
        [4,10.5,'A'],
        [3, 9.5, 'A'],
        [2,10,'A']]

df = DataFrame.from_array(data, columns = ['x', 'y', 'class'])

r = RandomForest(10)
r.fit(df)
print(r.predict({'x': 3, 'y': 10}))

예제 #24

0

파일 보기

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

 df = DataFrame.from_array(
[[0, 0, 1, 0], 
[1, 0, 2, 0], 
[2, 0, 4, 0], 
[4, 0, 8, 0], 
[6, 0, 9, 0], 
[0, 2, 2, 0], 
[0, 4, 5, 0], 
[0, 6, 7, 0], 
[0, 8, 6, 0],
[2, 2, 0.1, 4],
[3, 4, 0.1, 12]],
columns = ['beef', 'pb', 'rating', 'interactive']
)
log_reg = LogisticRegressor(df,10, dependent_variable = 'rating')
print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))

예제 #25

0

파일 보기

파일: test_linear_regressor.py 프로젝트: snowthesprite/machine-learning

df = DataFrame.from_array([[1,0.2], [2,0.25], [3,0.5]], columns = ['hours worked', 'progress'])

regressor = LinearRegressor(df, dependent_variable='progress')

print('Does all the linear_regressor stuff work')

assert regressor.coefficients == [0.01667, 0.15], 'No, coefficients does not work'

assert regressor.predict({'hours worked': 4}) == 0.61667, 'No, predict does not work'

print('Yes they do', "\n")
'''

df = DataFrame.from_array(
    [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]],
    columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating'])

regressor = LinearRegressor(df, dependent_variable='taste rating')

print('Does all the linear_regressor stuff work')

reg_coeff = regressor.coefficients.copy()
for (key, value) in reg_coeff.items():
    reg_coeff[key] = round(value, 8)

assert reg_coeff == {
    'constant': 0.19252336,
    'scoops of chocolate': -0.05981308,
    'scoops of vanilla': 0.13271028
}, 'No, coefficients does not work'

예제 #26

0

파일 보기

파일: test_k_nearest_neighbors_classifier.py 프로젝트: anton-perez/machine-learning

import sys
sys.path.append('src')
from k_nearest_neighbors_classifier import KNearestNeighborsClassifier
from dataframe import DataFrame

df = DataFrame.from_array(
    [['Shortbread', 0.14, 0.14, 0.28, 0.44],
     ['Shortbread', 0.10, 0.18, 0.28, 0.44],
     ['Shortbread', 0.12, 0.10, 0.33, 0.45],
     ['Shortbread', 0.10, 0.25, 0.25, 0.40], ['Sugar', 0.00, 0.10, 0.40, 0.50],
     ['Sugar', 0.00, 0.20, 0.40, 0.40], ['Sugar', 0.10, 0.08, 0.35, 0.47],
     ['Sugar', 0.00, 0.05, 0.30, 0.65], ['Fortune', 0.20, 0.00, 0.40, 0.40],
     ['Fortune', 0.25, 0.10, 0.30, 0.35], ['Fortune', 0.22, 0.15, 0.50, 0.13],
     ['Fortune', 0.15, 0.20, 0.35, 0.30], ['Fortune', 0.22, 0.00, 0.40, 0.38]],
    columns=[
        'Cookie Type', 'Portion Eggs', 'Portion Butter', 'Portion Sugar',
        'Portion Flour'
    ])

knn = KNearestNeighborsClassifier(k=5)
knn.fit(df, dependent_variable='Cookie Type')
observation = {
    'Portion Eggs': 0.10,
    'Portion Butter': 0.15,
    'Portion Sugar': 0.30,
    'Portion Flour': 0.45
}

print(knn.compute_distances(observation).to_array())
# Returns a dataframe representation of the following array:

예제 #27

0

파일 보기

파일: test_random_forest.py 프로젝트: geomeza/machine-learning

# for i in range(len(pos_neg)):
#     correct_class = pos_neg[i][3]
#     observation = into_new_observation(pos_neg[i])
#     prediction = r.predict(observation)
#     if prediction == correct_class:
#         correct += 1

# assert correct/len(pos_neg) * 100 == 100, 'WRONG ACCURACY BRUH'

points = [[x, y, z, 'A'] for z in range(-5, 6) for y in range(-5, 6)
          for x in range(-5, 6) if x * y * z != 0]
points.extend([[x, y, z, 'B'] for z in range(1, 6) for y in range(1, 6)
               for x in range(1, 6) if x * y * z != 0])
points.extend([[x, y, z, 'B'] for z in range(1, 6) for y in range(1, 6)
               for x in range(1, 6) if x * y * z != 0])

df = DataFrame.from_array(points, columns=['x', 'y', 'z', 'class'])
r = RandomForest(100, depth=None)
r.fit(df)
correct = 0

for i in range(len(points)):
    correct_class = points[i][3]
    observation = into_new_observation(points[i])
    prediction = r.predict(observation)
    if prediction == correct_class:
        correct += 1

assert correct / len(points) * 100 == 90, 'WRONG ACCURACY BRUH'

print('passed')

예제 #28

0

파일 보기

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
log_df = DataFrame(df.data_dict, df.columns)

logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating')

# test 8 slices of beef + mayo
observation = {'beef': 8, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 9.72

예제 #29

0

파일 보기

import sys
sys.path.append('src')
from dataframe import DataFrame
from polynomial_regressor import PolynomialRegressor

df = DataFrame.from_array(
    [(0,1), (1,2), (2,5), (3,10), (4,20), (5,30)],
    columns = ['x', 'y']
)

constant_regressor = PolynomialRegressor(degree=0)
constant_regressor.fit(df, dependent_variable='y')
print(constant_regressor.coefficients)
{'constant': 11.3333}
print(constant_regressor.predict({'x': 2}))
11.3333

linear_regressor = PolynomialRegressor(degree=1)

linear_regressor.fit(df, dependent_variable='y')
print(linear_regressor.coefficients)
{'constant': -3.2381, 'x': 5.8286}
print(linear_regressor.predict({'x': 2}))
8.4190



quadratic_regressor = PolynomialRegressor(degree=2)
quadratic_regressor.fit(df, dependent_variable='y')
print(quadratic_regressor.coefficients)
{'constant': 1.1071, 'x': -0.6893, 'x^2': 1.3036}

예제 #30

0

파일 보기

파일: test_decision_tree.py 프로젝트: geomeza/machine-learning

# print('\nTesting root low high indices')
# assert dt.root.low.high.row_indices == [6]
# print('passed')

# print('\nTesting root low low impurity')
# assert dt.root.low.low.impurity == 0
# print('passed')

# print('\nTesting root low high impurity')
# assert dt.root.low.high.impurity == 0
# print('passed')

print('Splitting Tests')
df = DataFrame.from_array(
    [[1, 11, 'A'], [1, 12, 'A'], [2, 11, 'A'], [1, 13, 'B'], [2, 13, 'B'],
     [3, 13, 'B'], [3, 11, 'B']],
    columns=['x', 'y', 'class'])

dt = DecisionTree(split_metric='gini')
dt.initialize(df)
dt.split()
dt.split()

assert dt.root.high.row_indices == [3, 4, 5]
assert dt.root.low.low.row_indices == [0, 1, 2]
assert dt.root.low.high.row_indices == [6]
print('passed')
dt = DecisionTree(split_metric='gini')
dt.fit(df)
assert dt.root.high.row_indices == [3, 4, 5]
assert dt.root.low.low.row_indices == [0, 1, 2]