class OrdinaryLeastSquares():
    def __init__(self, data, num_iter, verbosity):
        self.task = LinearRegression(data, num_iter, verbosity)

    def fit(self):
        x, y, theta = self.task.reset()
        sym_inv = la.inv(np.dot(x.T, x))
        pseudo_inverse = np.dot(sym_inv, x.T)
        pseudo_inverse_canonical = la.pinv(x)
        #testing.assert_allclose(pseudo_inverse, pseudo_inverse_canonical, rtol=1e-5)
        theta = np.dot(pseudo_inverse, y)
        return theta, self.task.compute_loss_directly(x, y, theta)
class GradientDescent2D_Vectorized():
    def __init__(self, data, num_iter, learning_rate, verbosity):
        self.task = LinearRegression(data, num_iter, verbosity)
        self.learning_rate = learning_rate

    def fit(self):
        x, y, theta = self.task.reset()
        m = x.shape[0]

        for iter in range(0, self.task.num_iters):
            prediction = np.dot(x, theta)                                   # (m,2)*(2,1) -> (m,1)
            error = prediction - y                                          # (m,1)-(m,1) -> (m,1)
            loss = np.sum(np.square(error)) / len(error)                    # (m,1) -> (1,1)
            gradient = (2/m) * np.dot(x.T, error)                           # (2,m)*(m,1) -> (2,1)
            theta = theta - (self.learning_rate * gradient)                 # (2,1) -> (2,1)
            self.task.save_result(iter, theta, loss)
        return theta, self.task.compute_loss_directly(x, y, theta)
class GradientDescent2D():
    def __init__(self, data, num_iter, learning_rate, verbosity):
        self.task = LinearRegression(data, num_iter, verbosity)
        self.learning_rate = learning_rate

    def fit(self):
        x, y, theta = self.task.reset()
        m = x.shape[0]
        for iter in range(0, self.task.num_iters):
            loss, gradient = 0, [0, 0]
            for i in range(0, m):
                prediction = theta[1] * x[i][1] + theta[0] * x[i][0]
                error = y[i] - prediction
                loss = loss + (1/m) * error ** 2
                gradient[0] = gradient[0] - (2/m) * x[i][0] * error
                gradient[1] = gradient[1] - (2/m) * x[i][1] * error
            theta[0] = theta[0] - self.learning_rate * gradient[0]
            theta[1] = theta[1] - self.learning_rate * gradient[1]
            self.task.save_result(iter, theta, loss)
        return theta, self.task.compute_loss_directly(x, y, theta)
class RandomSearch():
    def __init__(self, data, num_iter, verbosity, param_range):
        self.task = LinearRegression(data, num_iter, verbosity)
        self.range = param_range

    def fit(self):
        x, y, _ = self.task.get_initial_data()
        m, n = x.shape[0], x.shape[1]
        min_theta = np.zeros((n, 1))
        min_loss = 1000000
        for i in range(self.task.num_iters):
            theta = np.random.rand(n, 1)
            loss = self.task.compute_loss_directly(x, y, theta)
            if (loss < min_loss):
                min_loss = loss
                min_theta = theta

            self.task.save_result(i, min_theta, loss)

        loss = self.task.compute_loss_directly(x, y, min_theta)
        return min_theta, loss
class GradientDescent():
    def __init__(self, data, num_iter, learning_rate, verbosity):
        self.task = LinearRegression(data, num_iter, verbosity)
        self.learning_rate = learning_rate

    def fit(self):
        x, y, theta = self.task.reset()
        m, n = x.shape[0], x.shape[1]
        loss = 0
        for iter in range(0, self.task.num_iters):
            loss, gradient = 0, np.zeros((n, 1))
            for i in range(0, m):
                prediction = 0
                for j in range(0, n):
                    prediction = prediction + theta[j] * x[i][j]
                error = y[i] - prediction
                loss = loss + (1/m) * error ** 2
                for j in range(0, n):
                    gradient[j] = gradient[j] - (2/m) * x[i][j] * error
            for k in range(0, n):
                theta[k] = theta[k] - self.learning_rate * gradient[k]
            self.task.save_result(iter, theta, loss)
        return theta, loss
class GridSearch():
    def __init__(self, data, num_iter, verbosity, param_range):
        self.task = LinearRegression(data, num_iter, verbosity)
        self.range = param_range

    def fit(self):
        x, y, _ = self.task.get_initial_data()
        m, n = x.shape[0], x.shape[1]
        min_theta = np.zeros((n, 1))
        min_loss = 1000000
        nx, ny = (3, 2)
        iter = 0
        for x in np.linspace(0, 1, nx):
            for y in np.linspace(0, 1, ny):
                iter = iter + 1
                loss = self.task.compute_loss_directly(x, y, theta)
                if (loss < min_loss):
                    min_loss = loss
                    min_theta = 2

            self.task.save_result(iter, min_theta, loss)

        loss = self.task.compute_loss_directly(x, y, min_theta)
        return min_theta, loss
 def __init__(self, data, num_iter, verbosity):
     self.task = LinearRegression(data, num_iter, verbosity)
 def __init__(self, data, num_iter, learning_rate, verbosity):
     self.task = LinearRegression(data, num_iter, verbosity)
     self.learning_rate = learning_rate
 def __init__(self, data, num_iter, verbosity, param_range):
     self.task = LinearRegression(data, num_iter, verbosity)
     self.range = param_range