Ejemplo n.º 1
0
def test_sanity():
    X = np.array([[1, 1], [2, 2], [3, 3]], dtype=np.float32).reshape(-1, 2)
    y = np.array([3, 5, 7], dtype=np.float32).reshape(-1, 1)
    lin_reg = LinearRegression()
    lin_reg.fit(X, y)
    y_pred = lin_reg.predict(X)
    assert np.isclose(y, y_pred, rtol=0.1).any()
Ejemplo n.º 2
0
class ChanceModel:
    def __init__(self, margin_mapping):
        self.margin_mapping = margin_mapping
        self.reg = LogisticRegression()
        self.reg = LinearRegression()

    def getChance(self, margin):
        if margin not in self.margin_mapping.keys():
            return 0

        return self.margin_mapping[margin].getWinFraction()

    def getChanceLinear(self, margin):
        return self.reg.predict(margin)

    def getChanceLog(self, X):
        return self.reg.predict_log_proba(X)

    def fitRegression(self):
        x_list = list()
        y_list = list()

        for key,value in self.margin_mapping.items():
            x_list.append(key)
            y_list.append(value.getWinFraction())

        self.reg.fit(x_list,y_list)
Ejemplo n.º 3
0
def test_LinearRegression_init():
    """ Given a pandas dataframe, test the creation of a regression class.  """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = LinearRegression(some)
    data_2 = m.getData()
    assert some.equals(data_2)
 def input_data_file(self):
     self.clear_window()
     self.datafile = input("\nIngresar nombre del fichero de datos (.csv): ")
     if (not os.path.exists(self.datafile)):
         raise Exception("Archivo no existe.")
     self.rl = LinearRegression()
     self.rl.input_data(self.datafile)
Ejemplo n.º 5
0
def linear_regression():
  line = Line()
  line.slanting_line()
  sample = line.generate_sample(N)

  lr = LinearRegression()
  lr.learn(sample)

  # in-sample error
  e_in = lr.calculate_error(sample)
  # Plotting in-sample graph
  #plt = lr.plot(sample) #plot the samples
  #plt.plot([-lr.weight[0]/lr.weight[1] for y in xrange(-1,2)], [y for y in xrange(-1,2)]) # Add the x intercept line
  #plt.show()

  # out-sample error
  sample = line.generate_sample(1000)
  e_out = lr.calculate_error(sample)
  # Plotting out-sample graph
  #plt = lr.plot(sample) #plot the samples
  #plt.plot([-lr.weight[0]/lr.weight[1] for y in xrange(-1,2)], [y for y in xrange(-1,2)]) # Add the x intercept line
  #plt.show()

  #print "Line: slope=", line.slope, " intercept=", line.intercept
  #print "W_Vec: weight=", lr.weight[1], " threshold=", lr.weight[0]

  return e_in, e_out
Ejemplo n.º 6
0
def linear_regression():
  transformation = NonlinearTransformation()
  sample = transformation.generate_sample_two(N).add_noise().get_sample()

  lr = LinearRegression()
  lr.learn(sample)

  return lr.weight.flat
Ejemplo n.º 7
0
def test_LinearRegression_train():
    """
    Test that regression has a working train abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    some_2 = pd.DataFrame([1, 2, 3, 4])
    m = LinearRegression(some)
    assert m.train(some_2)
Ejemplo n.º 8
0
def main():
    #Example
    linear_regression = LinearRegression([5, 15, 20, 25, 10, 30, 38],
                                         [375, 450, 460, 500, 400, 568, 610])
    print(linear_regression.calculate(35))

    tf_linear_regression = TensorFlowLinearRegression(
        [5, 15, 20, 25, 10, 30, 38], [375, 450, 460, 500, 400, 568, 610])
    print(tf_linear_regression.calculate(35))
Ejemplo n.º 9
0
def main():
    raw_data = pd.read_csv("student_data.csv")
    x = np.array(raw_data.iloc[:, 1:])
    y = np.array(raw_data.iloc[:, :1], dtype=int)
    leave_one_out(x, y)
    linear_regression = LinearRegression(x, y, lr=0.00001)
    linear_regression.train(epoch_cnt=10000)
    print(linear_regression.inference([[168, 51, 38]]).item())
    print(linear_regression.inference([[179, 70, 42]]).item())
Ejemplo n.º 10
0
def leave_one_out(x, y):
    for i in range(len(x)):
        train_x = np.delete(x, i, 0)
        train_y = np.delete(y, i, 0)
        test_x = x[i]
        test_y = y[i]
        linear_regression = LinearRegression(train_x, train_y, lr=0.00001)
        linear_regression.train(epoch_cnt=10000)
        test_result = linear_regression.inference([test_x])
        print(f"{test_y} => {test_result.item()}")
Ejemplo n.º 11
0
def output(part_id):
    # Random Test Cases
    X = np.column_stack((np.ones(10),
                          (np.sin(np.arange(1, 16, 1.5))),
                          (np.cos(np.arange(1, 16, 1.5)))))
    y = np.sin(np.arange(1, 30, 3))

    Xval = np.column_stack((np.ones(10),
                          (np.sin(np.arange(0, 14, 1.5))),
                          (np.cos(np.arange(0, 14, 1.5)))))
    yval = np.sin(np.arange(1, 11))

    lr = LinearRegression()
    lr.fit(X, y)

    if part_id == 1:
        J, _ = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', J)
    elif part_id == 2:
        _, grad = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', grad)
    elif part_id == 3:
        error_train, error_val = lr.learningCurve(X, y, Xval, yval, 1)
        return sprintf('%0.5f ', np.hstack((error_train, error_val)))
    elif part_id == 4:
        X_poly = lr.polyFeatures(X[1, :].T, 8)
        return sprintf('%0.5f ', X_poly)
    elif part_id == 5:
        lambda_vec, error_train, error_val = lr.validationCurve(X, y,
                                                                Xval, yval)
        return sprintf('%0.5f', np.hstack((lambda_vec, error_train, error_val)))
Ejemplo n.º 12
0
def lr_booting_preceptron():
  line = Line()
  line.slanting_line()
  sample = line.generate_sample(N)

  lr = LinearRegression()
  lr.learn(sample)

  p = Preceptron(lr.weight)
  p.learn(sample)

  return p.count
Ejemplo n.º 13
0
def testExample():
    x,y = np.loadtxt("mydata1", delimiter=',', unpack=True)
    m = y.size
    x = x[np.newaxis].T
    x = np.c_[np.ones((m,1)), x]
    n = x.shape[1]
    theta  = np.zeros((n,))
    lr = LinearRegression(theta,x,y)
    alpha = 0.001
    iterations = 20000
    lr.gradientDescent(alpha,iterations)
    print("theta0: " + str(theta[0]))
    print("theta1: " + str(theta[1]))
Ejemplo n.º 14
0
    def __init__(self):
        self._initGraph()
        linearReg = LinearRegression(self._days, self._remaningPoints)
        poliReg = PolinomialRegression(self._days, self._remaningPoints, 3)

        plt.plot(self._xval, self._remaningPoints, label="Dados Originais")
        plt.plot(linearReg.getXAxis(),
                 linearReg.getFunction(),
                 label="Função Linear")
        plt.plot(poliReg.getXAxis(),
                 poliReg.getFunction(),
                 label="Função Polinomial")
        plt.legend()
        self._plotGraph()
Ejemplo n.º 15
0
 def __init__(self, degree=1, method="bgd", lamb=0):
     assert method == "bgd" or method == "sgd" or method == "normal" or method == "cd" or method == "mgd" or \
         method == "pgd" or method == "pgd_acc" or method == "admm" or method == "cd_pure", \
         "No such method, please select from bgd, sgd, normal, cd, cd_pure, mgd, pgd, pgd_acc and admm"
     assert lamb >= 0
     assert degree >= 1
     self.lamb = lamb  # lambda hyperparameters
     self.degree = degree  # Order of polynomial regression
     self.method = method  # Minimization method
     self._lin_reg = LinearRegression()  # Calling a Linear Regressor
     self._poly_fea = None  # PolynomialFeatures
     self._std_scaler = None  # StandardScaler
     self.theta = None  # Coefficient vector
     self.pca = None
def start_linear_regression(training_records, output):
    """
    In this method, we compare the weights calculated using our gradient descent approach with the sklearn's output.

    `Our method`
    >>> regressor = LinearRegression(iterations=NUM_OF_ITERATIONS, learning_rate=LEARNING_RATE)
    >>> weights_table, mse_costs, predicted_outputs = regressor.calculate_weights(training_records, output)

    As you see above there are 3 tables returned from our approach.

    1. weights_table - This is where we store the history of the weights from iteration 0 to the last iteration.
       To access the set of weights in the last iteration simply use `weights_table[-1]`

    2. mse_costs - Table which stores the mean square error for each iteration.

    3. predicted_outputs - This is the predicted output using our machine(i.e weights)

    The following code fragment shows how to invoke sklearn's Linear regression.
    `sklearn's method`
    >>> clf = linear_model.LinearRegression(fit_intercept=False)
    >>> clf.fit(training_records, output)

    Lastly, we just print the weights and it is left to the user to visually compare them.

    :parameter training_records - N X P matrix of training samples.
    :parameter output - N X 1 vector of output.

    :return:
    """
    regressor = LinearRegression(iterations=NUM_OF_ITERATIONS,
                                 learning_rate=LEARNING_RATE)
    print np.shape(training_records)
    print np.shape(output)
    weights_table, mse_costs, predicted_outputs = regressor.calculate_weights(
        training_records, output)

    clf = linear_model.LinearRegression(fit_intercept=False)
    clf.fit(training_records, output)
    print "Starting gradient descent with {0} iterations and a learning rate of {1}".format(
        NUM_OF_ITERATIONS, LEARNING_RATE)
    print "Running..."
    final_weights = [
        weights_table[-1][i] for i in range(0, NUM_OF_FEATURES + 1)
    ]
    print "After %s iterations of Gradient Descent (our implementation), the final weights are : %s" % (
        NUM_OF_ITERATIONS, final_weights)

    print "Using Sklearn's Linear Regression, the weights are : %s" % clf.coef_
    return weights_table, mse_costs
Ejemplo n.º 17
0
def linear_regression():
  transformation = NonlinearTransformation()
  sample = transformation.generate_sample_two(N).add_noise().get_sample()

  lr = LinearRegression()
  lr.learn(sample)

  # in-sample error
  e_in = lr.calculate_error(sample)

  # out-sample error
  sample = transformation.generate_sample_two(1000).add_noise().get_sample()
  e_out = lr.calculate_error(sample)

  return e_in, e_out
def best_params():
    lr_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005]
    mse_min = 10000000
    lr_min = 0
    n_iter_min = 0
    for lr_val in lr_list:
        for iteration in range(1000, 10000, 10):
            reg = LinearRegression(learning_rate=lr_val, n_iters=iteration)
            reg.fit(X_train, Y_train)
            predicted = reg.predict(X_test)
            mse_val = mse(Y_test, predicted)
            if mse_val < mse_min:
                mse_min = mse_val
                lr_min = lr_val
                n_iter_min = iteration
    return (lr_min, n_iter_min)
Ejemplo n.º 19
0
def returnClassify(a):
    testList = [i for i in a.split(",")]
    print testList
    lr = LinearRegression(testList).loadFiles()
    bin_dtree = DecisionTree(testList).loadFiles()

    return jsonify({"lr": str(lr), "bin": str(bin_dtree)})
Ejemplo n.º 20
0
def test_LinearRegression_test():
    """
    Test that regression has a working test abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    some_2 = pd.DataFrame([1.0, 2.0, 3.0, 4.0])

    m = LinearRegression(some)
    m.train(some_2)
    test = m.test(some)

    for i in range(3):
        assert round(test[0][i], 3) == some_2[0][i]

    print test[0]
    print some_2[0]
Ejemplo n.º 21
0
def test_LinearRegression_dtype():
    """
    Test that the initialization of a regression class throws a type error for 
    things that are not pandas dataframes
    """
    some = "A wrong data type of type string"
    with pytest.raises(TypeError):
        LinearRegression(some)
Ejemplo n.º 22
0
 def testLinearRegression(self):
     from LinearRegression import LinearRegression
     boston = load_boston()
     X_train = boston.data
     y_train = boston.target
     lr = LinearRegression()
     mse = np.mean(cross_validation(lr, X_train, y_train))
     print("Linear Regression with closed solution method: MSE =", mse)
Ejemplo n.º 23
0
    def fitdata(self, *argv):

        x, y = argv[0], argv[1]
        [x1, x2] = LinearRegression(x, y)

        y_fit = []
        for fit in y:
            y_fit.append(x1 + x2 * fit)

        plt.scatter(y, x)
        plt.plot(y_fit, x)
        return plt.show()
Ejemplo n.º 24
0
def main():
    data = np.array([[1, 1, 1, 3, 3],
                     [1, 2, 2, 2, 2],
                     [1, 3, 3, 1, 1]])
    result = np.array([[1], [2], [3]])

    Lr = LinearRegression(5)
    print Lr.predict(np.array([1, 2, 2, 2, 2]))
    Lr.train(zip(data, result), learningRate=0.1)
    print Lr.predict(np.array([1, 2, 2, 2, 2]))
Ejemplo n.º 25
0
def main():




    itemsMatrix = np.loadtxt("ex1data1.txt", delimiter=',', unpack=True)
    x = itemsMatrix[:-1, :].T
    # y is always a 1D array in this implementation, that is, a row vector. So we won't transpose it.
    y = itemsMatrix[-1, :]
    m = y.size  # m = number of training examples





    # Insert the usual column of 1's into the "x" matrix
    x = np.c_[np.ones((m, 1), dtype=float), x]  # matlab like solution to column inserting
    n = x.shape[1] #number of features
    theta = np.zeros((n,))




#    storedStds, storedMeans = [], []
#    for sub in range(x.shape[1]):  # sub = subscript = column number = feature number
#        currentFeatures = x[:, sub]
#        mean = currentFeatures.mean()
#        std = currentFeatures.std()
#        storedMeans.append(mean)
#        storedStds.append(std)
#        if std == 0: # avoids division by zero and also avoids applying feature normalization to x0, which we normally don't
#            continue
#        x[:, sub] = (x[:, sub] - mean) / std



    alpha = 0.01
    lr = LinearRegression(theta,x,y)
    lr.gradientDescentTillConvergence(alpha)



    #uncomment here to see the result
#    fig, ax = plt.subplots()
#    ax.set_xlabel('Population of City in 10,000s')
#    ax.set_ylabel('Profit in $10,000s')
#    xWithoutOnes = x[:,1]
#    print(xWithoutOnes)
#    ax.plot(xWithoutOnes,y,'x')
#    hresults = theta[0] + theta[1]*x
#    xmin = xWithoutOnes.min()
#    xmax = xWithoutOnes.max()
#    ax.set_xlim([xmin-0.2,xmax+0.2])
#    ax.plot(x,hresults)
#    plt.show()



    lr.plotCostFunction(type="surface")
Ejemplo n.º 26
0
    def __init__(self, parent):
        """
        Parameters
        ----------
        parent : MainApplication
            A reference to the MainApplication object which created the plot
            This is used to enable access to the PlotData object inside the main app.
        """

        self.fig, self.ax = plt.subplots()
        plt.show(block=False)

        self.parent = parent

        self.check_state = False
        self.cid = self.fig.canvas.mpl_connect("button_press_event",
                                               self.on_click)

        self.points = []
        self.lin_reg = LinearRegression()

        self.ax.set(xlim=(-10, 10), ylim=(-10, 10))
Ejemplo n.º 27
0
def LinearRegressionTest():
    X, y, w_org, b_org = gen_data()

    learning_rate = 0.1
    max_itr = 500
    # 生成测试集
    m = X.shape[1]
    X = X[0].tolist()
    y = y[0].tolist()
    m = len(X)
    index = list(range(100))
    np.random.shuffle(index)  # 将index乱序
    train_idxes = index[0:70]
    train_X = [X[i] for i in train_idxes]
    train_y = [y[i] for i in train_idxes]
    mean = np.mean(train_X)
    std = np.std(train_X)
    train_X = (train_X - mean) / std
    # 生成训练集
    test_idxes = index[70:100]
    test_X = [X[i] for i in test_idxes]
    test_y = [y[i] for i in test_idxes]
    test_X = (test_X - mean) / std
    # 训练
    lr = LinearRegression()
    w = lr.train(train_X,
                 train_y,
                 l_rate=learning_rate,
                 max_itr=max_itr,
                 batch_size=50)
    print('test loss is {0}'.format(lr.test(test_X, test_y)))
    x_ax = range(0, 100, 10)
    x_ax = (x_ax - mean) / std
    y_ax = w[0] * x_ax + w[1]

    plt.scatter(train_X, train_y, marker='o', c='b')
    plt.scatter(test_X, test_y, marker='o', c='r')
    plt.plot(x_ax, y_ax)
    plt.show()
Ejemplo n.º 28
0
def train_lr(degree):
    data = Data(FILENAME)
    model = LinearRegression(data, degree, method="CF")
    model.train()
    return {
        "tre": model.training_MSE(),
        "tse": model.test_MSE()
    }
Ejemplo n.º 29
0
def print_linear_regressions(degrees):
    for degree in degrees:
        data = Data(FILENAME)
        model = LinearRegression(data, degree, method="CF")
        model.train()
        print "degree = %d" % degree
        print "w: \n", model.W.T
        print "train error: ", model.training_MSE()
        print "test error: ", model.test_MSE()
Ejemplo n.º 30
0
    def fitdata(*argv):

        import matplotlib.pyplot as plt

        from LinearRegression import LinearRegression

        x, y = argv[0], argv[1]
        [x1, x2] = LinearRegression(x, y)

        y_fit = []
        for fit in y:
            y_fit.append(x1 + x2 * fit)

        plt.scatter(y, x)
        plt.plot(y_fit, x)
        return plt.show()
Ejemplo n.º 31
0
 def predict(self, _x_val):
     mr = None
     if type(_x_val) is not list:
         mr = LinearRegression(self.X, self.Y)
     else:
         mr = MultipleRegression(self.X, self.Y)
     mr.calculateCoeff()
     y_val = mr.predict(_x_val)
     prob_val = sigmoid(y_val)
     if prob_val < 0.5:
         return 0
     else:
         return 1
Ejemplo n.º 32
0
def calculate_weights(training_records, output):
        mse_costs = []
        weights = np.random.rand(training_records.shape[1])
        weights_table = [weights]
        predicted_outputs = []
        itr = 0
        prevErr = 0
        for i in range(NUM_OF_ITERATIONS):
            predicted_output = np.dot(training_records, weights)
            predicted_outputs.append(predicted_output)
            mse_cost, error = LinearRegression.mse_cost_function(predicted_output, output)
            mse_costs.append(mse_cost)
            slope = training_records.T.dot(error)/(len(output))
            weights -= (LEARNING_RATE* slope)
            weights_table.append(weights.copy())
            if (abs(prevErr-mse_cost)<0.0001):
                itr = i
                return itr,mse_costs
            prevErr = mse_cost
        return itr,mse_costs
Ejemplo n.º 33
0
# loading data
data = pd.read_csv('Salary_Data.csv')
data = featureScale(data)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# splitting data to test and train sets

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1)

# creating the model
model = LinearRegression()

# training the model
MSE, theta = model.train(X_train, y_train, Lambda=0.01)

# using the model to predict
y_pred = model.predict(X_test)

# calculating R2 score for my model
R2 = r2_score(y_test, y_pred)
print("R2 score = ", R2)

# plotting cost function
plt.grid()
plt.plot(MSE)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from scipy import interpolate
from LinearRegression import LinearRegression

data_matrix = np.loadtxt('./exdata1.txt')

x = data_matrix[0:data_matrix.shape[0], 0:1]
y = data_matrix[0:data_matrix.shape[0], 1:2]


lin_reg = LinearRegression(x, y) 
cost_function = lin_reg.gradient_descent()

predicted_x = []
predicted_y = []

i = 0;

while(i < 23.50):
    predicted_x.append(i)
    hypoth_of_i = np.matmul([1, i], lin_reg.thetas)[0]
    predicted_y.append(hypoth_of_i)
    i = i + 0.5

# Our data set with hypothesis function
data_set_graph = plt.figure()
plt.plot(x, y, 'ro')
plt.plot(predicted_x, predicted_y)
plt.ylabel('Profit in $10,000s')
import NumericalFeatureExtractor as nfe
import PosTagging as pt
from LinearRegression import LinearRegression 

x = nfe.getAvgWrdLength()
y = nfe.vocabError()
z = nfe.getWordCount()

r = x+y+z

lr = LinearRegression()
mat = lr.buildFeatureMatrix(r)
print mat
class Application(object):
    def __init__(self):
        self.line_lenght = 30

    def clear_window(self, wait=False):
        if wait:
            input("\n[Enter] Ir al menu principal.")
        return os.system('clear')

    def run(self):
        self.input_data_file()
        self.display_options()

    def input_data_file(self):
        self.clear_window()
        self.datafile = input("\nIngresar nombre del fichero de datos (.csv): ")
        if (not os.path.exists(self.datafile)):
            raise Exception("Archivo no existe.")
        self.rl = LinearRegression()
        self.rl.input_data(self.datafile)

    def display_options(self):
        self.clear_window()
        while(True):
            try:
                print ("-".ljust(90, "-"))
                print("Aplicacion de Regresion Lineal - Caso: Coeficiente de Expansion Termica del Acero.")
                print ("-".ljust(90, "-"))
                print("1. Mostrar datos cargados")
                print("2. Mostrar grafico de Temperatura vs Coeficiente de expansion termica.")
                print("3. Mostrar calculo de Regresion Lineal.")
                print("4. Mostrar grafico de Regresion Lineal.")
                print("0. Salir.")
                self.option = int(input(" -> "))
            except Exception:
                self.clear_window()
            self.switch_options()
            if (self.option == 0):
                break

    def switch_options(self):
        if self.option == 1:
            self.clear_window()
            self.rl.show_data()
            self.clear_window(True)
        elif self.option == 2:
            self.clear_window()
            self.rl.show_graph_data()
            self.clear_window(True)
        elif self.option == 3:
            self.clear_window()
            self.rl.calculate_rl()
            self.rl.show_calculate_rl()
            self.clear_window(True)
        elif self.option == 4:
            self.clear_window()
            self.rl.show_graph_linear_regresion()
            self.clear_window(True)
        elif self.option == 0:
            self.clear_window()
        elif self.option > 3:
            self.clear_window()
        else:
            self.clear_window()
Ejemplo n.º 37
0
'''
Created on Sep 11, 2012

@author: masumadmin
'''

from Utility import read_data
from LinearRegression import LinearRegression

if __name__ == '__main__':    
    lr= LinearRegression()    
    lr.train(read_data('data/linear_in.arff'), read_data('data/linear_out.arff'))   
    y = lr.predict([1,1,1])
    print "y1: ", y[0]
    print "y2: ", y[1]
    
    
Ejemplo n.º 38
0
def main():

    # 1.1
    data = np.genfromtxt('girls_train.csv', delimiter=',')
    lr = LinearRegression()

    ones = np.ones(len(data))
    x = np.array([ones, data[:,0]]).transpose()
    y = data[:,1]

    plt.scatter(x[:,1], y)
    plt.ylabel('Height')
    plt.xlabel('Age')

    # 1.2
    
    m, n = x.shape
    learning_rate = 0.05
    number_of_iterations = 1500
    theta = np.zeros(n)
    theta = lr.gradient_descent(x, y, theta, learning_rate, number_of_iterations)

    # 1.3 
    prediction = np.dot(x, theta)
    plt.plot(x[:,1], prediction, label = "%fx + %f" % (theta[1], theta[0]))
    plt.legend()

    theta0_vals = np.linspace(-1.0, 1.0, 100)
    theta1_vals = np.linspace(-1.0, 1.0, 100)
     
    Z = np.zeros(shape=(theta0_vals.size, theta1_vals.size))
     
    for t1, element in enumerate(theta0_vals):
        for t2, element2 in enumerate(theta1_vals):
            thetaT = np.zeros(shape=(2, 1))
            thetaT[0][0] = element
            thetaT[1][0] = element2

            guess = np.dot(x, thetaT).flatten()
            loss = guess - y

            Z[t1, t2] = lr.calculate_cost(loss, m)
    
    X, Y = np.meshgrid(theta0_vals, theta1_vals)
    
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    surf = ax.plot_surface(X, Y, Z)

    # 1.4
    
    print "Model: %fx + %f" % (theta[1], theta[0])
    print "Mean square error for training: %f" % lr.calculate_cost(prediction-y, m)
    print "Predicted height for a 4.5 years old girl: %f" % (4.5*theta[1] + theta[0])

    test_data = np.genfromtxt('girls_test.csv', delimiter=',')
    test_x = np.array([np.ones(len(test_data)), test_data[:,0]]).transpose()
    test_y = test_data[:,1]
    test_prediction = np.dot(test_x, theta)
    test_error = lr.calculate_cost(test_prediction - test_y, len(test_data))
    
    print "Mean square error for testing: %f" % (test_error)


    plt.show()
Ejemplo n.º 39
0
def main():

    # 2
    data = np.genfromtxt('girls_age_weight_height_2_8.csv', delimiter=',')
    lr = LinearRegression()

    ones = np.ones(len(data))
    x = np.array([ones, data[:,0], data[:,1]]).transpose()
    y = data[:,2]

    # 2.1

    age_mean = np.mean(x[:,1])
    age_std = np.std(x[:,1])

    weight_mean = np.mean(x[:,2])
    weight_std = np.std(x[:,2])

    print "Feature 'age' - Mean: %f, STD: %f" % (age_mean, age_std)
    print "Feature 'weight' - Mean: %f, STD: %f" % (weight_mean, weight_std)

    x_scaled = np.array(x)

    x_scaled[:,0] = ones
    x_scaled[:,1] = (x[:,1] - age_mean)/age_std
    x_scaled[:,2] = (x[:,2] - weight_mean)/weight_std
    
    m, n = x.shape

    # 2.3
    
    alphas = [0.005, 0.001, 0.05, 0.1, 0.5, 1.0]
    iterations_n = 50
    iterations = np.arange(iterations_n)
    risk = np.zeros(shape = (iterations_n, len(alphas))).T

    for alpha_i in range(0, len(alphas)):
        theta_sim = np.zeros(n)
        for iteration_n in iterations:
            theta_sim = lr.gradient_descent(x_scaled, y, theta_sim, alphas[alpha_i], iteration_n)
            prediction = np.dot(x_scaled, theta_sim)
            loss = prediction - y
            risk[alpha_i][iteration_n] = lr.calculate_cost(loss, m)

    for alpha_i in range(0, len(alphas)):
        plt.plot(iterations, risk[alpha_i], label='Alpha: %f' % alphas[alpha_i])

    theta = lr.gradient_descent(x_scaled,y,np.zeros(n), 1.0, iterations_n)
    prediction = np.dot(x_scaled, theta)


    point_to_guess = [1.0, (5.0-age_mean)/age_std, (20.0-weight_mean)/weight_std]
    guess = np.sum(np.dot(theta, point_to_guess))

    print "Betas: %s" % (theta)
    print "The 5 year girl weighting 20 is approximately %f m tall." % (guess)
    print "error %f" % lr.calculate_cost(prediction -y, m)

    # 2.4
    
    p = np.matrix(x)
    theta = lr.normal_equation(p, y)
    theta = np.ravel(theta)
    guess = np.sum(np.dot(theta, [1.0, 5.0, 20.0]))
    prediction = np.dot(x, theta.flatten())

    print "Betas: %s" % (theta)
    print "The 5 year girl weighting 20 is approximately %f m tall." % (guess)
    print "error %f" % lr.calculate_cost(prediction - y, m)

    plt.ylabel('Risk')
    plt.xlabel('# of Iterations')
    plt.legend()
    plt.show()
    axarr[1].axis([0, num_iters, 0, np.max(lr.loss_history)])

    plt.interactive(False)
    plt.show(block=True)
    plt.show()

# Test for Linear Regression
# load data
data = pd.read_csv("./lsd.dat", header=None, sep=r"\s+")
data = data.as_matrix()
X = data[:, 0: -1]
y = data[:, -1]


# invoke batch gradient decent
lr = LinearRegression(X, y, tolerance=1e-4)
lr.batch_gradient_decent(0.05, 1e5)
print('LinearRegression, theta of BGD: ', lr.theta.T, ', num of iterations: ', len(lr.loss_history))
show_result(lr, "Method: batch_gradient_decent")


# invoke stochastic gradient decent
lr = LinearRegression(X, y, tolerance=1e-4)
lr.stochastic_gradient_descent(0.03, 1e3)
print('LinearRegression, theta of SGB: ', lr.theta.T, ', num of iterations: ', len(lr.loss_history))
show_result(lr, "Method: stochastic_gradient_descent")


# invoke general newton method
lr = LinearRegression(X, y, tolerance=1e-4)
lr.newton_general()
import sys
sys.path.append('../models')

# include the OLS class
from LinearRegression import LinearRegression

data = pd.read_csv('data/machine.data.txt', header=None)

# lets keep 9 attributes
y = data[9]
X = data.drop([0, 1, 9], axis=1)

X = X.values
y = y.values

reg = linear_model.LinearRegression(normalize=True)
reg.fit(X, y)
skout = reg.predict(X)
plt.scatter(y, skout, color='r', alpha=0.4, label='scikit-learn')
plt.plot([-200, 0], [0, 0], 'k--', lw=1)
plt.plot([0, 0], [-200, 0], 'k--', lw=1)


regr = LinearRegression()
regr.fit(X, y)
outs = regr.predict(X)
plt.scatter(y, outs, color='g', alpha=0.5, label='pyLinear')

plt.legend()
plt.show()