Ejemplo n.º 1
0
def test_LinearRegression_init():
    """ Given a pandas dataframe, test the creation of a regression class.  """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = LinearRegression(some)
    data_2 = m.getData()
    assert some.equals(data_2)
Ejemplo n.º 2
0
def test_sanity():
    X = np.array([[1, 1], [2, 2], [3, 3]], dtype=np.float32).reshape(-1, 2)
    y = np.array([3, 5, 7], dtype=np.float32).reshape(-1, 1)
    lin_reg = LinearRegression()
    lin_reg.fit(X, y)
    y_pred = lin_reg.predict(X)
    assert np.isclose(y, y_pred, rtol=0.1).any()
Ejemplo n.º 3
0
def returnClassify(a):
    testList = [i for i in a.split(",")]
    print testList
    lr = LinearRegression(testList).loadFiles()
    bin_dtree = DecisionTree(testList).loadFiles()

    return jsonify({"lr": str(lr), "bin": str(bin_dtree)})
 def input_data_file(self):
     self.clear_window()
     self.datafile = input("\nIngresar nombre del fichero de datos (.csv): ")
     if (not os.path.exists(self.datafile)):
         raise Exception("Archivo no existe.")
     self.rl = LinearRegression()
     self.rl.input_data(self.datafile)
Ejemplo n.º 5
0
def output(part_id):
    # Random Test Cases
    X = np.column_stack((np.ones(10),
                          (np.sin(np.arange(1, 16, 1.5))),
                          (np.cos(np.arange(1, 16, 1.5)))))
    y = np.sin(np.arange(1, 30, 3))

    Xval = np.column_stack((np.ones(10),
                          (np.sin(np.arange(0, 14, 1.5))),
                          (np.cos(np.arange(0, 14, 1.5)))))
    yval = np.sin(np.arange(1, 11))

    lr = LinearRegression()
    lr.fit(X, y)

    if part_id == 1:
        J, _ = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', J)
    elif part_id == 2:
        _, grad = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', grad)
    elif part_id == 3:
        error_train, error_val = lr.learningCurve(X, y, Xval, yval, 1)
        return sprintf('%0.5f ', np.hstack((error_train, error_val)))
    elif part_id == 4:
        X_poly = lr.polyFeatures(X[1, :].T, 8)
        return sprintf('%0.5f ', X_poly)
    elif part_id == 5:
        lambda_vec, error_train, error_val = lr.validationCurve(X, y,
                                                                Xval, yval)
        return sprintf('%0.5f', np.hstack((lambda_vec, error_train, error_val)))
Ejemplo n.º 6
0
def main():




    itemsMatrix = np.loadtxt("ex1data1.txt", delimiter=',', unpack=True)
    x = itemsMatrix[:-1, :].T
    # y is always a 1D array in this implementation, that is, a row vector. So we won't transpose it.
    y = itemsMatrix[-1, :]
    m = y.size  # m = number of training examples





    # Insert the usual column of 1's into the "x" matrix
    x = np.c_[np.ones((m, 1), dtype=float), x]  # matlab like solution to column inserting
    n = x.shape[1] #number of features
    theta = np.zeros((n,))




#    storedStds, storedMeans = [], []
#    for sub in range(x.shape[1]):  # sub = subscript = column number = feature number
#        currentFeatures = x[:, sub]
#        mean = currentFeatures.mean()
#        std = currentFeatures.std()
#        storedMeans.append(mean)
#        storedStds.append(std)
#        if std == 0: # avoids division by zero and also avoids applying feature normalization to x0, which we normally don't
#            continue
#        x[:, sub] = (x[:, sub] - mean) / std



    alpha = 0.01
    lr = LinearRegression(theta,x,y)
    lr.gradientDescentTillConvergence(alpha)



    #uncomment here to see the result
#    fig, ax = plt.subplots()
#    ax.set_xlabel('Population of City in 10,000s')
#    ax.set_ylabel('Profit in $10,000s')
#    xWithoutOnes = x[:,1]
#    print(xWithoutOnes)
#    ax.plot(xWithoutOnes,y,'x')
#    hresults = theta[0] + theta[1]*x
#    xmin = xWithoutOnes.min()
#    xmax = xWithoutOnes.max()
#    ax.set_xlim([xmin-0.2,xmax+0.2])
#    ax.plot(x,hresults)
#    plt.show()



    lr.plotCostFunction(type="surface")
Ejemplo n.º 7
0
def train_lr(degree):
    data = Data(FILENAME)
    model = LinearRegression(data, degree, method="CF")
    model.train()
    return {
        "tre": model.training_MSE(),
        "tse": model.test_MSE()
    }
Ejemplo n.º 8
0
 def testLinearRegression(self):
     from LinearRegression import LinearRegression
     boston = load_boston()
     X_train = boston.data
     y_train = boston.target
     lr = LinearRegression()
     mse = np.mean(cross_validation(lr, X_train, y_train))
     print("Linear Regression with closed solution method: MSE =", mse)
Ejemplo n.º 9
0
def test_LinearRegression_dtype():
    """
    Test that the initialization of a regression class throws a type error for 
    things that are not pandas dataframes
    """
    some = "A wrong data type of type string"
    with pytest.raises(TypeError):
        LinearRegression(some)
Ejemplo n.º 10
0
def test_LinearRegression_train():
    """
    Test that regression has a working train abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    some_2 = pd.DataFrame([1, 2, 3, 4])
    m = LinearRegression(some)
    assert m.train(some_2)
Ejemplo n.º 11
0
def main():
    raw_data = pd.read_csv("student_data.csv")
    x = np.array(raw_data.iloc[:, 1:])
    y = np.array(raw_data.iloc[:, :1], dtype=int)
    leave_one_out(x, y)
    linear_regression = LinearRegression(x, y, lr=0.00001)
    linear_regression.train(epoch_cnt=10000)
    print(linear_regression.inference([[168, 51, 38]]).item())
    print(linear_regression.inference([[179, 70, 42]]).item())
Ejemplo n.º 12
0
def print_linear_regressions(degrees):
    for degree in degrees:
        data = Data(FILENAME)
        model = LinearRegression(data, degree, method="CF")
        model.train()
        print "degree = %d" % degree
        print "w: \n", model.W.T
        print "train error: ", model.training_MSE()
        print "test error: ", model.test_MSE()
Ejemplo n.º 13
0
def main():
    #Example
    linear_regression = LinearRegression([5, 15, 20, 25, 10, 30, 38],
                                         [375, 450, 460, 500, 400, 568, 610])
    print(linear_regression.calculate(35))

    tf_linear_regression = TensorFlowLinearRegression(
        [5, 15, 20, 25, 10, 30, 38], [375, 450, 460, 500, 400, 568, 610])
    print(tf_linear_regression.calculate(35))
Ejemplo n.º 14
0
def get_multiple_estimates(X, y, learning_rate=[0.0000000001], max_iter=1000, iteration_threshold=100, plotlabels=None,
                           reg_strength=0, regularization="Ridge", method="GD", minibatch_size=1, plot_by_lr=True, plot_by_mb=False,
                           learning_rate_decay=False, cost_threshold=None):
    cost_by_lr = []
    iterations = []
    if plotlabels is None:
        plotlabels = []

    if plot_by_lr:
        for lr in learning_rate:
            estimator_linReg = LinearRegression(learning_rate=lr, reg_strength=reg_strength, regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold, method=method,
                                                learning_rate_decay=learning_rate_decay, cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            plotlabels.append("Learning rate = " + str(lr))
    elif plot_by_mb:
        for mb in minibatch_size:
            estimator_linReg = LinearRegression(learning_rate=learning_rate, reg_strength=reg_strength, regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold,
                                                method=method, minibatch_size=mb, learning_rate_decay=learning_rate_decay,
                                                cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            plotlabels.append("Minibatch size = " + str(mb))
    else:
        cnt = 0
        for pl in plotlabels:
            estimator_linReg = LinearRegression(learning_rate=learning_rate[cnt], reg_strength=reg_strength,
                                                regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold,
                                                method=pl, minibatch_size=minibatch_size, learning_rate_decay=learning_rate_decay,
                                                cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            cnt += 1

    return cost_by_lr, iterations, plotlabels
Ejemplo n.º 15
0
def leave_one_out(x, y):
    for i in range(len(x)):
        train_x = np.delete(x, i, 0)
        train_y = np.delete(y, i, 0)
        test_x = x[i]
        test_y = y[i]
        linear_regression = LinearRegression(train_x, train_y, lr=0.00001)
        linear_regression.train(epoch_cnt=10000)
        test_result = linear_regression.inference([test_x])
        print(f"{test_y} => {test_result.item()}")
Ejemplo n.º 16
0
    def fitdata(self, *argv):

        x, y = argv[0], argv[1]
        [x1, x2] = LinearRegression(x, y)

        y_fit = []
        for fit in y:
            y_fit.append(x1 + x2 * fit)

        plt.scatter(y, x)
        plt.plot(y_fit, x)
        return plt.show()
Ejemplo n.º 17
0
def testExample():
    x,y = np.loadtxt("mydata1", delimiter=',', unpack=True)
    m = y.size
    x = x[np.newaxis].T
    x = np.c_[np.ones((m,1)), x]
    n = x.shape[1]
    theta  = np.zeros((n,))
    lr = LinearRegression(theta,x,y)
    alpha = 0.001
    iterations = 20000
    lr.gradientDescent(alpha,iterations)
    print("theta0: " + str(theta[0]))
    print("theta1: " + str(theta[1]))
Ejemplo n.º 18
0
 def predict(self, _x_val):
     mr = None
     if type(_x_val) is not list:
         mr = LinearRegression(self.X, self.Y)
     else:
         mr = MultipleRegression(self.X, self.Y)
     mr.calculateCoeff()
     y_val = mr.predict(_x_val)
     prob_val = sigmoid(y_val)
     if prob_val < 0.5:
         return 0
     else:
         return 1
Ejemplo n.º 19
0
 def __init__(self, degree=1, method="bgd", lamb=0):
     assert method == "bgd" or method == "sgd" or method == "normal" or method == "cd" or method == "mgd" or \
         method == "pgd" or method == "pgd_acc" or method == "admm" or method == "cd_pure", \
         "No such method, please select from bgd, sgd, normal, cd, cd_pure, mgd, pgd, pgd_acc and admm"
     assert lamb >= 0
     assert degree >= 1
     self.lamb = lamb  # lambda hyperparameters
     self.degree = degree  # Order of polynomial regression
     self.method = method  # Minimization method
     self._lin_reg = LinearRegression()  # Calling a Linear Regressor
     self._poly_fea = None  # PolynomialFeatures
     self._std_scaler = None  # StandardScaler
     self.theta = None  # Coefficient vector
     self.pca = None
Ejemplo n.º 20
0
    def __init__(self):
        self._initGraph()
        linearReg = LinearRegression(self._days, self._remaningPoints)
        poliReg = PolinomialRegression(self._days, self._remaningPoints, 3)

        plt.plot(self._xval, self._remaningPoints, label="Dados Originais")
        plt.plot(linearReg.getXAxis(),
                 linearReg.getFunction(),
                 label="Função Linear")
        plt.plot(poliReg.getXAxis(),
                 poliReg.getFunction(),
                 label="Função Polinomial")
        plt.legend()
        self._plotGraph()
def start_linear_regression(training_records, output):
    """
    In this method, we compare the weights calculated using our gradient descent approach with the sklearn's output.

    `Our method`
    >>> regressor = LinearRegression(iterations=NUM_OF_ITERATIONS, learning_rate=LEARNING_RATE)
    >>> weights_table, mse_costs, predicted_outputs = regressor.calculate_weights(training_records, output)

    As you see above there are 3 tables returned from our approach.

    1. weights_table - This is where we store the history of the weights from iteration 0 to the last iteration.
       To access the set of weights in the last iteration simply use `weights_table[-1]`

    2. mse_costs - Table which stores the mean square error for each iteration.

    3. predicted_outputs - This is the predicted output using our machine(i.e weights)

    The following code fragment shows how to invoke sklearn's Linear regression.
    `sklearn's method`
    >>> clf = linear_model.LinearRegression(fit_intercept=False)
    >>> clf.fit(training_records, output)

    Lastly, we just print the weights and it is left to the user to visually compare them.

    :parameter training_records - N X P matrix of training samples.
    :parameter output - N X 1 vector of output.

    :return:
    """
    regressor = LinearRegression(iterations=NUM_OF_ITERATIONS,
                                 learning_rate=LEARNING_RATE)
    print np.shape(training_records)
    print np.shape(output)
    weights_table, mse_costs, predicted_outputs = regressor.calculate_weights(
        training_records, output)

    clf = linear_model.LinearRegression(fit_intercept=False)
    clf.fit(training_records, output)
    print "Starting gradient descent with {0} iterations and a learning rate of {1}".format(
        NUM_OF_ITERATIONS, LEARNING_RATE)
    print "Running..."
    final_weights = [
        weights_table[-1][i] for i in range(0, NUM_OF_FEATURES + 1)
    ]
    print "After %s iterations of Gradient Descent (our implementation), the final weights are : %s" % (
        NUM_OF_ITERATIONS, final_weights)

    print "Using Sklearn's Linear Regression, the weights are : %s" % clf.coef_
    return weights_table, mse_costs
Ejemplo n.º 22
0
    def fitdata(*argv):

        import matplotlib.pyplot as plt

        from LinearRegression import LinearRegression

        x, y = argv[0], argv[1]
        [x1, x2] = LinearRegression(x, y)

        y_fit = []
        for fit in y:
            y_fit.append(x1 + x2 * fit)

        plt.scatter(y, x)
        plt.plot(y_fit, x)
        return plt.show()
def best_params():
    lr_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005]
    mse_min = 10000000
    lr_min = 0
    n_iter_min = 0
    for lr_val in lr_list:
        for iteration in range(1000, 10000, 10):
            reg = LinearRegression(learning_rate=lr_val, n_iters=iteration)
            reg.fit(X_train, Y_train)
            predicted = reg.predict(X_test)
            mse_val = mse(Y_test, predicted)
            if mse_val < mse_min:
                mse_min = mse_val
                lr_min = lr_val
                n_iter_min = iteration
    return (lr_min, n_iter_min)
Ejemplo n.º 24
0
def test_LinearRegression_test():
    """
    Test that regression has a working test abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    some_2 = pd.DataFrame([1.0, 2.0, 3.0, 4.0])

    m = LinearRegression(some)
    m.train(some_2)
    test = m.test(some)

    for i in range(3):
        assert round(test[0][i], 3) == some_2[0][i]

    print test[0]
    print some_2[0]
Ejemplo n.º 25
0
    def __init__(self, parent):
        """
        Parameters
        ----------
        parent : MainApplication
            A reference to the MainApplication object which created the plot
            This is used to enable access to the PlotData object inside the main app.
        """

        self.fig, self.ax = plt.subplots()
        plt.show(block=False)

        self.parent = parent

        self.check_state = False
        self.cid = self.fig.canvas.mpl_connect("button_press_event",
                                               self.on_click)

        self.points = []
        self.lin_reg = LinearRegression()

        self.ax.set(xlim=(-10, 10), ylim=(-10, 10))
Ejemplo n.º 26
0
def LinearRegressionTest():
    X, y, w_org, b_org = gen_data()

    learning_rate = 0.1
    max_itr = 500
    # 生成测试集
    m = X.shape[1]
    X = X[0].tolist()
    y = y[0].tolist()
    m = len(X)
    index = list(range(100))
    np.random.shuffle(index)  # 将index乱序
    train_idxes = index[0:70]
    train_X = [X[i] for i in train_idxes]
    train_y = [y[i] for i in train_idxes]
    mean = np.mean(train_X)
    std = np.std(train_X)
    train_X = (train_X - mean) / std
    # 生成训练集
    test_idxes = index[70:100]
    test_X = [X[i] for i in test_idxes]
    test_y = [y[i] for i in test_idxes]
    test_X = (test_X - mean) / std
    # 训练
    lr = LinearRegression()
    w = lr.train(train_X,
                 train_y,
                 l_rate=learning_rate,
                 max_itr=max_itr,
                 batch_size=50)
    print('test loss is {0}'.format(lr.test(test_X, test_y)))
    x_ax = range(0, 100, 10)
    x_ax = (x_ax - mean) / std
    y_ax = w[0] * x_ax + w[1]

    plt.scatter(train_X, train_y, marker='o', c='b')
    plt.scatter(test_X, test_y, marker='o', c='r')
    plt.plot(x_ax, y_ax)
    plt.show()
Ejemplo n.º 27
0
# loading data
data = pd.read_csv('Salary_Data.csv')
data = featureScale(data)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# splitting data to test and train sets

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1)

# creating the model
model = LinearRegression()

# training the model
MSE, theta = model.train(X_train, y_train, Lambda=0.01)

# using the model to predict
y_pred = model.predict(X_test)

# calculating R2 score for my model
R2 = r2_score(y_test, y_pred)
print("R2 score = ", R2)

# plotting cost function
plt.grid()
plt.plot(MSE)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from scipy import interpolate
from LinearRegression import LinearRegression

data_matrix = np.loadtxt('./exdata1.txt')

x = data_matrix[0:data_matrix.shape[0], 0:1]
y = data_matrix[0:data_matrix.shape[0], 1:2]


lin_reg = LinearRegression(x, y) 
cost_function = lin_reg.gradient_descent()

predicted_x = []
predicted_y = []

i = 0;

while(i < 23.50):
    predicted_x.append(i)
    hypoth_of_i = np.matmul([1, i], lin_reg.thetas)[0]
    predicted_y.append(hypoth_of_i)
    i = i + 0.5

# Our data set with hypothesis function
data_set_graph = plt.figure()
plt.plot(x, y, 'ro')
plt.plot(predicted_x, predicted_y)
plt.ylabel('Profit in $10,000s')
from utils import printNumericTable
from daal.data_management import HomogenNumericTable
import numpy as np

nFeatures = 10
nDependentVariables = 2

seeded = np.random.RandomState(42)
trainData = HomogenNumericTable(seeded.rand(200, nFeatures))
trainDependentVariables = HomogenNumericTable(
    seeded.rand(200, nDependentVariables))
testData = HomogenNumericTable(seeded.rand(50, nFeatures))
testGroundTruth = HomogenNumericTable(seeded.rand(50, nDependentVariables))

#Instantiate Linear Regression object
lr = LinearRegression()
#Training
trainingResult = lr.training(trainData, trainDependentVariables)
#Prediction
pred_array = lr.predict(trainingResult, trainData)
#Serialize
lr.serialize(trainingResult, fileName='trainingResult.npy')
#Deseriailze
de_trainingResult = lr.deserialize(fileName="trainingResult.npy")
#Predict with Metrics
predRes, predResRed, singleBeta, groupBeta = lr.predictWithQualityMetrics(
    trainingResult,
    trainData,
    trainDependentVariables,
    reducedBetaIndex=[2, 10])
#Print Metrics results
Ejemplo n.º 30
0
	# print(np.unique(y))	

	X = iris_df.drop(['class_b','class'],axis=1)
	print(type(X), type(y))	
	return X, y

X, y = load_file0(path)

plt.scatter(X, y)
plt.show()

print(X.ndim, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y)

linReg = LinearRegression()

print(type(X_train))

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

linReg.fit_normal(X_train, y_train)

print("特征参数:", linReg.coef_)

print("截距:",linReg.intercept_)

predictor = linReg.predict(X_test)

print("预测结果:", predictor)