Ejemplo n.º 1
0
def linear_test():
    X, y = make_regression(n_features=1, noise=20, random_state=1234)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=1234)

    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

    plt.figure(1, figsize=(5, 4))
    plt.scatter(X_test, y_test, c="black")
    plt.plot(X_test, lr.theta * X_test + lr.bias, linewidth=1, c="red")
    plt.axhline(0.5, color=".5")

    plt.ylabel("y")
    plt.xlabel("X")
    plt.legend(
        ("Linear Regression Model", ),
        loc="lower right",
        fontsize="small",
    )
    plt.tight_layout()
    plt.show()
def test_LinearRegression(dim):
    model_name = "LinearRegression"

    x, y = make_regression(n_samples=1000, n_features=dim)

    model = LinearRegression(dim)
    check_model(model, model_name, x, y, category="regression")
Ejemplo n.º 3
0
def test_linreg():
    '''
        Helper function that tests LinearRegression.

        @param:
            None
        @return:
            None
    '''
    m = np.array([[2, 3], [1, 0]])
    mm = np.array([[2, 3, 4, 5], [1, 1, 3, 0]])
    for l in range(2):
        print(mm[l, range(2)])
    ###print(m)
    n = np.append(m, np.ones((len(m), 1)), axis=1)
    #print(n)
    #print(m.shape[1])
    s = LinearRegression(m.shape[1])
    #print(s.weights)

    X_train, X_test, Y_train, Y_test = import_wine(WINE_FILE_PATH)
    #print(X_train.shape[1])
    num_features = X_train.shape[1]

    # Padding the inputs with a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)
    #print(6.7**2)
    #### Matrix Inversion ######
    print('---- LINEAR REGRESSION w/ Matrix Inversion ---')
    solver_model = LinearRegression(num_features)
    solver_model.train(X_train_b, Y_train)
    print('Average Training Loss:',
          solver_model.average_loss(X_train_b, Y_train))
    print('Average Testing Loss:', solver_model.average_loss(X_test_b, Y_test))
Ejemplo n.º 4
0
def one_hot_regression(proc, data):
    """
		linear regression using a one hot representation
		:proc processing object
		:data tuple containing train/test (one hot encoded space)

		return linear regression object
	"""
    print('one hot regression...')
    """ridge and random forest regression"""
    # train and test
    trainOneHotX, trainY, testOneHotX, testY = data

    ######### linear regression #########
    linear_models = 'ridge'
    print('ridge regression with one hot representation...')
    linReg = LinearRegression(model=linear_models)
    linReg.fit(trainOneHotX, trainY)
    preds = linReg.predict(testOneHotX)
    print('test r2 score: ', metrics.r2_score(testY, preds))
    print('test mse: ', metrics.mse(testY, preds))

    return linReg
Ejemplo n.º 5
0
 def _initialize_models(self, data_generator):
     """Initializes models prior to training."""
     models = {
         "Linear Regression":
         LinearRegression(),
         "Logistic Regression":
         LogisticRegression(),
         "Quadratic Regression":
         QuadraticRegression(),
         "Naive Bayes'":
         NaiveBayes(std_X=data_generator.std_X,
                    m0=data_generator.m0s,
                    m1=data_generator.m1s),
         "kNN CV":
         kNNCV(n_folds=self.n_folds)
     }
     return models
Ejemplo n.º 6
0
def test_models(dataset, epochs, test_size=0.2):
    '''
        Tests LinearRegression, OneLayerNN, TwoLayerNN on a given dataset.

        :param dataset The path to the dataset
        :return None
    '''

    # Check if the file exists
    if not os.path.exists(dataset):
        print('The file {} does not exist'.format(dataset))
        exit()

    # Load in the dataset
    data = np.loadtxt(dataset, skiprows=1)
    X, Y = data[:, 1:], data[:, 0]

    # Normalize the features
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=test_size)

    print('Running models on {} dataset'.format(dataset))

    #### Linear Regression ######
    print('----- LINEAR REGRESSION -----')
    # Add a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)
    regmodel = LinearRegression()
    regmodel.train(X_train_b, Y_train)
    print('Average Training Loss:', regmodel.average_loss(X_train_b, Y_train))
    print('Average Testing Loss:', regmodel.average_loss(X_test_b, Y_test))

    #### 1-Layer NN ######
    print('----- 1-Layer NN -----')
    nnmodel = OneLayerNN()
    nnmodel.train(X_train_b, Y_train, epochs=epochs, print_loss=False)
    print('Average Training Loss:', nnmodel.average_loss(X_train_b, Y_train))
    print('Average Testing Loss:', nnmodel.average_loss(X_test_b, Y_test))

    #### 2-Layer NN ######
    print('----- 2-Layer NN -----')
    model = TwoLayerNN(5)
    # Use X without a bias, since we learn a bias in the 2 layer NN.
    model.train(X_train, Y_train, epochs=epochs, print_loss=False)
    print('Average Training Loss:', model.average_loss(X_train, Y_train))
    print('Average Testing Loss:', model.average_loss(X_test, Y_test))
def main(grid):
	# Get Clean Data
	X, Y = read_clean_data()
	# Linear Regression
	try:
		LinearRegression(X, Y, grid)
	except Exception as e:
		print(e)
	# Binarize Y
	Y_binary = BinaryY(Y)
	# Logistic Regression
	try:
		LogisticRegression(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Decision Tree
	try:
		DecisionTree(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Support Vector Machine
	try:
		SVM(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Random Forest
	try:
		RandomForest(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Bagging Classifier
	try:
		Bagging(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Neural Network
	try:
		NeuralNet(X, Y_binary, grid)
	except Exception as e:
		print(e)
Ejemplo n.º 8
0
def test_linreg():
    '''
        Helper function that tests LinearRegression.

        @param:
            None
        @return:
            None
    '''

    X_train, X_test, Y_train, Y_test = import_wine(WINE_FILE_PATH)

    num_features = X_train.shape[1]

    # Padding the inputs with a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)

    #### Matrix Inversion ######
    print('---- LINEAR REGRESSION w/ Matrix Inversion ---')
    solver_model = LinearRegression(num_features)
    solver_model.train(X_train_b, Y_train)
    print('Average Training Loss:', solver_model.average_loss(X_train_b, Y_train))
    print('Average Testing Loss:', solver_model.average_loss(X_test_b, Y_test))
Ejemplo n.º 9
0
import models.LinearRegression as lr


df = lr.pd.read_csv('Datasets/house.csv')
df.columns=['size' , 'price']
x = df['size']
y = df['price']

thetas = lr.gradient(x,y)

y_fit = thetas[0] + thetas[1]*x
lr.plt.figure(figsize=(20,3))

lr.plt.scatter(x, y,s=20)
lr.plt.plot(x,y_fit,color="red")

lr.plt.show()
Ejemplo n.º 10
0
if __name__ == "__main__":

    # N is the number of training points.
    # D_in is input dimension
    # D_out is output dimension.
    N, D_in, D_out = 64, 1, 1

    # Add some noise to the observations
    noise_var = 0.5

    # Create random input and output data
    X = lhs(D_in, N)
    y = 5 * X + noise_var * np.random.randn(N, D_out)

    # Define the model
    model = LinearRegression(X, y)

    # Define an optimizer
    optimizer = SGD(model.num_params, lr=1e-3, momentum=0.9)
    #    optimizer = Adam(model.num_params, lr = 1e-3)
    #    optimizer = RMSprop(model.num_params, lr = 1e-3)

    # Train the model
    model.train(10000, optimizer)

    # Print the learned parameters
    print('w = %e, sigma_sq = %e' %
          (model.theta[:-1], np.exp(model.theta[-1])))

    # Make predictions
    y_pred = model.predict(X)
Ejemplo n.º 11
0
def main():
    use_cuda = torch.cuda.is_available()
    opt = parser.parse_args()

    print(opt)

    input_size = 784
    #num_classes = 10
    output_size = 10

    net = LinearRegression(input_size, output_size)

    if use_cuda:
        net.cuda()
        net = torch.nn.DataParallel(net,
                                    device_ids=range(
                                        torch.cuda.device_count()))
        cudnn.benchmark = True

    criterion = loss_fn(opt)
    train_loader, test_loader = data_set(opt)
    # optimizer = get_optimizer(net, opt, opt.lr)

    epoch_step = json.loads(opt.epoch_step)

    optimizer = get_optimizer(net, opt, opt.lr)

    base_ = str(random.randint(1, 100000)) + '_' + str(
        random.randint(1, 100000))
    progress = KeepProgress(net, opt, base=base_)
    print(base_)

    def train(epoch):

        if epoch in epoch_step:
            lr_adjust(optimizer, epoch, opt, epoch_step)

        net.train()
        train_loss = 0
        correct = 0
        total = 0
        for t, (images, labels) in enumerate(train_loader):
            inputs, targets = Variable(images.view(-1,
                                                   28 * 28)), Variable(labels)
            use_cuda = torch.cuda.is_available()
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            def closure():
                net.zero_grad()
                outputs = net(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                return loss

            loss = optimizer.step(closure)
            outputs = net(inputs)
            #loss = closure()
            train_loss += loss.data[0]
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            if t % 100 == 0:
                print(t)
                progress.train_progress({
                    'train_accuracy': 100 * correct / total,
                    'train_loss': loss.data[0]
                })

    def test(epoch):
        correct = 0
        total = 0
        test_loss = 0
        for inputs, targets in test_loader:

            inputs = Variable(inputs.view(-1, 28 * 28), volatile=True)
            targets = Variable(targets)

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            print(use_cuda)

            outputs = net(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.data[0]

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        print('Accuracy of the model on the 10000 test images: %d %%' %
              (100 * correct / total))

        progress.test_progress({
            'test_loss':
            test_loss / len(test_loader.dataset),
            'test_accuracy':
            100 * correct / total
        })

    for epoch in range(opt.epochs):
        train(epoch)
        test(epoch)
Ejemplo n.º 12
0
import numpy as np
from helperFunctions import *
import matplotlib.pyplot as plt
from models import LinearRegression

# Test scipt
lm = LinearRegression()
n = 50

# Generate some noisy train data
noise = np.random.randn(n)
x = np.linspace(0, 20, n)
y = 32.3 + 5.2 * (x + noise)

x = scaleFeature(x)
y = scaleFeature(y)

x = x.reshape((-1, 1))

# Train the lm, print out the parameters, plot the fit
lm.train(x, y, 0.5, 100, 0)
print(lm.parameters)

# Plot the fit of the linear model
y_hat = lm.predict(x)

# Plot the fit of line and train data
plt.plot(x, y, 'o')
plt.plot(x, y_hat)
plt.ylabel("y")
plt.xlabel("x")
import time
import numpy as np
import matplotlib.pyplot as plt
from dataset_income import Data
from metrics import MSE
from models import ConstantModel, LinearRegression, LinearRegressionWithB
from gradient_descent import stochastic_gradient_descent, gradient_descent, mini_batch_gradient_descent

if __name__ == '__main__':
    dataset = Data(
        r'C:\Users\Lautaro\PycharmProjects\ceia_intro_a_IA\clase_3\ejercicios\data\income.csv'
    )

    X_train, X_test, y_train, y_test = dataset.split(0.8)

    linear_regression = LinearRegression()
    linear_regression.fit(X_train, y_train)
    lr_y_hat = linear_regression.predict(X_test)

    linear_regression_b = LinearRegressionWithB()
    linear_regression_b.fit(X_train, y_train)
    lrb_y_hat = linear_regression_b.predict(X_test)

    constant_model = ConstantModel()
    constant_model.fit(X_train, y_train)
    ct_y_hat = constant_model.predict(X_test)

    mse = MSE()
    lr_mse = mse(y_test, lr_y_hat)
    lrb_mse = mse(y_test, lrb_y_hat)
    ct_mse = mse(y_test, ct_y_hat)
def main():
    """
    """
    path = get_resource_path()

    classifiers = [
        # DecisionTree(),
        # RandomForest(size=40),
        # ExtremelyRandomizedTrees(size=40),
        # XGB(),
        # SVM(),
        # LinearSVM(),
        # KNN(n_neighbors=7),
        LogRegression(),
        # GausNB(),
        # BaggingRandomForest(size=40),
        # MLPC(input_size=[16, 32, 16, 8])
    ]

    error_generators = [
        Anomalies(),
        Typos(),
        ExplicitMissingValues(),
        ImplicitMissingValues(),
        SwapFields()
    ]

    # TODO: dataset size as a hyperparameter
    # TODO: random_state as a hyperparameter
    hyperparams = {
        'train_ratio': .7,
        'val_ratio': .1,
        'test_ratio': .1,
        'target_ratio': .1,
        'random_state': [0],
        # 'row_fraction': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8],
        'row_fraction': [0.2],
        'classifier': classifiers,
        # Ordering of error generators
        # 'mask': [(0, 0, 1, 0, 0), (0, 0, 0, 1, 0), (0, 0, 0, 0, 1),
        #          (0, 2, 0, 0, 1)],
        'mask': [(0, 0, 0, 1, 0)],
        'testset_size': 100
    }

    datasets = pd.read_csv(os.path.join(path, 'datasets.csv'))

    for dataset_info in datasets.values:
        filepath, name, target_feature, task = tuple(dataset_info)
        data = pd.read_csv(os.path.join(path, 'data', filepath))

        for state in HyperParameterHolder(hyperparams):
            print("HyperParam : %s" % str(state))
            # Dataset Split
            (X_train, y_train, X_val, y_val, X_test, y_test, X_target,
             y_target) = split_dataset(data, target_feature, state)

            tuning_done = False
            while not tuning_done:
                # ML Pipeline Training Procedure
                model = BlackBox().train(state['classifier'], X_train, y_train)

                # ML Pipeline Validation Procedures
                predicted = model.predict(X_val)
                score = performance_metric(y_val, predicted)
                print("Validation : accuracy = %.4f" % round(score, 4))
                tuning_done = True

            # ML Pipeline final performance score
            predicted = model.predict(X_test)
            score = performance_metric(y_test, predicted)
            print("Test       : accuracy = %.4f" % round(score, 4))

            # Meta Classifier Training Procedure
            error_gen_strat = ErrorGenerationStrategy(error_generators, state)
            # TODO: so far, X_test/y_test is used for training

            # prepare a dataset based on X_test and repeated error generation
            # NB: returns a python list, not a numpy array or pandas dataframe
            list_of_corrupted_X_test = error_gen_strat.on(X_test, state)

            try:
                meta_classifier = MetaClassifier(model, LinearRegression())
                print(str(meta_classifier))
                meta_classifier.fit(list_of_corrupted_X_test, y_test)

                # Meta Classifier Evaluation Procedure
                list_of_corrupted_X_target = error_gen_strat.on(
                    X_target, state)
                predicted_scores = meta_classifier.predict(
                    list_of_corrupted_X_target)
                actual_scores = [
                    performance_metric(y_target, model.predict(x))
                    for x in list_of_corrupted_X_target
                ]
                plt.plot(range(len(actual_scores)), actual_scores, 'g^')
                plt.plot(range(len(predicted_scores)), predicted_scores, 'ro')
                plt.gca().legend(('ground truth', 'predicted scores'))
                plt.grid(True)
                plt.show()
                result = distance_metric(actual_scores, predicted_scores)

                print("Evaluation : distance metric = %.4f" % round(result, 4))
                print()
            except Exception as e:
                print("\nException  : %s\n%s\n" % (str(error_gen_strat), e))
def linear_regression_model():
    return LinearRegression()
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import config_loader

window_size = config_loader.get_window_size()
num_days = config_loader.get_num_predicted_days()

# Load the training data with n days held out
print("Loading training data...")
X_train, y_train, y_test = DataLoader.get_date_separated_testing_data(
    window_size,
    num_days,
)
# train the model (and save it to file)
print("Training the model...")
model = LinearRegression()
model.train(X_train, y_train)

# get metrics for the multi-day predictions in each state
states = DataLoader.get_states()
all_errors = np.empty((0, num_days))
all_predictions = np.empty((0, num_days))
all_actual = np.empty((0, num_days))
all_control = np.empty((0, num_days))
for state_name, state_abbrev in states.items():
    if state_abbrev != "US":
        # get the multi-day predictions
        case_df = DataLoader.get_daily_cases_df(state_abbrev)[:-num_days]
        vax_df = DataLoader.get_daily_vaccinations_df(state_abbrev)[:-num_days]
        future_vaccinations = DataLoader.get_assumed_vaccinations_dict(
            vax_df, num_days, multiplier=1)
Ejemplo n.º 17
0
import sys

from models import LinearRegression

try:
    passes = int(sys.argv[1])
    split = float(sys.argv[2])
except:
    print("Using default number of passes and split ratio")
    passes = 2
    split = 0.72
model = LinearRegression('data/iris.data')
model.fit(passes, split)
Ejemplo n.º 18
0
def sin_fitting_example():
    # y = sin(x)
    amt_points = 36
    x = np.linspace(0, 360, num=amt_points)
    y = np.sin(x * np.pi / 180.)
    noise = np.random.normal(0, .1, y.shape)
    noisy_y = y + noise

    X_train = x
    y_train = noisy_y

    regression = LinearRegression()

    # linear
    X_linear = np.vstack((X_train, np.ones(len(X_train)))).T
    regression.fit(X_linear, y_train.reshape(-1, 1))
    W_linear = regression.model
    y_linear = W_linear[0] * x + W_linear[1]

    # quadratic
    X_quadratic = np.vstack((np.power(X_train,
                                      2), X_train, np.ones(len(X_train)))).T
    regression.fit(X_quadratic, y_train.reshape(-1, 1))
    W_quadratic = regression.model
    y_quadratic = W_quadratic[0] * np.power(
        x, 2) + W_quadratic[1] * x + W_quadratic[2]

    # cubic
    X_cubic = np.vstack(
        (np.power(X_train, 3), np.power(X_train,
                                        2), X_train, np.ones(len(X_train)))).T
    regression.fit(X_cubic, y_train.reshape(-1, 1))
    W_cubic = regression.model
    y_cubic = W_cubic[0] * np.power(x, 3) + W_cubic[1] * np.power(
        x, 2) + W_cubic[2] * x + W_cubic[3]

    # X10
    X_10 = np.vstack(
        (np.power(X_train, 10), np.power(X_train, 9), np.power(X_train, 8),
         np.power(X_train, 7), np.power(X_train,
                                        6), np.power(X_train,
                                                     5), np.power(X_train, 4),
         np.power(X_train, 3), np.power(X_train,
                                        2), X_train, np.ones(len(X_train)))).T
    regression.fit(X_10, y_train.reshape(-1, 1))
    W_10 = regression.model
    y_10 = W_10[0] * np.power(x, 10) + W_10[1] * np.power(x, 9) + W_10[2] * np.power(x, 8) + \
           W_10[3] * np.power(x, 7) + W_10[4] * np.power(x, 6) + W_10[5] * np.power(x, 5) + \
           W_10[6] * np.power(x, 4) + W_10[7] * np.power(x, 3) + W_10[8] * np.power(x, 2) + \
           W_10[9] * x + W_10[10]

    # PLOTS
    plt.figure()
    plt.subplot(1, 1, 1)
    plt.gca().set_title('Sin(x) - Fitting curves')

    # original
    plt.plot(x, noisy_y, 'o')

    # linear
    plt.plot(x, y_linear, '-')

    # quadratic
    plt.plot(x, y_quadratic, '-')

    # cubic
    plt.plot(x, y_cubic, '-')

    # 10 power
    plt.plot(x, y_10, '-')

    plt.legend(['noisy signal', 'linear', 'quadratic', 'cubic', '10th power'])
    plt.show()
Ejemplo n.º 19
0
def logistic_test():
    n_samples = 100
    np.random.seed(0)
    X_train = np.random.normal(size=n_samples)
    y_train = (X_train > 0).astype(float)
    X_train[X_train > 0] *= 4
    X_train += 0.3 * np.random.normal(size=n_samples)

    X_train = X_train[:, np.newaxis]

    X, y = make_classification(
        n_features=1,
        n_classes=2,
        n_redundant=0,
        n_informative=1,
        n_clusters_per_class=1,
        class_sep=0.75,
        shuffle=True,
        random_state=0,
    )
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=0)

    df_test = pd.DataFrame(data=[X_test.flatten(), y_test]).T
    df_test.columns = ["X", "y"]

    lr = LogisticRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

    score = [1 if yi == yi_pred else 0 for yi, yi_pred in zip(y_test, y_pred)]
    print(np.sum(score) / len(score))

    # and plot the result
    plt.figure(1, figsize=(4, 3))
    plt.clf()
    plt.scatter(X_train.ravel(), y_train, color="black", zorder=20)

    df_test["loss"] = expit(X_test * lr.theta + lr.bias).ravel()
    df_test = df_test.sort_values("X")
    plt.plot(df_test["X"], df_test["loss"], color="red", linewidth=3)

    ols = LinearRegression()
    ols.fit(X_train, y_train)
    plt.plot(X_test, ols.theta * X_test + ols.bias, linewidth=1)
    plt.axhline(0.5, color=".5")

    plt.ylabel("y")
    plt.xlabel("X")
    plt.xticks(range(-5, 10))
    plt.yticks([0, 0.5, 1])
    plt.ylim(-0.25, 1.25)
    plt.xlim(-2, 2)
    plt.legend(
        ("Logistic Regression Model", "Linear Regression Model"),
        loc="lower right",
        fontsize="small",
    )
    plt.tight_layout()
    plt.show()
Ejemplo n.º 20
0
import numpy as np

from models import LinearRegression, LinearRegression2
from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics

if __name__ == '__main__':
    iris = datasets.load_iris()
    train_x, test_x, train_y, test_y = model_selection.train_test_split(iris.data, iris.target, test_size=0.4)

    regressor = LinearRegression()
    regressor.fit(train_x, train_y)
    pred_y = regressor.predit(test_x)

    acc = metrics.mean_squared_error(test_y, pred_y)
    print(acc)

    regressor2 = LinearRegression2(train_x.shape[1])
    regressor2.fit(train_x, train_y)
    pred_y = regressor2.predict(test_x)

    acc = metrics.mean_squared_error(test_y, pred_y)
    print(acc)