def compare_dataset_size(train_datasets, test_dataset, K=10, out_dir="."):
    """ Generate plot to compare effects of dataset size.

    Args:
    - train_datasets (list of dict): A list of training datasets from the same distribution.
    - test_dataset (dict): The test dataset.
    - K (int): The degree of the polynomial to fit. Note: 1 <= K <= 10
    """
    model = PolynomialRegression(K=K)

    title = "Comparing Effects of Dataset Size"
    x_label = "Dataset size"
    y_label = "Error (Log Scale)"

    # One for training error, one for testing error
    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for dataset in train_datasets:
        train_X = dataset["X"]
        train_Y = dataset["Y"]

        num_samples = len(train_X)

        x_s[0].append(num_samples)
        x_s[1].append(num_samples)

        model.fit(train_X, train_Y)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)
コード例 #2
0
def compare_regularization(train_dataset,
                           test_dataset,
                           K,
                           l2_coefs,
                           title_prefix="",
                           out_dir="."):
    """ Generate plot to compare effects of model complexity
    """

    title = f"{title_prefix}Comparing Effects of Regularization"
    x_label = "L2 Coefficient (Lambda Term) 1e-2"
    y_label = "Error (Log Scale)"

    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    train_X = train_dataset["X"]
    train_Y = train_dataset["Y"]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for l2_coef in l2_coefs:
        x_s[0].append(l2_coef * 1e2)
        x_s[1].append(l2_coef * 1e2)

        model = PolynomialRegression(K)
        model.fit_with_l2_regularization(train_X, train_Y, l2_coef)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)
コード例 #3
0
def compare_model_complexity(train_dataset,
                             test_dataset,
                             Ks,
                             title_prefix="",
                             out_dir="."):
    """ Generate plot to compare effects of model complexity
    """

    title = f"{title_prefix}Comparing Effects of Model Complexity"
    x_label = "Model Complexity (Degree of Polynomial)"
    y_label = "Error (Log Scale)"

    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    train_X = train_dataset["X"]
    train_Y = train_dataset["Y"]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for K in Ks:
        x_s[0].append(K)
        x_s[1].append(K)

        model = PolynomialRegression(K)
        model.fit(train_X, train_Y)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)