def test_tensor(self):
        loader = DataLoader(TensorDataset(), batch_size=256)

        x, = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
    def train(self, dl_train: DataLoader):
        """
        Trains the KNN model. KNN training is memorizing the training data.
        Or, equivalently, the model parameters are the training data itself.
        :param dl_train: A DataLoader with labeled training sample (should
            return tuples).
        :return: self
        """

        # TODO:
        #  Convert the input dataloader into x_train, y_train and n_classes.
        #  1. You should join all the samples returned from the dataloader into
        #     the (N,D) matrix x_train and all the labels into the (N,) vector
        #     y_train.
        #  2. Save the number of classes as n_classes.
        # ====== YOUR CODE: ======
        x_train, y_train = dataloader_utils.flatten(dl_train)
        n_classes = torch.unique(y_train).numel()

        # ========================

        self.x_train = x_train
        self.y_train = y_train
        self.n_classes = n_classes
        return self
    def test_two_tuple(self):
        loader = DataLoader(TensorTwoTupleDataset(), batch_size=256)

        x, y = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert torch.is_tensor(y)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
        assert y.shape == torch.Size([DATASET_SIZE, DATA_SIZE, 1])
Example #4
0
    def train(
        self,
        dl_train: DataLoader,
        dl_valid: DataLoader,
        loss_fn: ClassifierLoss,
        learn_rate=0.1,
        weight_decay=0.001,
        max_epochs=100,
    ):

        Result = namedtuple("Result", "accuracy loss")
        train_res = Result(accuracy=[], loss=[])
        valid_res = Result(accuracy=[], loss=[])

        print("Training", end="")
        for epoch_idx in range(max_epochs):
            total_correct = 0
            average_loss = 0

            # TODO:
            #  Implement model training loop.
            #  1. At each epoch, evaluate the model on the entire training set
            #     (batch by batch) and update the weights.
            #  2. Each epoch, also evaluate on the validation set.
            #  3. Accumulate average loss and total accuracy for both sets.
            #     The train/valid_res variables should hold the average loss
            #     and accuracy per epoch.
            #  4. Don't forget to add a regularization term to the loss,
            #     using the weight_decay parameter.

            # ====== YOUR CODE: ======
            train_loss = 0
            train_acc = 0
            num_examples = 0
            for x, y in dl_train:
                y_pred, class_scores = self.predict(x)
                train_loss += loss_fn(x, y, class_scores, y_pred) * y.shape[0]
                grad = loss_fn.grad()
                self.weights = self.weights - learn_rate * (
                    grad + weight_decay * self.weights)
                num_examples += y.shape[0]
                train_acc += self.evaluate_accuracy(y, y_pred) * y.shape[0]
            train_res[1].append(train_loss / num_examples)
            train_res[0].append(train_acc / num_examples)

            x_valid, y_valid = dl_utils.flatten(dl_valid)
            y_pred, class_scores = self.predict(x_valid)
            valid_res[0].append(self.evaluate_accuracy(y_valid, y_pred))
            valid_res[1].append(loss_fn(x_valid, y_valid, class_scores,
                                        y_pred))
            # ========================
            print(".", end="")

        print("")
        return train_res, valid_res
    def test_three_tuple(self):
        loader = DataLoader(TensorThreeTupleDataset(), batch_size=128)

        x, y, z = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert torch.is_tensor(y)
        assert torch.is_tensor(z)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
        assert x.shape == y.shape
        assert z.shape == torch.Size([DATASET_SIZE, DATA_SIZE, 1])
def find_best_k(ds_train: Dataset, k_choices, num_folds):
    """
    Use cross validation to find the best K for the kNN model.

    :param ds_train: Training dataset.
    :param k_choices: A sequence of possible value of k for the kNN model.
    :param num_folds: Number of folds for cross-validation.
    :return: tuple (best_k, accuracies) where:
        best_k: the value of k with the highest mean accuracy across folds
        accuracies: The accuracies per fold for each k (list of lists).
    """

    accuracies = []
    for i, k in enumerate(k_choices):
        model = KNNClassifier(k)

        # TODO:
        #  Train model num_folds times with different train/val data.
        #  Don't use any third-party libraries.
        #  You can use your train/validation splitter from part 1 (note that
        #  then it won't be exactly k-fold CV since it will be a
        #  random split each iteration), or implement something else.

        # ====== YOUR CODE: ======
        indices = np.array_split(list(range(len(ds_train))), num_folds)
        acc = []
        for j in range(num_folds):
            train_subset = torch.utils.data.Subset(
                ds_train, np.concatenate(indices[0:j] + indices[j + 1:]))
            valid_subset = torch.utils.data.Subset(ds_train, indices[j])
            dl_train = torch.utils.data.DataLoader(train_subset)
            dl_valid = torch.utils.data.DataLoader(valid_subset)
            model.train(dl_train)
            x_test, y_test = dataloader_utils.flatten(dl_valid)
            y_pred = model.predict(x_test)
            acc.append(accuracy(y_test, y_pred))
        accuracies.append(acc)
        # ========================

    best_k_idx = np.argmax([np.mean(acc) for acc in accuracies])
    best_k = k_choices[best_k_idx]

    return best_k, accuracies
Example #7
0
def find_best_k(ds_train: Dataset, k_choices, num_folds):
    """
    Use cross validation to find the best K for the kNN model.

    :param ds_train: Training dataset.
    :param k_choices: A sequence of possible value of k for the kNN model.
    :param num_folds: Number of folds for cross-validation.
    :return: tuple (best_k, accuracies) where:
        best_k: the value of k with the highest mean accuracy across folds
        accuracies: The accuracies per fold for each k (list of lists).
    """

    accuracies = []

    for i, k in enumerate(k_choices):
        model = KNNClassifier(k)

        # TODO:
        #  Train model num_folds times with different train/val data.
        #  Don't use any third-party libraries.
        #  You can use your train/validation splitter from part 1 (note that
        #  then it won't be exactly k-fold CV since it will be a
        #  random split each iteration), or implement something else.

        # ====== YOUR CODE: ======
        cur_k_accuracies = torch.empty(num_folds)

        for j in range(num_folds):
            dl_train, dl_valid = dataloaders.create_train_validation_loaders(dataset=ds_train, validation_ratio=1/num_folds)
            x_valid, y_valid = dataloader_utils.flatten(dl_valid)
            y_pred = model.train(dl_train).predict(x_valid)
            acc = accuracy(y_valid, y_pred)
            cur_k_accuracies[j] = acc
            # print(acc)

        accuracies.append(cur_k_accuracies.numpy())
        # raise NotImplementedError()
        # ========================

    best_k_idx = np.argmax([np.mean(acc) for acc in accuracies])
    best_k = k_choices[best_k_idx]

    return best_k, accuracies