Esempio n. 1
0
    def compute_accuracy(self, X, y):
        """
        Computes accuracy on provided data using mini-batches
        """
        indices = np.arange(X.shape[0])
        sections = np.arange(self.batch_size, X.shape[0], self.batch_size)
        batches_indices = np.array_split(indices, sections)

        pred = np.zeros_like(y)

        for batch_indices in batches_indices:
            batch_X = X[batch_indices]

            pred_batch = self.model.predict(batch_X)
            pred[batch_indices] = pred_batch

        return multiclass_accuracy(pred, y)
Esempio n. 2
0
    def fit(self,
            X,
            y,
            batch_size=100,
            learning_rate=1e-7,
            reg=1e-5,
            epochs=1):
        """
        Trains linear classifier

        Arguments:
          X, np array (num_samples, num_features) - training data
          y, np array of int (num_samples) - labels
          batch_size, int - batch size to use
          learning_rate, float - learning rate for gradient descent
          reg, float - L2 regularization strength
          epochs, int - number of epochs
        """

        num_train = X.shape[0]
        num_features = X.shape[1]
        num_classes = np.max(y) + 1
        if self.W is None:
            self.W = 0.001 * np.random.randn(num_features, num_classes)

        loss_history = []
        loss = 0
        for epoch in range(epochs):
            shuffled_indices = np.arange(num_train)
            np.random.shuffle(shuffled_indices)
            sections = np.arange(batch_size, num_train, batch_size)
            batches_indices = np.array_split(shuffled_indices, sections)
            for i in range(len(batches_indices)):
                loss, dW_dL = linear_softmax(X, self.W, y)
                reg_loss, dW_dR = l2_regularization(self.W, reg)
                self.W -= learning_rate * (dW_dL + dW_dR)
            print("Epoch %i, loss: %f, acur: %f" %
                  (epoch, loss, multiclass_accuracy(self.predict(X), y)))
            loss_history.append(loss)

        return loss_history
Esempio n. 3
0
    def compute_accuracy(self, X, y):
        '''
        Computes accuracy on provided data using mini-batches
        '''
        indices = np.arange(X.shape[0])

        sections = np.arange(self.batch_size, X.shape[0], self.batch_size)
        batches_indices = np.array_split(indices, sections)

        pred = np.zeros_like(y)

        for batch_indices in batches_indices:
            batch_X = X[batch_indices]
            pred_batch = self.model.predict(batch_X)
            pred[batch_indices] = pred_batch
        # print(f"\n prediction {np.unique(pred, return_counts=True)}")
        # print(f"ground through {np.unique(y, return_counts=True)} \n")

        # param_ = self.model.Conv1.W
        # before_opt = param_.value[:2, :2]
        # print(f"PREDICT stage Conv1_W value: \n {before_opt} \n")
        # print(f"PREDICT stage Conv1_dW: \n {param_.grad[:2, :2]} \n")

        return multiclass_accuracy(pred, y)
Esempio n. 4
0
    loss_with_reg, loss
), "Loss with regularization (%2.4f) should be higher than without it (%2.4f)!" % (
    loss, loss_with_reg)

check_model_gradient(model_with_reg, train_X[:2], train_y[:2])

#%% [markdown]
# Также реализуем функцию предсказания (вычисления значения) модели на новых данных.
#
# Какое значение точности мы ожидаем увидеть до начала тренировки?

#%%
# Finally, implement predict function!

# What would be the value we expect?
multiclass_accuracy(model_with_reg.predict(train_X[:30]), train_y[:30])

#%% [markdown]
# # Допишем код для процесса тренировки

#%%
from trainer import Trainer, Dataset
from optim import SGD
from modelGridSearch import search_model

model = TwoLayerNet(n_input=train_X.shape[1],
                    n_output=10,
                    hidden_layer_size=100,
                    reg=1e-1)
dataset = Dataset(train_X, train_y, val_X, val_y)
trainer = Trainer(model, dataset, SGD())
Esempio n. 5
0
loss_history = classifier.fit(train_X,
                              train_y,
                              epochs=10,
                              learning_rate=default_learning_rate,
                              batch_size=batch_size,
                              reg=default_reg_strength)

#%%
# let's look at the loss history!
plt.plot(loss_history)

#%%
# Let's check how it performs on validation set

pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)

# Now, let's train more and see if it performs better
loss_history = classifier.fit(train_X,
                              train_y,
                              epochs=num_epochs,
                              learning_rate=default_learning_rate,
                              batch_size=batch_size,
                              reg=default_reg_strength)
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy after training for {} epochs: {}".format(num_epochs, accuracy))

#%% [markdown]
# ### Как и раньше, используем кросс-валидацию для подбора гиперпараметтов.
Esempio n. 6
0
    prediction = best_knn_classifier.predict(binary_test_X, num_loops=1)

    precision, recall, f1, accuracy = binary_classification_metrics(
        prediction, binary_test_y)
    print("Best KNN with k = %s" % best_k)
    print("Accuracy: %4.2f, Precision: %4.2f, Recall: %4.2f, F1: %4.2f" %
          (accuracy, precision, recall, f1))

train_X = train_X.reshape(train_X.shape[0], -1)
test_X = test_X.reshape(test_X.shape[0], -1)

knn_classifier = KNN(k=1)
knn_classifier.fit(train_X, train_y)

predict = knn_classifier.predict(test_X, num_loops=1)
accuracy = multiclass_accuracy(predict, test_y)
print("Accuracy: %4.2f" % accuracy)

num_folds = 5

fold_step = int(train_X.shape[0] // num_folds)
ranges = list(range(0, train_X.shape[0], fold_step))
train_folds_X = [
    train_X[ranges[i]:ranges[i + 1]] for i in range(0,
                                                    len(ranges) - 1)
]
train_folds_y = [
    train_y[ranges[i]:ranges[i + 1]] for i in range(0,
                                                    len(ranges) - 1)
]
Esempio n. 7
0
test_X = test_X.reshape(test_X.shape[0], -1)

knn_classifier = KNN(k=1)
knn_classifier.fit(train_X, train_y)

#%%
dists = knn_classifier.compute_distances_no_loops(test_X)
num_test = dists.shape[0]
k_closest_indices = np.argpartition(dists, knn_classifier.k, axis=1)[:,:knn_classifier.k]
k_closest_indices.shape

#%%
predict = knn_classifier.predict(test_X)

#%%
accuracy = multiclass_accuracy(predict, test_y)
print("Accuracy: %4.2f" % accuracy)

#%% [markdown]
# Снова кросс-валидация. Теперь нашей основной метрикой стала точность (accuracy), и ее мы тоже будем усреднять по всем фолдам.

#%%
# Find the best k using cross-validation based on accuracy
num_folds = 5
train_folds_X = []
train_folds_y = []

#%%
data_to_fold = train_X
answers_to_fold = train_y
fold_size = train_y.shape[0]//num_folds
Esempio n. 8
0
# Now let's use all 10 classes
train_X = train_X.reshape(train_X.shape[0], -1)
test_X = test_X.reshape(test_X.shape[0], -1)

knn_classifier = KNN(k=1)
knn_classifier.fit(train_X, train_y)

# In[43]:

# TODO: Implement predict_labels_multiclass
predict = knn_classifier.predict(test_X)

# In[44]:

# TODO: Implement multiclass_accuracy
accuracy = multiclass_accuracy(predict, test_y)
print("Accuracy: %4.2f" % accuracy)

# Снова кросс-валидация. Теперь нашей основной метрикой стала точность (accuracy), и ее мы тоже будем усреднять по всем фолдам.

# In[47]:

# Find the best k using cross-validation based on accuracy
num_folds = 5
train_folds_X = []
train_folds_y = []

# TODO: split the training data in 5 folds and store them in train_folds_X/train_folds_y
train_folds_X = np.array_split(binary_train_X, num_folds)
train_folds_y = np.array_split(binary_train_y, num_folds)