def compute_accuracy(self, X, y): """ Computes accuracy on provided data using mini-batches """ indices = np.arange(X.shape[0]) sections = np.arange(self.batch_size, X.shape[0], self.batch_size) batches_indices = np.array_split(indices, sections) pred = np.zeros_like(y) for batch_indices in batches_indices: batch_X = X[batch_indices] pred_batch = self.model.predict(batch_X) pred[batch_indices] = pred_batch return multiclass_accuracy(pred, y)
def fit(self, X, y, batch_size=100, learning_rate=1e-7, reg=1e-5, epochs=1): """ Trains linear classifier Arguments: X, np array (num_samples, num_features) - training data y, np array of int (num_samples) - labels batch_size, int - batch size to use learning_rate, float - learning rate for gradient descent reg, float - L2 regularization strength epochs, int - number of epochs """ num_train = X.shape[0] num_features = X.shape[1] num_classes = np.max(y) + 1 if self.W is None: self.W = 0.001 * np.random.randn(num_features, num_classes) loss_history = [] loss = 0 for epoch in range(epochs): shuffled_indices = np.arange(num_train) np.random.shuffle(shuffled_indices) sections = np.arange(batch_size, num_train, batch_size) batches_indices = np.array_split(shuffled_indices, sections) for i in range(len(batches_indices)): loss, dW_dL = linear_softmax(X, self.W, y) reg_loss, dW_dR = l2_regularization(self.W, reg) self.W -= learning_rate * (dW_dL + dW_dR) print("Epoch %i, loss: %f, acur: %f" % (epoch, loss, multiclass_accuracy(self.predict(X), y))) loss_history.append(loss) return loss_history
def compute_accuracy(self, X, y): ''' Computes accuracy on provided data using mini-batches ''' indices = np.arange(X.shape[0]) sections = np.arange(self.batch_size, X.shape[0], self.batch_size) batches_indices = np.array_split(indices, sections) pred = np.zeros_like(y) for batch_indices in batches_indices: batch_X = X[batch_indices] pred_batch = self.model.predict(batch_X) pred[batch_indices] = pred_batch # print(f"\n prediction {np.unique(pred, return_counts=True)}") # print(f"ground through {np.unique(y, return_counts=True)} \n") # param_ = self.model.Conv1.W # before_opt = param_.value[:2, :2] # print(f"PREDICT stage Conv1_W value: \n {before_opt} \n") # print(f"PREDICT stage Conv1_dW: \n {param_.grad[:2, :2]} \n") return multiclass_accuracy(pred, y)
loss_with_reg, loss ), "Loss with regularization (%2.4f) should be higher than without it (%2.4f)!" % ( loss, loss_with_reg) check_model_gradient(model_with_reg, train_X[:2], train_y[:2]) #%% [markdown] # Также реализуем функцию предсказания (вычисления значения) модели на новых данных. # # Какое значение точности мы ожидаем увидеть до начала тренировки? #%% # Finally, implement predict function! # What would be the value we expect? multiclass_accuracy(model_with_reg.predict(train_X[:30]), train_y[:30]) #%% [markdown] # # Допишем код для процесса тренировки #%% from trainer import Trainer, Dataset from optim import SGD from modelGridSearch import search_model model = TwoLayerNet(n_input=train_X.shape[1], n_output=10, hidden_layer_size=100, reg=1e-1) dataset = Dataset(train_X, train_y, val_X, val_y) trainer = Trainer(model, dataset, SGD())
loss_history = classifier.fit(train_X, train_y, epochs=10, learning_rate=default_learning_rate, batch_size=batch_size, reg=default_reg_strength) #%% # let's look at the loss history! plt.plot(loss_history) #%% # Let's check how it performs on validation set pred = classifier.predict(val_X) accuracy = multiclass_accuracy(pred, val_y) print("Accuracy: ", accuracy) # Now, let's train more and see if it performs better loss_history = classifier.fit(train_X, train_y, epochs=num_epochs, learning_rate=default_learning_rate, batch_size=batch_size, reg=default_reg_strength) pred = classifier.predict(val_X) accuracy = multiclass_accuracy(pred, val_y) print("Accuracy after training for {} epochs: {}".format(num_epochs, accuracy)) #%% [markdown] # ### Как и раньше, используем кросс-валидацию для подбора гиперпараметтов.
prediction = best_knn_classifier.predict(binary_test_X, num_loops=1) precision, recall, f1, accuracy = binary_classification_metrics( prediction, binary_test_y) print("Best KNN with k = %s" % best_k) print("Accuracy: %4.2f, Precision: %4.2f, Recall: %4.2f, F1: %4.2f" % (accuracy, precision, recall, f1)) train_X = train_X.reshape(train_X.shape[0], -1) test_X = test_X.reshape(test_X.shape[0], -1) knn_classifier = KNN(k=1) knn_classifier.fit(train_X, train_y) predict = knn_classifier.predict(test_X, num_loops=1) accuracy = multiclass_accuracy(predict, test_y) print("Accuracy: %4.2f" % accuracy) num_folds = 5 fold_step = int(train_X.shape[0] // num_folds) ranges = list(range(0, train_X.shape[0], fold_step)) train_folds_X = [ train_X[ranges[i]:ranges[i + 1]] for i in range(0, len(ranges) - 1) ] train_folds_y = [ train_y[ranges[i]:ranges[i + 1]] for i in range(0, len(ranges) - 1) ]
test_X = test_X.reshape(test_X.shape[0], -1) knn_classifier = KNN(k=1) knn_classifier.fit(train_X, train_y) #%% dists = knn_classifier.compute_distances_no_loops(test_X) num_test = dists.shape[0] k_closest_indices = np.argpartition(dists, knn_classifier.k, axis=1)[:,:knn_classifier.k] k_closest_indices.shape #%% predict = knn_classifier.predict(test_X) #%% accuracy = multiclass_accuracy(predict, test_y) print("Accuracy: %4.2f" % accuracy) #%% [markdown] # Снова кросс-валидация. Теперь нашей основной метрикой стала точность (accuracy), и ее мы тоже будем усреднять по всем фолдам. #%% # Find the best k using cross-validation based on accuracy num_folds = 5 train_folds_X = [] train_folds_y = [] #%% data_to_fold = train_X answers_to_fold = train_y fold_size = train_y.shape[0]//num_folds
# Now let's use all 10 classes train_X = train_X.reshape(train_X.shape[0], -1) test_X = test_X.reshape(test_X.shape[0], -1) knn_classifier = KNN(k=1) knn_classifier.fit(train_X, train_y) # In[43]: # TODO: Implement predict_labels_multiclass predict = knn_classifier.predict(test_X) # In[44]: # TODO: Implement multiclass_accuracy accuracy = multiclass_accuracy(predict, test_y) print("Accuracy: %4.2f" % accuracy) # Снова кросс-валидация. Теперь нашей основной метрикой стала точность (accuracy), и ее мы тоже будем усреднять по всем фолдам. # In[47]: # Find the best k using cross-validation based on accuracy num_folds = 5 train_folds_X = [] train_folds_y = [] # TODO: split the training data in 5 folds and store them in train_folds_X/train_folds_y train_folds_X = np.array_split(binary_train_X, num_folds) train_folds_y = np.array_split(binary_train_y, num_folds)