def on_epoch_end(self, epoch, logs={}): self.counter +=1 train_predictions = self.model.predict(self.X_train, verbose=0) cpo = CutPointOptimizer(train_predictions, self.Y_train) self.cutPoints = optimize.fmin(cpo.qwk, self.cutPoints) p = self.model.predict(self.X_val, verbose=0) #score the validation data p = np.searchsorted(self.cutPoints, p) + 1 current = quadratic_weighted_kappa.quadratic_weighted_kappa(self.y_val.values.ravel(), p) print('Epoch %d Kappa: %f | Best Kappa: %f \n' % (epoch,current,self.best)) #if improvement over best.... if current > self.best: self.best = current self.best_rounds=self.counter self.wait = 0 self.model.save_weights(self.filepath, overwrite=True) else: if self.wait >= self.patience: #no more patience, retrieve best model self.model.stop_training = True print('Best number of rounds: %d \nKappa: %f \n' % (self.best_rounds, self.best)) self.model.load_weights(self.filepath) self.wait += 1 #incremental the number of times without improvement
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0): #m = 1000 #dimension = 256 (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE) #n_components = 100 #images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min) #(pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE) (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE) kappa_score_train = quadratic_weighted_kappa(pred[:m/2], y[:m/2], min_rating=0, max_rating=4) kappa_score_test = quadratic_weighted_kappa(pred[m/2:], y[m/2:], min_rating=0, max_rating=4) kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4) print "kappa score for train: ", kappa_score_train print "kappa score for test: ", kappa_score_test print "kappa score for all data: ", kappa_score_all print "svm score: ", svm_score
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0): # m = 1000 # dimension = 256 (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE) # n_components = 100 # images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min) # (pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE) (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE) kappa_score_train = quadratic_weighted_kappa(pred[: m / 2], y[: m / 2], min_rating=0, max_rating=4) kappa_score_test = quadratic_weighted_kappa(pred[m / 2 :], y[m / 2 :], min_rating=0, max_rating=4) kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4) print "kappa score for train: ", kappa_score_train print "kappa score for test: ", kappa_score_test print "kappa score for all data: ", kappa_score_all print "svm score: ", svm_score
def kappa(y_true, y_pred): y_true = np.array(y_true) y_pred = np.array(y_pred) if len(y_true.shape) > 1 and y_true.shape[1] > 1: y_true = y_true.dot(range(y_true.shape[1])) if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: y_pred = y_pred.dot(range(y_pred.shape[1])) try: return quadratic_weighted_kappa(y_true, y_pred) except IndexError: return np.nan
def qwk(self, cutPoints): transformedPredictions = np.searchsorted(cutPoints, self.predicted) + 1 return -1 * quadratic_weighted_kappa.quadratic_weighted_kappa(transformedPredictions, self.actual)
def qwk(self, cutPoints): transformedPredictions = np.searchsorted(cutPoints, self.predicted) + 1 return -1 * quadratic_weighted_kappa.quadratic_weighted_kappa( transformedPredictions, self.actual)
FOLDER = sys.argv[1] predict = [] true = [] for filename in os.listdir(FOLDER): if not filename.startswith("part"): continue for line in open(os.path.join(FOLDER, filename)): data = json.loads(line.strip()) predict.append(int(float(data[PREDICTION]))) if data[PREDICTION] != "0.0": print "!" true.append(int(float(data["label"]))) def get_ans(): n = random.random() if n <= 0.728: return 0 elif n <= 0.794: return 1 elif n <= 0.953: return 2 elif n <= 0.979: return 3 else: return 4 print quadratic_weighted_kappa(predict, true, 0, 4)
def qwkerror(self, preds, dtrain): labels = dtrain.get_label() preds = np.searchsorted(self.cutPoints, preds) + 1 kappa = quadratic_weighted_kappa.quadratic_weighted_kappa(labels, preds) return 'kappa', -1 * kappa
def scorer(estimator, X, y): return quadratic_weighted_kappa( y, estimator.predict(X) )
skf = StratifiedKFold(train["median_relevance"], 5) i = 0 y = train["median_relevance"] y2 = train["relevance_variance"] X = train.loc for train_index, test_index in skf: print 'fold' , i X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] pipeline.fit(X_train, y_train) y_pred = pipeline.predict(X_test) print metrics.classification_report( y_test , y_pred ) print quadratic_weighted_kappa( y_test , y_pred ) print metrics.confusion_matrix( y_test , y_pred ) print i += 1 #scores = cross_val_score( pipeline , train , train['median_relevance'] , cv=5 , scoring = scorer ) #print scores , scores.mean() , scores.std() pipeline.fit(train, train["median_relevance"]) predictions = pipeline.predict(test) submission = pd.DataFrame({"id": test["id"], "prediction": predictions}) submission.to_csv("python_benchmark.csv", index=False)
def train_model(self, num_epochs, log_nth): training_start_time = time.time() optimizer = self.optimizer self._reset_histories() if self.host_device == 'gpu': self.model.cuda() iter_per_epoch = len(self.train_dataset_loader) logging.info("Start training") logging.info( f"Size of training data: " f"{len(self.train_dataset_loader.sampler) * self.train_dataset_loader.batch_size}" ) for i_epoch in range(num_epochs): logging.info("Starting new epoch...") running_loss = 0. all_y = [] all_y_pred = [] # scheduler step for exp and step schedulers if (not isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau)): self.scheduler.step() logging.info(f"Learning rate is {self.scheduler.get_lr()}") for i_batch, batch in enumerate(self.train_dataset_loader): x, y = batch x, y = Variable(x), Variable(y) if self.host_device == 'gpu': x, y = x.cuda(), y.cuda() optimizer.zero_grad() outputs = self.model(x) if self.host_device == 'gpu': train_loss = self.loss_func(outputs.cuda(), y) else: train_loss = self.loss_func(outputs, y) train_loss.backward() optimizer.step() running_loss += train_loss.data[0] _, y_pred = torch.max(outputs.data, 1) all_y.append(y) all_y_pred.append(y_pred) if not log_nth == 0 and (i_batch % log_nth) == 0: logging.info( f'[Iteration {i_batch}/{iter_per_epoch}] ' f'TRAIN loss: {running_loss / sum(curr_y.shape[0] for curr_y in all_y):.3f}' ) self.train_loss_history.append(running_loss) y = torch.cat(all_y) y_pred = torch.cat(all_y_pred) train_qwk = quadratic_weighted_kappa(y_pred, y.data) logging.info( f'[Epoch {i_epoch+1}/{num_epochs}] ' f'TRAIN QWK: {train_qwk:.3f}; loss: {running_loss / y.shape[0]:.3f}' ) self.train_qwk_history.append(train_qwk) running_loss = 0. all_y = [] all_y_pred = [] for x, y in self.valid_dataset_loader: x, y = Variable(x), Variable(y) if self.host_device == 'gpu': x, y = x.cuda(), y.cuda() outputs = self.model(x) if self.host_device == 'gpu': val_loss = self.loss_func(outputs.cuda(), y) else: val_loss = self.loss_func(outputs, y) running_loss += val_loss.data[0] _, y_pred = torch.max(outputs.data, 1) all_y.append(y) all_y_pred.append(y_pred) y = torch.cat(all_y) y_pred = torch.cat(all_y_pred) val_qwk = quadratic_weighted_kappa(y_pred, y.data) logging.info( f'[Epoch {i_epoch+1}/{num_epochs}] ' f'VAL QWK: {val_qwk:.3f}; loss: {running_loss / y.shape[0]:.3f}' ) self.val_qwk_history.append(val_qwk) self.val_loss_history.append(running_loss) training_time = time.time() - training_start_time logging.info( f"Epoch {i_epoch+1} - Training Time - {training_time} seconds") # scheduler step for plateau scheduler val_loss_scheduler = running_loss if (isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau)): self.scheduler.step(val_loss_scheduler) if val_qwk > self.best_qwk: logging.info(f'New best validation QWK score: {val_qwk}') self.best_qwk = val_qwk self.best_model = deepcopy(self.model) self.wait = 0 logging.info('Storing best model...') torch.save(self.best_model, self.model_path) logging.info('Done storing') else: self.wait += 1 if self.wait >= self.patience: logging.info('Stopped after epoch %d' % (i_epoch)) break training_time = time.time() - training_start_time logging.info(f"Full Training Time - {training_time} seconds")
def validate(n_epochs, n_models, n_steps=5, activations=False): with h5py.File(constants.train_features_scaled_strat_file, "r") as fi: labels_train = fi.get("y_train")[:60000] X_train = fi.get("X_train")[:60000] y_train, _ = preprocess_labels(labels_train, categorical=(net_type == 'softmax')) labels_test = fi.get("y_test")[()] X_test = fi.get("X_test")[()] y_test, _ = preprocess_labels(labels_test, categorical=(net_type == 'softmax')) y_train = y_train / 5.0 / 2 + 0.5 y_test = y_test / 5.0 / 2 + 0.5 if net_type == 'softmax': n_classes = y_train.shape[1] elif net_type == 'regression': n_classes = 1 print(n_classes, 'classes') n_dims = X_train.shape[1] print(n_dims, 'dims') cum_blend = 0 models = range(1, n_models + 1) for i in models: print("\n-------------- Model %d --------------\n" % i) model = model_factory(n_classes, n_dims, net_type) for n in range(0, n_epochs, n_steps): model.fit(X_train, y_train, nb_epoch=n_steps, batch_size=128, verbose=2) #, validation_data=(X_test, y_test)) # validate individual net if net_type == 'softmax': y_pred = model.predict_classes(X_test, verbose=0) elif net_type == 'regression': y_pred = model.predict(X_test, verbose=0) y_pred = np.floor((y_pred - 0.5) * 2 * 5.0).flatten() y_pred[y_pred < 0] = 0 y_pred[y_pred > 4] = 4 print('Epoch: %d. Accuracy: %0.2f%%. Kappa: %0.2f' % (n + n_steps, 100 * accuracy_score(labels_test, y_pred), quadratic_weighted_kappa.quadratic_weighted_kappa( labels_test, y_pred))) # validate ensemble if net_type == 'softmax': cum_blend += model.predict_proba(X_test, verbose=0) y_pred = np.argmax(cum_blend, axis=1) elif net_type == 'regression': cum_blend += model.predict(X_test, verbose=0) y_pred = np.floor((cum_blend / i - 0.5) * 2 * 5.0).flatten() y_pred[y_pred < 0] = 0 y_pred[y_pred > 4] = 4 print('\nBlend %d. Accuracy: %0.2f%%. Kappa: %0.2f' % (i, 100 * accuracy_score(labels_test, y_pred), quadratic_weighted_kappa.quadratic_weighted_kappa( labels_test, y_pred))) print('Confusion matrix:\n', confusion_matrix(labels_test, y_pred)) fitted = fit2distribution(labels_test, cum_blend) print('\nFitted. Accuracy: %0.2f%%. Kappa: %0.2f' % (100 * accuracy_score(labels_test, fitted), quadratic_weighted_kappa.quadratic_weighted_kappa( labels_test, fitted))) print('Confusion matrix:\n', confusion_matrix(labels_test, fitted)) if activations: F_train = pick_activations(model, X_train, net_type) F_test = pick_activations(model, X_test, net_type) fout = os.path.join( constants.features_NN_dir, features_NN_prefix + format(i, '02d') + '.hd5') with h5py.File(fout, "w") as fo: fo.create_dataset("X_train", data=F_train) fo.create_dataset("y_train", data=labels_train) fo.create_dataset("X_test", data=F_test) fo.create_dataset("y_test", data=labels_test) with h5py.File(fout, "r") as fi: X = fi.get("X_train") y = fi.get("y_train") XX = fi.get("X_test") yy = fi.get("y_test") print(X.shape, y.shape, XX.shape, yy.shape)
def qwkerror(self, preds, dtrain): labels = dtrain.get_label() preds = np.searchsorted(self.cutPoints, preds) + 1 kappa = quadratic_weighted_kappa.quadratic_weighted_kappa( labels, preds) return 'kappa', -1 * kappa