def find_best_model(X, Y, param_grids, n_folds=10, target_value=1, normalize_features=True, shuffle=True): if shuffle: indices = np.arange(len(X)) np.random.shuffle(indices) X = X[indices] Y = Y[indices] auc_per_class = {} results_per_class = {} # if we were passed a single grid then turn it into a list since the # code below expects multiple grids for i, (model_class, param_grid) in enumerate(param_grids.items()): model_class_name = model_class.__name__ print("Param_grid #%d/%d for %s: %s" % (i + 1, len(param_grids), model_class_name, param_grid)) for param_combination in all_combinations(param_grid): for k, v in param_combination.iteritems(): assert type( v) != list, "Can't give a list of parameters for %s" % k curr_model = model_class(**param_combination) if normalize_features: normalized = Normalizer(curr_model) else: normalized = curr_model # nested cross validation over just the current training set # to evaluate each parameter set's performance nested_kfold = KFold(len(Y), n_folds, shuffle=False) curr_aucs = [] for (nested_train_idx, nested_test_idx) in nested_kfold: normalized.fit(X[nested_train_idx, :], Y[nested_train_idx]) X_nested = X[nested_test_idx, :] Y_nested = Y[nested_test_idx] # skip iterations where all Y's are the same if (Y_nested == Y_nested[0]).all(): continue curr_auc = roc_auc(normalized, X_nested, Y_nested == target_value) curr_aucs.append(curr_auc) curr_auc = np.mean(curr_aucs) if curr_auc > auc_per_class.get(model_class_name, 0): print("-- improved %s AUC to %0.4f with %s" % (model_class_name, curr_auc, curr_model)) auc_per_class[model_class_name] = curr_auc results_per_class[model_class_name] = CrossValResults( auc=curr_auc, model=normalized, params=param_combination) else: print("-- no improvement from %s %s, AUC = %0.4f" % (model_class_name, param_combination, curr_auc)) best_model_name, overall_best_results = max(results_per_class.items(), key=lambda pair: pair[1].auc) print("== Best Model: %s, AUC = %0.4f\n" % (overall_best_results.model, overall_best_results.auc)) return overall_best_results, results_per_class
class Trainer: loss_dispatcher = {"l2": nn.MSELoss} def __init__(self, **params): self.num_epochs = params["num_epochs"] self.early_stop_tolerance = params["early_stop_tolerance"] self.norm_method = params["norm_method"] self.loss_type = params["loss_type"] self.learning_rate = params["learning_rate"] self.l2_reg = params["l2_reg"] self.clip = params['clip'] self.device = params['device'] self.input_normalizer = Normalizer(self.norm_method) self.output_normalizer = Normalizer(self.norm_method) def fit(self, model, batch_generator): """ :param TrajGRU model: :param BatchGenerator batch_generator: :return: """ print('Training starts...') model.to(self.device) train_loss = [] val_loss = [] tolerance = 0 best_epoch = 0 best_val_loss = 1e6 evaluation_val_loss = best_val_loss best_dict = model.state_dict() data_list = [] label_list = [] for x, y in batch_generator.generate('train'): data_list.append(x.reshape(-1, *x.shape[2:])) label_list.append(y.reshape(-1, *y.shape[2:])) self.input_normalizer.fit(torch.cat(data_list)) self.output_normalizer.fit(torch.cat(label_list)) optimizer = optim.Adam(model.parameters(), lr=self.learning_rate, weight_decay=self.l2_reg) for epoch in range(self.num_epochs): # train and validation loop start_time = time.time() # train running_train_loss = self.step_loop( model=model, batch_generator=batch_generator, step_fun=self.train_step, loss_fun=self.loss_fun, dataset_type='train', optimizer=optimizer, denormalize=False) # validate running_val_loss = self.step_loop(model=model, batch_generator=batch_generator, step_fun=self.eval_step, loss_fun=self.loss_fun, dataset_type='validation', optimizer=None, denormalize=False) epoch_time = time.time() - start_time message_str = "\nEpoch: {}, Train_loss: {:.5f}, Validation_loss: {:.5f}, Took {:.3f} seconds." print( message_str.format(epoch + 1, running_train_loss, running_val_loss, epoch_time)) # save the losses train_loss.append(running_train_loss) val_loss.append(running_val_loss) if running_val_loss < best_val_loss: best_epoch = epoch + 1 best_val_loss = running_val_loss best_dict = deepcopy(model.state_dict()) # brutal tolerance = 0 else: tolerance += 1 if tolerance > self.early_stop_tolerance or epoch == self.num_epochs - 1: model.load_state_dict(best_dict) evaluation_val_loss = self.step_loop( model=model, batch_generator=batch_generator, step_fun=self.eval_step, loss_fun=self.loss_fun_evaluation, dataset_type='validation', optimizer=None, denormalize=True) message_str = "Early exiting from epoch: {}, Validation error: {:.5f}." print(message_str.format(best_epoch, evaluation_val_loss)) break print('Training finished') return train_loss, val_loss, evaluation_val_loss def step_loop(self, model, batch_generator, step_fun, loss_fun, dataset_type, optimizer, denormalize): count = 0 running_loss = 0.0 dataset = batch_generator.dataset_dict[dataset_type] hidden = self.reset_per_epoch(model=model, batch_size=batch_generator.batch_size) for count, (input_data, output_data) in enumerate( batch_generator.generate(dataset_type)): print("\r{:.2f}%".format(dataset.count * 100 / len(dataset)), flush=True, end='') input_data_shape = input_data.shape input_data = self.input_normalizer.transform( input_data.reshape(-1, *input_data.shape[2:])) input_data = input_data.reshape(input_data_shape).to(self.device) output_data_shape = output_data.shape output_data = self.output_normalizer.transform( output_data.reshape(-1, *output_data.shape[2:])) output_data = output_data.reshape(output_data_shape).to( self.device) loss = step_fun(model=model, input_tensor=input_data, output_tensor=output_data, hidden=hidden, loss_fun=loss_fun, optimizer=optimizer, denormalize=denormalize) # many-to-one hidden = self.repackage_hidden(hidden) running_loss += loss.item() running_loss /= (count + 1) return running_loss def train_step(self, model, input_tensor, output_tensor, hidden, loss_fun, optimizer, denormalize): def closure(): optimizer.zero_grad() predictions = model.forward(input_tensor, hidden) loss = loss_fun(predictions, output_tensor) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), self.clip) return loss loss = optimizer.step(closure) return loss def eval_step(self, model, input_tensor, output_tensor, hidden, loss_fun, optimizer, denormalize): predictions = model.forward(input_tensor, hidden) if denormalize: predictions = self.output_normalizer.inverse_transform( predictions.to('cpu')) output_tensor = self.output_normalizer.inverse_transform( output_tensor.to('cpu')) # output_tensor = output_tensor.to('cpu') loss = loss_fun(predictions, output_tensor) return loss def loss_fun(self, predictions, labels): """ :param predictions: BxD_out :param labels: BxD_out :return: """ loss_obj = self.loss_dispatcher[self.loss_type]() loss = loss_obj(predictions, labels) return loss def loss_fun_evaluation(self, predictions, labels): """ :param labels: :param preds: :return: """ predictions = predictions.detach().numpy() labels = labels.detach().numpy() loss = haversine_dist(predictions, labels).mean() return loss def reset_per_epoch(self, model, batch_size): """ This will be called at beginning of every epoch :param model: :param batch_size: :return: """ hidden_list = model.init_hidden(batch_size=batch_size, device=self.device) return hidden_list def repackage_hidden(self, h): """ Wraps hidden states in new Tensors, to detach them from their history. :param h: list of states, e.g [state, state, ...] """ if isinstance(h, torch.Tensor): return h.detach() else: return tuple(self.repackage_hidden(v) for v in h)
class LSTM(nn.Module): optimizer_dispatcher = {"adam": torch.optim.Adam, "sgd": torch.optim.SGD} loss_dispatcher = {"l2": nn.MSELoss} activation_dispatcher = { "leaky_relu": nn.LeakyReLU, "tanh": nn.Tanh, "sigmoid": nn.Sigmoid } def __init__(self, input_dim, output_dim, **params): super(LSTM, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.loss_type = params["loss_type"] self.learning_rate = params["learning_rate"] self.optimizer_type = params["optimizer_type"] self.grad_clip = params["grad_clip"] self.l2_reg = params["l2_reg"] self.dropout_rate = params["dropout_rate"] self.num_epochs = params["num_epochs"] self.early_stop_tolerance = params["early_stop_tolerance"] self.hidden_dim_list = params["hidden_dim_list"] self.num_layers = len(self.hidden_dim_list) self.final_act_type = params["final_act_type"] self.relu_alpha = params["relu_alpha"] self.input_norm_method = params["input_norm_method"] self.output_norm_method = params["output_norm_method"] self.__create_rnn_cell_list() self.__create_dropout_layer() self.__create_dense_layer() self.__create_final_act_layer() self.optimizer = self.optimizer_dispatcher[self.optimizer_type]( self.parameters(), lr=self.learning_rate, weight_decay=self.l2_reg) self.input_normalizer = Normalizer(self.input_norm_method) self.output_normalizer = Normalizer(self.output_norm_method) def __create_rnn_cell_list(self): cell_list = [] input_dim = self.input_dim for i in range(self.num_layers): hidden_dim = self.hidden_dim_list[i] cell_list.append( nn.LSTMCell(input_size=input_dim, hidden_size=hidden_dim)) input_dim = self.hidden_dim_list[i] self.cell_list = nn.ModuleList(cell_list) def __create_dropout_layer(self): self.dropout_layer = nn.Dropout(p=self.dropout_rate) def __create_dense_layer(self): self.dense_layer = nn.Linear(in_features=self.hidden_dim_list[-1], out_features=self.output_dim) def __create_final_act_layer(self): if self.final_act_type == "leaky_relu": self.final_act_layer = self.activation_dispatcher[ self.final_act_type](self.relu_alpha) else: self.final_act_layer = self.activation_dispatcher[ self.final_act_type]() def __init_hidden_states(self, batch_size): self.h_list = [] self.c_list = [] for cell in self.cell_list: self.h_list.append( Variable(torch.zeros(batch_size, cell.hidden_size))) self.c_list.append( Variable(torch.zeros(batch_size, cell.hidden_size))) @staticmethod def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(LSTM.repackage_hidden(v) for v in h) def forward(self, input_tensor): batch_size = input_tensor.shape[0] num_steps = input_tensor.shape[1] self.__init_hidden_states(batch_size) for step in range(num_steps): x = input_tensor[:, step] for layer_idx, cell in enumerate(self.cell_list): h, c = cell(x, (self.h_list[layer_idx], self.c_list[layer_idx])) self.h_list[layer_idx] = h self.c_list[layer_idx] = c x = h x = self.dropout_layer(x) x = self.dense_layer(x) x = self.final_act_layer(x) return x def fit(self, batch_generator): print('Training starts...') train_loss = [] val_loss = [] tolerance = 0 best_epoch = 0 best_val_loss = 1e6 evaluation_val_loss = best_val_loss best_dict = self.state_dict() self.input_normalizer.fit(batch_generator.dataset_dict["train"].data) self.output_normalizer.fit(batch_generator.dataset_dict["train"].label) for epoch in range(self.num_epochs): # train and validation loop start_time = time.time() running_train_loss = self.step_loop(batch_generator, self.train_step, self.loss_fun, 'train', denormalize=False) running_val_loss = self.step_loop(batch_generator, self.eval_step, self.loss_fun, 'validation', denormalize=False) epoch_time = time.time() - start_time message_str = "Epoch: {}, Train_loss: {:.5f}, Validation_loss: {:.5f}, Took {:.3f} seconds." print( message_str.format(epoch + 1, running_train_loss, running_val_loss, epoch_time)) # save the losses train_loss.append(running_train_loss) val_loss.append(running_val_loss) if running_val_loss < best_val_loss: best_epoch = epoch + 1 best_val_loss = running_val_loss best_dict = deepcopy(self.state_dict()) # brutal tolerance = 0 else: tolerance += 1 if tolerance > self.early_stop_tolerance or epoch == self.num_epochs - 1: self.load_state_dict(best_dict) evaluation_val_loss = self.step_loop(batch_generator, self.eval_step, self.loss_fun_evaluation, 'validation', denormalize=True) message_str = "Early exiting from epoch: {}, Validation error: {:.5f}." print(message_str.format(best_epoch, evaluation_val_loss)) break print('Training finished') return train_loss, val_loss, evaluation_val_loss def step_loop(self, batch_generator, step_fun, loss_fun, dataset_type, denormalize): count = 0 running_loss = 0.0 for count, (input_data, output_data) in enumerate( batch_generator.generate(dataset_type)): input_data = self.input_normalizer.transform(input_data) output_data = self.output_normalizer.transform(output_data) loss = step_fun(input_data, output_data[:, -1], loss_fun, denormalize) # many-to-one try: running_loss += loss.detach().numpy() except: running_loss += loss running_loss /= (count + 1) return running_loss def train_step(self, input_tensor, output_tensor, loss_fun, denormalize): def closure(): self.optimizer.zero_grad() predictions = self.forward(input_tensor) loss = loss_fun(predictions, output_tensor) loss.backward() nn.utils.clip_grad_norm_(self.parameters(), self.grad_clip) return loss loss = self.optimizer.step(closure) return loss def eval_step(self, input_tensor, output_tensor, loss_fun, denormalize): predictions = self.forward(input_tensor) if denormalize: predictions = self.output_normalizer.inverse_transform(predictions) output_tensor = self.output_normalizer.inverse_transform( output_tensor) loss = loss_fun(predictions, output_tensor) return loss def loss_fun(self, predictions, labels): """ :param predictions: BxD_out :param labels: BxD_out :return: """ loss_obj = self.loss_dispatcher[self.loss_type]() loss = loss_obj(predictions, labels) return loss def loss_fun_evaluation(self, predictions, labels): """ :param labels: :param preds: :return: """ predictions = predictions.detach().numpy() labels = labels.detach().numpy() loss = haversine_dist(predictions, labels).mean() return loss