def evaluate(beam_size): # DataLoader loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'test', transform=transforms.Compose([normalize])), # TODO: batched beam search # therefore, DO NOT use a batch_size greater than 1 - IMPORTANT! batch_size=1, shuffle=True, num_workers=1, pin_memory=True) # store ground truth captions and predicted captions (word id) of each image # for n images, each of them has one prediction and multiple ground truths (a, b, c...): # prediction = [ [pred1], [pred2], ..., [predn] ] # ground_truth = [ [ [gt1a], [gt1b], [gt1c] ], ..., [ [gtna], [gtnb] ] ] ground_truth = list() prediction = list() # for each image for i, (image, caps, caplens, allcaps) in enumerate( tqdm(loader, desc="Evaluating at beam size " + str(beam_size))): # move to GPU device, if available image = image.to(device) # (1, 3, 256, 256) # forward encoder encoder_out = encoder(image) # ground_truth img_caps = allcaps[0].tolist() img_captions = list( map( lambda c: [ w for w in c if w not in { word_map['<start>'], word_map['<end>'], word_map[ '<pad>'] } ], img_caps)) # remove <start> and pads ground_truth.append(img_captions) # prediction (beam search) seq, _, _, _ = decoder.beam_search(encoder_out, beam_size, word_map) pred = [ w for w in seq if w not in {word_map['<start>'], word_map['<end>'], word_map['<pad>']} ] prediction.append(pred) assert len(ground_truth) == len(prediction) # calculate metrics metrics = Metrics(ground_truth, prediction, rev_word_map) scores = metrics.all_metrics() return scores
def __init__(self, env, config: DDPGConfig): super().__init__(env) self.config = config self.replay_buffer = ReplayBuffer(config.buffer_size, config.batch_size) # Actor self.actor_current = Actor(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.actor_target = Actor(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.actor_optimizer = torch.optim.Adam( self.actor_current.parameters(), lr=config.learning_rate) # Critic self.critic_current = Critic(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.critic_target = Critic(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.critic_optimizer = torch.optim.Adam( self.critic_current.parameters(), lr=config.learning_rate) self.metrics = Metrics()
def __init__(self, env, config: DQNConfig): super().__init__(env) self.config = config self.replay_buffer = ReplayBuffer(config.buffer_size, config.batch_size) self.qnet_current = QNetwork(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.qnet_target = QNetwork(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.optimizer = torch.optim.Adam(self.qnet_current.parameters(), lr=config.learning_rate) self.metrics = Metrics()
def validate(loader, num_classes, device, net, criterion): num_samples = 0 running_loss = 0 metrics = Metrics(range(num_classes)) with torch.no_grad(): net.eval() for images, masks, tiles in tqdm(loader, desc="Validate", unit="batch", ascii=True): images = images.to(device) masks = masks.to(device) assert images.size()[2:] == masks.size( )[1:], "resolutions for images and masks are in sync" num_samples += int(images.size(0)) outputs = net(images) loss = criterion(outputs, masks) running_loss += loss.item() for mask, output in zip(masks, outputs): metrics.add(mask, output) return { "loss": running_loss / num_samples, "miou": metrics.get_miou(), "fg_iou": metrics.get_fg_iou(), "mcc": metrics.get_mcc(), }
def train(loader, num_classes, device, net, optimizer, criterion): num_samples = 0 running_loss = 0 # always two classes in our case metrics = Metrics(range(num_classes)) # initialized model net.train() # training loop for images, masks, tiles in tqdm(loader, desc="Train", unit="batch", ascii=True): images = images.to(device) masks = masks.to(device) assert images.size()[2:] == masks.size( )[1:], "resolutions for images and masks are in sync" num_samples += int(images.size(0)) optimizer.zero_grad() outputs = net(images) loss = criterion(outputs, masks) loss.backward() optimizer.step() running_loss += loss.item() for mask, output in zip(masks, outputs): prediction = output.detach() metrics.add(mask, prediction) return { "loss": running_loss / num_samples, "miou": metrics.get_miou(), "fg_iou": metrics.get_fg_iou(), "mcc": metrics.get_mcc(), }
class DDPGAgent(Agent): def __init__(self, env, config: DDPGConfig): super().__init__(env) self.config = config self.replay_buffer = ReplayBuffer(config.buffer_size, config.batch_size) # Actor self.actor_current = Actor(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.actor_target = Actor(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.actor_optimizer = torch.optim.Adam( self.actor_current.parameters(), lr=config.learning_rate) # Critic self.critic_current = Critic(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.critic_target = Critic(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.critic_optimizer = torch.optim.Adam( self.critic_current.parameters(), lr=config.learning_rate) self.metrics = Metrics() def restore(self, actor_file, critic_file): self.actor_current.load_state_dict(torch.load(actor_file)) self.critic_current.load_state_dict(torch.load(critic_file)) def compute_action(self, state, epsilon=0): action = self.actor_current.action_values_for(state) if np.random.random() < epsilon: action += np.random.randn(self.env.action_size) * epsilon action = np.clip(action, -1, 1) return action def train(self, n_steps, update_every, print_every, epsilon_init=1.0, epsilon_decay=0.995, epsilon_min=0.01): epsilon = epsilon_init state = self._warmup(epsilon) self.metrics.plot() for t_step in range(1, n_steps + 1): state = self._step(state, epsilon) epsilon = max(epsilon_min, epsilon * epsilon_decay) if t_step % update_every == 0: self._batch_train() if self._check_solved(): break if t_step % print_every == 0: print(f"Step #{t_step}" + f", Running score {self.metrics.running_score():.2f}" + f", Total episodes {self.metrics.episode_count}") def _warmup(self, epsilon): state = self.env.reset(train_mode=True) needed_experiences = max( 0, self.replay_buffer.batch_size - len(self.replay_buffer)) for i in range(needed_experiences): state = self._step(state, epsilon) return state def _step(self, state, epsilon): action = self.compute_action(state, epsilon) next_state, reward, done = self.env.step(action) self.replay_buffer.add( Experience(state, action, reward, next_state, done)) self.metrics.on_step(reward, done) if done: return self.env.reset(train_mode=True) return next_state def _batch_train(self): states, actions, rewards, next_states, dones = self.replay_buffer.sample( ) # Update Critic target_actions_next = self.actor_target(next_states) target_values_next = self.critic_target( next_states, target_actions_next).detach().max(1)[0].unsqueeze(1) target_values = rewards + (self.config.gamma * target_values_next * (1 - dones)) expected_values = self.critic_current(states, actions) critic_loss = F.mse_loss(expected_values, target_values) self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() self.critic_target.soft_update(self.critic_current, self.config.tau) # Update Actor current_actions = self.actor_current(states) actor_loss = -self.critic_current(states, current_actions).mean() self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() self.actor_target.soft_update(self.actor_current, self.config.tau) def _check_solved(self): if self.metrics.running_score() >= 30: print( f"\nEnvironment solved in {self.metrics.episode_count} episodes!\t" + f"Average Score: {self.metrics.running_score():.2f}") torch.save(self.actor_current.state_dict(), "actor_model.pt") torch.save(self.critic_current.state_dict(), "critic_model.pt") return True return False
def init_args(): """Parse and return the arguments.""" parser = argparse.ArgumentParser(description="Simple Gmail Analyzer") parser.add_argument("--top", type=int, default=10, help="Number of results to show") parser.add_argument( "--user", type=str, default="me", help="User ID to fetch data for" ) parser.add_argument( "--verbose", action="store_true", help="Verbose output, helpful for debugging" ) parser.add_argument( "--version", action="store_true", help="Display version and exit" ) args = vars(parser.parse_args()) return args if __name__ == "__main__": colorama.init() args = init_args() if args["version"]: print("gmail analyzer v{}".format(VERSION)) sys.exit() Metrics(args).start()
def train_cv(self, Xt, yt, epochs, opts, ohe=None): """ Training with CV method """ if self.cv_fn == 'k_fold_strat': # Transform to 1D to work with k-fold strat yt = ohe.inverse_transform(yt) cv = cvs(Xt, yt) splits, [X_train, y_train, X_val, y_val] = getattr(cv, self.cv_fn)(opts) # Reverse back by transforming with earlier defined OneHotEncoder for yv in enumerate(y_train): y_train[yv[0]] = ohe.transform(y_train[yv[0]]).toarray() y_val[yv[0]] = ohe.transform(y_val[yv[0]]).toarray() else: cv = cvs(Xt, yt) splits, [X_train, y_train, X_val, y_val] = getattr(cv, self.cv_fn)(opts) train_fmse = [] val_fmse = [] val_pred = [] val_true = [] fold_met = [] fold_weights = {} fold_bias = {} for f in range(splits): print('\nFold: {}\n'.format(f + 1)) # Train network training set train_results = self.train(X_train[f], y_train[f], epochs) train_fmse.append(train_results['loss']['train']) # Append weights/bias to new dict fold_weights[f + 1] = train_results['weights']['split'] fold_bias[f + 1] = train_results['bias']['split'] # Test network with validation set and append MSE + predictions val_mse, fold_pred = self.test(X_val[f], y_val[f], self.fnt, self.norm_pred) # Metrics MetricsCV = Metrics(self.loss_str) MetricsCV.load(y_val[f], self.yclass, fold_pred, self.btm) cm = MetricsCV.confusion(plot=False) fold_met.append(cm[1]) val_fmse.append(val_mse) val_true.append(y_val[f]) val_pred.append(fold_pred) return { 'loss': { 'train': train_fmse, 'validation': val_fmse }, 'weights': { 'cv': fold_weights }, 'bias': { 'cv': fold_bias }, 'validation_metrics': fold_met, 'cross_val': self.cv_fn, 'data': { 'true': val_true, 'prediction': val_pred } }
if method_opt == 'single': test_error, pred = NN.test(X_test, y_test, user_test_metric, user_norm_pred) #%% Metrics # Save pearson/pairwise plots first MG.save_plot({ 'pearson': pearson_result['figure'].get_figure(), 'pairwise': pairwise_result['figure'] }) # For testing if method_opt == 'single': # Call Metrics and load the test and predictions MetricsNN = Metrics(NN.loss_str) MetricsNN.load(y_test, y_classes, pred, btm=bin_to_multi) # Save X_test and predictions as npy if save_test_data: # Save unscaled test data for RC np.save( os.sep.join([MG.current_output_dir, 'X_test.npy']), np.concatenate([ X_test_ns, np.reshape(np.array(MetricsNN.y_1D), (1, np.array(MetricsNN.y_1D).size)).T ], axis=1)) # Save predictions np.save(os.sep.join([MG.current_output_dir, 'y_hat.npy']), pred)
class Trainer: """ Class to train, validate, and test a model """ # Progress logging, initialization of metrics train_loss = [] val_loss = [] metrics = defaultdict(list) best_val = 1e10 evalu = Metrics() def __init__(self, model, train_dir, val_dir, test_dir=None, lr=1e-3, batch_size=10, visualize=True): self.__dict__.update(locals()) self.optimizer = optim.Adam( params=[p for p in self.model.parameters() if p.requires_grad], lr=self.lr) def train(self, num_epochs, *args, **kwargs): """ Train a model """ for epoch in range(1, num_epochs + 1): self.train_epoch(epoch, *args, **kwargs) def train_epoch(self, epoch, steps=25, val_ckpt=5): """ Train the model for one epoch """ self.val_ckpt = val_ckpt # Enable dropout, any learnable regularization self.model.train() epoch_loss, epoch_sents = [], [] for step in tqdm(range(1, steps + 1)): # Zero out gradients self.optimizer.zero_grad() # Compute a train batch, backpropagate batch_loss, num_sents, segs_correct, texts_correct, total_segs, total_texts = self.train_batch( ) batch_loss.backward() # Log progress (Loss is reported as average loss per sentence) print( 'Step: %d | Loss: %f | Num. sents: %d | Segs correct: %d / %d | Texts correct: %d / %d' % (step, batch_loss.item() / num_sents, num_sents, segs_correct, total_segs, texts_correct, total_texts)) # For logging purposes epoch_loss.append(batch_loss.item()) epoch_sents.append(num_sents) # Step the optimizer self.optimizer.step() epoch_loss = np.mean(epoch_loss) epoch_sents = np.mean(epoch_sents) # Log progress (Loss is reported as average loss per sentence) print('\nEpoch: %d | Loss: %f | Avg. num sents: %d\n' % (epoch, epoch_loss / epoch_sents, epoch_sents)) self.train_loss.append(epoch_loss / epoch_sents) # Validation set performance if epoch % val_ckpt == 0: metrics_dict, val_loss = self.validate(self.val_dir) # Log progress self.val_loss.append(val_loss) for key, val in metrics_dict.items(): self.metrics[key].append(val) if val_loss < self.best_val: self.best_val = val_loss self.best_model = deepcopy(self.model.eval()) # Log progress print('Validation loss: %f | Best val loss: %f\n' % (val_loss, self.best_val)) if self.visualize: self.viz_metrics() def train_batch(self): """ Train the model using one batch """ # Sample a batch of documents batch = sample_and_batch(self.train_dir, self.batch_size, TRAIN=True) # Get predictions for each document in the batch preds = self.model(batch) # Compute loss, IGNORING last entry as it ALWAYS ends a subsection batch_loss = F.cross_entropy(preds[:-1], batch.labels[:-1], size_average=False, weight=self.weights(batch)) # Number of boundaries correctly predicted segs_correct, texts_correct, total_segs, total_texts = self.debugging( preds, batch) return batch_loss, len( batch), segs_correct, texts_correct, total_segs, total_texts def validate(self, dirname): """ Evaluate using SegEval text segmentation metrics """ print('Evaluating across SegEval metrics.') # Disable dropout, any learnable regularization self.model.eval() # Initialize val directory files, dictionaries list files, dicts_list = list(crawl_directory(dirname)), [] eval_loss, num_sents = 0, 0 # Break into chunks for memory constraints for chunk in chunk_list(files, self.batch_size): # Batchify documents batch = Batch([read_document(f, TRAIN=False) for f in chunk]) # Predict the batch preds, logits = self.predict_batch(batch) # Compute validation loss, add number of sentences eval_loss += F.cross_entropy(logits, batch.labels, size_average=False, weight=self.weights(batch)) num_sents += len(batch) # Evaluate across SegEval metrics metric_dict = self.evalu(batch, preds) # Save the batch performance dicts_list.append(metric_dict) # Average dictionaries eval_metrics = avg_dicts(dicts_list) # Normalize eval loss normd_eval_loss = eval_loss.item() / num_sents return eval_metrics, normd_eval_loss def viz_metrics(self): """ Visualize progress: train loss, val loss, word- sent-level metrics """ # Initialize plot _, axes = plt.subplots(ncols=2, nrows=2, sharex='col', sharey='col') val, word, train, sent = axes.ravel() # Plot validation loss val.plot(self.val_loss, c='g') val.set_ylabel('Val Loss') val.set_ylim([0, max(max(self.val_loss), max(self.train_loss)) + 0.1]) # Plot training loss train.plot(self.train_loss, c='r') train.set_ylabel('Train Loss') for key, values in self.metrics.items(): # Plot word-level metrics if key.startswith('w_'): word.plot(values, label=key) # Plot sent-level metrics elif key.startswith('s_'): sent.plot(values, label=key) # Fix y axis limits, y label, legend for word-level metrics word.set_ylim([0, 1]) word.set_ylabel('Word metrics') word.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0) # Fix again but this time for sent-level sent.set_ylabel('Sent metrics') sent.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0) # Give the plots some room to breathe plt.subplots_adjust(left=None, bottom=4, right=2, top=5, wspace=None, hspace=None) # Display the plot plt.show() def debugging(self, preds, batch, show_probs=True): """ Check how many segment boundaries were correctly predicted """ labels = batch.labels logits = F.softmax(preds, dim=1) probs, outputs = torch.max(logits, dim=1) segs_correct = sum([ 1 for i, j in zip(batch.labels, outputs) if i == j == torch.tensor(1) ]) texts_correct = sum([ 1 for i, j in zip(batch.labels, outputs) if i == j == torch.tensor(0) ]) total_segs = batch.labels.sum().item() total_texts = (batch.labels == 0).sum().item() if show_probs: means = logits.mean(dim=0) print('Label 0: %f | Label 1: %f' % (means[0].item(), means[1].item())) return segs_correct, texts_correct, total_segs, total_texts def predict(self, document): """ Given a document, predict segmentations """ return self.predict_batch(Batch([document])) def predict_batch(self, batch, THETA=0.50): """ Given a batch, predict segmentation boundaries thresholded by min probability THETA, which needs to be tuned """ # Predict logits = self.model(batch) # Softmax for probabilities probs = F.softmax(logits, dim=1) # If greater than threshold theta, make it a boundary boundaries = probs[:, 1] > THETA # Convert from tensor to list preds = boundaries.tolist() return preds, logits def weights(self, batch): """ Class weight loss from batch """ zero_weight = 1 / (len(batch.labels) / sum(batch.labels).float()) one_weight = torch.tensor(1.) return torch.stack([zero_weight, one_weight]) def save_model(self, savepath): """ Save model state dictionary """ torch.save(self.model.state_dict(), savepath + '.pth') def load_model(self, loadpath): """ Load state dictionary into model """ state = torch.load(loadpath) self.model.load_state_dict(state) self.model = to_cuda(self.model)
class DQNAgent(Agent): def __init__(self, env, config: DQNConfig): super().__init__(env) self.config = config self.replay_buffer = ReplayBuffer(config.buffer_size, config.batch_size) self.qnet_current = QNetwork(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.qnet_target = QNetwork(env.state_size, env.action_size, config.fc1_units, config.fc2_units).to(device) self.optimizer = torch.optim.Adam(self.qnet_current.parameters(), lr=config.learning_rate) self.metrics = Metrics() def restore(self, file): self.qnet_current.load_state_dict(torch.load(file)) def compute_action(self, state, epsilon=0): if np.random.random() < epsilon: return np.random.randint(self.env.action_size) action_values = self.qnet_current.action_values_for(state) return np.argmax(action_values) def train(self, n_steps, update_every, print_every, epsilon_init=1.0, epsilon_decay=0.995, epsilon_min=0.01): epsilon = epsilon_init state = self._warmup(epsilon) self.metrics.plot() for t_step in range(1, n_steps + 1): state = self._step(state, epsilon) epsilon = max(epsilon_min, epsilon * epsilon_decay) if t_step % update_every == 0: self._batch_train() if self._check_solved(): break if t_step % print_every == 0: print(f"Step #{t_step}" + f", Running score {self.metrics.running_score():.2f}" + f", Total steps {self.metrics.step_count}" + f", Total episodes {self.metrics.episode_count}") def _warmup(self, epsilon): state = self.env.reset(train_mode=True) needed_experiences = max(0, self.replay_buffer.batch_size - len(self.replay_buffer)) for i in range(needed_experiences): state = self._step(state, epsilon) return state def _step(self, state, epsilon): action = self.compute_action(state, epsilon) next_state, reward, done = self.env.step(action) self.replay_buffer.add(Experience(state, action, reward, next_state, done)) if self.metrics.current_episode_length >= self.config.episode_max_length: done = True self.metrics.on_step(reward, done) if done: return self.env.reset(train_mode=True) return next_state def _batch_train(self): states, actions, rewards, next_states, dones = self.replay_buffer.sample() Q_targets_next = self.qnet_target(next_states).detach().max(1)[0].unsqueeze(1) Q_targets = rewards + (self.config.gamma * Q_targets_next * (1 - dones)) Q_expected = self.qnet_current(states).gather(1, actions) loss = F.mse_loss(Q_expected, Q_targets) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.qnet_target.soft_update(self.qnet_current, self.config.tau) def _check_solved(self): if self.metrics.running_score() >= 13: print(f"\nEnvironment solved in {self.metrics.episode_count} episodes!\t" + f"Average Score: {self.metrics.running_score():.2f}") torch.save(self.qnet_current.state_dict(), "model.pt") return True return False