def mnist_classification(data, epochs=10000, epochs_without_progress=2000, mini_batch_size=40): """Plots and writes out performance of neural and logistic models on classification problem for the MNIST dataset""" total_steps = epochs * len(data['x_train'])//mini_batch_size learning_rates = [ learning_rate.Learning_rate(base=base, decay=decay).ramp_up(10).compile(total_steps) for base, decay in [(3e-3, 2.5e-5), (2e-3, 5e-5)]] neural = {'layers': [{'height': 64}, {'height': 32}, {'height': 10, 'activations': activations.sigmoids, 'd_func': lambda a, y, _: y - a}], 'learning_rate': learning_rates[0], 'momentum': 0.6} logistic = {'name': 'Logistic', 'layers': [{'height': 64}, {'height': 10, 'activations': activations.softmaxs, 'd_func': lambda a, y, _: y - a}], 'learning_rate': learning_rates[1], 'momentum': 0.6} neural_model_ = neural_model.Network(**neural) logistic_model = neural_model.Network(**logistic) subplots = [([neural_model_, logistic_model], {'mini_batch_size': mini_batch_size})] sgd_on_models_kwargs = { 'epochs': epochs, 'epochs_without_progress': epochs_without_progress, 'metric': metrics.accuracy_loss } data_args = [data['x_train'], data['x_validate'], data['y_train'], data['y_validate']] errors, subtitle, subplots, metric_string = sgd.sgd_on_models(*data_args, *subplots, **sgd_on_models_kwargs) title = ['Neural model, Classification test', 'neural_classification', subtitle] sgd.plot_sgd_errors(errors, title, metric_string) classification_subplots = [(subplots[0][0], subplots[1]), (subplots[0][1], subplots[1])] metrics.classification_accuracy(classification_subplots, data)
def create_predictions_df(query_img_paths, query_labels, qry_lbl_sim_matrix, topk, device): # converting to probs scores = qry_lbl_sim_matrix scores = scores.to(device) probs = scores.softmax(1) max_probs, indexes = torch.max(probs, dim=1) max_scores, _ = torch.max(scores, dim=1) print(scores.shape) print(indexes.shape) print(query_labels.shape) print(probs.shape) print(max_probs.shape) print(max_scores.shape) predictions_list = [] predictions_list += zip( query_img_paths, indexes.cpu().numpy(), max_probs.cpu().numpy(), query_labels.data.cpu().numpy(), probs.data.cpu().numpy(), scores.data.cpu().numpy(), max_scores.cpu().numpy() ) # NOTE: zip function only iterates up to the smallest list assert len(predictions_list) == qry_lbl_sim_matrix.shape[0] assert len(predictions_list) == len(query_img_paths) accuracies = classification_accuracy(scores, query_labels, topk=topk) metrics_results = {'top1-acc': accuracies[0].item(), 'top5-acc': accuracies[1].item()} np.set_printoptions(threshold=sys.maxsize) predictions_df = pd.DataFrame(predictions_list, columns=['img_path', 'pred_index', 'prob', 'correct_index', 'score', 'similarity', 'pred_similarity']) # TODO: fix column names - a bit confusing # score: np array, normalized scores/similarities # prob: scalar, max of 'score' # similarity: np array, scores/similarities before normalization # pred_similarity: scalar, max of 'similarity' return predictions_df, metrics_results
def train_model(model, criterion, optimizer, scheduler, device, dataloaders, label_encoder, num_epochs=100, earlystop_patience=2): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) has_waited = 0 stop_training = False epoch_metrics = MetricsCollection() for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode batch_metrics = MetricsCollection() predictions_list = [] # Iterate over data. loader = dataloaders[phase] # tqdm disable=None for Azure ML (no progress-bar for non-tty) pbar = tqdm(loader, total=len(loader), desc="Epoch {} {}".format(epoch, phase), ncols=0, disable=None) for batch_index, batch_data in enumerate(pbar): inputs = batch_data['image'].to(device) labels = batch_data['label'].to(device) img_paths = batch_data['image_name'] # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = criterion(outputs, labels) scores = outputs.softmax(1) # backward + optimize only if in training phase if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.) optimizer.step() batch_metrics.add(phase, 'loss', loss.item(), inputs.size(0)) accuracies = classification_accuracy(outputs, labels, topk=(1, 5)) batch_metrics.add(phase, 'top1-acc', accuracies[0].item(), inputs.size(0)) batch_metrics.add(phase, 'top5-acc', accuracies[1].item(), inputs.size(0)) pbar.set_postfix( **{ k: "{:.5f}".format(meter.avg) for k, meter in batch_metrics[phase].items() }) for key, meter in batch_metrics[phase].items(): epoch_metrics.add(phase, key, meter.avg, 1) run.log('{}_{}'.format(phase, key), meter.avg) if phase == 'val': # monitor the val metrics best_epoch_index = epoch_metrics['val']['top1-acc'].best()[1] if best_epoch_index == epoch: has_waited = 1 best_model_wts = copy.deepcopy(model.state_dict()) print("Saving the best model state dict") else: if has_waited >= earlystop_patience: print("** Early stop in training: {} waits **".format( has_waited)) stop_training = True has_waited += 1 if type(scheduler ) is torch.optim.lr_scheduler.ReduceLROnPlateau: scheduler.step(epoch_metrics['val']['loss'].value) else: scheduler.step() print() # end of epoch if stop_training: break time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) best_acc, best_epoch = epoch_metrics['val']['top1-acc'].best() best_metrics = { 'top1-acc': best_acc, 'top5-acc': epoch_metrics['val']['top5-acc'].history[best_epoch], } for key, meter in epoch_metrics['val'].items(): best_value, best_epoch = meter.best() train_value = epoch_metrics['train'][key].history[best_epoch] print('* Best val-{} at epoch {}: {:4f} (train-{}: {:4f}) *'.format( key, best_epoch, best_value, key, train_value)) # load best model weights model.load_state_dict(best_model_wts) return model, best_metrics
def eval_model( self, device, dataloader, do_pr_metrics = False, topk=(1, 5)): since = time.time() self.model.eval() # Set model to evaluate mode batch_metrics = MetricsCollection() predictions_list = [] # Iterate over data. for batch_data in tqdm(dataloader, disable=None): inputs = batch_data['image'].to(device) labels = batch_data['label'].to(device) img_paths = batch_data['image_name'] # track history if only in train with torch.set_grad_enabled(False): model_outputs = self.model(inputs) if type(model_outputs) is dict: outputs = model_outputs['classification_outputs'] else: outputs = model_outputs loss = self.criterion(outputs, labels) probs = outputs.softmax(1) max_probs, indexes = torch.max(probs, dim=1) predictions_list += zip( img_paths, indexes.cpu().numpy(), max_probs.cpu().numpy(), labels.data.cpu().numpy(), probs.data.cpu().numpy() ) if self.results_dir is not None: # debug visualization if 'visualize_preds' in dir(self.model): prob_of_corrects, positions_of_corrects = probability_of_correct_class(outputs, labels) fig_titles = ["prob-of-correct-class {:.4f}, top {} position".format(x[0], x[1]) for x in zip(prob_of_corrects, positions_of_corrects)] self.model.visualize_preds(inputs.cpu(), model_outputs, save_dir=self.results_dir, file_names=[os.path.basename(x) for x in img_paths], titles=fig_titles) # statistics batch_metrics.add('eval', 'loss', loss.item(), inputs.size(0)) accuracies = classification_accuracy(outputs, labels, topk=(1, 5)) batch_metrics.add('eval', 'top1-acc', accuracies[0].item(), inputs.size(0)) batch_metrics.add('eval', 'top5-acc', accuracies[1].item(), inputs.size(0)) time_elapsed = time.time() - since print('Evaluation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) metrics_results = {} for key, meter in batch_metrics['eval'].items(): metrics_results[key] = meter.avg print(metrics_results) predictions_df = pd.DataFrame(predictions_list, columns=['img_path', 'pred_index', 'prob', 'correct_index', 'score']) if do_pr_metrics: precision_metrics = microavg_precision(predictions_df) metrics_results.update(precision_metrics) return metrics_results, predictions_df
def hneg_train_model(model, optimizer, scheduler, device, dataloaders, results_dir, label_encoder, criterion, num_epochs=100, earlystop_patience=7, simul_sidepairs=False, train_with_side_labels=True, sidepairs_agg='post_mean', metric_evaluator_type='euclidean', val_evaluator='metric' ): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) has_waited = 0 stop_training = False epoch_metrics = MetricsCollection() for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) evaluator = MetricEmbeddingEvaluator(model, simul_sidepairs=simul_sidepairs, sidepairs_agg_method=sidepairs_agg, metric_evaluator_type=metric_evaluator_type) logit_evaluator = LogitEvaluator(model, simul_sidepairs=simul_sidepairs, sidepairs_agg_method=sidepairs_agg) # Each epoch has a training and validation phase for phase in ['train', 'val']: print('Phase: {}'.format(phase)) if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode batch_metrics = MetricsCollection() distance_records = defaultdict(list) # Iterate over data. loader = dataloaders[phase] # tqdm disable=None for Azure ML (no progress-bar for non-tty) pbar = tqdm(loader, total=len(loader), desc="Epoch {} {}".format(epoch, phase), ncols=0, disable=None) for batch_index, batch_data in enumerate(pbar): if DEBUG and batch_index > 10: break inputs = batch_data['image'].to(device) labels = batch_data['label'].to(device) if DEBUG: print("labels", labels) print(batch_data['is_front']) print(batch_data['is_ref']) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): all_outputs = model(inputs, labels) loss_outputs = criterion(all_outputs, labels, is_front=batch_data['is_front'], is_ref=batch_data['is_ref']) if loss_outputs is None: warnings.warn(f"loss_outputs is None, skip this minibtach. labels: {labels}, is_front: {batch_data.get('is_front', None)}, is_ref: {batch_data.get('is_ref', None)}") continue logits = all_outputs['logits'] if train_with_side_labels: # front/back is treated as different classes logits = model.shift_label_indexes(logits) accuracies = classification_accuracy(logits, labels, topk=(1, 5)) batch_metrics.add(phase, 'acc1', accuracies[0].item(), inputs.size(0)) batch_metrics.add(phase, 'acc5', accuracies[1].item(), inputs.size(0)) for prefix in ['triplet_', 'contrastive_']: for n in ['distances', 'targets']: k = prefix + n if k in loss_outputs: distance_records[k].append(loss_outputs[k]) # backward + optimize only if in training phase if phase == 'train': loss_outputs['loss'].backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.) optimizer.step() lr = get_current_lr(optimizer) batch_metrics.add(phase, 'lr', lr, inputs.size(0)) for k in ['loss', 'metric_loss', 'ce', 'arcface', 'contrastive', 'triplet', 'focal']: if k in loss_outputs: batch_metrics.add(phase, k, loss_outputs[k].item(), inputs.size(0)) for prefix in ['triplet', 'contrastive']: for pn in ['pos', 'neg']: k = f"{prefix}_{pn}_distances" if k not in loss_outputs: continue batch_metrics.add(phase, k, loss_outputs[k].mean().item(), loss_outputs[k].size(0)) pbar.set_postfix(**{k.replace("contrastive", "cont").replace("triplet", "trip").replace("distances", "dist"): ("{:.1e}" if k.endswith('lr') else "{:.2f}").format(meter.avg) for k, meter in batch_metrics[phase].items()}) # finished all batches # copy the average batch metrics for key, meter in batch_metrics[phase].items(): epoch_metrics.add(phase, key, meter.avg, 1) run.log('{}_{}'.format(phase, key), meter.avg) #avg-dist abs diff for prefix in ['triplet_', 'contrastive_']: pos_k = f"{prefix}pos_distances" neg_k = f"{prefix}neg_distances" if pos_k not in epoch_metrics[phase] or neg_k not in epoch_metrics[phase]: continue avg_dist_diff = epoch_metrics[phase][neg_k].history[epoch] - epoch_metrics[phase][pos_k].history[epoch] epoch_metrics.add(phase, prefix + 'dist_diff', avg_dist_diff, 1) run.log('{}_{}'.format(phase, prefix + 'dist_diff'), avg_dist_diff) distances = torch.cat(distance_records[prefix + 'distances'], 0) targets = torch.cat(distance_records[prefix + 'targets'], 0) precision_metrics = microavg_precision_from_dists(targets, distances) epoch_metrics.add(phase, prefix + 'pw-avg-precision', precision_metrics['avg-precision'], 1) run.log('{}_{}'.format(phase, prefix + 'pw-avg-precision'), precision_metrics['avg-precision']) # pandas DataFrame in evaluator has memory leak checkpoint = 5 if phase == 'val' and epoch % checkpoint == 0: print("#### Checkpoint ###") eval_logit = 'logit' in val_evaluator if eval_logit: print("Evaluating logit metrics") # logit eval logit_evaluator.multihead_model = model metrics_results, _ = logit_evaluator.eval_model(device, dataloaders) for key, value in [(key, value) for key, value in metrics_results.items() if isinstance(value, (int, float))]: epoch_metrics.add(phase, key, value, 1) run.log('{}_{}_logit'.format(phase, key), value) del metrics_results gc.collect() # eval using embedding distances eval_metric_embedding = 'metric' in val_evaluator if eval_metric_embedding: print("Evaluating metric metrics") evaluator.siamese_model = model.embedding_model # DataParallel metrics_results, _ = evaluator.eval_model(device, dataloaders) for key, value in [(key, value) for key, value in metrics_results.items() if isinstance(value, (int, float))]: epoch_metrics.add(phase, key, value, 1) run.log('{}_{}_metric'.format(phase, key), value) del metrics_results gc.collect() best_value, best_checkpoint_index = epoch_metrics['val']['micro-ap'].best(mode='max') if best_checkpoint_index + 1 == len(epoch_metrics['val']['micro-ap'].history): has_waited = 1 best_model_wts = copy.deepcopy(model.state_dict()) print(f"Saving the best model state dict, {best_value}, {best_checkpoint_index}") else: if has_waited >= earlystop_patience: print("** Early stop in training: {} waits **".format(has_waited)) stop_training = True has_waited += 1 if type(scheduler) is lr_scheduler.ReduceLROnPlateau: scheduler.step(-1.0 * epoch_metrics['val']['micro-ap'].value) else: scheduler.step() print() # end of epoch if stop_training: break time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) _, best_epoch = epoch_metrics['val']['micro-ap'].best(mode='max') best_metrics = {'best_epoch': best_epoch} for k, v in epoch_metrics['val'].items(): try: best_metrics[k] = v.history[best_epoch] except: pass # load best model weights model.load_state_dict(best_model_wts) # model = model.module # DataPrallel return model, best_metrics
def eval_model(self, device, eval_ref_dataloaders, topk=(1, 5), do_pr_metrics=True, add_single_side_eval=False): since = time.time() self.multihead_model.eval() # Set all model to evaluate mode predictions_list = [] # Iterate over eval data. eval_dataloader = eval_ref_dataloaders['eval'] query_results = self.create_embeddings_tensor(eval_dataloader, device) query_outputs = query_results['output_tensor'] query_labels = query_results['label_tensor'] query_img_paths = query_results['img_name_list'] print('query_img_paths', len(query_img_paths), 'nunique', len(np.unique(query_img_paths)), query_img_paths[0]) print('query_labels', len(query_labels), 'nunique', len(np.unique(query_labels.cpu().numpy()))) all_metrics_results = {} accuracies = classification_accuracy(query_outputs, query_labels, topk=(1, 5)) all_metrics_results['raw-top1-acc'] = accuracies[0].item() all_metrics_results['raw-top5-acc'] = accuracies[1].item() # create pairs and aggr if simul_sidepairs if self.do_agg_distance: #creates list of tuples simulating pairs query_pair_idxs, query_labels = create_simul_query_pairids(query_labels, query_results['sidelbl_tensor']) nunique_labels = len(np.unique(query_labels.numpy())) query_labels = query_labels.to(device) query_num = len(query_pair_idxs) query_img_paths = [[query_img_paths[p[0]], query_img_paths[p[1]]] for p in query_pair_idxs] nunique_imgs = len(np.unique([item for sublist in query_img_paths for item in sublist])) # flatten print(f'Evaluation will be performed in {len(query_pair_idxs)} simulated 2-side pairs from {nunique_imgs} consumer images of {nunique_labels} labels.') else: query_num = query_outputs.size(0) label_num = query_outputs.shape[1] print('query_outputs.shape', query_outputs.shape) assert label_num == self.multihead_model.get_original_n_classes() #TODO include some package with scatter_min support, see if compute *_dist_2ref vectorized outside loop print(f"Calculate distance matrix between all the labels {label_num} and consumer-queries {query_num}") since_queries = time.time() if self.do_agg_distance: # no need to aggregate ref-axis (done in multihead_model) # (# query, # labels) qf_lbl_sim_matrix = query_outputs[query_pair_idxs[:, 0], :] qb_lbl_sim_matrix = query_outputs[query_pair_idxs[:, 1], :] if add_single_side_eval: # calculate single-side metrics f_query_img_paths = [q[0] for q in query_img_paths] b_query_img_paths = [q[1] for q in query_img_paths] # front-side f_predictions_df, f_metrics_results = create_predictions_df( f_query_img_paths, query_labels, qf_lbl_sim_matrix, topk, device) f_precision_metrics = all_avg_precision(f_predictions_df, per_class=False) f_metrics_results.update(f_precision_metrics) all_metrics_results.update({'f_' + k: v for k, v in f_metrics_results.items()}) # back-side b_predictions_df, b_metrics_results = create_predictions_df( b_query_img_paths, query_labels, qb_lbl_sim_matrix, topk, device) b_precision_metrics = all_avg_precision(b_predictions_df, per_class=False) b_metrics_results.update(b_precision_metrics) all_metrics_results.update({'b_' + k: v for k, v in b_metrics_results.items()}) # single-side (both front and back) s_predictions_df = pd.concat([f_predictions_df, b_predictions_df], ignore_index=True) s_precision_metrics = all_avg_precision(s_predictions_df, per_class=False) all_metrics_results.update({'s_' + k: v for k, v in s_precision_metrics.items()}) del s_predictions_df del f_predictions_df del b_predictions_df import gc gc.collect() # aggregate front and back sides q2sides_lbl_sim_matrix = torch.stack([qf_lbl_sim_matrix, qb_lbl_sim_matrix], dim = 2) if 'post_mean' in self.sidepairs_agg_method: qry_lbl_sim_matrix = q2sides_lbl_sim_matrix.mean(dim=2).squeeze() elif 'post_max' in self.sidepairs_agg_method: qry_lbl_sim_matrix = q2sides_lbl_sim_matrix.max(dim=2)[0].squeeze() else: raise f"{self.sidepairs_agg_method} not supported" else: qry_lbl_sim_matrix = query_outputs print('qry_lbl_sim_matrix.shape', qry_lbl_sim_matrix.shape) avg_time_elapsed_qry = (time.time() - since_queries) / query_num print('Avg. time elapsed per metric query {:.0f}m {:.0f}s'.format(avg_time_elapsed_qry // 60, avg_time_elapsed_qry % 60)) print('qry_lbl_sim_matrix', qry_lbl_sim_matrix.shape) predictions_df, metrics_results = create_predictions_df(query_img_paths, query_labels, qry_lbl_sim_matrix, topk, device) all_metrics_results.update(metrics_results) time_elapsed = time.time() - since print('Evaluation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) if do_pr_metrics: precision_metrics = all_avg_precision(predictions_df) all_metrics_results.update(precision_metrics) return all_metrics_results, predictions_df