def check_distribution(loader, args): """Check data distribution Args: loader: initialized data loader args: input arguments """ # Allocate memory all_n_samples = np.zeros(len(loader)) all_duration = np.zeros(len(loader)) all_labels = np.zeros([3, 6], dtype=np.int) # Prepare environment if args.append: fout = open(os.path.join(args.out_dir, 'viz.html'), 'a') else: fout = open(os.path.join(args.out_dir, 'viz.html'), 'w') fmt = '<img src="{}"></img><br>\n' pbar = MiscUtils.gen_pbar(max_value=len(loader), msg='Scanning {}: '.format(args.exp_name)) # Scan through dataset for i, (samples, labels, msr_ids) in enumerate(loader): # Retrieve data samples = samples.numpy().squeeze() labels = labels.numpy().squeeze() # Collect length information all_n_samples[i] = samples.shape[0] all_duration[i] = samples[-1, 0] - samples[0, 0] # Collect labels information for j in range(3): if np.isnan(labels[j]): all_labels[j, -1] += 1 else: all_labels[j, int(labels[j])] += 1 # Update progress pbar.update(i + 1) pbar.finish() all_labels = all_labels / len(loader) # Write to html file fout.write('{}<br>\n'.format(args.exp_name)) img_fname = viz_length_distribution(all_n_samples, all_duration, args) fout.write(fmt.format(img_fname)) img_fname = viz_label_distribution(all_labels, args) fout.write(fmt.format(img_fname)) fout.write('<hr>\n') fout.close()
def train_one_epoch(model, optimizer, criterion, train_loader, device, writer, run_iter): """Training routine for one epoch Args: model: model to train optimizer: optimizer to optimize the loss function criterion: loss function train_loader: data loader for training set device: id of the device for torch to allocate objects writer: summary writer to log training progress wrt some iterations run_iter: number of iterations already ran Return: train_loss: training loss of the epoch """ # Switch to train mode model.train() # Set up progressbar pbar = MiscUtils.gen_pbar(max_value=len(train_loader), msg='Training: ') # Go through all samples of the training data train_loss, n_samples, start_time = 0.0, 0, time.time() for i, (samples, labels) in enumerate(train_loader): # Place data on the corresponding device samples = samples.to(device) labels = labels.to(device) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(samples) loss = criterion(outputs, labels) loss.backward() optimizer.step() # Statistics n_samples += labels.size(0) train_loss += loss # Monitor the training progress pbar.update(i + 1, loss=loss.item()) run_iter += 1 if run_iter % 100 == 0: writer.add_scalar('train_loss_per_iter', loss.item(), run_iter) pbar.finish() train_loss = train_loss.item() train_loss /= len(train_loader) logger.info('Training loss: %.4f' % train_loss) logger.info('Epoch running time: %.4fs' % (time.time() - start_time)) return train_loss
def viz(loader, args): """Visualize data Args: loader: initialized data loader args: input arguments """ # Prepare the environments img_dir = os.path.join(args.out_dir, 'imgs') if not os.path.isdir(img_dir): os.makedirs(img_dir) out_fname = os.path.join(args.out_dir, 'viz.html') fout = open(out_fname, 'w') fmt = '<img src="{}"></img><br>' pbar = MiscUtils.gen_pbar(max_value=args.n_samples, msg='Plotting: ') # Loop through n_samples and plot for i, (samples, labels, msr_ids) in enumerate(loader): # Retrieve data samples = samples.numpy().squeeze() labels = labels.numpy().squeeze() n_samples = len(samples) duration = samples[-1, 0] - samples[0, 0] sampling_freq = 1. * n_samples / duration # Plot the figure fig, axes = plt.subplots(5, 1, figsize=(12, 12), constrained_layout=True) viz_time_series(samples, axes[0]) viz_gradient(samples, axes[1]) viz_spectrogram(samples, sampling_freq, axes[2:5], fig, args.min_band, args.max_band) fig.suptitle(parse_labels(labels, args.lbl_type)) # Save the figure img_fname = os.path.join(img_dir, msr_ids[0]+'.png') fig.savefig(img_fname) plt.close(fig) # Add to html fout.write(msr_ids[0] + '<br>') fout.write('sampling frequency = {} <br>'.format(sampling_freq)) img_fname = img_fname.replace(args.out_dir, '.') fout.write(fmt.format(img_fname)) fout.write('<hr>') # Update progress pbar.update(i+1) if i >= args.n_samples-1: break pbar.finish() # Close the html file fout.close()
def reid_evaluate(emb_query, emb_gallery, lb_ids_query, lb_ids_gallery, \ cmc_rank=1, top_k=100, is_reranking=None): #Calculate distance matrix between query images and gallery images dist_mtx = pdist_torch(emb_query,emb_gallery).cpu().detach().numpy() if (is_reranking): print ("Reranking is applied!") dist_mtx = re_ranking(emb_query, emb_gallery) n_q, n_g = dist_mtx.shape #sort "gallery index" in "distance" ascending order indices = np.argsort(dist_mtx, axis = 1)[:,:top_k] matches = lb_ids_gallery[indices] == lb_ids_query[:, np.newaxis] matches = matches.astype(np.int32) all_aps = [] all_cmcs = [] # Setup progressbar pbar = MiscUtils.gen_pbar(max_value=n_q, msg="Evaluating: ") for qidx in range(n_q): qpid = lb_ids_query[qidx] # qcam = lb_cams_query[qidx] order = indices[qidx] pid_diff = lb_ids_gallery[order] != qpid # cam_diff = lb_cams_gallery[order] != qcam useful = lb_ids_gallery[order] != -1 # keep = np.logical_or(pid_diff, cam_diff) # keep = np.logical_and(keep, useful) # match = matches[qidx][keep] match = matches[qidx] if not np.any(match): continue cmc = match.cumsum() cmc[cmc > 1] = 1 #basically count all correct prediction < cmc_rannk all_cmcs.append(cmc[:cmc_rank]) num_real = match.sum() match_cum = match.cumsum() match_cum = [el / (1.0 + i) for i, el in enumerate(match_cum)] match_cum = np.array(match_cum) * match ap = match_cum.sum() / num_real all_aps.append(ap) # Monitor progress pbar.update(qidx+1) pbar.finish() assert len(all_aps) > 0, "NO QUERY MATCHED" mAP = sum(all_aps) / len(all_aps) all_cmcs = np.array(all_cmcs, dtype = np.float32) cmc = np.mean(all_cmcs, axis = 0) return indices, mAP, cmc, dist_mtx
def viz(loader, args): """Visualize data Args: loader: initialized data loader args: input arguments """ # Prepare the environments # img_dir = os.path.join(args.out_dir, 'imgs') if not os.path.isdir(args.out_dir): os.makedirs(args.out_dir) pbar = MiscUtils.gen_pbar(max_value=args.n_samples, msg='Plotting: ') # Loop through n_samples and plot for i, (samples, labels, msr_ids) in enumerate(loader): # Retrieve data samples = samples.numpy().squeeze() labels = labels.numpy().squeeze() n_samples = len(samples) duration = samples[-1, 0] - samples[0, 0] sampling_freq = 1. * n_samples / duration # Plot the figure # fig, axes = plt.subplots(5, 1, figsize=(12, 12), constrained_layout=True) # viz_time_series(samples, axes[0]) # viz_gradient(samples, axes[1]) out_file = os.path.join(args.out_dir, msr_ids[0] + '.npy') save_stft(samples, sampling_freq, out_file) # save_sample(samples, out_file) # fig.suptitle(parse_labels(labels, args.lbl_type)) # # Save the figure # img_fname = # fig.savefig(img_fname) # plt.close(fig) # # Add to html # fout.write(msr_ids[0] + '<br>') # fout.write('sampling frequency = {} <br>'.format(sampling_freq)) # img_fname = img_fname.replace(args.out_dir, '.') # fout.write(fmt.format(img_fname)) # fout.write('<hr>') # Update progress pbar.update(i + 1) if i >= args.n_samples - 1: break pbar.finish()
def evaluation_loop(self): """ This function loop through every testing epoches """ assert self.eval_loader is not None, "Evaluation loader is not specified" # Setup progressbar pbar = MiscUtils.gen_pbar(max_value=len(self.eval_loader), msg=self.eval_mess) with torch.no_grad(): for i, (samples, labels) in enumerate(self.eval_loader): # Evaluating for the current batch self.batch_evaluation(samples, labels) # Monitor progress pbar.update(i + 1) pbar.finish()
def embed_imgs(self, imgloader, name=""): """ This function embeds all images in given query set to vectors """ # Setup progressbar pbar = MiscUtils.gen_pbar(max_value=len(imgloader), msg='Embedding %s: ' % name) que_emb = [] que_lbl = [] with torch.no_grad(): for i, (samples, labels) in enumerate(imgloader): samples = samples.to(self.device) que_emb.append(self.model(samples)['feat']) que_lbl.append(labels) #Monitor progress pbar.update(i + 1) pbar.finish() return que_emb, que_lbl
def make_nonan(loader, output_pth): """ """ pbar = MiscUtils.gen_pbar( max_value=len(loader), msg='Scanning {}: ') # Scan through dataset nonan_lst = [] for i, (_, labels, msr_ids) in enumerate(loader): # Retrieve data # samples = samples.numpy().squeeze() labels = labels.numpy().squeeze() if np.isnan(labels): continue nonan_lst.append(msr_ids[0]) # Update progress pbar.update(i+1) pbar.finish() with open(output_pth, 'w') as fout: fout.write('\n'.join(nonan_lst))
def train(model, optimizer, criterion, loaders, logdir, train_mode, train_params, device, pretrained_model_path, infer_fn): """Training routine Args: model: model to train optimizer: optimizer to optimize the loss function criterion: loss function loaders: dictionary of data loader for training and validation 'loaders[train]' for training 'loaders[val]' for validationn ... logdir: where to store the logs train_mode: how to start the training. Only accept values as: 'from_scratch': start the training from scratch 'from_pretrained': start the training with a pretrained model 'resume': resume an interrupted training train_params: training parameters as a dictionary device: id of the device for torch to allocate objects infer_fn: BaseInference object: calculate additional metrics, saving predictions """ # Setup training starting point if train_mode == 'from_scratch': start_epoch = 0 lr = train_params['init_lr'] elif train_mode == 'from_pretrained': model.load_model(pretrained_model_path) start_epoch = 0 lr = train_params['init_lr'] elif train_mode == 'resume': prefix = MiscUtils.get_lastest_checkpoint(logdir) lr, start_epoch = MiscUtils.load_progress(model, optimizer, prefix) else: raise ValueError('Unsupported train_mode: {}'.format(train_mode)) # Set up some variables infer_fn.init_best_model_score() # writer = SummaryWriter(log_dir=logdir, purge_step=start_epoch) writer = SummaryWriter(log_dir=logdir) # Go through training epochs for epoch in range(start_epoch, train_params['n_epochs']): logger.info('epoch: %d/%d' % (epoch + 1, train_params['n_epochs'])) # Training phase train_loader = loaders['train'] run_iter = epoch * len(train_loader) train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device, writer, run_iter) # Validation phase val_loss, val_score = test(model, criterion, loaders, device, infer_fn) # Log using Tensorboard writer.add_scalars('losses', { 'train': train_loss, 'val': val_loss }, epoch) writer.add_scalar('val_score', val_score, epoch) # Save training results when necessary if (epoch + 1) % train_params['n_epochs_to_log'] == 0: MiscUtils.save_progress(model, optimizer, logdir, epoch) # Backup the best model if infer_fn.is_better_model(val_loss, val_score): model.save_model(os.path.join(logdir, 'best.model')) # Decay learning Rate if epoch + 1 in train_params['decay_epochs']: lr *= train_params['lr_decay'] logger.info('Change learning rate to: %.5f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.info('%.5f' % param_group['lr']) logger.info('-' * 80) writer.close()