def get_similarities(P, get_embeddings, net, dataset): set_net_train(net, False) n = len(dataset) d, o = embeddings_device_dim(P, net, n, sim_matrix=True) embeddings = get_embeddings(net, dataset, d, o) similarities = torch.mm(embeddings, embeddings.t()) set_net_train(net, True, bn_train=P.train_bn) return similarities, d
def test_print_descriptor(train_type, P, net, testset_tuple, get_embeddings, best_score=0, epoch=0): def print_stats(prefix, p1, c, t, avg_pos, avg_neg, avg_max, mAP): s1 = 'Correct: {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}\n'.format( c, t, p1, mAP) s2 = 'AVG cosine sim (sq dist) values: pos: {0:.4f} ({1:.4f}), neg: {2:.4f} ({3:.4f}), max: {4:.4f} ({5:.4f})'.format( avg_pos, 2 - 2 * avg_pos, avg_neg, 2 - 2 * avg_neg, avg_max, 2 - 2 * avg_max) log(P, prefix + s1 + s2) test_set, test_ref_set = testset_tuple set_net_train(net, False) prec1, correct, tot, sum_pos, sum_neg, sum_max, mAP, lab_dict = test_descriptor_net( P, get_embeddings, net, test_set, test_ref_set) # can save labels dictionary (predicted labels for all test labels) # TODO num_pos = sum(test_label == ref_label for _, test_label, _ in test_set for _, ref_label, _ in test_ref_set) num_neg = len(test_set) * len(test_ref_set) - num_pos if (correct > best_score): best_score = correct prefix = '{0}, EPOCH:{1}, SCORE:{2}'.format(train_type, epoch, correct) save_uuid(P, prefix) torch.save(net.state_dict(), path.join(P.save_dir, unique_str(P) + "_best_siam.pth.tar")) print_stats('TEST - ', prec1, correct, tot, sum_pos / num_pos, sum_neg / num_neg, sum_max / len(test_set), mAP) torch.save(net.state_dict(), path.join(P.save_dir, "model_siam_" + str(epoch) + ".pth.tar")) # training set accuracy (choose second highest value, # as highest should almost certainly be the same image) # choose train samples with at least 2 other images for the query couples = get_pos_couples(test_ref_set) train_test_set = random.sample(test_ref_set, max(1, len(test_ref_set) // 10)) train_test_set = filter(lambda x: len(couples[x[1]]) >= 3, train_test_set) prec1, correct, tot, sum_pos, sum_neg, sum_max, mAP, _ = test_descriptor_net( P, get_embeddings, net, train_test_set, test_ref_set, kth=2) num_pos = sum(test_label == ref_label for _, test_label, _ in train_test_set for _, ref_label, _ in test_ref_set) num_neg = len(train_test_set) * len(test_ref_set) - num_pos print_stats('TRAIN - ', prec1, correct, tot, sum_pos / num_pos, sum_neg / num_neg, sum_max / len(train_test_set), mAP) set_net_train(net, True, bn_train=P.train_bn) return best_score
def main(dataset_full, model, weights, device, dba): # training and test sets dataset_id = parse_dataset_id(dataset_full) match_labels = match_label_functions[dataset_id] train_set_full = get_images_labels(dataset_full, match_labels) test_set_full = get_images_labels(dataset_full + '/test', match_labels) labels_list = [t[1] for t in train_set_full] # setup global params so that testing functions work properly labels.extend(sorted(list(set(labels_list)))) P.test_pre_proc = True # we always pre process images P.cuda_device = device P.preload_net = weights P.cnn_model = model P.feature_size2d = feature_sizes[model, image_sizes[dataset_id]] P.bn_model = '' # only useful for training print('Loading and transforming train/test sets.') # open the images (and transform already if possible) # do that only if it fits in memory ! m, s = read_mean_std(mean_std_files[dataset_id]) test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)]) test_set, test_train_set = [], [] for im, lab in train_set_full: im_o = imread_rgb(im) test_train_set.append((test_trans(im_o), lab, im)) for im, lab in test_set_full: if lab not in labels: continue im_o = imread_rgb(im) test_set.append((test_trans(im_o), lab, im)) print('Testing network on dataset with ID {0}'.format(dataset_id)) class_net = get_class_net() set_net_train(class_net, False) c, t = test_classif_net(class_net, test_set) print('Classification (TEST): {0} / {1} - acc: {2:.4f}'.format(c, t, float(c) / t)) test_embeddings = get_embeddings(class_net, test_set, device, len(labels)) ref_embeddings = get_embeddings(class_net, test_train_set, device, len(labels)) sim = torch.mm(test_embeddings, ref_embeddings.t()) prec1, c, t, _, _ = precision1(sim, test_set, test_train_set) mAP = mean_avg_precision(sim, test_set, test_train_set) print('Descriptor (TEST): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP)) if dba == 0: return print('Testing using instance feature augmentation') dba_embeddings, dba_set = instance_avg(device, ref_embeddings, test_train_set, labels, dba) sim = torch.mm(test_embeddings, dba_embeddings.t()) prec1, c, t, _, _ = precision1(sim, test_set, dba_set) mAP = mean_avg_precision(sim, test_set, dba_set) print('Descriptor (TEST DBA k={4}): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP, dba))
def test_print_classif(net, testset_tuple, labels, best_score=0, epoch=0): test_set, test_train_set = testset_tuple set_net_train(net, False) c, t = test_classif_net(net, test_set, labels, P.classif_test_batch_size) if (c > best_score): best_score = c prefix = 'CLASSIF, EPOCH:{0}, SCORE:{1}'.format(epoch, c) P.save_uuid(prefix) torch.save(net.state_dict(), path.join(P.save_dir, P.unique_str() + "_best_classif.pth.tar")) P.log('TEST - correct: {0} / {1} - acc: {2}'.format(c, t, float(c) / t)) c, t = test_classif_net(net, test_train_set, labels, P.classif_test_batch_size) torch.save(net.state_dict(), path.join(P.save_dir, "model_classif_" + str(epoch) + ".pth.tar")) P.log("TRAIN - correct: {0} / {1} - acc: {2}".format(c, t, float(c) / t)) set_net_train(net, True, bn_train=P.classif_train_bn) return best_score
def test_print_classif(train_type, P, net, testset_tuple, test_net, best_score=0, epoch=0): test_set, test_train_set = testset_tuple set_net_train(net, False) c, t = test_net(net, test_set) if (c > best_score): best_score = c prefix = '{0}, EPOCH:{1}, SCORE:{2}'.format(train_type, epoch, c) save_uuid(P, prefix) torch.save(net.state_dict(), path.join(P.save_dir, unique_str(P) + "_best_classif.pth.tar")) log(P, 'TEST - correct: {0} / {1} - acc: {2}'.format(c, t, float(c) / t)) c, t = test_net(net, test_train_set) torch.save(net.state_dict(), path.join(P.save_dir, "model_classif_" + str(epoch) + ".pth.tar")) log(P, 'TRAIN - correct: {0} / {1} - acc: {2}'.format(c, t, float(c) / t)) set_net_train(net, True, bn_train=P.train_bn) return best_score
def train_gen(train_type, P, test_print, test_net, net, train_set, testset_tuple, optimizer, create_epoch, create_batch, create_loss, best_score=0): set_net_train(net, True, bn_train=P.train_bn) for epoch in range(P.train_epochs): # annealing optimizer = anneal(net, optimizer, epoch, P.train_annealing) dataset, batch_args = create_epoch(epoch, train_set, testset_tuple) micro_args = { 'P': P, 'net': net, 'create_batch': create_batch, 'batch_args': batch_args, 'create_loss': create_loss } mini_args = { 'train_type': train_type, 'P': P, 'test_print': test_print, 'test_net': test_net, 'net': net, 'optimizer': optimizer, 'testset_tuple': testset_tuple, 'epoch': epoch, 'micro_args': micro_args } init = 0, best_score, 0.0 # batch count, score, running loss _, best_score, _ = fold_batches(mini_batch_gen, init, dataset, P.train_batch_size, cut_end=True, add_args=mini_args)
def train_gen(is_classif, net, train_set, test_set, optimizer, params, create_epoch, create_batch, output_stats, create_loss, best_score=0): if is_classif: bn_train = params.classif_train_bn n_epochs = params.classif_train_epochs annealing_dict = params.classif_annealing mini_batch_size = params.classif_train_batch_size micro_batch_size = params.classif_train_micro_batch loss_avg = params.classif_loss_avg loss2_avg, loss2_alpha = None, None else: bn_train = params.siam_train_bn n_epochs = params.siam_train_epochs annealing_dict = params.siam_annealing mini_batch_size = params.siam_train_batch_size micro_batch_size = params.siam_train_micro_batch loss_avg = params.siam_loss_avg loss2_avg = params.siam_do_loss2_avg loss2_alpha = params.siam_do_loss2_alpha set_net_train(net, True, bn_train=bn_train) for epoch in range(n_epochs): # annealing optimizer = anneal(net, optimizer, epoch, annealing_dict) dataset, batch_args, stats_args = create_epoch(epoch, train_set, test_set) micro_args = { 'net': net, 'create_batch': create_batch, 'batch_args': batch_args, 'create_loss': create_loss, 'loss_avg': loss_avg, 'loss2_avg': loss2_avg, 'loss2_alpha': loss2_alpha } mini_args = { 'net': net, 'optimizer': optimizer, 'micro_batch_size': micro_batch_size, 'output_stats': output_stats, 'stats_args': stats_args, 'test_set': test_set, 'epoch': epoch, 'micro_args': micro_args } init = 0, best_score, 0.0 # batch count, score, running loss _, best_score, _ = fold_batches(mini_batch_gen, init, dataset, mini_batch_size, cut_end=True, add_args=mini_args)