def forward(self, student, teacher): with torch.no_grad(): t_d = pdist(teacher, squared=False) d = pdist(student, squared=False) loss = F.smooth_l1_loss(d, self.alpha * t_d, reduction='elementwise_mean') return loss
def forward(self, student, teacher): with torch.no_grad(): t_d = pdist(teacher, squared=False) t_d = t_d / teacher.pow(2).sum(dim=1).sqrt().mean() d = pdist(student, squared=False) d = d / student.pow(2).sum(dim=1).sqrt().mean() loss = F.smooth_l1_loss(d, t_d, reduction='elementwise_mean') return loss
def forward(self, student, teacher): with torch.no_grad(): t_d = pdist(teacher, squared=False) mean_td = t_d[t_d > 0].mean() t_d = t_d / mean_td d = pdist(student, squared=False) mean_d = d[d > 0].mean() d = d / mean_d loss = F.smooth_l1_loss(d, t_d, reduction='elementwise_mean') return loss
def get_ss_ftr(self, channels, smp_loc=None): """ Compute self-similarity features :param channels: shrunk channels for self-similarity features :param smp_loc: shrunk sample locations (None for all) :return: self-similarity features """ shrink = self.options["shrink"] p_size = self.options["p_size"] / shrink n_r, n_c, n_ch = channels.shape ss_ftr = view_as_windows(channels, (p_size, p_size, n_ch)) if smp_loc is not None: ss_ftr = ss_ftr.reshape((n_r - p_size + 1, n_c - p_size + 1, p_size ** 2, n_ch)) r_pos = [r - p_size / 2 for r, _ in smp_loc] c_pos = [c - p_size / 2 for _, c in smp_loc] ss_ftr = ss_ftr[r_pos, c_pos] else: ss_ftr = ss_ftr.reshape((-1, p_size ** 2, n_ch)) n_cell = self.options["n_cell"] half_cell_size = int(round(p_size / (2.0 * n_cell))) grid_pos = [int(round((i + 1) * (p_size + 2 * half_cell_size - 1) / \ (n_cell + 1.0) - half_cell_size)) for i in xrange(n_cell)] grid_pos = [r * p_size + c for r in grid_pos for c in grid_pos] ss_ftr = ss_ftr[:, grid_pos] ss_ftr = pdist(ss_ftr) return ss_ftr.reshape((ss_ftr.shape[0], -1))
def eval_graph(net, loader, ep): net.eval() graph_net.eval() test_iter = tqdm(loader) embeddings_all, labels_all = [], [] test_iter.set_description("[Eval][Epoch %d]" % ep) with torch.no_grad(): for images, labels in test_iter: images, labels = images.cuda(), labels.cuda() embedding = net(images) embeddings_all.append(embedding.data) labels_all.append(labels.data) embeddings_all = torch.cat(embeddings_all) labels_all = torch.cat(labels_all) d = pdist(embeddings_all) pos_idx = d.topk(11, dim=1, largest=False)[1][:, 1:] neg_idx = torch.randint(0, len(d), (len(d), 1), device=d.device, dtype=torch.int64) graph_embedding = [] for i, e in enumerate(embeddings_all): pos_embedding = embeddings_all[pos_idx[i][1:]] neg_embedding = embeddings_all[torch.cat([pos_idx[j] for j in range(i-3, i-1)])] e = torch.cat((e.unsqueeze(0), pos_embedding, neg_embedding), dim=0) e = graph_net(e) graph_embedding.append(e[0]) graph_embedding = torch.stack(graph_embedding, dim=0) rec = recall(graph_embedding, labels_all) print('[Epoch %d] Recall@1: [%.4f]\n' % (ep, 100 * rec)) return rec
def evaluate(args): ## logging FORMAT = '%(levelname)s %(filename)s:%(lineno)d: %(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) logger = logging.getLogger(__name__) ## load embeddings logger.info('loading gallery embeddings') with open(args.gallery_embs, 'rb') as fr: gallery_dict = pickle.load(fr) emb_gallery, lb_ids_gallery, lb_cams_gallery = gallery_dict[ 'embeddings'], gallery_dict['label_ids'], gallery_dict[ 'label_cams'] logger.info('loading query embeddings') with open(args.query_embs, 'rb') as fr: query_dict = pickle.load(fr) emb_query, lb_ids_query, lb_cams_query = query_dict[ 'embeddings'], query_dict['label_ids'], query_dict['label_cams'] ## compute and clean distance matrix dist_mtx = pdist(emb_query, emb_gallery) n_q, n_g = dist_mtx.shape indices = np.argsort(dist_mtx, axis=1) matches = lb_ids_gallery[indices] == lb_ids_query[:, np.newaxis] matches = matches.astype(np.int32) all_aps = [] all_cmcs = [] logger.info('starting evaluating ...') for qidx in tqdm(range(n_q)): qpid = lb_ids_query[qidx] qcam = lb_cams_query[qidx] order = indices[qidx] pid_diff = lb_ids_gallery[order] != qpid cam_diff = lb_cams_gallery[order] != qcam useful = lb_ids_gallery[order] != -1 keep = np.logical_or(pid_diff, cam_diff) keep = np.logical_and(keep, useful) match = matches[qidx][keep] if not np.any(match): continue cmc = match.cumsum() cmc[cmc > 1] = 1 all_cmcs.append(cmc[:args.cmc_rank]) num_real = match.sum() match_cum = match.cumsum() match_cum = [el / (1.0 + i) for i, el in enumerate(match_cum)] match_cum = np.array(match_cum) * match ap = match_cum.sum() / num_real all_aps.append(ap) assert len(all_aps) > 0, "NO QUERY MATCHED" mAP = sum(all_aps) / len(all_aps) all_cmcs = np.array(all_cmcs, dtype=np.float32) cmc = np.mean(all_cmcs, axis=0) print('mAP is: {}, cmc is: {}'.format(mAP, cmc))
def forward(self, student, teacher): score_teacher = -1 * self.alpha * pdist(teacher, squared=False).pow( self.beta) score_student = -1 * self.alpha * pdist(student, squared=False).pow( self.beta) permute_idx = score_teacher.sort( dim=1, descending=True)[1][:, 1:(self.permute_len + 1)] ordered_student = torch.gather(score_student, 1, permute_idx) log_prob = (ordered_student - torch.stack([ torch.logsumexp(ordered_student[:, i:], dim=1) for i in range(permute_idx.size(1)) ], dim=1)).sum(dim=1) loss = (-1 * log_prob).mean() return loss
def __init__(self, p=2, margin=0.2, sampler=None, reduce=True, size_average=True): super().__init__() self.p = p self.margin = margin # update distance function accordingly self.sampler = sampler self.sampler.dist_func = lambda e: pdist(e, squared=(p == 2)) self.reduce = reduce self.size_average = size_average
def __call__(self, embeds, labels): dist_mtx = pdist(embeds, embeds).detach().cpu().numpy() labels = labels.contiguous().cpu().numpy().reshape((-1, 1)) num = labels.shape[0] dia_inds = np.diag_indices(num) lb_eqs = labels == labels.T lb_eqs[dia_inds] = False dist_same = dist_mtx.copy() dist_same[lb_eqs == False] = -np.inf pos_idxs = np.argmax(dist_same, axis=1) dist_diff = dist_mtx.copy() lb_eqs[dia_inds] = True dist_diff[lb_eqs == True] = np.inf neg_idxs = np.argmin(dist_diff, axis=1) pos = embeds[pos_idxs].contiguous().view(num, -1) neg = embeds[neg_idxs].contiguous().view(num, -1) return embeds, pos, neg
def forward(self, embeddings, labels): with torch.no_grad(): pos_mask, neg_mask = pos_neg_mask(labels) pos_pair_idx = pos_mask.nonzero() anchor_idx = pos_pair_idx[:, 0] pos_idx = pos_pair_idx[:, 1] d = embeddings.size(1) dist = (pdist(embeddings, squared=True) + torch.eye(embeddings.size(0), device=embeddings.device, dtype=torch.float32)).sqrt() dist = dist.clamp(min=self.cut_off) log_weight = ((2.0 - d) * dist.log() - ((d - 3.0)/2.0) * (1.0 - 0.25 * (dist * dist)).log()) weight = (log_weight - log_weight.max(dim=1, keepdim=True)[0]).exp() weight = weight * (neg_mask * (dist < self.nonzero_loss_cutoff)).float() weight = weight + ((weight.sum(dim=1, keepdim=True) == 0) * neg_mask).float() weight = weight / (weight.sum(dim=1, keepdim=True)) weight = weight[anchor_idx] neg_idx = torch.multinomial(weight, 1).squeeze(1) return anchor_idx, pos_idx, neg_idx
def pdist(A, B): # test covered """ Pairwise Euclidean distance """ return utils.pdist(A, B)
d) return self.gamma * (logit - logit.max()) / self.t if __name__ == "__main__": from utils import pdist gamma = torch.nn.Parameter(torch.as_tensor(10.0)) t = torch.nn.Parameter(torch.as_tensor(10.0)) fun = SoftDPMatch(gamma, t) x = torch.arange(3, dtype=torch.float32)[None, None, :] y = torch.arange(4, dtype=torch.float32)[None, None, :] y = torch.nn.Parameter(y) opt = torch.optim.Adam([gamma, t, y], 1e-3) gt = torch.as_tensor([[1, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=torch.float32).unsqueeze(0) d = pdist(x, y) for i in range(10000): opt.zero_grad() d = pdist(x, y) logit = fun(-d) loss = (d * logit.exp()).mean() loss.backward() opt.step() print(f"{loss.data=}") print(f"{y=}") print(f"{gamma=}") print(f"{t=}") print(f"{logit.exp()=}")
model = LinearEmbedding(base_model, output_size=base_model.output_size, embedding_size=opts.embedding_size, normalize=not opts.no_normalize).cuda() if opts.load is not None: model.load_state_dict(torch.load(opts.load)) print("Loaded Model from %s" % opts.load) criterion = loss.MarginLoss( alpha=opts.alpha, beta=opts.beta, nu=opts.nu, beta_classes=torch.zeros(len(dataset_train.classes)), sampler=opts.sample(dist_func=lambda x: pdist(x, False))).cuda() print(type(criterion)) if opts.optim == "sgd": optimizer = optim.SGD([{ 'params': model.base.parameters(), 'lr': opts.lr * 0.1 }, { 'params': model.linear.parameters(), 'lr': opts.lr }], momentum=0.9, weight_decay=1e-5) elif opts.optim == "adam": optimizer = optim.Adam([{ 'params': model.base.parameters(),
def main(): parser = argparse.ArgumentParser() parser.add_argument('--coco-path', type=str, help='', default='') parser.add_argument('--train-set-name', type=str, help='', default='training') parser.add_argument('--test-set-name', type=str, help='', default='validation_wo_occlusion') parser.add_argument('--target-size', type=int, help='Resize/padding input image to target-size.', default=224) parser.add_argument('--snapshot', type=str, help='', default=None) parser.add_argument('--emb-size', type=int, help='Embedding size', default=2048) parser.add_argument('--backbone', type=str, help='ResNet18/34/50/101/152', default='ResNet50') parser.add_argument('--snapshot-path', type=str, help='Path to save snapshot', default='.') parser.add_argument('--num-workers', type=int, help='Number of workers for data loader', default=1) parser.add_argument('--n-neighbors', type=int, help='Number of neighbors for KNN classifier', default=1) args = parser.parse_args() # Set up data loaders parameters kwargs = { 'num_workers': args.num_workers, 'pin_memory': True } if cuda else {} # transforms_args = [ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] train_dataset = CoCoDataset(args.coco_path, args.train_set_name, target_size=args.target_size, transform=transforms.Compose(transforms_args)) test_dataset = CoCoDataset(args.coco_path, args.test_set_name, target_size=args.target_size, transform=transforms.Compose(transforms_args)) train_loader = DataLoader(train_dataset, batch_size=8, shuffle=False, **kwargs) test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, **kwargs) # init model model, _, _ = load_model(args.backbone, args.snapshot) if cuda: model.cuda() train_embeddings, train_labels = extract_embeddings( train_loader, model, embedding_size=args.emb_size, cuda=cuda) test_embeddings, test_labels = extract_embeddings( test_loader, model, embedding_size=args.emb_size, cuda=cuda) dist_mtx = pdist(test_embeddings, train_embeddings) indices = np.argmin(dist_mtx, axis=1) y_pred = train_labels[indices] clf = KNeighborsClassifier(n_neighbors=args.n_neighbors, metric='l2', n_jobs=-1, weights="distance") clf.fit(train_embeddings, train_labels) pickle.dump( clf, open( os.path.join( args.snapshot_path, '%s.pkl' % (os.path.basename(args.snapshot).split(".")[0])), 'wb')) print(classification_report(test_labels, y_pred)) plt.figure(figsize=(20, 20)) columns = 4 rows = 4 for i in range(rows * 2): # plot test image index = np.random.randint(len(test_dataset)) image, label = test_dataset._load_image(index) ax = plt.subplot(rows, columns, i * 2 + 1) ax.imshow(image) ax.title.set_text( test_dataset.coco_label_to_name( test_dataset.label_to_coco_label(label))) # plot most similar image from train_dataset gt_index = indices[index] gt_image, gt_label = train_dataset._load_image(gt_index) ax = plt.subplot(rows, columns, i * 2 + 2) ax.imshow(gt_image) ax.title.set_text( train_dataset.coco_label_to_name( train_dataset.label_to_coco_label(gt_label))) plt.tight_layout() plt.savefig("evaluation.png")
def main(args): model_path = args.model_path save_dir = args.save_dir vec_dim = 128 data_type = ['validation' ] if args.phase == 'test' else ['train', 'validation'] img_list, base_path, item_dict = read_data("DeepFashion2", bbox_gt=True, type_list=data_type) # model = ResNetbasedNet(vec_dim=vec_dim, max_pool=True, load_path=model_path, clf2_num=2, adv_eta=1e-4) model = ResNetbasedNet(vec_dim=vec_dim, max_pool=True, load_path=model_path, clf2_num=2) domain_adap = args.domain_adap adv_train = args.adv_train is_cud = torch.cuda.is_available() device = torch.device("cuda" if is_cud else "cpu") if is_cud: if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) kwargs = {'num_workers': 8, 'pin_memory': True} if is_cud else {} if args.phase == 'train': train_dataset = DeepFashionDataset(img_list['train'], root=base_path, augment=True) train_batch_sampler = BalancedBatchSampler(train_dataset.labels, train_dataset.source, n_classes=64, n_samples=4) online_train_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, **kwargs) test_dataset = DeepFashionDataset(img_list['validation'], root=base_path) test_batch_sampler = BalancedBatchSampler(test_dataset.labels, test_dataset.source, n_classes=64, n_samples=4) online_test_loader = torch.utils.data.DataLoader( test_dataset, batch_sampler=test_batch_sampler, **kwargs) margin = 0.2 loss_fn = OnlineTripletLoss(margin, HardestNegativeTripletSelector(margin), domain_adap) # loss_fn = AllTripletLoss(margin) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=4, threshold=0.001, cooldown=2, min_lr=1e-4 / (10 * 2),) scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", patience=4, threshold=1, cooldown=2, min_lr=1e-5 / (10 * 2), ) n_epochs = 300 log_interval = 200 fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, is_cud, log_interval, save_dir, metrics=[AverageNonzeroTripletsMetric()], start_epoch=200, criterion=criterion, domain_adap=domain_adap, adv_train=adv_train) # fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, is_cud, log_interval, # save_dir, metrics=[AverageNonzeroTripletsMetric()], start_epoch=0, criterion=criterion, # adv_train=True, adv_epsilon=0.01, adv_alph=0.007, adv_iter=1) else: with torch.no_grad(): model.eval() test_dataset = DeepFashionDataset(img_list['validation'], root=base_path) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=4) embedding_mtx = torch.zeros((len(test_dataset), vec_dim)) labels = np.zeros(len(test_dataset)) top_k = 500 idx_ = 0 start_time = time.time() cf_mtx = np.zeros( 4, dtype=float ) # predict_user_real_user / predict_user_real_shop / predict_shop_real_user / predict_shop_real_shop for idx, (data, target, _, source) in enumerate(test_loader): emb_vecs = model(data.cuda()) embedding_mtx[idx_:idx_ + len(data)] = emb_vecs[0] predict = torch.argmax(emb_vecs[1], dim=1).cpu().numpy() real = source.cpu().numpy() cf_mtx[0] += np.sum((predict == 0) & (real == 0)) cf_mtx[1] += np.sum((predict == 0) & (real == 1)) cf_mtx[2] += np.sum((predict == 1) & (real == 0)) cf_mtx[3] += np.sum((predict == 1) & (real == 1)) labels[idx_:idx_ + len(data)] = np.asarray(target) idx_ += len(data) if idx % 20 == 0: print('processing {}/{}... elapsed time {}s'.format( idx + 1, len(test_loader), time.time() - start_time)) print('Total: {}, Domain Classification Acc: {:.5f}'.format( np.sum(cf_mtx), (cf_mtx[0] + cf_mtx[3]) / np.sum(cf_mtx))) print('Recall User Photo: {:.5f}'.format(cf_mtx[0] / (cf_mtx[0] + cf_mtx[2]))) print('Recall Shop Photo: {:.5f}'.format(cf_mtx[3] / (cf_mtx[1] + cf_mtx[3]))) np.save(os.path.join(save_dir, 'emb_mtx.npy'), embedding_mtx) with open(os.path.join(save_dir, 'file_info.txt'), 'w') as f: for i in range(len(test_dataset)): f.write('{},{},{},{}\n'.format(img_list['validation'][i][0], test_dataset[i][1], test_dataset[i][2], test_dataset[i][3])) print('save files!') distance_mtx = pdist(embedding_mtx) sorted_idx = torch.argsort(distance_mtx, dim=1).cpu().numpy() result_arr = np.zeros((sorted_idx.shape[0], top_k)) for idx in range(sorted_idx.shape[0]): result_arr[idx] = sorted_idx[idx][sorted_idx[idx] != idx][:top_k] result_arr[idx] = labels[result_arr[idx].astype( np.int)] == labels[idx] if idx % 1000 == 0: print(idx) for k in [1, 5, 10, 20, 100, 200, 500]: topk_accuracy = np.sum( np.sum(result_arr[:, :k], axis=1) > 0) / result_arr.shape[0] print('Top-{} Accuracy: {:.5f}'.format(k, topk_accuracy))