def batch(last, i, is_final, batch): embeddings = last # one image at a time test_in = move_device( test_trans(batch[0][0]).unsqueeze(0), P.cuda_device) out = net(Variable(test_in, volatile=True)).data embeddings[i] = out[0] return embeddings
def create_batch(batch, n, epoch, similarities): # one image at a time. batch is always of size 1 lab, (i1, i2), (im1, im2) = batch[0] labels_in = tensor_t(torch.LongTensor, P.cuda_device, 1) labels_in[0] = labels.index(lab) # we get a positive couple. find negative for it im3 = None # choose a semi-hard negative. see FaceNet # paper by Schroff et al for details. # essentially, choose hardest negative that is still # easier than the positive. this should avoid # collapsing the model at beginning of training ind_exl = lab_indicators[lab] sim_pos = similarities[i1, i2] if epoch < P.train_epoch_switch: # exclude all positives as well as any that are # more similar than sim_pos ind_exl = ind_exl | similarities[i1].ge(sim_pos) if ind_exl.sum() >= similarities.size(0): p = 'cant find semi-hard neg for' s = 'falling back to random neg' n_pos = lab_indicators[lab].sum() n_ge = similarities[i1].ge(sim_pos).sum() n_tot = similarities.size(0) print( '{0} {1}-{2}-{3} (#pos:{4}, #ge:{5}, #total:{6}), {7}'.format( p, i1, i2, lab, n_pos, n_ge, n_tot, s)) else: # similarities must be in [-1, 1] # set all similarities of excluded indexes to -2 # then take argmax (highest similarity not excluded) sims = similarities[i1].clone() sims[ind_exl] = -2 _, k = sims.max(0) im3 = train_set[k[0]][0] if im3 is None: # default to random negative im3 = choose_rand_neg(train_set, lab) # one image at a time train_in1 = move_device(train_trans(im1).unsqueeze(0), P.cuda_device) train_in2 = move_device(train_trans(im2).unsqueeze(0), P.cuda_device) train_in3 = move_device(train_trans(im3).unsqueeze(0), P.cuda_device) # return input tensors and labels return [train_in1, train_in2, train_in3], [labels_in]
def create_batch(batch, n): # must proceed image by image (since different input sizes) # each image/batch is composed of multiple scales n_sc = len(batch[0][0]) train_in_scales = [] labels_in = tensor_t(torch.LongTensor, P.cuda_device, 1) labels_in.fill_(labels.index(batch[0][1])) for j in range(n_sc): im = trans_scales[j](batch[0][0][j]) train_in = move_device(im.unsqueeze(0), P.cuda_device) train_in_scales.append(train_in) return train_in_scales, [labels_in]
def get_class_net(): model = models.alexnet if P.cnn_model.lower() == 'resnet152': model = models.resnet152 net = TuneClassif(model(pretrained=True), len(labels), untrained=P.untrained_blocks) if P.preload_net: net.load_state_dict( torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu())) net = move_device(net, P.cuda_device) return net
def eval_batch_test(last, i, is_final, batch): correct, total = last im_trans = trans(batch[0][0]) test_in = move_device(im_trans.unsqueeze(0), P.cuda_device) out = net(Variable(test_in, volatile=True))[0].data # first get all maximal values for classification # then, use the spatial region with the highest maximal value # to make a prediction max_pred, predicted = torch.max(out, 1) _, max_subp = torch.max(max_pred.view(-1), 0) predicted = predicted.view(-1)[max_subp[0]] total += 1 correct += (labels.index(batch[0][1]) == predicted) return correct, total
def batch(last, i, is_final, batch): embeddings = last im_trans = test_trans(batch[0][0]) test_in = move_device(im_trans.unsqueeze(0), P.cuda_device) out = net(Variable(test_in, volatile=True))[0].data # first, determine location of highest maximal activation max_pred, _ = out.max(1) max_pred1, max_i1 = max_pred.max(2) _, max_i2 = max_pred1.max(3) i2 = max_i2.view(-1)[0] i1 = max_i1.view(-1)[i2] # we have the indexes of the highest maximal activation, # get the classification values at this point and normalize out = out[:, :, i1, i2] out = NormalizeL2Fun()(Variable(out, volatile=True)) out = out.data embeddings[i] = out[0] return embeddings
def get_siamese_net(): model = models.alexnet if P.cnn_model.lower() == 'resnet152': model = models.resnet152 class_net = TuneClassif(model(pretrained=True), P.num_classes, untrained=P.untrained_blocks) if P.classif_model: class_net.load_state_dict( torch.load(P.classif_model, map_location=lambda storage, location: storage.cpu())) net = DescriptorNet(class_net, P.feature_dim, P.feature_size2d, untrained=P.untrained_blocks) if P.preload_net: net.load_state_dict( torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu())) net = move_device(net, P.cuda_device) return net
def get_class_net(): model = models.alexnet if P.cnn_model.lower() == 'resnet152': model = models.resnet152 if P.bn_model: bn_model = TuneClassif(model(), len(labels)) bn_model.load_state_dict( torch.load(P.bn_model, map_location=lambda storage, location: storage.cpu())) # copy_bn_all(net.features, bn_model.features) else: bn_model = model(pretrained=True) net = TuneClassifSub(bn_model, len(labels), P.feature_size2d, untrained=P.untrained_blocks) if P.preload_net: net.load_state_dict( torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu())) net = move_device(net, P.cuda_device) return net