def forward(self,x): max_sample = x.size()[1] x = x.view(-1,self.feature_size) assignment = th.matmul(x,self.clusters) if self.add_batch_norm: assignment = self.batch_norm(assignment) assignment = F.softmax(assignment, dim=1) assignment = assignment.view(-1, max_sample, self.cluster_size) assignment = assignment.transpose(1,2) x = x.view(-1, max_sample, self.feature_size) rvlad = th.matmul(assignment, x) rvlad = rvlad.transpose(-1,1) # L2 intra norm rvlad = F.normalize(rvlad) # flattening + L2 norm rvlad = rvlad.view(-1, self.cluster_size*self.feature_size) rvlad = F.normalize(rvlad) return rvlad
def forward(self,x): max_sample = x.size()[1] x = x.view(-1,self.feature_size) assignment = th.matmul(x,self.clusters) if self.add_batch_norm: assignment = self.batch_norm(assignment) assignment = F.softmax(assignment,dim=1) assignment = assignment.view(-1, max_sample, self.cluster_size) a_sum = th.sum(assignment,-2,keepdim=True) a = a_sum*self.clusters2 assignment = assignment.transpose(1,2) x = x.view(-1, max_sample, self.feature_size) vlad = th.matmul(assignment, x) vlad = vlad.transpose(1,2) vlad = vlad - a # L2 intra norm vlad = F.normalize(vlad) # flattening + L2 norm vlad = vlad.view(-1, self.cluster_size*self.feature_size) vlad = F.normalize(vlad) return vlad
def pairwise_cosine_similarities(embeddings1, embeddings2): assert len(embeddings1.size()) == len(embeddings2.size()) == 2 embedding_dim = embeddings1.size(1) assert embeddings2.size(1) == embedding_dim embeddings1_ = F.normalize(embeddings1, dim=1) embeddings2_ = F.normalize(embeddings2, dim=1) sims = torch.mm(embeddings1_, embeddings2_.t()) return sims # num_embeddings1 x num_embeddings2
def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- cosine = F.linear(F.normalize(input), F.normalize(self.weight)) phi = cosine - self.m # --------------------------- convert label to one-hot --------------------------- one_hot = torch.zeros(cosine.size(), device = 'cuda') # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot one_hot.scatter_(1, label.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def forward(self, x, label): cosine = F.linear(F.normalize(x), F.normalize(self.weight)) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm) one_hot = torch.zeros(cosine.size(), device='cuda') one_hot.scatter_(1, label.view(-1, 1).long(), 1) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output
def compute_weight(self, module): weight = module._parameters[self.name + '_org'] u = module._buffers[self.name + '_u'] height = weight.size(0) weight_mat = weight.view(height, -1) for _ in range(self.n_power_iterations): # Spectral norm of weight equals to `u^T W v`, where `u` and `v` # are the first left and right singular vectors. # This power iteration produces approximations of `u` and `v`. v = normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps) u = normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps) sigma = torch.dot(u, torch.matmul(weight_mat, v)) weight.data /= sigma return weight, u
def compute_weight(self, module): weight = getattr(module, self.name + '_org') u = getattr(module, self.name + '_u') height = weight.size(0) weight_mat = weight.view(height, -1) with torch.no_grad(): for _ in range(self.n_power_iterations): # Spectral norm of weight equals to `u^T W v`, where `u` and `v` # are the first left and right singular vectors. # This power iteration produces approximations of `u` and `v`. v = normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps) u = normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps) sigma = torch.dot(u, torch.matmul(weight_mat, v)) weight = weight / sigma return weight, u
def forward(self, x, geneexpr): #if sparse_in: # (?, 600, 4) # in_seq = to_one_hot(x, n_dims=4).permute(0,3,1,2).squeeze() #else: # in_seq = x.squeeze() x = F.pad(x,(9,9)) out = F.relu(self.conv1(x)) # (?, 4, 580) out = self.maxpool_3(out) # (?, 30, 145) out = F.pad(out,(5,5)) out = F.relu(self.conv2(out)) # (?, 300, 140) out = self.maxpool_4(out) # (?, 300, 35) out = F.pad(out,(3,3)) out = F.relu(self.conv3(out)) # (?, 500, 32) out = F.pad(out,(1,1)) out = self.maxpool_4(out) # (?, 500, 8) out = out.view(-1, 200*13) # (?, 500*8) if self.gdl == 0: geneexpr = self.dropout(geneexpr) geneexpr = F.relu(self.genelinear(geneexpr)) elif self.gdl == 1: geneexpr = F.relu(self.genelinear(geneexpr)) # (?, 500) geneexpr = self.dropout(geneexpr) elif self.gdl == 2: geneexpr = F.normalize(self.genelinear(geneexpr), p=2, dim=1) out = torch.cat([out, geneexpr], dim=1) # (?, 200*13+500) out = F.relu(self.linear1(out)) # (?, 800) out = self.dropout(out) out = F.relu(self.linear2(out)) # (?, 800) out = self.dropout(out) return self.output(out) # (?, 1)
def get_arguments(): cfg = parse_arguments() # these stay fixed cfg.sampleN = 100 cfg.renderDepth = 1.0 cfg.BNepsilon = 1e-5 cfg.BNdecay = 0.999 cfg.inputViewN = 24 # ------ below automatically set ------ cfg.device = torch.device( f"cuda:{cfg.gpu}" if torch.cuda.is_available() else "cpu") cfg.inH, cfg.inW = [int(x) for x in cfg.inSize.split("x")] cfg.outH, cfg.outW = [int(x) for x in cfg.outSize.split("x")] cfg.H, cfg.W = [int(x) for x in cfg.predSize.split("x")] cfg.Khom3Dto2D = torch.Tensor([[cfg.W, 0, 0, cfg.W / 2], [0, -cfg.H, 0, cfg.H / 2], [0, 0, -1, 0], [0, 0, 0, 1]]).float().to(cfg.device) cfg.Khom2Dto3D = torch.Tensor([[cfg.outW, 0, 0, cfg.outW / 2], [0, -cfg.outH, 0, cfg.outH / 2], [0, 0, -1, 0], [0, 0, 0, 1]]).float().to(cfg.device) cfg.fuseTrans = F.normalize( torch.from_numpy( np.load(f"{cfg.path}/trans_fuse{cfg.outViewN}.npy")), p=2, dim=1).to(cfg.device) print(f"EXPERIMENT: {cfg.model}_{cfg.experiment}") print("------------------------------------------") print(f"input:{cfg.inH}x{cfg.inW}, output:{cfg.outH}x{cfg.outW}, pred:{cfg.H}x{cfg.W}") print(f"viewN:{cfg.outViewN}(out), upscale:{cfg.upscale}, novelN:{cfg.novelN}") print(f"Device: {cfg.device}, depth_loss weight:{cfg.lambdaDepth}") print("------------------------------------------") return cfg
def forward(self,x): x = self.fc(x) x = self.cg(x) x = F.normalize(x) return x
def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.pool3(x) x = self.inception4a(x) x = self.inception4b(x) x = self.inception5a(x) x = self.inception5b(x) x = self.inception6a(x) x = self.inception6b(x) if self.cut_at_pooling: return x x = self.avgpool(x) x = x.view(x.size(0), -1) if self.has_embedding: x = self.feat(x) x = self.feat_bn(x) if self.norm: x = F.normalize(x) elif self.has_embedding: x = F.relu(x) if self.dropout > 0: x = self.drop(x) if self.num_classes > 0: x = self.classifier(x) return x
def compute_weight(self, module): weight = getattr(module, self.name + '_orig') u = getattr(module, self.name + '_u') weight_mat = weight if self.dim != 0: # permute dim to front weight_mat = weight_mat.permute(self.dim, *[d for d in range(weight_mat.dim()) if d != self.dim]) height = weight_mat.size(0) weight_mat = weight_mat.reshape(height, -1) with torch.no_grad(): for _ in range(self.n_power_iterations): v = F.normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps) u = F.normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps) sigma = torch.dot(u, torch.matmul(weight_mat, v)) weight = weight / sigma return weight, u
def read(self, k, b): # k: (1, M_DIM*kr), b: (1, kr) kr = b.size(1) K = k.view(kr, M_DIM) _K = F.normalize(K, eps=EPS) _M = F.normalize(self.M, eps=EPS) C = T.matmul(_K, T.transpose(_M, 0, 1)) B = b.repeat(N_mem, 1) # beta B = T.transpose(B, 0, 1) if kr == Kr: self.W_predictor = F.softmax(B*C, dim=1) # B*C: elementwise multiplication M = T.matmul(self.W_predictor, self.M) elif kr == Krp: self.W_policy = F.softmax(B*C, dim=1) M = T.matmul(self.W_policy, self.M) else: raise(ValueError) return M.view(1, -1)
def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- cosine = F.linear(F.normalize(input), F.normalize(self.weight)) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) # --------------------------- convert label to one-hot --------------------------- # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda') one_hot = torch.zeros(cosine.size(), device = 'cuda') one_hot.scatter_(1, label.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def apply(module, name, n_power_iterations, eps): fn = SpectralNorm(name, n_power_iterations, eps) weight = module._parameters[name] height = weight.size(0) u = normalize(weight.new_empty(height).normal_(0, 1), dim=0, eps=fn.eps) module.register_parameter(fn.name + "_org", weight) module.register_buffer(fn.name + "_u", u) module.register_forward_pre_hook(fn) return fn
def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- if self.device_id == None: cosine = F.linear(F.normalize(input), F.normalize(self.weight)) else: x = input sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) temp_x = x.cuda(self.device_id[0]) weight = sub_weights[0].cuda(self.device_id[0]) cosine = F.linear(F.normalize(temp_x), F.normalize(weight)) for i in range(1, len(self.device_id)): temp_x = x.cuda(self.device_id[i]) weight = sub_weights[i].cuda(self.device_id[i]) cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) # --------------------------- convert label to one-hot --------------------------- one_hot = torch.zeros(cosine.size()) if self.device_id != None: one_hot = one_hot.cuda(self.device_id[0]) one_hot.scatter_(1, label.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def _get_patches(z, patch_idxs, normalize=False): """ z: CxHxW patch_idxs: K """ c = z.size(0) patches = z.view(c, -1).t()[patch_idxs] if normalize: patches = F.normalize(patches, dim=1) return patches
def forward(self, input, label): # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power)) self.iter += 1 self.lamb = max(self.LambdaMin, self.base * (1 + self.gamma * self.iter) ** (-1 * self.power)) # --------------------------- cos(theta) & phi(theta) --------------------------- cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) cos_theta = cos_theta.clamp(-1, 1) cos_m_theta = self.mlambda[self.m](cos_theta) theta = cos_theta.data.acos() k = (self.m * theta / 3.14159265).floor() phi_theta = ((-1.0) ** k) * cos_m_theta - 2 * k NormOfFeature = torch.norm(input, 2, 1) # --------------------------- convert label to one-hot --------------------------- one_hot = torch.zeros(cos_theta.size()) one_hot = one_hot.cuda() if cos_theta.is_cuda else one_hot one_hot.scatter_(1, label.view(-1, 1), 1) # --------------------------- Calculate output --------------------------- output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta output *= NormOfFeature.view(-1, 1) return output
def apply(module, name, n_power_iterations, eps): fn = SpectralNorm(name, n_power_iterations, eps) weight = module._parameters[name] height = weight.size(0) u = normalize(weight.new_empty(height).normal_(0, 1), dim=0, eps=fn.eps) delattr(module, fn.name) module.register_parameter(fn.name + "_orig", weight) # We still need to assign weight back as fn.name because all sorts of # things may assume that it exists, e.g., when initializing weights. # However, we can't directly assign as it could be an nn.Parameter and # gets added as a parameter. Instead, we assign weight.data, which will # just be added as plain attribute, and also supports nn.init due to # shared storage. setattr(module, fn.name, weight.data) module.register_buffer(fn.name + "_u", u) module.register_forward_pre_hook(fn) return fn
def forward(self, joint_feature): orig_feature_size = len(joint_feature.size()) if orig_feature_size == 2: joint_feature = torch.unsqueeze(joint_feature, dim=1) batch_size, num_loc, dim = joint_feature.size() if dim % self.pool_size != 0: exit("the dim %d is not multiply of \ pool_size %d" % (dim, self.pool_size)) joint_feature_reshape = joint_feature.view( batch_size, num_loc, int(dim / self.pool_size), self.pool_size) # N x 100 x 1000 x 1 iatt_iq_sumpool = torch.sum(joint_feature_reshape, 3) iatt_iq_sqrt = torch.sqrt( F.relu(iatt_iq_sumpool)) - torch.sqrt(F.relu(-iatt_iq_sumpool)) iatt_iq_sqrt = iatt_iq_sqrt.view(batch_size, -1) # N x 100000 iatt_iq_l2 = F.normalize(iatt_iq_sqrt) iatt_iq_l2 = iatt_iq_l2.view( batch_size, num_loc, int(dim / self.pool_size)) if orig_feature_size == 2: iatt_iq_l2 = torch.squeeze(iatt_iq_l2, dim=1) return iatt_iq_l2
def forward(self, x): for name, module in self.base._modules.items(): if name == 'avgpool': break x = module(x) if self.cut_at_pooling: return x x = F.avg_pool2d(x, x.size()[2:]) x = x.view(x.size(0), -1) if self.has_embedding: x = self.feat(x) x = self.feat_bn(x) if self.norm: x = F.normalize(x) elif self.has_embedding: x = F.relu(x) if self.dropout > 0: x = self.drop(x) if self.num_classes > 0: x = self.classifier(x) return x
def forward(self, input, label): # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power)) self.iter += 1 self.lamb = max(self.LambdaMin, self.base * (1 + self.gamma * self.iter) ** (-1 * self.power)) # --------------------------- cos(theta) & phi(theta) --------------------------- if self.device_id == None: cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) else: x = input sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) temp_x = x.cuda(self.device_id[0]) weight = sub_weights[0].cuda(self.device_id[0]) cos_theta = F.linear(F.normalize(temp_x), F.normalize(weight)) for i in range(1, len(self.device_id)): temp_x = x.cuda(self.device_id[i]) weight = sub_weights[i].cuda(self.device_id[i]) cos_theta = torch.cat((cos_theta, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) cos_theta = cos_theta.clamp(-1, 1) cos_m_theta = self.mlambda[self.m](cos_theta) theta = cos_theta.data.acos() k = (self.m * theta / 3.14159265).floor() phi_theta = ((-1.0) ** k) * cos_m_theta - 2 * k NormOfFeature = torch.norm(input, 2, 1) # --------------------------- convert label to one-hot --------------------------- one_hot = torch.zeros(cos_theta.size()) if self.device_id != None: one_hot = one_hot.cuda(self.device_id[0]) one_hot.scatter_(1, label.view(-1, 1), 1) # --------------------------- Calculate output --------------------------- output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta output *= NormOfFeature.view(-1, 1) return output
codebook_quantized_reuse_NN = codebook_reusable_NN.sign() pairwise_dist_neural_quantized, d_min_quantized = pairwise_distances(codebook_quantized_reuse_NN.data.cpu()) codebook_plotkin = correct_second_order_encoder_Plotkin_RM_leaves(all_msg_bits) pairwise_dist_plotkin, d_min_plotkin = pairwise_distances(codebook_plotkin.data.cpu()) print("Neural Codebook with d_min: {0: .4f} is \n {1}".format(d_min_reusable_NN, codebook_reusable_NN.data.cpu().numpy())) print("Quantized Neural Codebook with d_min: {0: .4f} is \n {1}".format(d_min_quantized, codebook_quantized_reuse_NN.data.cpu().numpy())) print("Plotkin Codebook with d_min: {0: .4f} is \n {1}".format(d_min_plotkin, codebook_plotkin)) Gaussian_codebook = F.normalize(torch.randn(codebook_size, 2**args.m), p=2, dim=1)*np.sqrt(2**args.m) pairwise_dist_Gaussian, d_min_Gaussian = pairwise_distances(Gaussian_codebook) total_pairwise_dist = len(pairwise_dist_neural) min_stuff = np.min([np.min(pairwise_dist_neural), np.min(pairwise_dist_Gaussian)]) max_stuff = np.max([np.max(pairwise_dist_neural), np.max(pairwise_dist_Gaussian)]) bins = np.linspace(min_stuff, max_stuff, 1000) n_neural, bins_neural = np.histogram(pairwise_dist_neural, bins=bins, density=True)#, density = False, bins=100, label='Neural Code: d_min={0:.2f}'.format(d_min_reusable_NN)) n_neural_quantized, bins_neural_quantized = np.histogram(pairwise_dist_neural_quantized, bins=bins, density=False)#, density = False, bins=100, label='Neural Code: d_min={0:.2f}'.format(d_min_reusable_NN)) n_RM, bins_RM = np.histogram(pairwise_dist_plotkin, bins=bins, density=False)
def forward(self, x): x = F.normalize(x, dim=1) return self.net(x)
def normalize(self, arr): if self.complex_normalize: return self._complex_normalize(arr) return F.normalize(arr, dim=-1)
def forward(self, x): return F.normalize(x, self.p, self.dim, eps=1e-8)
def forward(self, x): feat = self.encoder(x) feat = F.normalize(self.head(feat), dim=1) return feat
def forward(self, x: Tensor, msg: Tensor, p: int = 2): """""" msg = F.normalize(msg, p=p, dim=-1) x_norm = x.norm(p=p, dim=-1, keepdim=True) return msg * x_norm * self.scale
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='ijba_eval') parser.add_argument('-g', '--gpu', type=int, default=0) parser.add_argument('-d', '--data_dir', default='/home/renyi/arunirc/data1/datasets/CS2') parser.add_argument('-p', '--protocol_dir', default='/home/renyi/arunirc/data1/datasets/IJB-A/IJB-A_11_sets/') parser.add_argument('--fold', type=int, default=1, choices=[1,10]) parser.add_argument('--sqrt', action='store_true', default=False, help='Add signed sqrt normalization') parser.add_argument('--cosine', action='store_true', default=False, help='Use cosine similarity instead of L2 distance') parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('-m', '--model_path', default=MODEL_PATH, help='Path to pre-trained model') parser.add_argument('--model_type', default=MODEL_TYPE, choices=['resnet50', 'resnet101', 'resnet101-512d', 'resnet101-512d-norm']) args = parser.parse_args() # CUDA setup os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Model # ----------------------------------------------------------------------------- num_class = 8631 if args.model_type == 'resnet50': model = torchvision.models.resnet50(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101': model = torchvision.models.resnet101(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101-512d': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) elif args.model_type == 'resnet101-512d-norm': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(models.NormFeat(scale_factor=50.0)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) else: raise NotImplementedError checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) if cuda: model = model.cuda() # Convert the trained network into a "feature extractor" feature_map = list(model.children()) if args.model_type == 'resnet101-512d' or args.model_type == 'resnet101-512d-norm': model.eval() extractor = model extractor.fc = nn.Sequential(extractor.fc[0]) else: feature_map.pop() extractor = nn.Sequential(*feature_map) extractor.eval() # ALWAYS set to evaluation mode (fixes BatchNorm, dropout, etc.) # ----------------------------------------------------------------------------- # 2. Dataset # ----------------------------------------------------------------------------- fold_id = 1 file_ext = '.jpg' RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] test_transform = transforms.Compose([ # transforms.Scale(224), # transforms.CenterCrop(224), transforms.Scale((224,224)), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) pairs_path = osp.join(args.protocol_dir, 'split%d' % fold_id, 'verify_comparisons_%d.csv' % fold_id) pairs = utils.read_ijba_pairs(pairs_path) protocol_file = osp.join(args.protocol_dir, 'split%d' % fold_id, 'verify_metadata_%d.csv' % fold_id) metadata = utils.get_ijba_1_1_metadata(protocol_file) # dict assert np.all(np.unique(pairs) == np.unique(metadata['template_id'])) # sanity-check path_list = np.array([osp.join(args.data_dir, str(x)+file_ext) for x in metadata['sighting_id'] ]) # face crops saved as <sighting_id.jpg> # Create data loader test_loader = torch.utils.data.DataLoader( data_loader.IJBADataset( path_list, test_transform, split=fold_id), batch_size=args.batch_size, shuffle=False ) # testing # for i in range(len(test_loader.dataset)): # img = test_loader.dataset.__getitem__(i) # sz = img.shape # if sz[0] != 3: # print sz # ----------------------------------------------------------------------------- # 3. Feature extraction # ----------------------------------------------------------------------------- print 'Feature extraction...' cache_dir = osp.join(here, 'cache-' + args.model_type) if not osp.exists(cache_dir): os.makedirs(cache_dir) feat_path = osp.join(cache_dir, 'feat-fold-%d.mat' % fold_id) if not osp.exists(feat_path): features = [] for batch_idx, images in tqdm.tqdm(enumerate(test_loader), total=len(test_loader), desc='Extracting features'): x = Variable(images, volatile=True) # test-time memory conservation if cuda: x = x.cuda() feat = extractor(x) if cuda: feat = feat.data.cpu() # free up GPU else: feat = feat.data features.append(feat) features = torch.cat(features, dim=0) # (n_batch*batch_sz) x 512 sio.savemat(feat_path, {'feat': features.cpu().numpy() }) else: dat = sio.loadmat(feat_path) features = torch.FloatTensor(dat['feat']) del dat print 'Loaded.' # ----------------------------------------------------------------------------- # 4. Verification # ----------------------------------------------------------------------------- scores = [] labels = [] # labels: is_same_subject print 'Computing pair labels . . . ' for pair in tqdm.tqdm(pairs): # TODO - check tqdm sel_t0 = np.where(metadata['template_id'] == pair[0]) sel_t1 = np.where(metadata['template_id'] == pair[1]) subject0 = np.unique(metadata['subject_id'][sel_t0]) subject1 = np.unique(metadata['subject_id'][sel_t1]) labels.append(int(subject0 == subject1)) labels = np.array(labels) print 'done' # templates: average pool, then L2-normalize print 'Pooling templates . . . ' pooled_features = [] template_set = np.unique(metadata['template_id']) for tid in tqdm.tqdm(template_set): sel = np.where(metadata['template_id'] == tid) # pool template: 1 x n x 512 -> 1 x 512 feat = features[sel,:].mean(1) if args.sqrt: # signed-square-root normalization feat = torch.mul(torch.sign(feat),torch.sqrt(torch.abs(feat)+1e-12)) pooled_features.append(F.normalize(feat, p=2, dim=1) ) pooled_features = torch.cat(pooled_features, dim=0) # (n_batch*batch_sz) x 512 print 'done' print 'Computing pair distances . . . ' for pair in tqdm.tqdm(pairs): sel_t0 = np.where(template_set == pair[0]) sel_t1 = np.where(template_set == pair[1]) if args.cosine: feat_dist = torch.dot(torch.squeeze(pooled_features[sel_t0]), torch.squeeze(pooled_features[sel_t1])) else: feat_dist = (pooled_features[sel_t0] - pooled_features[sel_t1]).norm(p=2, dim=1) feat_dist = -torch.squeeze(feat_dist) feat_dist = feat_dist.numpy() scores.append(feat_dist) # score: negative of L2-distance scores = np.array(scores) # Metrics: TAR (tpr) at FAR (fpr) fpr, tpr, thresholds = sklearn.metrics.roc_curve(labels, scores) fpr_levels = [0.0001, 0.001, 0.01, 0.1] f_interp = interpolate.interp1d(fpr, tpr) tpr_at_fpr = [ f_interp(x) for x in fpr_levels ] for (far, tar) in zip(fpr_levels, tpr_at_fpr): print 'TAR @ FAR=%.4f : %.4f' % (far, tar) res = {} res['TAR'] = tpr_at_fpr res['FAR'] = fpr_levels with open( osp.join(cache_dir, 'result-1-1-fold-%d.yaml' % fold_id), 'w') as f: yaml.dump(res, f, default_flow_style=False) sio.savemat(osp.join(cache_dir, 'roc-1-1-fold-%d.mat' % fold_id), {'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds, 'tpr_at_fpr': tpr_at_fpr})
def init_source(self, center): center = F.normalize(center, p=2, dim=-1) self.source_memo = center self.num_src = center.shape[0]
def init(self, center): center = F.normalize(center, p=2, dim=-1) self.memory = center
def forward(self, feature_map): down = F.avg_pool2d(feature_map, 2, stride=2) return self.conv(F.normalize(self.activation(self.fattener(feature_map)))) + self.resfat(down)
def infer(self, embedding): weight = F.normalize(self.fc, p=2, dim=1) x = F.linear(embedding, weight).view(-1, self._n_class, self._n_center) prob = self.softmax(self._inv_gamma * x) return (prob * x).sum(dim=2), weight
def forward(self, feature_map): up = F.upsample(feature_map, scale_factor=2) return self.thinner(F.normalize(self.activation(self.upconv(feature_map)))) + self.resthin(up)
def closest_to_mean(features): F.normalize(features) class_mean = torch.mean(features, dim=0, keepdim=False) return
def similarity(a, b): a = F.normalize(a, dim=-1) b = F.normalize(b, dim=-1) return (1 - (a * b).sum(dim=-1)).mean()
def to_embedding(x): return F.normalize(x.view(x.size(1), -1).view(x.size(0), -1))
def evaluate(qf, qf2, qpl, ql, qc, gf, gf2, gpl, gl, gc): if isinstance(qf, np.ndarray): qf = torch.from_numpy(qf) # [6, 2048] qf = qf.cuda() if isinstance(gf, np.ndarray): gf = torch.from_numpy(gf) # [17661, 6, 2048] gf = gf.cuda() if isinstance(gpl, np.ndarray): gpl = torch.from_numpy(gpl) gpl = gpl.cuda() if isinstance(qpl, np.ndarray): qpl = torch.from_numpy(qpl) qpl = qpl.cuda() if isinstance(qf2, np.ndarray): qf2 = torch.from_numpy(qf2) qf2 = qf2.cuda() if isinstance(gf2, np.ndarray): gf2 = torch.from_numpy(gf2) gf2 = gf2.cuda() #######Calculate the distance of pose-guided global features query2 = qf2 qf2 =qf2.expand_as(gf2) q2 = F.normalize(qf2, p=2, dim=1) g2 = F.normalize(gf2, p=2, dim=1) s2 = q2 * g2 s2 = s2.sum(1) # calculate the cosine distance s2 = (s2 + 1.) / 2 # convert cosine distance range from [-1,1] to [0,1], because occluded part distance is set to 0 ########Calculate the distance of partial features query = qf overlap = gpl * qpl overlap = overlap.view(-1, gpl.size(1)) # Calculate the shared region part label qf = qf.expand_as(gf) q = F.normalize(qf, p=2, dim=2) g = F.normalize(gf, p=2, dim=2) s = q*g s = s.sum(2) # Ca culate the consine distance s = (s + 1.) / 2 # c o vert cosine distance range from [-1,1] to [0,1] s = s * overlap # [17661, 2048] [17661, 6] s = (s.sum(1) + s2) / (overlap.sum(1)+1) s = s.data.cpu() #################### ############### score = s.numpy() index = np.argsort(score) # from small to large index = index[::-1] # good index query_index = np.argwhere(gl == ql) camera_index = np.argwhere(gc == qc) good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) junk_index1 = np.argwhere(gl == -1) junk_index2 = np.intersect1d(query_index, camera_index) junk_index = np.append(junk_index2, junk_index1) # .f atten()) CMC_tmp = compute_mAP(index, good_index, junk_index) return CMC_tmp, index
def at(x): return F.normalize(x.pow(2).mean(1).view(x.size(0), -1))
def row_norm(input): return F.normalize(input, p=1, dim=1)
def forward(self, input): return self.scale_factor * F.normalize(input, p=2, dim=1)
def col_norm(input): return F.normalize(input, p=1, dim=0)
def forward(self, x): out = F.normalize(x, dim=1).mm(F.normalize(self.weight, dim=0)) return out
def forward(self, input): return F.normalize(input)
def train(self, replay_buffer, batch_size=100): self.total_it += 1 # Sample replay buffer state, action, next_state, reward, not_done = replay_buffer.sample( batch_size) state.requires_grad = True # Get current Q estimates current_Q1, current_Q2 = self.critic_1(state, action), self.critic_2( state, action) current_Q1_target = self.critic_target_1(state, action) current_Q2_target = self.critic_target_2(state, action) with torch.no_grad(): # Select action according to policy and add clipped noise noise = (torch.randn_like(action) * self.policy_noise).clamp( -self.noise_clip, self.noise_clip) next_action = (self.actor_target(next_state) + noise).clamp( -self.max_action, self.max_action) # Compute the target Q value target_Q1 = self.critic_target_1(next_state, next_action) target_Q2 = self.critic_target_2(next_state, next_action) target_Q = torch.min(target_Q1, target_Q2) # get adversarial target Q state.requires_grad = True adv_loss_1 = current_Q1_target.mean() self.critic_target_1.zero_grad() adv_loss_1.backward(retain_graph=True) adv_perturb_1 = F.normalize(state.grad.data) state.requires_grad = True adv_loss_2 = current_Q2_target.mean() self.critic_target_2.zero_grad() adv_loss_2.backward(retain_graph=True) adv_perturb_2 = F.normalize(state.grad.data) # get Q1 Q2 adversairial estimation adv_q1 = self.critic_1(state - self.adv_epsilon * adv_perturb_1, action) adv_q2 = self.critic_2(state - self.adv_epsilon * adv_perturb_2, action) adv_error_1 = torch.clamp(current_Q1 - adv_q1, 0.0, 1000.0) adv_error_2 = torch.clamp(current_Q2 - adv_q2, 0.0, 1000.0) target_Q1 = reward - self.alpha * adv_error_1 + not_done * self.discount * target_Q target_Q2 = reward - self.alpha * adv_error_2 + not_done * self.discount * target_Q target_Q1, target_Q2 = target_Q1.detach(), target_Q2.detach() # Compute critic loss critic_loss_1 = F.mse_loss(current_Q1, target_Q1) critic_loss_2 = F.mse_loss(current_Q2, target_Q2) # Optimize critic Q1 self.critic_optimizer_1.zero_grad() critic_loss_1.backward() self.critic_optimizer_1.step() # Optimize critic Q2 self.critic_optimizer_2.zero_grad() critic_loss_2.backward() self.critic_optimizer_2.step() # Delayed policy updates if self.total_it % self.policy_freq == 0: # Compute actor losse actor_loss = -self.critic_1(state, self.actor(state)).mean() # Optimize the actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # Update the frozen target models for param, target_param in zip(self.critic_1.parameters(), self.critic_target_1.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) for param, target_param in zip(self.critic_2.parameters(), self.critic_target_2.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) # [ ONLY FOR DEBUG ] if self.total_it % 100 == 2: reward_mean = reward.mean().float() adv_error_mean = 0.5 * adv_error_1.mean().float( ) + 0.5 * adv_error_2.mean().float() q_loss_mean = 0.5 * critic_loss_1.float( ) + 0.5 * critic_loss_2.float() pi_loss_mean = actor_loss.float() sumstr = tf.Summary(value=[ tf.Summary.Value(tag='agent/reward', simple_value=reward_mean), tf.Summary.Value(tag='agent/adv_error', simple_value=adv_error_mean), tf.Summary.Value(tag='agent/qloss', simple_value=q_loss_mean), tf.Summary.Value(tag='agent/pi_loss', simple_value=pi_loss_mean) ]) self.writer.add_summary(sumstr, global_step=self.total_it)
# nicer euclidean similarity matrix at https://discuss.pytorch.org/t/build-your-own-loss-function-in-pytorch/235/7 # np.sqrt(sum((mat[valtestdex[i]]-mat[traindex[j]])**2)) # TODO: better euclidean knn implementation! valtestdex = np.concatenate([val.expn_dex,test.expn_dex]) traindex = train.expn_dex simmat = torch.zeros(7,49) # mat = pinned_lookup.weight mat = torch.Tensor(np.vstack([np.zeros((1,30)),np.load('mu.npy')])).cuda() for i in range(7): for j in range(49): simmat[i,j] = F.cosine_similarity(mat[valtestdex[i]],mat[traindex[j]],dim=0).item() k_weights, k_nearest = simmat.sort(descending=False) # args.num_k=1 k_weights, k_nearest = k_weights[:,:args.num_k], k_nearest[:,:args.num_k] k_weights = F.normalize(k_weights, p=1, dim=1) tensor1 = torch.zeros(7,49) tensor1.scatter_(1, k_nearest, k_weights) tensor2 = torch.zeros(57,49) tensor2[valtestdex,:] = tensor1 tensor2 = tensor2.cuda() # take 7,49 thing and make it bigger so easy to index into with geneexpr #criterion = torch.nn.MultiLabelSoftMarginLoss() # Loss function criterion = torch.nn.BCEWithLogitsLoss(size_average=False) # model_files = sorted(glob.glob('bassetnorm_*.pkl')) # for mf in model_files: model.eval() losses = []
def forward(self, x): x = F.normalize(x, p=2, dim=1) return x
def forward(self, x): x = F.normalize(x, dim=1) scale = self.weight[None, :, None, None] return scale * x
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='lfw_eval') parser.add_argument('-g', '--gpu', type=int, default=0) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/lfw-deepfunneled') parser.add_argument('--fold', type=int, default=0, choices=[0,10]) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('-m', '--model_path', default=None, required=True, help='Path to pre-trained model') parser.add_argument('--model_type', default='resnet50', choices=['resnet50', 'resnet101', 'resnet101-512d']) args = parser.parse_args() # CUDA setup os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size if args.fold == 0: pairs_path = './lfw/data/pairsDevTest.txt' else: pairs_path = './lfw/data/pairs.txt' # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- file_ext = 'jpg' # observe, no '.' before jpg num_class = 8631 pairs = utils.read_pairs(pairs_path) path_list, issame_list = utils.get_paths(args.dataset_path, pairs, file_ext) # Define data transforms RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] test_transform = transforms.Compose([ transforms.Scale((250,250)), # make 250x250 transforms.CenterCrop(150), # then take 150x150 center crop transforms.Scale((224,224)), # resized to the network's required input size transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) # Create data loader test_loader = torch.utils.data.DataLoader( data_loader.LFWDataset( path_list, issame_list, test_transform), batch_size=args.batch_size, shuffle=False ) # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- if args.model_type == 'resnet50': model = torchvision.models.resnet50(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101': model = torchvision.models.resnet101(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101-512d': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) else: raise NotImplementedError checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) if cuda: model = model.cuda() # Convert the trained network into a "feature extractor" feature_map = list(model.children()) if args.model_type == 'resnet101-512d': model.eval() extractor = model extractor.fc = nn.Sequential(extractor.fc[0]) else: feature_map.pop() extractor = nn.Sequential(*feature_map) extractor.eval() # set to evaluation mode (fixes BatchNorm, dropout, etc.) # ----------------------------------------------------------------------------- # 3. Feature extraction # ----------------------------------------------------------------------------- features = [] for batch_idx, images in tqdm.tqdm(enumerate(test_loader), total=len(test_loader), desc='Extracting features'): x = Variable(images, volatile=True) # test-time memory conservation if cuda: x = x.cuda() feat = extractor(x) if cuda: feat = feat.data.cpu() else: feat = feat.data features.append(feat) features = torch.stack(features) sz = features.size() features = features.view(sz[0]*sz[1], sz[2]) features = F.normalize(features, p=2, dim=1) # L2-normalize # TODO - cache features # ----------------------------------------------------------------------------- # 4. Verification # ----------------------------------------------------------------------------- num_feat = features.size()[0] feat_pair1 = features[np.arange(0,num_feat,2),:] feat_pair2 = features[np.arange(1,num_feat,2),:] feat_dist = (feat_pair1 - feat_pair2).norm(p=2, dim=1) feat_dist = feat_dist.numpy() # Eval metrics scores = -feat_dist gt = np.asarray(issame_list) if args.fold == 0: fig_path = osp.join(here, args.exp_name + '_' + args.model_type + '_lfw_roc_devTest.png') roc_auc = sklearn.metrics.roc_auc_score(gt, scores) fpr, tpr, thresholds = sklearn.metrics.roc_curve(gt, scores) print 'ROC-AUC: %.04f' % roc_auc # Plot and save ROC curve fig = plt.figure() plt.title('ROC - lfw dev-test') plt.plot(fpr, tpr, lw=2, label='ROC (auc = %0.4f)' % roc_auc) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.grid() plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc='lower right') plt.tight_layout() else: # 10 fold fold_size = 600 # 600 pairs in each fold roc_auc = np.zeros(10) roc_eer = np.zeros(10) fig = plt.figure() plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.grid() plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') for i in tqdm.tqdm(range(10)): start = i * fold_size end = (i+1) * fold_size scores_fold = scores[start:end] gt_fold = gt[start:end] roc_auc[i] = sklearn.metrics.roc_auc_score(gt_fold, scores_fold) fpr, tpr, _ = sklearn.metrics.roc_curve(gt_fold, scores_fold) # EER calc: https://yangcha.github.io/EER-ROC/ roc_eer[i] = brentq( lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) plt.plot(fpr, tpr, alpha=0.4, lw=2, color='darkgreen', label='ROC(auc=%0.4f, eer=%0.4f)' % (roc_auc[i], roc_eer[i]) ) plt.title( 'AUC: %0.4f +/- %0.4f, EER: %0.4f +/- %0.4f' % (np.mean(roc_auc), np.std(roc_auc), np.mean(roc_eer), np.std(roc_eer)) ) plt.tight_layout() fig_path = osp.join(here, args.exp_name + '_' + args.model_type + '_lfw_roc_10fold.png') plt.savefig(fig_path, bbox_inches='tight') print 'ROC curve saved at: ' + fig_path
def test_emb( cfg, data_cfg, weights, batch_size=16, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, print_interval=40, ): # Configure run f = open(data_cfg) data_cfg_dict = json.load(f) f.close() test_paths = data_cfg_dict['test_emb'] dataset_root = data_cfg_dict['root'] cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Initialize model model = Darknet(cfg_dict, test_emb=True) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) model = torch.nn.DataParallel(model) model.cuda().eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, test_paths, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) embedding, id_labels = [], [] print('Extracting pedestrain features...') for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() output = model(imgs.cuda(), targets.cuda(), targets_len.cuda()).squeeze() for out in output: feat, label = out[:-1], out[-1].long() if label != -1: embedding.append(feat) id_labels.append(label) if batch_i % print_interval == 0: print( 'Extracting {}/{}, # of instances {}, time {:.2f} sec.'.format( batch_i, len(dataloader), len(id_labels), time.time() - t)) print('Computing pairwise similairity...') if len(embedding) < 1: return None embedding = torch.stack(embedding, dim=0).cuda() id_labels = torch.LongTensor(id_labels) n = len(id_labels) print(n, len(embedding)) assert len(embedding) == n embedding = F.normalize(embedding, dim=1) pdist = torch.mm(embedding, embedding.t()).cpu().numpy() gt = id_labels.expand(n, n).eq(id_labels.expand(n, n).t()).numpy() up_triangle = np.where(np.triu(pdist) - np.eye(n) * pdist != 0) pdist = pdist[up_triangle] gt = gt[up_triangle] far_levels = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1] far, tar, threshold = metrics.roc_curve(gt, pdist) interp = interpolate.interp1d(far, tar) tar_at_far = [interp(x) for x in far_levels] for f, fa in enumerate(far_levels): print('TPR@FAR={:.7f}: {:.4f}'.format(fa, tar_at_far[f])) return tar_at_far
1) # 标准正态分布 # use GPU or not if NETWORKS_PARAMETERS['GPU']: voice, voice_identity_label, voice_emotion_label = voice.cuda( ), voice_identity_label.cuda(), voice_emotion_label.cuda() face, face_identity_label, face_emotion_label = face.cuda( ), face_identity_label.cuda(), face_emotion_label.cuda() real_label, fake_label = real_label.cuda(), fake_label.cuda() noise = noise.cuda() D_loss_positive, D_loss_negative = D_loss_positive.cuda( ), D_loss_negative.cuda() # get embeddings and generated faces embeddings = e_net(voice) embeddings = F.normalize(embeddings) # introduce some permutations embeddings = embeddings + noise embeddings = F.normalize(embeddings) embeddings = embeddings.squeeze() # 压缩维度从64,128,1,1 --> 64,128 # 扩展维度从64,1 --> 64, 8, 128, 128 , nn.Embedding(emotion_class_num,emotion_class_num) face_EM_label_ = torch.zeros( (DATASET_PARAMETERS['batch_size'], emotion_class_num)).scatter_( 1, face_emotion_label.type(torch.LongTensor).unsqueeze(1), 1) face_EM_label_ = face_EM_label_.unsqueeze(2).unsqueeze(3).expand( DATASET_PARAMETERS['batch_size'], emotion_class_num, face.size(2), face.size(3)) face_EM_label_ = face_EM_label_.cuda() voice_EM_label_ = torch.zeros(
def forward(self, spo_feat, sbj_labels=None, obj_labels=None, sbj_feat=None, obj_feat=None): device_id = spo_feat.get_device() if sbj_labels is not None: sbj_labels = Variable(torch.from_numpy( sbj_labels.astype('int64'))).cuda(device_id) if obj_labels is not None: obj_labels = Variable(torch.from_numpy( obj_labels.astype('int64'))).cuda(device_id) if cfg.MODEL.RUN_BASELINE: assert sbj_labels is not None and obj_labels is not None prd_cls_scores = self.freq_bias.rel_index_with_labels( torch.stack((sbj_labels, obj_labels), 1)) prd_cls_scores = F.softmax(prd_cls_scores, dim=1) return prd_cls_scores, None, None, None, None, None if spo_feat.dim() == 4: spo_feat = spo_feat.squeeze(3).squeeze(2) sbj_vis_embeddings = self.so_vis_embeddings(sbj_feat) obj_vis_embeddings = self.so_vis_embeddings(obj_feat) prd_hidden = self.prd_feats(spo_feat) prd_features = torch.cat((sbj_vis_embeddings.detach(), prd_hidden, obj_vis_embeddings.detach()), dim=1) prd_vis_embeddings = self.prd_vis_embeddings(prd_features) ds_obj_vecs = self.obj_vecs ds_obj_vecs = Variable(torch.from_numpy( ds_obj_vecs.astype('float32'))).cuda(device_id) so_sem_embeddings = self.so_sem_embeddings(ds_obj_vecs) so_sem_embeddings = F.normalize(so_sem_embeddings, p=2, dim=1) # (#prd, 1024) so_sem_embeddings.t_() sbj_vis_embeddings = F.normalize(sbj_vis_embeddings, p=2, dim=1) # (#bs, 1024) sbj_sim_matrix = torch.mm(sbj_vis_embeddings, so_sem_embeddings) # (#bs, #prd) sbj_cls_scores = cfg.MODEL.NORM_SCALE * sbj_sim_matrix obj_vis_embeddings = F.normalize(obj_vis_embeddings, p=2, dim=1) # (#bs, 1024) obj_sim_matrix = torch.mm(obj_vis_embeddings, so_sem_embeddings) # (#bs, #prd) obj_cls_scores = cfg.MODEL.NORM_SCALE * obj_sim_matrix if not cfg.MODEL.USE_SEM_CONCAT: ds_prd_vecs = self.prd_vecs ds_prd_vecs = Variable( torch.from_numpy( ds_prd_vecs.astype('float32'))).cuda(device_id) prd_sem_embeddings = self.prd_sem_embeddings(ds_prd_vecs) prd_sem_embeddings = F.normalize(prd_sem_embeddings, p=2, dim=1) # (#prd, 1024) prd_vis_embeddings = F.normalize(prd_vis_embeddings, p=2, dim=1) # (#bs, 1024) prd_sim_matrix = torch.mm(prd_vis_embeddings, prd_sem_embeddings.t_()) # (#bs, #prd) prd_cls_scores = cfg.MODEL.NORM_SCALE * prd_sim_matrix else: ds_prd_vecs = self.prd_vecs ds_prd_vecs = Variable( torch.from_numpy( ds_prd_vecs.astype('float32'))).cuda(device_id) prd_sem_hidden = self.prd_sem_hidden(ds_prd_vecs) # (#prd, 1024) # get sbj vis embeddings and expand to (#bs, #prd, 1024) sbj_vecs = self.obj_vecs[sbj_labels] # (#bs, 300) sbj_vecs = Variable(torch.from_numpy( sbj_vecs.astype('float32'))).cuda(device_id) if len(list(sbj_vecs.size())) == 1: # sbj_vecs should be 2d sbj_vecs.unsqueeze_(0) sbj_sem_embeddings = self.so_sem_embeddings( sbj_vecs) # (#bs, 1024) sbj_sem_embeddings = sbj_sem_embeddings.unsqueeze(1).expand( sbj_vecs.shape[0], ds_prd_vecs.shape[0], 1024 ) # (#bs, 1024) --> # (#bs, 1, 1024) --> # (#bs, #prd, 1024) # get obj vis embeddings and expand to (#bs, #prd, 1024) obj_vecs = self.obj_vecs[obj_labels] # (#bs, 300) obj_vecs = Variable(torch.from_numpy( obj_vecs.astype('float32'))).cuda(device_id) if len(list(obj_vecs.size())) == 1: # obj_vecs should be 2d obj_vecs.unsqueeze_(0) obj_sem_embeddings = self.so_sem_embeddings( obj_vecs) # (#bs, 1024) obj_sem_embeddings = obj_sem_embeddings.unsqueeze(1).expand( obj_vecs.shape[0], ds_prd_vecs.shape[0], 1024 ) # (#bs, 1024) --> # (#bs, 1, 1024) --> # (#bs, #prd, 1024) # expand prd hidden feats to (#bs, #prd, 1024) prd_sem_hidden = prd_sem_hidden.unsqueeze(0).expand( sbj_vecs.shape[0], ds_prd_vecs.shape[0], 1024 ) # (#prd, 1024) --> # (1, #prd, 1024) --> # (#bs, #prd, 1024) # now feed semantic SPO features into the last prd semantic layer spo_sem_feat = torch.cat( (sbj_sem_embeddings.detach(), prd_sem_hidden, obj_sem_embeddings.detach()), dim=2) # (#bs, #prd, 3 * 1024) # get prd scores prd_sem_embeddings = self.prd_sem_embeddings( spo_sem_feat) # (#bs, #prd, 1024) prd_sem_embeddings = F.normalize(prd_sem_embeddings, p=2, dim=2) # (#bs, #prd, 1024) prd_vis_embeddings = F.normalize(prd_vis_embeddings, p=2, dim=1) # (#bs, 1024) prd_vis_embeddings = prd_vis_embeddings.unsqueeze( -1) # (#bs, 1024) --> (#bs, 1024, 1) prd_sim_matrix = torch.bmm( prd_sem_embeddings, prd_vis_embeddings ).squeeze( -1 ) # bmm((#bs, #prd, 1024), (#bs, 1024, 1)) = (#bs, #prd, 1) --> (#bs, #prd) prd_cls_scores = cfg.MODEL.NORM_SCALE * prd_sim_matrix if cfg.MODEL.USE_FREQ_BIAS: assert sbj_labels is not None and obj_labels is not None prd_cls_scores = prd_cls_scores + self.freq_bias.rel_index_with_labels( torch.stack((sbj_labels, obj_labels), 1)) if not self.training: sbj_cls_scores = F.softmax(sbj_cls_scores, dim=1) obj_cls_scores = F.softmax(obj_cls_scores, dim=1) prd_cls_scores = F.softmax(prd_cls_scores, dim=1) return prd_cls_scores, sbj_cls_scores, obj_cls_scores
def cosine_loss(a: T, b: T) -> T: a = F.normalize(a, dim=-1) b = F.normalize(b, dim=-1) neg_cos_sim = -(a * b).sum(dim=-1).mean() return neg_cos_sim
def single(patch): return F.normalize(siam(patch, params))
def sp(x): Q = x.view(x.size(0), -1) G = F.normalize(torch.mm(Q, Q.permute(1, 0))) return G
def single(patch): return F.normalize(streams(patch, params))
def __getitem__(self, index): # Get image_id, which serves as a primary key for current instance. image_id = self.image_ids[index] # Get image features for this image_id using hdf reader. image_features = self.hdf_reader[image_id] image_features = torch.tensor(image_features) # Normalize image features at zero-th dimension (since there's no batch # dimension). if self.config["img_norm"]: image_features = normalize(image_features, dim=0, p=2) # Retrieve instance for this image_id using json reader. visdial_instance = self.dialogs_reader[image_id] caption = visdial_instance["caption"] dialog = visdial_instance["dialog"] # Convert word tokens of caption, question, answer and answer options # to integers. caption = self.vocabulary.to_indices(caption) for i in range(len(dialog)): dialog[i]["question"] = self.vocabulary.to_indices( dialog[i]["question"]) if self.add_boundary_toks: dialog[i]["answer"] = self.vocabulary.to_indices( [self.vocabulary.SOS_TOKEN] + dialog[i]["answer"] + [self.vocabulary.EOS_TOKEN]) else: dialog[i]["answer"] = self.vocabulary.to_indices( dialog[i]["answer"]) if self.return_options: for j in range(len(dialog[i]["answer_options"])): if self.add_boundary_toks: dialog[i]["answer_options"][ j] = self.vocabulary.to_indices( [self.vocabulary.SOS_TOKEN] + dialog[i]["answer_options"][j] + [self.vocabulary.EOS_TOKEN]) else: dialog[i]["answer_options"][ j] = self.vocabulary.to_indices( dialog[i]["answer_options"][j]) questions, question_lengths = self._pad_sequences( [dialog_round["question"] for dialog_round in dialog]) history, history_lengths = self._get_history( caption, [dialog_round["question"] for dialog_round in dialog], [dialog_round["answer"] for dialog_round in dialog], ) answers_in, answer_lengths = self._pad_sequences( [dialog_round["answer"][:-1] for dialog_round in dialog]) answers_out, _ = self._pad_sequences( [dialog_round["answer"][1:] for dialog_round in dialog]) # Collect everything as tensors for ``collate_fn`` of dataloader to # work seamlessly questions, history, etc. are converted to # LongTensors, for nn.Embedding input. item = {} item["img_ids"] = torch.tensor(image_id).long() item["img_feat"] = image_features item["ques"] = questions.long() item["hist"] = history.long() item["ans_in"] = answers_in.long() item["ans_out"] = answers_out.long() item["ques_len"] = torch.tensor(question_lengths).long() item["hist_len"] = torch.tensor(history_lengths).long() item["ans_len"] = torch.tensor(answer_lengths).long() item["num_rounds"] = torch.tensor( visdial_instance["num_rounds"]).long() if self.return_options: if self.add_boundary_toks: answer_options_in, answer_options_out = [], [] answer_option_lengths = [] for dialog_round in dialog: options, option_lengths = self._pad_sequences([ option[:-1] for option in dialog_round["answer_options"] ]) answer_options_in.append(options) options, _ = self._pad_sequences([ option[1:] for option in dialog_round["answer_options"] ]) answer_options_out.append(options) answer_option_lengths.append(option_lengths) answer_options_in = torch.stack(answer_options_in, 0) answer_options_out = torch.stack(answer_options_out, 0) item["opt_in"] = answer_options_in.long() item["opt_out"] = answer_options_out.long() item["opt_len"] = torch.tensor(answer_option_lengths).long() else: answer_options = [] answer_option_lengths = [] for dialog_round in dialog: options, option_lengths = self._pad_sequences( dialog_round["answer_options"]) answer_options.append(options) answer_option_lengths.append(option_lengths) answer_options = torch.stack(answer_options, 0) item["opt"] = answer_options.long() item["opt_len"] = torch.tensor(answer_option_lengths).long() if "test" not in self.split: answer_indices = [ dialog_round["gt_index"] for dialog_round in dialog ] item["ans_ind"] = torch.tensor(answer_indices).long() # Gather dense annotations. if "val" in self.split or "dense" in self.split: dense_annotations = self.annotations_reader[image_id] item["gt_relevance"] = torch.tensor( dense_annotations["gt_relevance"]).float() item["round_id"] = torch.tensor( dense_annotations["round_id"]).long() if self.return_adjusted_gt_relevance: item['adjusted_gt_relevance'] = torch.tensor( dense_annotations['adjusted_gt_relevance']).float() return item
def export(images_list, model, checkpoint, keypoints_type, num_keypoints, detection_thresh, extension): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") config = base_config config['name'] = model # Load the model net = get_model(config['name'])(None, config, device) net.load(checkpoint, Mode.EXPORT) net._net.eval() # Load the keypoint network if necessary if keypoints_type == 'superpoint': kp_net = load_SP_net(conf_thresh=detection_thresh) # Parse the data, predict the features, and export them in an npz file with open(images_list, 'r') as f: image_files = f.readlines() image_files = [path.strip('\n') for path in image_files] for img_path in tqdm(image_files): img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_size = img.shape if img_size[2] != 3: sys.exit('Export only available for RGB images.') cpu_gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img = torch.tensor(img, dtype=torch.float, device=device) img = img.permute(2, 0, 1).unsqueeze(0) / 255. # Predict keypoints if keypoints_type == 'sift': cpu_gray_img = np.uint8(cpu_gray_img) keypoints = SIFT_detect(cpu_gray_img, nfeatures=num_keypoints, contrastThreshold=0.04) if keypoints_type == 'superpoint': keypoints = SP_detect(cpu_gray_img, kp_net) scores = keypoints[:, 2] grid_points = keypoints_to_grid( torch.tensor(keypoints[:, :2], dtype=torch.float, device=device), img_size[:2]) keypoints = keypoints[:, [1, 0]] # Predict the corresponding descriptors inputs = {'image0': img} with torch.no_grad(): outputs = net._forward(inputs, Mode.EXPORT, config) descs = outputs['descriptors'] meta_descs = outputs['meta_descriptors'] descriptors, meta_descriptors = [], [] for k in descs.keys(): desc = func.normalize(func.grid_sample(descs[k], grid_points), dim=1).squeeze().cpu().numpy().transpose( 1, 0) descriptors.append(desc) meta_descriptors.append(meta_descs[k].squeeze().cpu().numpy()) descriptors = np.stack(descriptors, axis=1) meta_descriptors = np.stack(meta_descriptors, axis=0) grid_points = grid_points.cpu().numpy() with open(img_path + extension, 'wb') as output_file: np.savez(output_file, keypoints=keypoints, descriptors=descriptors, scores=scores, meta_descriptors=meta_descriptors, grid_points=grid_points)
def forward(self, x): x_norm = F.normalize(x) w_norm = F.normalize(self.weight) cosine = F.linear(x_norm, w_norm, None) out = cosine #* self.scale return out
def loss_function(model, batch, device, margin=1, safe_radius=4, scaling_steps=3, plot=False): output = model({ 'image1': batch['image1'].to(device), 'image2': batch['image2'].to(device) }) loss = torch.tensor(np.array([0], dtype=np.float32), device=device) has_grad = False n_valid_samples = 0 for idx_in_batch in range(batch['image1'].size(0)): # Annotations depth1 = batch['depth1'][idx_in_batch].to(device) # [h1, w1] intrinsics1 = batch['intrinsics1'][idx_in_batch].to(device) # [3, 3] pose1 = batch['pose1'][idx_in_batch].view(4, 4).to(device) # [4, 4] bbox1 = batch['bbox1'][idx_in_batch].to(device) # [2] depth2 = batch['depth2'][idx_in_batch].to(device) intrinsics2 = batch['intrinsics2'][idx_in_batch].to(device) pose2 = batch['pose2'][idx_in_batch].view(4, 4).to(device) bbox2 = batch['bbox2'][idx_in_batch].to(device) # Network output dense_features1 = output['dense_features1'][idx_in_batch] c, h1, w1 = dense_features1.size() scores1 = output['scores1'][idx_in_batch].view(-1) dense_features2 = output['dense_features2'][idx_in_batch] _, h2, w2 = dense_features2.size() scores2 = output['scores2'][idx_in_batch] all_descriptors1 = F.normalize(dense_features1.view(c, -1), dim=0) descriptors1 = all_descriptors1 all_descriptors2 = F.normalize(dense_features2.view(c, -1), dim=0) # Warp the positions from image 1 to image 2 fmap_pos1 = grid_positions(h1, w1, device) hOrig, wOrig = int(batch['image1'].shape[2] / 8), int( batch['image1'].shape[3] / 8) fmap_pos1Orig = grid_positions(hOrig, wOrig, device) pos1 = upscale_positions(fmap_pos1Orig, scaling_steps=scaling_steps) # SIFT Feature Detection imgNp1 = imshow_image(batch['image1'][idx_in_batch].cpu().numpy(), preprocessing=batch['preprocessing']) imgNp1 = cv2.cvtColor(imgNp1, cv2.COLOR_BGR2RGB) # surf = cv2.xfeatures2d.SIFT_create(100) #surf = cv2.xfeatures2d.SURF_create(80) orb = cv2.ORB_create(nfeatures=100, scoreType=cv2.ORB_FAST_SCORE) kp = orb.detect(imgNp1, None) keyP = [(kp[i].pt) for i in range(len(kp))] keyP = np.asarray(keyP).T keyP[[0, 1]] = keyP[[1, 0]] keyP = np.floor(keyP) + 0.5 pos1 = torch.from_numpy(keyP).to(pos1.device).float() try: pos1, pos2, ids = warp(pos1, depth1, intrinsics1, pose1, bbox1, depth2, intrinsics2, pose2, bbox2) except EmptyTensorError: continue ids = idsAlign(pos1, device, h1, w1) # cv2.drawKeypoints(imgNp1, kp, imgNp1) # cv2.imshow('Keypoints', imgNp1) # cv2.waitKey(0) # drawTraining(batch['image1'], batch['image2'], pos1, pos2, batch, idx_in_batch, output, save=False) # exit(1) # Top view homography adjustment # H1 = output['H1'][idx_in_batch] # H2 = output['H2'][idx_in_batch] # try: # pos1, pos2 = homoAlign(pos1, pos2, H1, H2, device) # except IndexError: # continue # ids = idsAlign(pos1, device, h1, w1) # img_warp1 = tgm.warp_perspective(batch['image1'].to(device), H1, dsize=(400, 400)) # img_warp2 = tgm.warp_perspective(batch['image2'].to(device), H2, dsize=(400, 400)) # drawTraining(img_warp1, img_warp2, pos1, pos2, batch, idx_in_batch, output) fmap_pos1 = fmap_pos1[:, ids] descriptors1 = descriptors1[:, ids] scores1 = scores1[ids] # Skip the pair if not enough GT correspondences are available if ids.size(0) < 128: print(ids.size(0)) continue # Descriptors at the corresponding positions fmap_pos2 = torch.round( downscale_positions(pos2, scaling_steps=scaling_steps)).long() descriptors2 = F.normalize(dense_features2[:, fmap_pos2[0, :], fmap_pos2[1, :]], dim=0) positive_distance = 2 - 2 * (descriptors1.t().unsqueeze( 1) @ descriptors2.t().unsqueeze(2)).squeeze() # positive_distance = getPositiveDistance(descriptors1, descriptors2) all_fmap_pos2 = grid_positions(h2, w2, device) position_distance = torch.max(torch.abs( fmap_pos2.unsqueeze(2).float() - all_fmap_pos2.unsqueeze(1)), dim=0)[0] is_out_of_safe_radius = position_distance > safe_radius distance_matrix = 2 - 2 * (descriptors1.t() @ all_descriptors2) # distance_matrix = getDistanceMatrix(descriptors1, all_descriptors2) negative_distance2 = torch.min( distance_matrix + (1 - is_out_of_safe_radius.float()) * 10., dim=1)[0] # negative_distance2 = semiHardMine(distance_matrix, is_out_of_safe_radius, positive_distance, margin) all_fmap_pos1 = grid_positions(h1, w1, device) position_distance = torch.max(torch.abs( fmap_pos1.unsqueeze(2).float() - all_fmap_pos1.unsqueeze(1)), dim=0)[0] is_out_of_safe_radius = position_distance > safe_radius distance_matrix = 2 - 2 * (descriptors2.t() @ all_descriptors1) # distance_matrix = getDistanceMatrix(descriptors2, all_descriptors1) negative_distance1 = torch.min( distance_matrix + (1 - is_out_of_safe_radius.float()) * 10., dim=1)[0] # negative_distance1 = semiHardMine(distance_matrix, is_out_of_safe_radius, positive_distance, margin) diff = positive_distance - torch.min(negative_distance1, negative_distance2) scores2 = scores2[fmap_pos2[0, :], fmap_pos2[1, :]] loss = loss + (torch.sum(scores1 * scores2 * F.relu(margin + diff)) / (torch.sum(scores1 * scores2))) has_grad = True n_valid_samples += 1 if plot and batch['batch_idx'] % batch['log_interval'] == 0: print("Inside plot.") drawTraining(batch['image1'], batch['image2'], pos1, pos2, batch, idx_in_batch, output, save=True) # drawTraining(img_warp1, img_warp2, pos1, pos2, batch, idx_in_batch, output, save=True) if not has_grad: raise NoGradientError loss = loss / (n_valid_samples) return loss