def test_indices_ivfflat(self): res = faiss.StandardGpuResources() d = 128 nb = 5000 nlist = 10 rs = np.random.RandomState(567) xb = rs.rand(nb, d).astype('float32') xb_indices_base = np.arange(nb, dtype=np.int64) # Force values to not be representable in int32 xb_indices = (xb_indices_base + 4294967296).astype('int64') config = faiss.GpuIndexIVFFlatConfig() idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx.train(xb) idx.add_with_ids(xb, xb_indices) _, I = idx.search(xb[10:20], 5) self.assertTrue(np.array_equal(xb_indices[10:20], I[:, 0])) # Store values using 32-bit indices instead config.indicesOptions = faiss.INDICES_32_BIT idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx.train(xb) idx.add_with_ids(xb, xb_indices) _, I = idx.search(xb[10:20], 5) # This will strip the high bit self.assertTrue(np.array_equal(xb_indices_base[10:20], I[:, 0]))
def inference(discriminator, dev_src, dev_tgt): discriminator.eval() datapairslist = batchize(dev_src, dev_tgt, batch_size, volatile=True) score = 0 len_datalist = len(datapairslist) prec, rec, f1 = 0, 0, 0 _, e_v = load_kb.loadvec() flat_config = faiss.GpuIndexIVFFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() index = faiss.GpuIndexIVFFlat(res, args.concept_size, 1000, faiss.METRIC_L2, flat_config) index.train(e_v) index.add(e_v) for i, item in enumerate(datapairslist): src_seqs, tgt_seqs, mask = item batch_len, maxlen = src_seqs.size() ### embbed, pre_kb_emb = encoder(src_seqs) embbed = embbed * mask.unsqueeze(-1) pre_kb_emb = (pre_kb_emb * mask.unsqueeze(-1)).permute(1, 0, 2) if not isinstance(pre_kb_emb, np.ndarray): pre_kb_emb = pre_kb_emb.data.cpu().numpy() v_list = [] for item in pre_kb_emb: D, I = index.search(item, args.num_kb) v_can = e_v[I] v_list.append(torch.from_numpy(v_can)) v = Variable(torch.stack(v_list, 0)) if USE_CUDA: v = v.cuda() v = v * mask.transpose(1, 0).unsqueeze(-1).unsqueeze(-1) ### scores, preds = vqcrf.inference(embbed, v, mask) micro_prec, micro_rec, micro_f1 = evaluate_acc(tgt_seqs, preds) prec += micro_prec rec += micro_rec f1 += micro_f1 return prec / len_datalist, rec / len_datalist, f1 / len_datalist
def train_epoch(discriminator, train_src, train_tgt, epoch_index, lr): discriminator.train() datapairslist = batchize(train_src, train_tgt, batch_size) epoch_loss = 0 start_time = time.time() #encoder_optimizer = getattr(optim, args.optim)(encoder.parameters(), weight_decay=L2) #vqcrf_optimizer = getattr(optim, args.optim)(vqcrf.parameters(), weight_decay=L2) len_traintensorlist = len(train_src) idx_list = list(range(len_traintensorlist)) shuffle(datapairslist) _, e_v = load_kb.loadvec() flat_config = faiss.GpuIndexIVFFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() index = faiss.GpuIndexIVFFlat(res, args.concept_size, 1000, faiss.METRIC_L2, flat_config) index.train(e_v) index.add(e_v) for i, item in enumerate(datapairslist): total_loss = 0 src_seqs, tgt_seqs, mask = item batch_len, maxlen = src_seqs.size() encoder.zero_grad() vqcrf.zero_grad() embbed, pre_kb_emb = encoder(src_seqs) embbed = embbed * mask.unsqueeze(-1) pre_kb_emb = (pre_kb_emb * mask.unsqueeze(-1)).permute(1, 0, 2) if not isinstance(pre_kb_emb, np.ndarray): pre_kb_emb = pre_kb_emb.data.cpu().numpy() v_list = [] for item in pre_kb_emb: D, I = index.search(item, args.num_kb) v_can = e_v[I] v_list.append(torch.from_numpy(v_can)) v = Variable(torch.stack(v_list, 0)) if USE_CUDA: v = v.cuda() v = v * mask.transpose(1, 0).unsqueeze(-1).unsqueeze(-1) neglogscore = vqcrf(embbed, v, tgt_seqs, mask).mean() #print("neglogscore", neglogscore.size()) #decoder_hidden = decoder.init_hidden(batch_len) neglogscore.backward() torch.nn.utils.clip_grad_norm(vqcrf.parameters(), args.clip) torch.nn.utils.clip_grad_norm(encoder.parameters(), args.clip) encoder_optimizer.step() vqcrf_optimizer.step() epoch_loss += neglogscore.data[0] print_loss = neglogscore.data[0] / len(tgt_seqs) if (i % print_every_train == 0 and i != 0) or (len_traintensorlist - 1 == i): using_time = time.time() - start_time print('| epoch %3d | %4d/%5d batches | ms/batch %5.5f | ' 'loss %5.15f | ppl: %5.2f |}' % (epoch_index, i, len_trainset // batch_size, using_time * 1000 / print_every_train, print_loss, math.exp(print_loss))) print_loss = 0 start_time = time.time() epoch_loss = epoch_loss / len_trainset return epoch_loss
pin_data_drop = np.array(pin_data_dropid).astype('float32') #这里量会限制 #pin_data_drop[:10] d = len(pca_pin_data_drop[0]) pin_data_drop_new = np.ascontiguousarray(pca_pin_data_drop) nlist = 256 m = 3 # PQ才有 列方向划分个数,必须能被d整除, 特征向量分组,这个很影响速率 k = 10 res = [faiss.StandardGpuResources() for i in range(ngpus)] # first we get StandardGpuResources of each GPU # ngpu is the num of GPUs flat_config = [] for i in range(ngpus): cfg = faiss.GpuIndexIVFFlatConfig( ) #faiss.GpuIndexFlatConfig() faiss.GpuIndexIVFPQConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) #indexes = [faiss.GpuIndexFlatL2(res[i],d,flat_config[i]) for i in range(ngpus)] #可行,速度快,不需要train,直接计算L2距离 #indexes = [faiss.GpuIndexIVFPQ(res[i],d,nlist, m,4,faiss.METRIC_L2,flat_config[i]) for i in range(ngpus)] indexes = [ faiss.GpuIndexIVFFlat(res[i], d, nlist, faiss.METRIC_L2, flat_config[i]) for i in range(ngpus) ] # then we make an Index array # useFloat16 is a boolean value index = faiss.IndexProxy()
def main(args): cudnn.benchmark = True args.dset_root = os.path.join(args.sm.scratch_dir, args.data, args.dset_name) args.gen_root = os.path.join(args.sm.scratch_dir, args.gen) args.ckpt_dir = os.path.join(args.gen_root, 'ckpt') utils.mkdir_p(args.gen_root) utils.mkdir_p(args.ckpt_dir) # print(arg.dset_root, os.path.exists(args.dset_root)) # Transforms transforms_train = transforms.Compose([ transforms.Resize(args.imsize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transforms_val = transforms.Compose([ transforms.Resize(args.imsize), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Dataset args.dset_root_train = os.path.join(args.dset_root, 'train') train_dset = ImageFolderWithFilenames(args.dset_root_train, transforms_train) args.dset_root_val = os.path.join(args.dset_root, 'val') val_dset = ImageFolderWithFilenames(args.dset_root_val, transforms_val) # Data Loader train_loader = data.DataLoader(train_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) val_loader = data.DataLoader(val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # Uncomment to test data loader # itr = iter(train_loader) #img, target = next(itr) # print(img) # print(target) # pdb.set_trace() # Model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() model = model.cuda() model_fe = ResNetFeatureExtractor(model) criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-6, swap=True) criterion = criterion.cuda() optimizer = optim.Adam(model_fe.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, # mode='min', # factor=0.1, # patience=10, # verbose=True, # threshold=1e-4, # threshold_mode='rel', # cooldown=0, # min_lr=0, # eps=1e-8 # ) num_param_matrix = len(list(model_fe.parameters())) pnorm = np.zeros((args.num_epochs, num_param_matrix)) model_fe.train(False) args.emb_fv_file = os.path.join( args.ckpt_dir, 'fv_{:s}_{:s}.npy'.format(args.dset_name, args.arch)) res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 ivfflat_config = faiss.GpuIndexIVFFlatConfig() for epoch in range(args.num_epochs): logging.error('Epoch {:04d}'.format(epoch)) ## STEP 1 : EMBED IMAGES INTO FEATURE VECTORS USING CURRENT MODEL train_embedding, train_img_fnames, train_embedding_id, train_index_id, train_embedding_neg_id, train_index_neg_id = \ embed(args, train_dset, train_loader, model_fe) val_embedding, val_img_fnames, val_embedding_id, val_index_id, val_embedding_neg_id, val_index_neg_id = \ embed(args, val_dset, val_loader, model_fe) ## STEP 2: HARD NEGATIVE TRIPLET MINING train_hard = mine_triplets(args, res, flat_config, ivfflat_config, train_embedding_id, train_index_id, train_embedding_neg_id, train_index_neg_id) val_hard = mine_triplets(args, res, flat_config, ivfflat_config, val_embedding_id, val_index_id, val_embedding_neg_id, val_index_neg_id) ## STEP 3: TRAIN / EVAL NETWORK train_loss_epoch = learn(args, True, model_fe, criterion, optimizer, train_hard, train_dset) val_loss_epoch = learn(args, False, model_fe, criterion, optimizer, val_hard, val_dset) print('Epoch {:d} : Train Loss = {:f} Val Loss = {:f}'.format( epoch, train_loss_epoch, val_loss_epoch)) # print('Epoch {:d} : Train Loss = {:f}'.format(epoch, train_loss_epoch)) # Norm of parameter matrices at each layer for pnorm_idx, param in enumerate(list(model_fe.parameters())): pnorm[epoch, pnorm_idx] = param.norm().clone().data[0] # scheduler.step() gc.collect() # Check to see if weights update print(pnorm)