def get_losses_for_batch(self, batch): indices, inputs1, inputs2, _ = batch outputs1 = self.forward(inputs1) with torch.no_grad(): self._momentum_update_key_encoder() if self.use_ddp or self.use_ddp2: inputs2, idx_unshuffle = self._batch_shuffle_ddp(inputs2) outputs2 = self.model_k(inputs2) if self.use_ddp or self.use_ddp2: outputs2 = self._batch_unshuffle_ddp(outputs2, idx_unshuffle) loss_fn = MoCo(outputs1, outputs2, self.moco_queue.clone().detach(), t=self.config.loss_params.t) loss = loss_fn.get_loss() with torch.no_grad(): outputs2 = l2_normalize(outputs2, dim=1) self._dequeue_and_enqueue(outputs2) outputs1 = l2_normalize(outputs1, dim=1) self.memory_bank.update(indices, outputs1) return loss
def __init__(self, outputs1, outputs2, queue, t=0.07): super().__init__() self.outputs1 = l2_normalize(outputs1, dim=1) self.outputs2 = l2_normalize(outputs2, dim=1) self.queue = queue.detach() self.t = t self.k = queue.size(0) self.device = self.outputs1.device
def get_losses_for_batch(self, batch): indices, inputs1, inputs2, _ = batch outputs1 = self.forward(inputs1) outputs2 = self.forward(inputs2) loss_fn = SimCLR(outputs1, outputs2, t=self.config.loss_params.t) loss = loss_fn.get_loss() with torch.no_grad(): # for nearest neighbor new_data_memory = (l2_normalize(outputs1, dim=1) + l2_normalize(outputs2, dim=1)) / 2. self.memory_bank.update(indices, new_data_memory) return loss
def _create(self): # initialize random weights mb_init = torch.rand(self.size, self.dim, device=self.device) std_dev = 1. / np.sqrt(self.dim / 3) mb_init = mb_init * (2 * std_dev) - std_dev # L2 normalise so that the norm is 1 mb_init = l2_normalize(mb_init, dim=1) return mb_init.detach() # detach so its not trainable
def __init__(self, indices, outputs, memory_bank, k=4096, t=0.07, m=0.5): super().__init__() self.k, self.t, self.m = k, t, m self.indices = indices.detach() self.outputs = l2_normalize(outputs, dim=1) self.memory_bank = memory_bank self.device = outputs.device self.data_len = memory_bank.size
def __init__(self, config): super().__init__(config) self.model_k = self.create_encoder() for param_q, param_k in zip(self.model.parameters(), self.model_k.parameters()): param_k.data.copy_(param_q.data) # initialize param_k.requires_grad = False # do not update # create queue (k x out_dim) moco_queue = torch.randn( self.config.loss_params.k, self.config.model_params.out_dim, ) self.register_buffer("moco_queue", moco_queue) self.moco_queue = l2_normalize(moco_queue, dim=1) self.register_buffer("moco_queue_ptr", torch.zeros(1, dtype=torch.long))
if __name__ == "__main__": if len(sys.argv) > 1: checkpoint_dir = sys.argv[1] print('loading from checkpoint in {}'.format(constant.SAVE_DIR+'/'+checkpoint_dir)) checkpoint = load_checkpoint(checkpoint=checkpoint_dir) args = checkpoint['args'] args.embedding_size = args.glove_embedding_size + args.other_embedding_size state = {k: v for k, v in args.items()} print(args) dm = datamanager.TextDataManager(args) args.n_embed = dm.vocab.n_words model = text_model.TextClassifier(config=args) model.glove_embed.weight.data = l2_normalize(torch.Tensor(dm.vocab.get_glove_embed_vectors())) model.other_embed.weight.data = l2_normalize(torch.Tensor(dm.vocab.get_medw2v_embed_vectors())) if args.cuda: model.cuda() # Numbers of parameters print("number of trainable parameters found {}".format(sum( param.nelement() for param in model.parameters() if param.requires_grad))) pos_weight = torch.sum(1-dm.train_labels, dim=0)/torch.sum(dm.train_labels, dim=0) pos_weight = torch.clamp(pos_weight, min=0.1, max=10) if state['balance_loss']: criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight) else:
def __init__(self, outputs1, outputs2, t=0.07): super().__init__() self.outputs1 = l2_normalize(outputs1, dim=1) self.outputs2 = l2_normalize(outputs2, dim=1) self.t = t
def updated_new_data_memory(self): data_memory = self.memory_bank.at_idxs(self.indices) new_data_memory = data_memory * self.m + (1 - self.m) * self.outputs return l2_normalize(new_data_memory, dim=1)