Ejemplo n.º 1
0
    def forward(  # type: ignore
            self,
            anchors: TextFieldTensors,
            positives: TextFieldTensors = None) -> Dict[str, torch.Tensor]:
        """
        # Parameters

        tokens : TextFieldTensors
            From a `TextField`

        # Returns

        An output dictionary consisting of:

        embeddings : torch.FloatTensor
            A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the
            representation for the given `tokens` output by the encoder. The encoder is composed of:
            `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order.
        projections : torch.FloatTensor
            A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the
            non-linear projection of the learned representation for the given `anchor_tokens` output
            by the projection head. This field will only be included if `self._feedforward` is not
            `None`.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimized.
        """
        output_dict: Dict[str, torch.Tensor] = {}

        # If multiple anchors were sampled, we need to unpack them.
        anchors = unpack_batch(anchors)
        # Mask anchor input ids and get labels required for MLM.
        if self.training and self._masked_language_modeling:
            anchors = mask_tokens(anchors, self._tokenizer)
        # This is the textual representation learned by a model and used for downstream tasks.
        masked_lm_loss, embedded_anchors = self._forward_internal(
            anchors, output_dict)

        # If positives are supplied by DataLoader and we are training, compute a contrastive loss.
        if self.training:
            output_dict["loss"] = 0
            # TODO: We should throw a ValueError if no postives provided but loss is not None.
            if self._loss is not None:
                # Like the anchors, if we sampled multiple positives, we need to unpack them.
                positives = unpack_batch(positives)
                # Positives are represented by their mean embedding a la
                # https://arxiv.org/abs/1902.09229.
                _, embedded_positives = self._forward_internal(positives)
                # Shape: (num_anchors, num_positives_per_anchor, embedding_dim)
                embedded_positives = torch.reshape(
                    embedded_positives,
                    (embedded_anchors.size(0), -1, embedded_anchors.size(-1)),
                )
                # Shape: (num_anchors, embedding_dim)
                embedded_positives = torch.mean(embedded_positives, dim=1)

                # If we are training on multiple GPUs using DistributedDataParallel, then a naive
                # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per
                # GPU. To avoid this, we need to gather the anchors/positives from each replica on
                # every other replica in order to generate the correct number of negatives,
                # i.e. 2 * (batch_size - 1), before computing the contrastive loss.
                embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs(
                    embedded_anchors, embedded_positives)
                # Get embeddings into the format that the PyTorch Metric Learning library expects
                # before computing the loss (with an optional mining step).
                embeddings, labels = self._loss.get_embeddings_and_labels(
                    embedded_anchors, embedded_positives)
                indices_tuple = self._miner(
                    embeddings, labels) if self._miner is not None else None
                contrastive_loss = self._loss(embeddings, labels,
                                              indices_tuple)
                # Loss needs to be scaled by world size when using DistributedDataParallel
                # See: https://amsword.medium.com/gradient-backpropagation-with-torch-distributed-all-gather-9f3941a381f8
                if util.is_distributed() and self._scale_fix:
                    contrastive_loss *= dist.get_world_size()
                output_dict["loss"] += contrastive_loss
            # Loss may be derived from contrastive objective, MLM objective or both.
            if masked_lm_loss is not None:
                output_dict["loss"] += masked_lm_loss

        return output_dict
Ejemplo n.º 2
0
    def forward(  # type: ignore
            self, anchors: TextFieldTensors, positives: TextFieldTensors=None, label:torch.LongTensor=None
    ) -> Dict[str, torch.Tensor]:

        """
        # Parameters

        tokens : TextFieldTensors
            From a `TextField`

        # Returns

        An output dictionary consisting of:

        embeddings : torch.FloatTensor
            A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the
            representation for the given `tokens` output by the encoder. The encoder is composed of:
            `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order.
        projections : torch.FloatTensor
            A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the
            non-linear projection of the learned representation for the given `anchor_tokens` output
            by the projection head. This field will only be included if `self._feedforward` is not
            `None`.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimized.
        """
        output_dict: Dict[str, torch.Tensor] = {}
        print(label)
        print("****************************************8")

        # If multiple anchors were sampled, we need to unpack them.
        anchors = unpack_batch(anchors)
        # Mask anchor input ids and get labels required for MLM.
        if self.training and self._masked_language_modeling:
            anchors = mask_tokens(anchors, self._tokenizer)
        # This is the textual representation learned by a model and used for downstream tasks.
        masked_lm_loss, embedded_anchors = self._forward_internal(anchors, output_dict)
        # If positives are supplied by DataLoader and we are training, compute a contrastive loss.
        if self.training:
            output_dict["loss"] = 0
            # TODO: We should throw a ValueError if no postives provided by loss is not None.
            if self._loss is not None:
                # Like the anchors, if we sampled multiple positives, we need to unpack them.
                positives = unpack_batch(positives)
                # Positives are represented by their mean embedding a la
                # https://arxiv.org/abs/1902.09229.
                _, embedded_positives = self._forward_internal(positives)
                embedded_positive_chunks = []
                for i, chunk in enumerate(
                        torch.chunk(embedded_positives, chunks=embedded_anchors.size(0), dim=0)
                ):
                    embedded_positive_chunks.append(torch.mean(chunk, dim=0))
                embedded_positives = torch.stack(embedded_positive_chunks)
                
                # If we are training on multiple GPUs using DistributedDataParallel, then a naive
                # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per
                # GPU. To avoid this, we need to gather the anchors/positives from each replica on
                # every other replica in order to generate the correct number of negatives,
                # i.e. 2 * (batch_size - 1), before computing the contrastive loss.
                embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs(
                    embedded_anchors, embedded_positives
                )
                embedded_positives_1=embedded_positives[torch.nonzero(label==1).view(1,-1)[0],:]
                embedded_positives_0=embedded_positives[torch.nonzero(label==0).view(1,-1)[0],:]
                embedded_anchors_1=embedded_anchors[torch.nonzero(label==1).view(1,-1)[0],:]
                embedded_anchors_0=embedded_anchors[torch.nonzero(label==0).view(1,-1)[0],:]
                #print(embedded_positives_1.shape)
                #print(embedded_positives_0.shape)
                #print(embedded_anchors_1.shape)
                #print(embedded_anchors_0.shape)
                #embedded_anchors_0 = embedded_anchors_1 = embedded_anchors
                #embedded_positives_0 = embedded_positives_1 = embedded_positives
                # Get embeddings into the format that the PyTorch Metric Learning library expects
                # before computing the loss (with an optional mining step).
                print(embedded_positives_1.shape)
                print(embedded_anchors_1.shape)
                print("*********************************8")
                print(embedded_positives_0.shape)
                print(embedded_anchors_0.shape)

                if(embedded_positives_1.shape[0]>0 and embedded_positives_0.shape[0]>0):
                      embeddings, labels, parties = self._loss.get_embeddings_and_labels(
                                embedded_anchors_0, embedded_positives_0,embedded_anchors_1,embedded_positives_1
                         )
                      indices_tuple = self._miner(embeddings, labels) if self._miner is not None else None
                      output_dict["loss"] += self._loss(embeddings, labels, indices_tuple=indices_tuple, parties=parties)
                else:
                    output_dict["loss"]+=torch.zeros(1,device='cuda:0')
                    #masked_lm_loss = None
            # Loss may be derived from contrastive objective, MLM objective or both.
            if masked_lm_loss is not None:
                output_dict["loss"] += masked_lm_loss

        return output_dict
Ejemplo n.º 3
0
    def forward(  # type: ignore
        self,
        anchors: TextFieldTensors,
        positives: TextFieldTensors = None,
        difficulty: LabelField = None,
    ) -> Dict[str, torch.Tensor]:
        """
        # Parameters

        tokens : TextFieldTensors
            From a `TextField`

        # Returns

        An output dictionary consisting of:

        embeddings : torch.FloatTensor
            A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the
            representation for the given `tokens` output by the encoder. The encoder is composed of:
            `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order.
        projections : torch.FloatTensor
            A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the
            non-linear projection of the learned representation for the given `anchor_tokens` output
            by the projection head. This field will only be included if `self._feedforward` is not
            `None`.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimized.
        """
        output_dict: Dict[str, torch.Tensor] = {}
        # If multiple anchors were sampled, we need to unpack them.
        anchors = unpack_batch(anchors)
        # print("anchor token len is ", len(anchors["tokens"]))
        # Mask anchor input ids and get labels required for MLM.
        if self.training and self._masked_language_modeling:
            anchors = mask_tokens(anchors, self._tokenizer)
        # This is the textual representation learned by a model and used for downstream tasks.
        masked_lm_loss, embedded_anchors = self._forward_internal(
            anchors, -1, output_dict=output_dict)
        # self.iteration += 1
        # print("instance from reader is ", difficulty, self.iteration)
        if difficulty is not None:
            difficulty_step = int(difficulty[0])
        else:
            difficulty_step = -100
        # If positives are supplied by DataLoader and we are training, compute a contrastive loss.
        if self.training:
            output_dict["loss"] = 0
            # TODO: We should throw a ValueError if no postives provided by loss is not None.
            if self._loss is not None:
                # if difficulty_step > 5:
                #     sampling_gate = random.randint(0,2)
                # else:
                #     sampling_gate = 0
                # sampling_gate = random.randint(0,1)
                if len(self.augment) == 0:
                    # if difficulty_step > 5:
                    # if True:
                    # sampling_gate = random.randint(0,2)
                    # if sampling_gate > 0:
                    # print("enter sampling!!!!!")
                    # Like the anchors, if we sampled multiple positives, we need to unpack them.
                    positives = unpack_batch(positives)
                    # Positives are represented by their mean embedding a la
                    # https://arxiv.org/abs/1902.09229.
                    _, embedded_positives = self._forward_internal(
                        positives, -1, difficulty_step=difficulty_step)
                    embedded_positive_chunks = []
                    for i, chunk in enumerate(
                            torch.chunk(embedded_positives,
                                        chunks=embedded_anchors.size(0),
                                        dim=0)):
                        embedded_positive_chunks.append(
                            torch.mean(chunk, dim=0))
                    embedded_positives = torch.stack(embedded_positive_chunks)
                    # If we are training on multiple GPUs using DistributedDataParallel, then a naive
                    # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per
                    # GPU. To avoid this, we need to gather the anchors/positives from each replica on
                    # every other replica in order to generate the correct number of negatives,
                    # i.e. 2 * (batch_size - 1), before computing the contrastive loss.
                    embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs(
                        embedded_anchors, embedded_positives)
                    # Get embeddings into the format that the PyTorch Metric Learning library expects
                    # before computing the loss (with an optional mining step).
                else:
                    augment = np.random.choice(self.augment, 1)[0]
                    # print("augment difficulty is ", difficulty_step)
                    # print("augment value is ~~~~~~~~~~~~~~~~~~~~", augment)
                    # _, embedded_positives2 = self._forward_internal(anchors, augment, difficulty_step)
                    _, embedded_positives = self._forward_internal(
                        anchors, augment, difficulty_step=difficulty_step)
                    # _, embedded_positives = self._forward_internal(anchors, augment, 2)
                    # embedded_anchors, embedded_positives2 = all_gather_anchor_positive_pairs(
                    #     embedded_anchors, embedded_positives2
                    # )
                    embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs(
                        embedded_anchors, embedded_positives)
                # print("embedded_anchors", embedded_anchors)
                # print("embedded_positives",embedded_positives)
                # cos = nn.CosineSimilarity()
                # output = cos(embedded_anchors, embedded_positives)
                # print("cosine similarity is", output)
                embeddings, labels = self._loss.get_embeddings_and_labels(
                    embedded_anchors, embedded_positives)
                indices_tuple = self._miner(
                    embeddings, labels) if self._miner is not None else None
                output_dict["loss"] += self._loss(embeddings, labels,
                                                  indices_tuple)
                # output_dict["loss"] += (self._loss(embeddings, labels, indices_tuple))*1/2

                # embeddings2, labels2 = self._loss.get_embeddings_and_labels(
                #     embedded_anchors, embedded_positives2
                # )
                # indices_tuple2 = self._miner(embeddings2, labels2) if self._miner is not None else None
                # output_dict["loss"] += (self._loss(embeddings2, labels2, indices_tuple2))*1/2
                # print("contrastive loss is ", output_dict["loss"], self.iteration)
            # Loss may be derived from contrastive objective, MLM objective or both.
            if masked_lm_loss is not None:
                # print("mlm loss is ", masked_lm_loss)
                output_dict["loss"] += masked_lm_loss

        return output_dict