Example #1
0
    def prepare(self, batch):
        """
        This function is call when one batch is built.
        :param batch:
        :return:
        """
        textual_input = []
        field_input = []
        tf_input = []

        sizes = []
        last_size = -1
        has_same_size = True

        for txt, field, tf in batch:
            textual_input.append(txt)
            field_input.append(field)

            if tf is not None:
                tf_input.append(tf)

            sizes.append(self.min_input_size
                         if len(txt) < self.min_input_size else len(txt))
            if has_same_size:
                if last_size != -1:
                    has_same_size = last_size == sizes[-1]

                last_size = sizes[-1]

        if has_same_size and self.min_input_size <= last_size:
            tf_input = None if len(tf_input) == 0 else torch.tensor(
                tf_input, dtype=torch.float32)

            return (torch.tensor(textual_input, dtype=torch.int64),
                    torch.tensor(field_input, dtype=torch.int64), tf_input,
                    torch.tensor(sizes))

        tf_input = None if len(tf_input) == 0 else torch.from_numpy(
            padSequences(
                tf_input, 0.0, dtype='float32', minSize=self.min_input_size))

        return (torch.from_numpy(
            padSequences(textual_input,
                         self.padding_idx,
                         dtype="int64",
                         minSize=self.min_input_size)),
                torch.from_numpy(
                    padSequences(field_input,
                                 self.field_padding_idx,
                                 dtype="int64",
                                 minSize=self.min_input_size)), tf_input,
                torch.tensor(sizes))
Example #2
0
    def prepare(self, batch):
        """
        This function is call when one batch is built.
        :param batch:
        :return:
        """
        anchor, candidate = batch

        return (
            torch.from_numpy(
                padSequences(anchor, self.padding_idx, dtype="int64")),
            torch.tensor([len(seq) for seq in anchor]),
            torch.from_numpy(
                padSequences(candidate, self.padding_idx, dtype="int64")),
            torch.tensor([len(seq) for seq in candidate]),
        )
Example #3
0
 def prepare(self, batch):
     """
     This function is call when one batch is built.
     :param batch:
     :return:
     """
     return (torch.from_numpy(
         padSequences(batch,
                      self.padding_idx,
                      dtype="int64",
                      minSize=self.min_input_size)), )
Example #4
0
 def prepare(self, batch):
     """
     This function is call when one batch is built.
     :param batch:
     :return:
     """
     return (torch.from_numpy(
         padSequences(batch,
                      0.0,
                      dtype="float32",
                      minSize=self.min_input_size)),
             torch.tensor([len(seq) for seq in batch]))
Example #5
0
 def prepare(self, batch):
     """
     This function is call when one batch is built.
     :param batch:
     :return:
     """
     return (torch.from_numpy(
         padSequences(batch,
                      self.padding_idx,
                      dtype="int64",
                      minSize=self.min_input_size)), None,
             torch.tensor([
                 self.min_input_size
                 if len(seq) < self.min_input_size else len(seq)
                 for seq in batch
             ]))
Example #6
0
    def prepare(self, batch):
        """
        This function is call when one batch is built.
        :param batch:
        :return:
        """
        lengths = np.asarray([
            self.min_input_size if len(seq) < self.min_input_size else len(seq)
            for seq in batch
        ])
        input_mask = np.zeros((len(batch), lengths.max()))

        for idx, l in enumerate(lengths):
            input_mask[idx, :l] = 1

        return (torch.from_numpy(
            padSequences(batch,
                         self.padding_idx,
                         dtype="int64",
                         minSize=self.min_input_size)),
                torch.tensor(input_mask), torch.tensor(lengths))