def encode(self, sequence: str) -> torch.Tensor: """ Encodes a 'sequence'. :param sequence: String 'sequence' to encode. :return: torch.Tensor with Encoding of the `sequence`. """ sequence = TextEncoder.encode(self, sequence) return self.tokenizer(sequence, return_tensors="pt")["input_ids"][0]
def encode(self, sequence: str) -> torch.Tensor: """ Encodes a 'sequence'. :param sequence: String 'sequence' to encode. Returns: - torch.Tensor: Encoding of the 'sequence'. """ sequence = TextEncoder.encode(self, sequence) return self.encode_func(sequence)
def encode(self, sequence: str) -> torch.Tensor: """Encodes a 'sequence'. :param sequence: String 'sequence' to encode. :return: torch.Tensor with Encoding of the `sequence`. """ sequence = TextEncoder.encode(self, sequence) return self.tokenizer(sequence, truncation=True, max_length=256)["input_ids"]
def encode(self, sequence: str) -> torch.Tensor: """ Encodes a 'sequence'. :param sequence: String 'sequence' to encode. Returns: - torch.Tensor: Encoding of the 'sequence'. """ sequence = TextEncoder.encode(self, sequence) vector = self.tokenizer.encode(sequence) return torch.tensor(vector)
def encode(self, sequence: str) -> torch.Tensor: """Encodes a 'sequence'. :param sequence: String 'sequence' to encode. Returns: - torch.Tensor: Encoding of the 'sequence'. """ sequence = TextEncoder.encode(self, sequence) return torch.tensor( self.tokenizer(sequence, truncation=False)["input_ids"])
def encode_trackpos(self, sequence: str) -> torch.Tensor: """ Encodes a 'sequence' and keeps the alignments with the respective tags. :param sequence: String 'sequence' to encode. Returns: - torch.Tensor: Encoding of the 'sequence'. - torch.Tensor: Alignment indexes """ sequence = TextEncoder.encode(self, sequence) tag_index, vector = [], [ self._bos_index, ] for index, token in enumerate(sequence.split()): tag_index.append(len(vector)) vector = vector + self.tokenizer.encode(token, add_special_tokens=False) vector.append(self._eos_index) return torch.tensor(vector), torch.tensor(tag_index)
def encode_trackpos(self, sequence: str) -> torch.Tensor: """ Encodes a 'sequence' and keeps the alignments with the respective tags. :param sequence: String 'sequence' to encode. Returns: - torch.Tensor: Encoding of the 'sequence'. - torch.Tensor: Alignment indexes """ sequence = TextEncoder.encode(self, sequence) tag_index, vector = [], [ self._bos_index, ] tokens = sequence.split() # Add whitespace to each token to prevent Ġ<token> tokens = [tokens[0]] + [" " + token for token in tokens[1:]] for index, token in enumerate(tokens): tag_index.append(len(vector)) vector = vector + self.encode_func(token)[1:-1].tolist() vector.append(self._eos_index) return torch.tensor(vector), torch.tensor(tag_index)