def regular_encode(texts: list,
                   tokenizer: transformers.AutoTokenizer,
                   maxlen: int = 512,
                   multi_class: bool = True):
    """
    Encode sentences for input to Transformer models.

    :param texts: list of strings to be encoded
    :param tokenizer: tokenizer for encoding
    :param maxlen: max number of characters of input string being encoded
    :param multi_class: if True, the default truncation is applied. If False, implies auxillary input and
        custom truncation is applied.
    :return: numpy array of encoded strings
    """
    # TODO: Intersphinx link to transformers.AutoTokenizer is failing. What's wrong with my docs/source/conf.py?
    if not multi_class:
        # If len > maxlen, truncate text upto maxlen-8 characters and append the 8-character auxillary input
        texts = [
            text[:maxlen - 8] + text[-8:] if len(text) > maxlen else text
            for text in texts
        ]

    enc_di = tokenizer.batch_encode_plus(
        texts,
        return_attention_mask=False,
        return_token_type_ids=False,
        pad_to_max_length=True,
        # sep_token='[SEP]',
        max_length=maxlen,
        truncation=True)  # Is this what we want?

    return np.array(enc_di['input_ids'])
def regular_encode(texts: list, tokenizer: transformers.AutoTokenizer, maxlen: int = 512):
    """
    Encode sentences for input to Transformer models.

    :param texts: list of strings to be encoded
    :param tokenizer: tokenizer for encoding
    :param maxlen: max number of characters of input string being encoded
    :return: numpy array of encoded strings
    """
    # TODO: Intersphinx link to transformers.AutoTokenizer is failing. What's wrong with my docs/source/conf.py?
    enc_di = tokenizer.batch_encode_plus(texts,
                                         return_attention_mask=False,
                                         return_token_type_ids=False,
                                         pad_to_max_length=True,
                                         # sep_token='[SEP]',
                                         max_length=maxlen,
                                         truncation=True)  # Is this what we want?

    return np.array(enc_di['input_ids'])