Python Vocabulary.get_unk_index Examples

Programming Language: Python

Namespace/Package Name: pytext.data.utils

Class/Type: Vocabulary

Method/Function: get_unk_index

Examples at hotexamples.com: 3

Python Vocabulary.get_unk_index - 3 examples found. These are the top rated real world Python examples of pytext.data.utils.Vocabulary.get_unk_index extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vocabulary(26)

get_pad_index(9)

get_eos_index(5)

get_mask_index(4)

lookup_all(4)

get_bos_index(3)

get_unk_index(3)

Example #1

Show file

 def __init__(self, vocab: Vocabulary):
     super().__init__()
     self.vocab = ScriptVocabulary(
         list(vocab),
         pad_idx=vocab.get_pad_index(-1),
         bos_idx=vocab.get_bos_index(-1),
         eos_idx=vocab.get_eos_index(-1),
         unk_idx=vocab.get_unk_index(-1),
     )

Example #2

Show file

File: bert_tensorizer.py Project: nadileaf/pytext

 def __init__(self, tokenizer: Tokenizer, vocab: Vocabulary, max_seq_len: int):
     super().__init__()
     self.tokenizer = tokenizer
     self.vocab = ScriptVocabulary(
         list(vocab),
         pad_idx=vocab.get_pad_index(),
         bos_idx=vocab.get_bos_index(-1),
         eos_idx=vocab.get_eos_index(-1),
         unk_idx=vocab.get_unk_index(),
     )
     self.vocab_lookup = VocabLookup(self.vocab)
     self.max_seq_len = max_seq_len

Example #3

Show file

File: doc_model.py Project: theniteshsingh/pytext

    def __init__(
        self,
        pretrained_embeddings_path: str,
        vocab: Vocabulary,
        embedding_dim: int,
        mlp_layer_dims: Optional[Sequence[int]] = None,
        lowercase_tokens: bool = False,
        skip_header: bool = True,
        delimiter: str = " ",
    ) -> None:
        super().__init__()
        pretrained_embedding = PretrainedEmbedding(
            pretrained_embeddings_path,
            lowercase_tokens=lowercase_tokens,
            skip_header=skip_header,
            delimiter=delimiter,
        )
        embeddings_weight = pretrained_embedding.initialize_embeddings_weights(
            vocab.idx,  # tensorizer.vocab.idx,
            vocab.unk_token,  # tensorizer.vocab.unk_token,
            embedding_dim,
            EmbedInitStrategy.RANDOM,
        )
        num_embeddings = len(vocab.idx)

        self.embedding = nn.Embedding(
            num_embeddings,
            embedding_dim,
            _weight=embeddings_weight,
            padding_idx=vocab.get_pad_index(),
        )

        # Initialize unk embedding with zeros
        # to guard the model against randomized decisions based on unknown words
        unk_token_idx = vocab.get_unk_index()
        if unk_token_idx >= 0:
            self.embedding.weight.data[unk_token_idx].fill_(0.0)

        # Create MLP layers
        if mlp_layer_dims is None:
            mlp_layer_dims = []

        self.mlp = nn.Sequential(
            *(nn.Sequential(nn.Linear(m, n), nn.ReLU())
              for m, n in zip([embedding_dim] +
                              list(mlp_layer_dims), mlp_layer_dims)))
        self.output_dim = mlp_layer_dims[
            -1] if mlp_layer_dims else embedding_dim