Python Vectors.get_vecs_by_tokens Examples

Programming Language: Python

Namespace/Package Name: torchtext.vocab

Class/Type: Vectors

Method/Function: get_vecs_by_tokens

Examples at hotexamples.com: 4

Python Vectors.get_vecs_by_tokens - 4 examples found. These are the top rated real world Python examples of torchtext.vocab.Vectors.get_vecs_by_tokens extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vectors(30)

unk_init(6)

get_vecs_by_tokens(4)

itos(1)

stoi(1)

vectors(1)

Example #1

Show file

File: utils.py Project: vedantc6/mtl-dts

def load_glove_embeddings(sentences):
    """
    Converts each word of the sentences to the respective Glove embeddings.

    :param sentences
    return:
    """

    # Load the glove vectors saved locally.
    glove_vectors = Vectors('glove.6B.300d.txt', './pretrained_weights/')

    # Convert the input sentences to embeddings.
    final_sentences = []
    batch_size = len(sentences)
    max_len = max([len(sentence) for sentence in sentences])
    for sentence in sentences:
        sentence_with_embeddings = glove_vectors.get_vecs_by_tokens(sentence)

        # Add padding for words.
        if len(sentence_with_embeddings) < max_len:
            temp = torch.zeros([max_len - len(sentence), 300]).float()
            sentence_with_embeddings = torch.cat(
                [sentence_with_embeddings, temp], dim=0)

        final_sentences.append(torch.as_tensor(sentence_with_embeddings))
    return torch.stack(final_sentences).view(batch_size, max_len, 300)

Example #2

Show file

def tfidf_fasttext_pretrained_vectorize(conf: dict, preprocessed_text: pd.Series, name: str):

    vectors = Vectors(name='data/06_models/crawl-300d-2M.vec', cache='cache')
    sent_emb = preprocessed_text.apply(
        lambda text:
            vectors.get_vecs_by_tokens(text.split()).mean(axis=0)
    )
    X_fasttext = np.stack(sent_emb.values, axis=0)
    return X_fasttext

Example #3

Show file

File: tagger.py Project: Yishaiaz/NLP_POS_Tagger

def load_pretrained_embeddings(path, vocab=None):
    """ Returns an object with the the pretrained vectors, loaded from the
        file at the specified path. The file format is the same as
        https://www.kaggle.com/danielwillgeorge/glove6b100dtxt
        You can also access the vectors at:
         https://www.dropbox.com/s/qxak38ybjom696y/glove.6B.100d.txt?dl=0
         (for efficiency (time and memory) - load only the vectors you need)
        The format of the vectors object is not specified as it will be used
        internaly in your code, so you can use the datastructure of your choice.

    Args:
        path (str): full path to the embeddings file
        vocab (list): a list of words to have embeddings for. Defaults to None.

    """
    vectors = Vectors(name=path, cache=os.getcwd())
    if vocab is not None:
        vectors = vectors.get_vecs_by_tokens(vocab, True)
    return vectors

Example #4

Show file

File: main.py Project: topliftarm/NLP-Semantic-Role-Labeling

dataloader_dev: DataLoader = DataLoader(dataset_dev,
                                        batch_size=BATCH_SIZE_VALID_TEST)

if USE_GLOVE:
    '''
    Load the GloVe embeddings
    '''
    from torchtext.vocab import Vectors

    vectors = Vectors(GLOVE_PATH, cache="./")
    pretrained_embeddings = torch.randn(len(vocab_words), vectors.dim)
    initialised = 0
    for i, w in enumerate(vocab_words.itos):
        if w in vectors.stoi:
            initialised += 1
            vec = vectors.get_vecs_by_tokens(w)
            pretrained_embeddings[i] = vec

    pretrained_embeddings[vocab_words[pad_token]] = torch.zeros(vectors.dim)
    hyperparameters.embedding_dim = vectors.dim
    hyperparameters.glove_embeddings = pretrained_embeddings
    hyperparameters.vocab_size_words = len(vocab_words)
    print("VECTOR DIM", vectors.dim)
    print("initialised embeddings {}".format(initialised))
    print("random initialised embeddings {} ".format(
        len(vocab_words) - initialised))

print(hyperparameters)
print(net_configuration)

model: SRL_final_MODEL = SRL_final_MODEL(