Python Indexer.add_word Examples

Programming Language: Python

Namespace/Package Name: Indexer

Class/Type: Indexer

Method/Function: add_word

Examples at hotexamples.com: 2

Python Indexer.add_word - 2 examples found. These are the top rated real world Python examples of Indexer.Indexer.add_word extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Indexer(30)

add_sentence(2)

add_word(2)

count_word(2)

__init__(2)

addToIndex(1)

dump(1)

tagcloud(1)

search(1)

indexer(1)

get_terms(1)

get_posting_list(1)

get_normalized_fequency(1)

extract_classes(1)

create_inverted_index(1)

document_frequency_normalized(1)

add(1)

count_word_in_text(1)

UserInterface(1)

compute_tdidf(1)

build_dictionary(1)

build_data_structure(1)

buidlindex(1)

add_sentences(1)

term_document_frequency(1)

Example #1

Show file

class StanfordTwitterEmbedding(AbstractEmbedding):
    def __init__(self, device):
        super(StanfordTwitterEmbedding, self).__init__(device=device)
        self.path = Path(
            '../data/models/glove.twitter.27B/glove.twitter.27B.200d.txt')
        with_raw_file = False
        if with_raw_file:
            with self.path.open('r', encoding='utf-8-sig') as f:
                texts = f.readlines()
            headers = [len(texts), None]
            vocab, weights = map(
                list,
                zip(*Parallel(n_jobs=10)
                    ([delayed(self.get_weights)(text) for text in texts])))
            with (self.path.parent / 'vocab.pkl').open('wb') as f:
                pickle.dump(vocab, f)
            with (self.path.parent / 'weights.pkl').open('wb') as f:
                pickle.dump(weights, f)
        else:
            with (self.path.parent / 'vocab.pkl').open('rb') as f:
                vocab = pickle.load(f)
            with (self.path.parent / 'weights.pkl').open('rb') as f:
                weights = pickle.load(f)

        self.indexer = Indexer(special_tokens={
            '<s>': 0,
            '<unk>': 1,
            '<pad>': 2,
            '<\s>': 3,
            '<mask>': 4
        },
                               with_del_stopwords=self.with_del_stopwords)
        for word in vocab:
            self.indexer.count_word(word)
            self.indexer.add_word(word)
        self.embedding_dim = len(weights[0])
        special_weights = [[0.0] * self.embedding_dim] * 5
        weights = torch.FloatTensor(special_weights + weights)
        self.embedding = nn.Embedding.from_pretrained(
            embeddings=weights, padding_idx=self.indexer.padding_index)
        self.embedding.to(device)

    def get_weights(self, text):
        content = text.split(' ')
        return content[0], list(map(float, content[1:]))

Example #2

Show file

class NtuaTwitterEmbedding(AbstractEmbedding):
    def __init__(self, device):
        super(NtuaTwitterEmbedding, self).__init__(device=device)
        self.path = Path('../data/models/ntua-slp-semeval2018/ntua_twitter_300.txt')
        with self.path.open('r', encoding='utf-8-sig') as f:
            texts = f.readlines()
        headers = texts[0].strip().split(' ')
        contents = [text.strip().split(' ') for text in texts[1:]]
        vocab = [content[0] for content in contents]
        weights = [list(map(float, content[1:])) for content in contents]
        self.indexer = Indexer(special_tokens={'<s>': 0, '<unk>': 1, '<pad>': 2, '<\s>': 3, '<mask>': 4}, with_del_stopwords=self.with_del_stopwords)
        for word in vocab:
            self.indexer.count_word(word)
            self.indexer.add_word(word)
        self.embedding_dim = int(headers[1])
        special_weights = [[0.0] * self.embedding_dim] * 5
        weights = torch.FloatTensor(special_weights + weights)
        self.embedding = nn.Embedding.from_pretrained(embeddings=weights, padding_idx=self.indexer.padding_index)
        self.embedding.to(device)