Esempi in Python per Vocabulary.get_word

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Vocabulary

Classe/tipologia: Vocabulary

Metodo/funzione: get_word

Esempi su hotexamples.com: 3

Vocabulary.get_word in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per Vocabulary.Vocabulary.get_word, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Vocabulary(30)

add_token(5)

load(5)

add_word(5)

save(3)

get_vocab(3)

from_serializable(3)

get_word(3)

index(2)

build_from_token(2)

make_vocab_charts(2)

readPostProcessingVoc(2)

get_index(2)

getIndex(2)

fetch(2)

addSentence(1)

load_bigquery_vocab_from_indexed(1)

load_vocab_from_local(1)

load_word_from_data(1)

make_array_of_words_from_sentences(1)

prune(1)

restore_text(1)

add_sentence_pair(1)

save_dict(1)

loadIndexFile(1)

sentence2indices(1)

similar(1)

size(1)

sorted_tokens(1)

startSymbolWordID(1)

symbol(1)

text2ids(1)

to_index(1)

unknownWordID(1)

sentence2index(1)

incrementDF(1)

addSymbol(1)

from_serialiable(1)

add_words(1)

build_vocabulary(1)

checkIndex(1)

create(1)

create_from_text(1)

de_tokenize_data(1)

endSymbolWordID(1)

expand(1)

export_vocabulary(1)

addWord(1)

isATerm(1)

getCF(1)

Esempio n. 1

Mostra file

class Encoder_Decoder(nn.Module):
    def __init__(self,
                 dim_emb,
                 dim_hid,
                 vocab_file='./data/preprocessed/vocab_file.vocab'):
        super(Encoder_Decoder, self).__init__()
        self.vocab = Vocabulary()
        self.vocab.load(vocab_file=vocab_file)
        self.dim_hid = dim_hid
        self.word_embeddings = nn.Embedding(len(self.vocab), dim_emb)
        # self.gru = nn.GRU(dim_emb, dim_hid, batch_first=True)
        self.en_lstm = nn.LSTM(dim_emb, dim_hid, batch_first=True)

        self.de_lstm = nn.LSTM(dim_emb, dim_hid, batch_first=True)
        # LSTMの128次元の隠れ層を13次元に変換する全結合層
        self.hidden2linear = nn.Linear(dim_hid, len(self.vocab))

    def forward(self, sequence, state=None):
        embedding = self.word_embeddings(sequence)
        hs, (h, c) = self.en_lstm(embedding, state)

        output, (h, c) = self.de_lstm(embedding, (h, c))

        # アテンションを計算
        # t_output = torch.transpose(output, 1, 2)
        # s = torch.bmm(hs, t_output)
        # attention_weight = self.softmax(s)

        output = self.hidden2linear(output)
        return output, (h, c)

    def generate(self, start=None, max_len=17):

        if start is None:
            start = random.choice(self.vocab.index2word)

        idx = self.embed.weight.new_full((1, 1),
                                         self.vocab.get_index(start),
                                         dtype=torch.long)
        decoded = [start]
        state = None
        unk = self.vocab.get_index('<unk>')
        while decoded[-1] != '<eos>' and len(decoded) < max_len:
            x, state = self.forward(idx, state)
            x[:, :, unk] = -float('inf')

            # prob = list(map(self.to_int, x.squeeze().tolist()))

            # idx = torch.tensor(random.choices(
            #     list(range(len(prob))), weights=prob, k=1)).view(1, -1)

            idx = torch.argmax(x, dim=-1)

            word = self.vocab.get_word(idx.item())
            decoded.append(word)
        return ' '.join(decoded)

Esempio n. 2

Mostra file

File: test_1.py Progetto: Siqi-Z/dl_project

def main(args):
    threshold = 20
    captions_dict = load_captions(train_dir)
    vocab = Vocabulary(captions_dict, threshold)
    vocab_size = vocab.index
    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataloader = DataLoader(val_dir, vocab, transform)
    imagenumbers, captiontotal, imagetotal = dataloader.gen_data()

    # Build data loader
    data_loader = get_loader(imagenumbers,
                             captiontotal,
                             imagetotal,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build models
    encoder = EncoderCNN(args.embed_size).eval()
    decoder = DecoderRNN(args.embed_size, args.hidden_size, vocab_size,
                         args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))

    # Build data loader

    total_step = len(data_loader)

    # List to score the BLEU scores
    bleu_scores = []

    for i, (images, captions, lengths) in enumerate(data_loader):

        # Set mini-batch dataset
        images = images.to(device)
        # captions = captions.to(device)

        # Generate an caption from the image
        feature = encoder(images)
        sampled_ids = decoder.sample(feature)
        sampled_ids = sampled_ids[0].cpu().numpy()

        # Convert word_ids to words
        sampled_caption = []
        for word_id in sampled_ids:
            word = vocab.get_word(word_id)
            sampled_caption.append(word)
            if word == '<end>':
                break
        sentence = ' '.join(sampled_caption)

        score = sentence_bleu([captions], sentence, args.bleu_weights)
        bleu_scores.append(score)

        # Print log info
        if i % args.log_step == 0:
            print('Finish [{}/{}], Current BLEU Score: {:.4f}'.format(
                i, total_step, np.mean(bleu_scores)))
            print(sentence)
            print(captions)

    np.save('test_results.npy', [bleu_scores, np.mean(bleu_scores)])

Esempio n. 3

Mostra file

class LM(torch.nn.Module):
    def __init__(self,
                 dim_emb,
                 dim_hid,
                 vocab_file='./data/preprocessed/vocab_file.vocab'):
        super().__init__()

        self.vocab = Vocabulary()
        self.vocab.load(vocab_file=vocab_file)
        self.embed = torch.nn.Embedding(len(self.vocab), dim_emb)
        self.rnn1 = torch.nn.LSTM(dim_emb, dim_hid, batch_first=True)
        self.rnn2 = torch.nn.LSTM(dim_hid, dim_hid, batch_first=True)
        #         self.rnn3 = torch.nn.LSTM(dim_hid, dim_hid, batch_first=True)
        #         self.rnn4 = torch.nn.LSTM(dim_hid, dim_hid, batch_first=True)
        self.out = torch.nn.Linear(dim_hid, len(self.vocab))

    def forward(self, x, state1=None, state2=None):
        out = self.embed(x)
        out, state1 = self.rnn1(out, state1)
        out, state2 = self.rnn2(out, state2)
        #         out, (h, c) = self.rnn3(out, None)
        #         out, (h, c) = self.rnn4(out, None)
        out = self.out(out)
        return out, state1, state2

    # def to_int(self, a):
    #     if a == -float('inf'):
    #         return 0
    #     else:
    #         return int(1e9*a)

    def generate(self, prefix, max_len=30):
        cost = 0
        softmax = torch.nn.Softmax(dim=-1)
        start = '<bos>'

        idx = self.embed.weight.new_full((1, 1),
                                         self.vocab.get_index(start),
                                         dtype=torch.long)
        decoded = [start]
        state1, state2 = None, None
        unk = self.vocab.get_index('<unk>')
        while decoded[-1] != '<eos>' and len(decoded) < max_len:
            x, state1, state2 = self.forward(idx, state1, state2)

            if 0 < len(prefix):
                word = prefix.pop()
                idx = self.vocab.get_index(word)

                idx = torch.tensor(idx).view(1, 1).to(device)
            else:
                x[:, :, unk] = -float('inf')
                x = softmax(x)
                # idx = torch.argmax(x, dim=-1)
                x = x.squeeze().to('cpu').detach().numpy()
                accum = list(accumulate(x))
                idx = bisect(accum, random.random() * accum[-1])
                # word = self.vocab.get_word(idx.item())
                cost += np.log2(x[idx])
                word = self.vocab.get_word(idx)
                idx = torch.tensor(idx).view(1, 1).to(device)

            decoded.append(word)
        cost /= len(decoded)
        return ' '.join(decoded), cost