コード例 #1
0
class RickAndMortyData(Dataset):
    def __init__(self, text, seq_length, vocab=None):
        self.text = text
        self.seq_length = seq_length
        if vocab is None:
            self.vocab = Vocabulary()
            self.vocab.add_text(self.text)
        else:
            self.vocab = vocab
        self.text = self.vocab.clean_text(text)
        self.tokens = self.vocab.tokenize(self.text)

    def __len__(self):
        return len(self.tokens) - self.seq_length

    def __getitem__(self, idx):
        x = [
            self.vocab[word] for word in self.tokens[idx:idx + self.seq_length]
        ]
        y = [self.vocab[self.tokens[idx + self.seq_length]]]
        x = torch.LongTensor(x)
        y = torch.LongTensor(y)
        return x, y
コード例 #2
0
batch_size = 128
lstm_size = 128
seq_length = 64
num_layers = 2
bidirectional = True
embeddings_size = 300
dropout = 0.5
learning_rate = 0.001

with open(data_path, 'r') as f:
    text = f.read()

vocab = Vocabulary()

if args.vocab_path is None:
    vocab.add_text(text)
    vocab.save('data/vocab.pkl')
else:
    vocab.load(args.load_vocab)

print(vocab)

model = MortyFire(vocab_size=len(vocab),
                  lstm_size=lstm_size,
                  embed_size=embeddings_size,
                  seq_length=seq_length,
                  num_layers=num_layers,
                  dropout=dropout,
                  bidirectional=bidirectional,
                  train_on_gpu=train_on_gpu)