def _test(run): U.set_random_seeds(11785) lang = Lang() trans = U.tokenizeTranscripts('train') lang.init_lang(trans) output_size = lang.num_items batch_size = 1 print("Starting .. ..") num_layers = 3 hidden_size = 256 input_size = 40 key_size = 128 value_size = 128 bidirectional = True p = 3 embedding_size = 128 max_len = 496 encoder = EncoderRNN(input_size, hidden_size, key_size, value_size, num_layers, bidirectional, p) decoder = DecoderRNN(output_size, embedding_size, hidden_size, key_size, value_size, num_layers, max_len) teacher_forcing_ratio = 1.0 las = LAS(encoder, decoder, teacher_forcing_ratio) model = torch.load('../saved_models/' + run + '-model.pt', map_location=lambda storage, loc: storage) las.load_state_dict(model.state_dict()) if U.use_cuda(): las = las.cuda() # Prediction test_dataset = SpeechDataset(lang, 'test') test_dataloader = SpeechDataLoader(test_dataset, batch_size=batch_size, shuffle=False) criterion = CrossEntropyLoss3D(reduce=False, ignore_index=C.PAD_TOKEN_IDX) predictor = Predictor(las, lang, criterion) outFile = open('../predictions-' + run + '.txt', 'w') predictor.predict(test_dataloader, outFile)
def __init__(self, lang, mode): self.mode = mode features_path = (C.DATA_PATH + "%s.npy") % (self.mode) self.feats = np.load(features_path) self.trans = [] if self.mode is not 'test': trans_tokenized = U.tokenizeTranscripts(self.mode) for trans_items in trans_tokenized: trans_indices = lang.items2indices(trans_items) self.trans.append(trans_indices) self.trans = np.asarray(self.trans) assert (self.feats.shape[0] == self.trans.shape[0]) self.len = self.feats.shape[0] print("%s Dataset Loading Completed" % (self.mode))
from data.language import Lang from data.dataloader import SpeechDataLoader from data.dataset import SpeechDataset from models.encoder import EncoderRNN from models.decoder import DecoderRNN from models.las import LAS from evaluator.evaluator import Evaluator import torch.nn as nn if __name__ == "__main__": batch_size = 32 lang = Lang() trans = U.tokenizeTranscripts('train') lang.init_lang(trans) output_size = lang.num_items dev_dataset = SpeechDataset(lang, 'dev') dev_dataloader = SpeechDataLoader(dev_dataset, batch_size=batch_size) num_layers = 3 hidden_size = 256 input_size = 40 key_size = 128 value_size = 128 bidirectional = True p = 3
assert (self.feats.shape[0] == self.trans.shape[0]) self.len = self.feats.shape[0] print("%s Dataset Loading Completed" % (self.mode)) def __getitem__(self, index): if self.mode is not 'test': trans = np.append([C.SOS_TOKEN_IDX], self.trans[index]) trans = np.append(trans, [C.EOS_TOKEN_IDX]) else: trans = np.asarray([C.SOS_TOKEN_IDX]) return (self.feats[index], trans) def __len__(self): return self.len if __name__ == "__main__": lang = Lang() trans = U.tokenizeTranscripts('dev') lang.init_lang(trans) dataset = SpeechDataset(lang, 'dev') assert (len(dataset) == 1139) print(dataset[0]) print(''.join(lang.indices2items(dataset[0][1])))