def data_loader(): datasets, word_to_i_map = load_swbd_labelled(rng, data_dir, min_count) ntokens = len(word_to_i_map) # Loading train set train_x, train_y = datasets[0] train_x = torch.cuda.FloatTensor( train_x) if torch.cuda.is_available() else torch.FloatTensor(train_x) train_y = torch.cuda.LongTensor( train_y) if torch.cuda.is_available() else torch.FloatTensor(train_y) train = TensorDataset(train_x, train_y) # comment out if using get_batch2 train_ldr = make_loader(train, batch_size) # Loading dev and test data datasets, _ = load_swbd_labelled(rng, data_dir, 1) dev_x, dev_y = datasets[1] test_x, test_y = datasets[2] dev = TensorDataset( torch.cuda.FloatTensor(dev_x), torch.cuda.LongTensor( dev_y)) if torch.cuda.is_available() else TensorDataset( torch.FloatTensor(dev_x), torch.LongTensor(dev_y)) test = TensorDataset( torch.cuda.FloatTensor(test_x), torch.cuda.LongTensor( test_y)) if torch.cuda.is_available() else TensorDataset( torch.FloatTensor(test_x), torch.LongTensor(test_y)) dev_ldr = make_loader(dev, batch_size) test_ldr = make_loader(test, batch_size) return ntokens, train_ldr, dev_ldr, test_ldr
def data_loader(): datasets, word_to_i_map = load_swbd_labelled(rng, data_dir, min_count) ntokens = len(word_to_i_map) # Loading train set train_x, train_y = datasets[0] train_x = torch.cuda.FloatTensor(train_x) if torch.cuda.is_available() else torch.FloatTensor(train_x) train_y = torch.cuda.LongTensor(train_y) if torch.cuda.is_available() else torch.FloatTensor(train_y) train = TensorDataset(train_x, train_y) # comment out if using get_batch2 train_ldr = make_loader(train, batch_size) # Loading dev and test data datasets, _ = load_swbd_labelled(rng, data_dir, 1) dev_x, dev_y = datasets[1] test_x, test_y = datasets[2] dev = TensorDataset(torch.cuda.FloatTensor(dev_x), torch.cuda.LongTensor(dev_y)) if torch.cuda.is_available() else TensorDataset(torch.FloatTensor(dev_x), torch.LongTensor(dev_y)) test = TensorDataset(torch.cuda.FloatTensor(test_x), torch.cuda.LongTensor(test_y)) if torch.cuda.is_available() else TensorDataset(torch.FloatTensor(test_x), torch.LongTensor(test_y)) dev_ldr = make_loader(dev, batch_size) test_ldr = make_loader(test, batch_size) dictionary = list(word_to_i_map) word_to_onehot_map = {} for w in dictionary: if w == "okay_1": word_to_onehot_map[w] = word2onehot("okay") elif w == "7-eleven": word_to_onehot_map[w] = word2onehot("7-eleven") elif w == "401k’s": word_to_onehot_map[w] = word2onehot("fourohoneks") else: word_to_onehot_map[w] = word2onehot(w) i_to_word_map = {value: key for key, value in word_to_i_map.items()} return ntokens, train_x, train_y, train_ldr, dev_ldr, test_ldr, word_to_onehot_map, i_to_word_map