Esempio n. 1
0
# at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch
# in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all.
kalchbrenner_path = "./data/binarySentiment/"
train_x_indexes, train_y, train_lengths = dataUtils.read_and_sort_matlab_data(
    kalchbrenner_path + "train.txt", kalchbrenner_path + "train_lbl.txt")
dev_x_indexes, dev_y, dev_lengths = dataUtils.read_and_sort_matlab_data(
    kalchbrenner_path + "valid.txt", kalchbrenner_path + "valid_lbl.txt")
test_x_indexes, test_y, test_lengths = dataUtils.read_and_sort_matlab_data(
    kalchbrenner_path + "test.txt", kalchbrenner_path + "test_lbl.txt")

# train data
n_train_batches = len(train_lengths) / hyperparas['batch_size']

#dev data
# to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size
dev_x_indexes_extended = dataUtils.pad_to_batch_size(dev_x_indexes,
                                                     hyperparas['batch_size'])
dev_y_extended = dataUtils.pad_to_batch_size(dev_y, hyperparas['batch_size'])
n_dev_batches = dev_x_indexes_extended.shape[0] / hyperparas['batch_size']
n_dev_samples = len(dev_y)
dataUtils.extend_lenghts(dev_lengths, hyperparas['batch_size'])

# test data
test_x_indexes_extended = dataUtils.pad_to_batch_size(test_x_indexes,
                                                      hyperparas['batch_size'])
test_y_extended = dataUtils.pad_to_batch_size(test_y, hyperparas['batch_size'])
n_test_batches = test_x_indexes_extended.shape[0] / hyperparas['batch_size']
n_test_samples = len(test_y)
dataUtils.extend_lenghts(test_lengths, hyperparas['batch_size'])

######################
# BUILD ACTUAL MODEL #
Esempio n. 2
0
# load data, taken from Kalchbrenner matlab files
# we order the input according to length and pad all sentences until the maximum length
# at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch
# in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all.
data_path = "../multinli_0.9/DCNN_format/"
train_y, train_sents1, train_sents2, train_lens1, train_lens2 = dataUtils.read_and_sort(data_path + 'train.txt')
dev_y, dev_sents1, dev_sents2, dev_lens1, dev_lens2 = dataUtils.read_and_sort(data_path + 'dev.txt')
test_y, test_sents1, test_sents2, test_lens1, test_lens2 = dataUtils.read_and_sort(data_path + 'test.txt')

# train data
n_train_batches = len(train_lens1) / hyperparas['batch_size']


#dev data
# to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size
dev_sents1_extended = dataUtils.pad_to_batch_size(dev_sents1,hyperparas['batch_size'])
dev_sents2_extended = dataUtils.pad_to_batch_size(dev_sents2,hyperparas['batch_size'])
dev_y_extended = dataUtils.pad_to_batch_size(dev_y,hyperparas['batch_size'])
n_dev_batches = dev_sents1_extended.shape[0] / hyperparas['batch_size']
n_dev_samples = len(dev_y)
dataUtils.extend_lenghts(dev_lens1,hyperparas['batch_size'])
dataUtils.extend_lenghts(dev_lens2,hyperparas['batch_size'])

# test data
test_sents1_extended = dataUtils.pad_to_batch_size(test_sents1, hyperparas['batch_size'])
test_sents2_extended = dataUtils.pad_to_batch_size(test_sents2, hyperparas['batch_size'])
test_y_extended = dataUtils.pad_to_batch_size(test_y, hyperparas['batch_size'])
n_test_batches = test_sents1_extended.shape[0] / hyperparas['batch_size']
n_test_samples = len(test_y)
dataUtils.extend_lenghts(test_lens1,hyperparas['batch_size'])
dataUtils.extend_lenghts(test_lens2,hyperparas['batch_size'])
# load data, taken from Kalchbrenner matlab files
# we order the input according to length and pad all sentences until the maximum length
# at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch
# in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all.
kalchbrenner_path = "./data/binarySentiment/"
train_x_indexes, train_y, train_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"train.txt",kalchbrenner_path+"train_lbl.txt")
dev_x_indexes, dev_y, dev_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"valid.txt",kalchbrenner_path+"valid_lbl.txt")
test_x_indexes, test_y, test_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"test.txt",kalchbrenner_path+"test_lbl.txt")

# train data
n_train_batches = len(train_lengths) / hyperparas['batch_size']

#dev data
# to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size
dev_x_indexes_extended = dataUtils.pad_to_batch_size(dev_x_indexes,hyperparas['batch_size'])
dev_y_extended = dataUtils.pad_to_batch_size(dev_y,hyperparas['batch_size'])
n_dev_batches = dev_x_indexes_extended.shape[0] / hyperparas['batch_size']
n_dev_samples = len(dev_y)
dataUtils.extend_lenghts(dev_lengths,hyperparas['batch_size'])

# test data
test_x_indexes_extended = dataUtils.pad_to_batch_size(test_x_indexes,hyperparas['batch_size'])
test_y_extended = dataUtils.pad_to_batch_size(test_y,hyperparas['batch_size'])
n_test_batches = test_x_indexes_extended.shape[0] / hyperparas['batch_size']
n_test_samples = len(test_y)
dataUtils.extend_lenghts(test_lengths,hyperparas['batch_size'])

######################
# BUILD ACTUAL MODEL #
######################