def generate_batch(size, data, labels, lengths): global indices if len(indices) < size: indices.extend(range(data.shape[0])) # Random indices r = random.sample(indices, size) indices = filter(lambda a: a not in r, indices) return data[r], labels[r], lengths[r] ## Read Training/Dev/Test data os.chdir('/home/ybisk/GroundedLanguage') print("Running from ", os.getcwd()) maxlength = 80 offset = 3 labelspace = 9 Sparse = SparseFiles(maxlength, offset, labelspace=labelspace, prediction=2) train, train_lens, vocabsize = Sparse.read("JSONReader/data/2016-NAACL/SRD/Train.mat") dev, dev_lens, _ = Sparse.read("JSONReader/data/2016-NAACL/SRD/Dev.mat") test, test_lens, _ = Sparse.read("JSONReader/data/2016-NAACL/SRD/Test.mat") ## Create sparse arrays training, training_labels = Sparse.matrix(train) development, development_labels = Sparse.matrix(dev) testing, testing_labels = Sparse.matrix(test) ## TODO: ## MutiCellLSTM batch_size = 128 hiddendim = 256 embeddingdim = 100
training_labels = {} training_lens = {} development = {} development_labels = {} development_lens = {} testing = {} testing_labels = {} testing_lens = {} dataType = ["source","reference","direction"] for prediction in [0,1,2]: ## Read Training/Dev/Test data labelspace = [20,20,9] labelspace = labelspace[prediction] print "Read ", dataType[prediction] Sparse = SparseFiles(maxlength, offset, labelspace=labelspace, prediction=prediction) train, train_lens, vocabsize = Sparse.read("JSONReader/data/2016-Version2/SRD/Train.mat") dev, dev_lens, _ = Sparse.read("JSONReader/data/2016-Version2/SRD/Dev.mat") test, test_lens, _ = Sparse.read("JSONReader/data/2016-Version2/SRD/Test.mat") training_lens[prediction] = train_lens development_lens[prediction] = dev_lens testing_lens[prediction] = test_lens ## Create sparse arrays t, t_l = Sparse.matrix(train) training[prediction] = t training_labels[prediction] = t_l d, d_l = Sparse.matrix(dev) development[prediction] = d development_labels[prediction] = d_l
global indices if len(indices) < size: indices.extend(range(data.shape[0])) r = random.sample(indices, size) indices = filter(lambda a: a not in r, indices) # Randomly reorder the data return data[r], labels[r] ## Read Training/Dev/Test data os.chdir('/home/ybisk/GroundedLanguage') print("Running from ", os.getcwd()) maxlength = 80 offset = 3 labelspace = 9 Sparse = SparseFiles(maxlength, offset, labelspace=labelspace, prediction=2) train, _, vocabsize = Sparse.read("JSONReader/data/2016-NAACL/SRD/Train.mat") dev, _, _ = Sparse.read("JSONReader/data/2016-NAACL/SRD/Dev.mat") test, _, _ = Sparse.read("JSONReader/data/2016-NAACL/SRD/Test.mat") ## Create sparse arrays training, training_labels = Sparse.matrix(train) development, development_labels = Sparse.matrix(dev) testing, testing_labels = Sparse.matrix(test) batch_size = 128 hiddendim = 100 embeddingdim = 100 graph = tf.Graph() onehot = True inputdim = maxlength * vocabsize if onehot else maxlength * embeddingdim
training_lens = {} development = {} development_labels = {} development_lens = {} testing = {} testing_labels = {} testing_lens = {} dataType = ["source", "reference", "direction"] for prediction in [0, 1, 2]: ## Read Training/Dev/Test data labelspace = [20, 20, 9] labelspace = labelspace[prediction] print "Read ", dataType[prediction] Sparse = SparseFiles(maxlength, offset, labelspace=labelspace, prediction=prediction) train, train_lens, vocabsize = Sparse.read( "JSONReader/data/2016-Version2/SRD/Train.mat") dev, dev_lens, _ = Sparse.read("JSONReader/data/2016-Version2/SRD/Dev.mat") test, test_lens, _ = Sparse.read( "JSONReader/data/2016-Version2/SRD/Test.mat") training_lens[prediction] = train_lens development_lens[prediction] = dev_lens testing_lens[prediction] = test_lens ## Create sparse arrays t, t_l = Sparse.matrix(train) training[prediction] = t training_labels[prediction] = t_l