def load_data(config): train, dev, test, embeddings, vocab = cPickle.load(open(config.data_file)) trainset, devset, testset = DataSet(train), DataSet(dev), DataSet(test) vocab = dict([(v.index,k) for k,v in vocab.items()]) trainset.sort() train_batches = trainset.get_batches(config.batch_size, config.epochs, rand=True) dev_batches = devset.get_batches(config.batch_size, 1, rand=False) test_batches = testset.get_batches(config.batch_size, 1, rand=False) dev_batches = [i for i in dev_batches] test_batches = [i for i in test_batches] return len(train), train_batches, dev_batches, test_batches, embeddings, vocab
def load_data(self, config, evaluate_split="test"): train, dev, test, _, _ = cPickle.load(open(config.data_file)) eval_set = None if evaluate_split == "test": eval_set = DataSet(test) elif evaluate_split == "train": eval_set = DataSet(train) elif evaluate_split == "dev": eval_set = DataSet(dev) eval_batches = eval_set.get_batches(config.batch_size, 1, rand=False) eval_batches = [i for i in eval_batches] return eval_batches