Exemple #1
0
def load_data(config):
    train, dev, test, embeddings, vocab = cPickle.load(open(config.data_file))
    trainset, devset, testset = DataSet(train), DataSet(dev), DataSet(test)
    vocab = dict([(v.index,k) for k,v in vocab.items()])
    trainset.sort()
    train_batches = trainset.get_batches(config.batch_size, config.epochs, rand=True)
    dev_batches = devset.get_batches(config.batch_size, 1, rand=False)
    test_batches = testset.get_batches(config.batch_size, 1, rand=False)
    dev_batches = [i for i in dev_batches]
    test_batches = [i for i in test_batches]
    return len(train), train_batches, dev_batches, test_batches, embeddings, vocab
Exemple #2
0
    def load_data(self, config, evaluate_split="test"):
        train, dev, test, _, _ = cPickle.load(open(config.data_file))
        eval_set = None
        if evaluate_split == "test":
            eval_set = DataSet(test)
        elif evaluate_split == "train":
            eval_set = DataSet(train)
        elif evaluate_split == "dev":
            eval_set = DataSet(dev)

        eval_batches = eval_set.get_batches(config.batch_size, 1, rand=False)
        eval_batches = [i for i in eval_batches]
        return eval_batches