Beispiel #1
0
if args.loss_function == "nll":
    args.num_noise_samples = 0

U.print_args(args)
U.set_theano_device(args.device, args.threads)

import dlm.trainer
from dlm.io.mmapReader import MemMapReader
from dlm.models.mlp import MLP

#########################
## Loading datasets
#

trainset = MemMapReader(args.trainset,
                        batch_size=args.batchsize,
                        instance_weights_path=args.instance_weights_path)
devset = MemMapReader(args.devset)
testset = None
if args.testset:
    testset = MemMapReader(args.testset)

#########################
## Creating model
#

L.info('Building the model')
args.vocab_size = trainset.get_vocab_size()
args.ngram_size = trainset.get_ngram_size()
args.num_classes = trainset.get_num_classes()
Beispiel #2
0
if args.loss_function == "nll":
	args.num_noise_samples = 0

U.print_args(args)
U.set_theano_device(args.device, args.threads)

import dlm.trainer
from dlm.io.mmapReader import MemMapReader
from dlm.models.mlp import MLP

#########################
## Loading datasets
#

trainset = MemMapReader(args.trainset, batch_size=args.batchsize, instance_weights_path=args.instance_weights_path)
devset = MemMapReader(args.devset)
testset = None
if args.testset:
	testset = MemMapReader(args.testset)


#########################
## Creating model
#

L.info('Building the model')
args.vocab_size = trainset.get_vocab_size()
args.ngram_size = trainset.get_ngram_size()
args.num_classes = trainset.get_num_classes()
Beispiel #3
0
## Loading model
#

classifier = MLP(model_path=args.model_path)

#########################
## Loading dataset
#

U.xassert(args.format == "mmap" or args.format == "nbest" or args.format == "text", "Invalid file format given: " + args.format)
U.xassert(args.perplexity or args.nlp_path or args.ulp_path, "You should use one of (or more) -ppl, -nlp or -ulp")

if args.format == "mmap":
	U.xassert((args.nlp_path is None) and (args.ulp_path is None), "Cannot compute log-probabilities for an mmap file")
	from dlm.io.mmapReader import MemMapReader
	testset = MemMapReader(dataset_path=args.test_path, batch_size=500)
else:
	U.xassert(args.vocab_path, "Vocab file is required for non-mmap file formats")
	from dlm.io.textReader import TextReader
	is_nbest = False
	if args.format == "nbest":
		is_nbest = True
	testset = TextReader(dataset_path=args.test_path, is_nbest=is_nbest, ngram_size=classifier.ngram_size, vocab_path=args.vocab_path)

#########################
## Compiling theano function
#

evaluator = eval.Evaluator(testset, classifier)

#########################