if args.loss_function == "nll": args.num_noise_samples = 0 U.print_args(args) U.set_theano_device(args.device, args.threads) import dlm.trainer from dlm.io.mmapReader import MemMapReader from dlm.models.mlp import MLP ######################### ## Loading datasets # trainset = MemMapReader(args.trainset, batch_size=args.batchsize, instance_weights_path=args.instance_weights_path) devset = MemMapReader(args.devset) testset = None if args.testset: testset = MemMapReader(args.testset) ######################### ## Creating model # L.info('Building the model') args.vocab_size = trainset.get_vocab_size() args.ngram_size = trainset.get_ngram_size() args.num_classes = trainset.get_num_classes()
## Loading model # classifier = MLP(model_path=args.model_path) ######################### ## Loading dataset # U.xassert(args.format == "mmap" or args.format == "nbest" or args.format == "text", "Invalid file format given: " + args.format) U.xassert(args.perplexity or args.nlp_path or args.ulp_path, "You should use one of (or more) -ppl, -nlp or -ulp") if args.format == "mmap": U.xassert((args.nlp_path is None) and (args.ulp_path is None), "Cannot compute log-probabilities for an mmap file") from dlm.io.mmapReader import MemMapReader testset = MemMapReader(dataset_path=args.test_path, batch_size=500) else: U.xassert(args.vocab_path, "Vocab file is required for non-mmap file formats") from dlm.io.textReader import TextReader is_nbest = False if args.format == "nbest": is_nbest = True testset = TextReader(dataset_path=args.test_path, is_nbest=is_nbest, ngram_size=classifier.ngram_size, vocab_path=args.vocab_path) ######################### ## Compiling theano function # evaluator = eval.Evaluator(testset, classifier) #########################