logger.info("Initialization finished ...") logger.info("Parameters: " + str(json.dumps(vars(opt)))) logger.info("Output path is: %s" % (exp_path)) logger.info("Random seed is set to: %d" % (opt.seed)) logger.info("Use GPU with index %s as target slu device" % (opt.deviceIds[0]) if opt.deviceIds[0] >= 0 else "Use CPU as target slu torch device") logger.info("Use GPU with index %s as target nlg device" % (opt.deviceIds[1]) if opt.deviceIds[1] >= 0 else "Use CPU as target nlg torch device") ##### Vocab and Dataset Reader ##### slu_vocab, nlg_vocab = Vocab(dataset=opt.dataset, task='slu'), Vocab(dataset=opt.dataset, task='nlg') lm_vocab = Vocab(dataset=opt.dataset, task='lm') slu_evaluator, nlg_evaluator = Evaluator.get_evaluator_from_task( task='slu', vocab=slu_vocab), Evaluator.get_evaluator_from_task(task='nlg', vocab=nlg_vocab) if not opt.testing: train_dataset, dev_dataset = read_dataset( opt.dataset, choice='train'), read_dataset(opt.dataset, choice='valid') labeled_dataset, unlabeled_dataset = split_dataset(train_dataset, opt.labeled) logger.info( "Labeled/Unlabeled train and dev dataset size is: %s/%s and %s" % (len(labeled_dataset), len(unlabeled_dataset), len(dev_dataset))) unlabeled_dataset = labeled_dataset + unlabeled_dataset test_dataset = read_dataset(opt.dataset, choice='test') logger.info("Test dataset size is: %s" % (len(test_dataset)))
set_random_seed(opt.seed) device = set_torch_device(opt.deviceId) logger.info("Initialization finished ...") logger.info("Parameters: " + str(json.dumps(vars(opt)))) logger.info("Output path is: %s" % (exp_path)) logger.info("Random seed is set to: %d" % (opt.seed)) logger.info( "Use GPU with index %s" % (opt.deviceId) if opt.deviceId >= 0 else "Use CPU as target torch device") ##### Vocab and Dataset Reader ##### vocab = Vocab(dataset=opt.dataset, task=task) logger.info("Vocab size for input utterance is: %s" % (len(vocab.word2id))) logger.info("Vocab size for output slot label is: %s" % (len(vocab.slot2id))) logger.info("Vocab size for output intent is: %s" % (len(vocab.int2id))) evaluator = Evaluator.get_evaluator_from_task(task=task, vocab=vocab) if not opt.testing: train_dataset, dev_dataset = read_dataset( opt.dataset, choice='train'), read_dataset(opt.dataset, choice='valid') train_dataset, _ = split_dataset(train_dataset, opt.labeled) logger.info("Train and dev dataset size is: %s and %s" % (len(train_dataset), len(dev_dataset))) test_dataset = read_dataset(opt.dataset, choice='test') logger.info("Test dataset size is: %s" % (len(test_dataset))) ##### Model Construction and Init ##### if not opt.testing: opt.vocab_size, opt.slot_num, opt.intent_num = len(vocab.word2id), len( vocab.slot2id), len(vocab.int2id) opt.pad_token_idxs = {