Esempio n. 1
0
logger.info("Initialization finished ...")
logger.info("Parameters: " + str(json.dumps(vars(opt))))
logger.info("Output path is: %s" % (exp_path))
logger.info("Random seed is set to: %d" % (opt.seed))
logger.info("Use GPU with index %s as target slu device" % (opt.deviceIds[0])
            if opt.deviceIds[0] >= 0 else "Use CPU as target slu torch device")
logger.info("Use GPU with index %s as target nlg device" % (opt.deviceIds[1])
            if opt.deviceIds[1] >= 0 else "Use CPU as target nlg torch device")

##### Vocab and Dataset Reader #####
slu_vocab, nlg_vocab = Vocab(dataset=opt.dataset,
                             task='slu'), Vocab(dataset=opt.dataset,
                                                task='nlg')
lm_vocab = Vocab(dataset=opt.dataset, task='lm')
slu_evaluator, nlg_evaluator = Evaluator.get_evaluator_from_task(
    task='slu',
    vocab=slu_vocab), Evaluator.get_evaluator_from_task(task='nlg',
                                                        vocab=nlg_vocab)

if not opt.testing:
    train_dataset, dev_dataset = read_dataset(
        opt.dataset, choice='train'), read_dataset(opt.dataset, choice='valid')
    labeled_dataset, unlabeled_dataset = split_dataset(train_dataset,
                                                       opt.labeled)
    logger.info(
        "Labeled/Unlabeled train and dev dataset size is: %s/%s and %s" %
        (len(labeled_dataset), len(unlabeled_dataset), len(dev_dataset)))
    unlabeled_dataset = labeled_dataset + unlabeled_dataset
test_dataset = read_dataset(opt.dataset, choice='test')
logger.info("Test dataset size is: %s" % (len(test_dataset)))
Esempio n. 2
0
set_random_seed(opt.seed)
device = set_torch_device(opt.deviceId)
logger.info("Initialization finished ...")
logger.info("Parameters: " + str(json.dumps(vars(opt))))
logger.info("Output path is: %s" % (exp_path))
logger.info("Random seed is set to: %d" % (opt.seed))
logger.info(
    "Use GPU with index %s" %
    (opt.deviceId) if opt.deviceId >= 0 else "Use CPU as target torch device")

##### Vocab and Dataset Reader #####
vocab = Vocab(dataset=opt.dataset, task=task)
logger.info("Vocab size for input utterance is: %s" % (len(vocab.word2id)))
logger.info("Vocab size for output slot label is: %s" % (len(vocab.slot2id)))
logger.info("Vocab size for output intent is: %s" % (len(vocab.int2id)))
evaluator = Evaluator.get_evaluator_from_task(task=task, vocab=vocab)

if not opt.testing:
    train_dataset, dev_dataset = read_dataset(
        opt.dataset, choice='train'), read_dataset(opt.dataset, choice='valid')
    train_dataset, _ = split_dataset(train_dataset, opt.labeled)
    logger.info("Train and dev dataset size is: %s and %s" %
                (len(train_dataset), len(dev_dataset)))
test_dataset = read_dataset(opt.dataset, choice='test')
logger.info("Test dataset size is: %s" % (len(test_dataset)))

##### Model Construction and Init #####
if not opt.testing:
    opt.vocab_size, opt.slot_num, opt.intent_num = len(vocab.word2id), len(
        vocab.slot2id), len(vocab.int2id)
    opt.pad_token_idxs = {