"WARNING: You have a CUDA device, so you should probably run with --deviceId [1|2|3]" ) else: torch.cuda.manual_seed(opt.random_seed) np.random.seed(opt.random_seed) dataroot = opt.dataroot tag_vocab_dir = dataroot + '/vocab.slot' class_vocab_dir = dataroot + '/vocab.intent' train_data_dir = dataroot + '/train' valid_data_dir = dataroot + '/valid' test_data_dir = dataroot + '/test' if not opt.testing: tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(tag_vocab_dir, bos_eos=opt.enc_dec) class_to_idx, idx_to_class = vocab_reader.read_vocab_file(class_vocab_dir, bos_eos=False) else: tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(opt.read_vocab + '.tag', bos_eos=False, no_pad=True, no_unk=True) class_to_idx, idx_to_class = vocab_reader.read_vocab_file(opt.read_vocab + '.class', bos_eos=False, no_pad=True, no_unk=True) logger.info("Vocab size: %s %s" % (len(tag_to_idx), len(class_to_idx)))
if opt.device.type != 'cuda': logger.info("WARNING: You have a CUDA device, so you should probably run with --deviceId [1|2|3]") else: torch.cuda.manual_seed(opt.random_seed) np.random.seed(opt.random_seed) dataroot = opt.dataroot tag_vocab_dir = dataroot + '/vocab.slot' class_vocab_dir = dataroot + '/vocab.intent' train_data_dir = dataroot + '/train' valid_data_dir = dataroot + '/valid' test_data_dir = dataroot + '/test' #if not opt.testing: tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(tag_vocab_dir, bos_eos=opt.enc_dec) class_to_idx, idx_to_class = vocab_reader.read_vocab_file(class_vocab_dir, bos_eos=False) #else: # tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(opt.read_vocab+'.tag', bos_eos=False, no_pad=True, no_unk=True) # class_to_idx, idx_to_class = vocab_reader.read_vocab_file(opt.read_vocab+'.class', bos_eos=False, no_pad=True, no_unk=True) logger.info("Vocab size: %s %s" % (len(tag_to_idx), len(class_to_idx))) if not opt.testing: vocab_reader.save_vocab(idx_to_tag, os.path.join(exp_path, opt.save_vocab+'.tag')) vocab_reader.save_vocab(idx_to_class, os.path.join(exp_path, opt.save_vocab+'.class')) opt.out_label=len(tag_to_idx) opt.out_int=len(class_to_idx) opt.word_lowercase = False if not opt.testing: train_feats, train_tags, train_class = data_reader.read_seqtag_data_with_class(train_data_dir, tag_to_idx, class_to_idx, multiClass=opt.multiClass, lowercase=opt.word_lowercase) valid_feats, valid_tags, valid_class = data_reader.read_seqtag_data_with_class(valid_data_dir, tag_to_idx, class_to_idx, multiClass=opt.multiClass, keep_order=opt.testing, lowercase=opt.word_lowercase)