word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2') print('word_vocab: {}'.format(word_vocab.word_vecs.shape)) if FLAGS.with_char: char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2') print('char_vocab: {}'.format(char_vocab.word_vecs.shape)) if FLAGS.with_POS: POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2') print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape)) action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2') print('action_vocab: {}'.format(action_vocab.word_vecs.shape)) feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2') print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape)) print('Loading test set.') if use_dep: testset = NP2P_data_stream.read_Testset(in_path) elif FLAGS.infile_format == 'fof': testset = NP2P_data_stream.read_generation_datasets_from_fof(in_path, isLower=FLAGS.isLower) else: testset = NP2P_data_stream.read_all_GenerationDatasets(in_path, isLower=FLAGS.isLower) print('Number of samples: {}'.format(len(testset))) print('Build DataStream ... ') batch_size=1 assert batch_size == 1 devDataStream = NP2P_data_stream.DataStream(testset, word_vocab=word_vocab, char_vocab=char_vocab, POS_vocab=POS_vocab, feat_vocab=feat_vocab, action_vocab=action_vocab, options=FLAGS, isShuffle=False, isLoop=False, isSort=True, batch_size=batch_size, decode=True) print('Number of instances in testDataStream: {}'.format(devDataStream.get_num_instance())) print('Number of batches in testDataStream: {}'.format(devDataStream.get_num_batch()))
word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2') print('word_vocab: {}'.format(word_vocab.word_vecs.shape)) if FLAGS.with_char: char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2') print('char_vocab: {}'.format(char_vocab.word_vecs.shape)) if FLAGS.with_POS: POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2') print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape)) action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2') print('action_vocab: {}'.format(action_vocab.word_vecs.shape)) feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2') print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape)) print('Loading test set.') if use_dep: testset = NP2P_data_stream.read_Testset(in_path, ulfdep=args.ulf) elif FLAGS.infile_format == 'fof': testset = NP2P_data_stream.read_generation_datasets_from_fof( in_path, isLower=FLAGS.isLower, ulfdep=args.ulf) else: testset = NP2P_data_stream.read_all_GenerationDatasets( in_path, isLower=FLAGS.isLower, ulfdep=args.ulf) print('Number of samples: {}'.format(len(testset))) print('Build DataStream ... ') batch_size = 1 assert batch_size == 1 devDataStream = NP2P_data_stream.DataStream(testset, word_vocab=word_vocab, char_vocab=char_vocab,
print('word_vocab: {}'.format(word_vocab.word_vecs.shape)) if FLAGS.with_char: char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2') print('char_vocab: {}'.format(char_vocab.word_vecs.shape)) if FLAGS.with_POS: POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2') print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape)) FLAGS.feat_num = 72 + args.cache_size * 5 action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2') print('action_vocab: {}'.format(action_vocab.word_vecs.shape)) feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2') print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape)) print('Loading test set.') if use_dep: testset = NP2P_data_stream.read_Testset(in_path, decode=True) elif FLAGS.infile_format == 'fof': testset = NP2P_data_stream.read_generation_datasets_from_fof( in_path, isLower=FLAGS.isLower) else: testset = NP2P_data_stream.read_all_GenerationDatasets( in_path, isLower=FLAGS.isLower) print('Number of samples: {}'.format(len(testset))) print('Build DataStream ... ') batch_size = 1 if mode in ['beam_search', 'beam_evaluate']: batch_size = 1 assert batch_size == 1 devDataStream = NP2P_data_stream.DataStream(testset, word_vocab=word_vocab, char_vocab=char_vocab,
if FLAGS.with_char: char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2') print('char_vocab: {}'.format(char_vocab.word_vecs.shape)) if FLAGS.with_POS: POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2') print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape)) FLAGS.feat_num = 72 + args.cache_size * 5 action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2') print('action_vocab: {}'.format(action_vocab.word_vecs.shape)) feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2') print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape)) print('Loading test set.') if use_dep: testset = NP2P_data_stream.read_Testset(in_path, decode=True, ulfdep=args.ulf) elif FLAGS.infile_format == 'fof': testset = NP2P_data_stream.read_generation_datasets_from_fof( in_path, isLower=FLAGS.isLower, ulfdep=args.ulf) else: testset = NP2P_data_stream.read_all_GenerationDatasets( in_path, isLower=FLAGS.isLower, ulfdep=args.ulf) print('Number of samples: {}'.format(len(testset))) print('Build DataStream ... ') batch_size = 1 if mode in ['beam_search', 'beam_evaluate']: batch_size = 1 assert batch_size == 1 devDataStream = NP2P_data_stream.DataStream(testset, word_vocab=word_vocab,