Ejemplo n.º 1
0
    word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2')
    print('word_vocab: {}'.format(word_vocab.word_vecs.shape))
    if FLAGS.with_char:
        char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2')
        print('char_vocab: {}'.format(char_vocab.word_vecs.shape))
    if FLAGS.with_POS:
        POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2')
        print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape))
    action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2')
    print('action_vocab: {}'.format(action_vocab.word_vecs.shape))
    feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2')
    print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape))

    print('Loading test set.')
    if use_dep:
        testset = NP2P_data_stream.read_Testset(in_path)
    elif FLAGS.infile_format == 'fof':
        testset = NP2P_data_stream.read_generation_datasets_from_fof(in_path, isLower=FLAGS.isLower)
    else:
        testset = NP2P_data_stream.read_all_GenerationDatasets(in_path, isLower=FLAGS.isLower)
    print('Number of samples: {}'.format(len(testset)))

    print('Build DataStream ... ')
    batch_size=1
    assert batch_size == 1

    devDataStream = NP2P_data_stream.DataStream(testset,
            word_vocab=word_vocab, char_vocab=char_vocab, POS_vocab=POS_vocab, feat_vocab=feat_vocab, action_vocab=action_vocab,
            options=FLAGS, isShuffle=False, isLoop=False, isSort=True, batch_size=batch_size, decode=True)
    print('Number of instances in testDataStream: {}'.format(devDataStream.get_num_instance()))
    print('Number of batches in testDataStream: {}'.format(devDataStream.get_num_batch()))
Ejemplo n.º 2
0
    word_vocab = Vocab(FLAGS.word_vec_path, fileformat='txt2')
    print('word_vocab: {}'.format(word_vocab.word_vecs.shape))
    if FLAGS.with_char:
        char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2')
        print('char_vocab: {}'.format(char_vocab.word_vecs.shape))
    if FLAGS.with_POS:
        POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2')
        print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape))
    action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2')
    print('action_vocab: {}'.format(action_vocab.word_vecs.shape))
    feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2')
    print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape))

    print('Loading test set.')
    if use_dep:
        testset = NP2P_data_stream.read_Testset(in_path, ulfdep=args.ulf)
    elif FLAGS.infile_format == 'fof':
        testset = NP2P_data_stream.read_generation_datasets_from_fof(
            in_path, isLower=FLAGS.isLower, ulfdep=args.ulf)
    else:
        testset = NP2P_data_stream.read_all_GenerationDatasets(
            in_path, isLower=FLAGS.isLower, ulfdep=args.ulf)
    print('Number of samples: {}'.format(len(testset)))

    print('Build DataStream ... ')
    batch_size = 1
    assert batch_size == 1

    devDataStream = NP2P_data_stream.DataStream(testset,
                                                word_vocab=word_vocab,
                                                char_vocab=char_vocab,
Ejemplo n.º 3
0
    print('word_vocab: {}'.format(word_vocab.word_vecs.shape))
    if FLAGS.with_char:
        char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2')
        print('char_vocab: {}'.format(char_vocab.word_vecs.shape))
    if FLAGS.with_POS:
        POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2')
        print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape))
    FLAGS.feat_num = 72 + args.cache_size * 5
    action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2')
    print('action_vocab: {}'.format(action_vocab.word_vecs.shape))
    feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2')
    print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape))

    print('Loading test set.')
    if use_dep:
        testset = NP2P_data_stream.read_Testset(in_path, decode=True)
    elif FLAGS.infile_format == 'fof':
        testset = NP2P_data_stream.read_generation_datasets_from_fof(
            in_path, isLower=FLAGS.isLower)
    else:
        testset = NP2P_data_stream.read_all_GenerationDatasets(
            in_path, isLower=FLAGS.isLower)
    print('Number of samples: {}'.format(len(testset)))

    print('Build DataStream ... ')
    batch_size = 1
    if mode in ['beam_search', 'beam_evaluate']: batch_size = 1
    assert batch_size == 1
    devDataStream = NP2P_data_stream.DataStream(testset,
                                                word_vocab=word_vocab,
                                                char_vocab=char_vocab,
Ejemplo n.º 4
0
    if FLAGS.with_char:
        char_vocab = Vocab(model_prefix + ".char_vocab", fileformat='txt2')
        print('char_vocab: {}'.format(char_vocab.word_vecs.shape))
    if FLAGS.with_POS:
        POS_vocab = Vocab(model_prefix + ".POS_vocab", fileformat='txt2')
        print('POS_vocab: {}'.format(POS_vocab.word_vecs.shape))
    FLAGS.feat_num = 72 + args.cache_size * 5
    action_vocab = Vocab(model_prefix + ".action_vocab", fileformat='txt2')
    print('action_vocab: {}'.format(action_vocab.word_vecs.shape))
    feat_vocab = Vocab(model_prefix + ".feat_vocab", fileformat='txt2')
    print('feat_vocab: {}'.format(feat_vocab.word_vecs.shape))

    print('Loading test set.')
    if use_dep:
        testset = NP2P_data_stream.read_Testset(in_path,
                                                decode=True,
                                                ulfdep=args.ulf)
    elif FLAGS.infile_format == 'fof':
        testset = NP2P_data_stream.read_generation_datasets_from_fof(
            in_path, isLower=FLAGS.isLower, ulfdep=args.ulf)
    else:
        testset = NP2P_data_stream.read_all_GenerationDatasets(
            in_path, isLower=FLAGS.isLower, ulfdep=args.ulf)
    print('Number of samples: {}'.format(len(testset)))

    print('Build DataStream ... ')
    batch_size = 1
    if mode in ['beam_search', 'beam_evaluate']: batch_size = 1
    assert batch_size == 1
    devDataStream = NP2P_data_stream.DataStream(testset,
                                                word_vocab=word_vocab,