コード例 #1
0
ファイル: main.py プロジェクト: wuxiangli91/tf-lm
        print('Type a seed word or sentence:')
        seed = raw_input()
        print('Start generating text for the seed "{0}"...'.format(seed))

        # write seed to temporary file
        out = io.open('tmp', 'w')
        out.write(u'{0}'.format(seed))
        out.close()

        config['predict_next'] = 'tmp'

        if 'char' in config:
            data = lm_data.charSentenceDataRescore(config, eval_config, TRAIN,
                                                   VALID, TEST)
        else:
            data = lm_data.wordSentenceDataRescore(config, eval_config, TRAIN,
                                                   VALID, TEST)

        all_data, vocab_size, _ = data.get_data()

    # character-level training, in batches (across sentence boundaries)
    elif 'char' in config:
        print('Character-level data')
        if 'per_sentence' in config:
            print('Sentence per sentence')
            data = lm_data.charSentenceData(config, eval_config, TRAIN, VALID,
                                            TEST)
            all_data, vocab_size, total_length, seq_lengths = data.get_data()

            config['num_steps'] = total_length

            # write maximum sentence length to file
コード例 #2
0
ファイル: main.py プロジェクト: flovera1/tf-languagemodel
    # character-level training, in batches (cross sentence boundaries)
    if 'char' in config:
        data = lm_data.charData(config, eval_config)
        all_data, vocab_size, _ = data.get_data()

    # word-level training, on sentence level (sentences are padded until maximum sentence length)
    elif 'per_sentence' in config:

        if 'rescore' in config:
            max_length = int(
                open('{0}max_length'.format(
                    config['trained_model'])).readlines()[0].strip())
            # set num_steps = total length of each (padded) sentence
            config['num_steps'] = max_length

            data = lm_data.wordSentenceDataRescore(config, eval_config)
            all_data, vocab_size, _ = data.get_data()

        else:
            data = lm_data.wordSentenceData(config, eval_config)
            all_data, vocab_size, total_length, seq_lengths = data.get_data()

            # set num_steps = total length of each (padded) sentence
            config['num_steps'] = total_length

            print('Write max length of sentence to {0}max_length'.format(
                config['save_path']))

            # write maximum sentence length to file
            max_length_f = open('{0}max_length'.format(config['save_path']),
                                'w')