Python PTBDataReaderの例

プログラミング言語: Python

名前空間/パッケージ名: text_corrector_data_readers

クラス/型: PTBDataReader

hotexamples.comのコード掲載数: 4

Python PTBDataReader - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtext_corrector_data_readers.PTBDataReaderの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PTBDataReader(4)

よく使われるメソッド

PTBDataReader (4)

コード例 #1

ファイルを表示

ファイル: correct_text.py プロジェクト: floydhub/deep-text-corrector

def main(_):
    # Determine which config we should use.
    if FLAGS.config == "TestConfig":
        config = TestConfig()
    elif FLAGS.config == "DefaultMovieDialogConfig":
        config = DefaultMovieDialogConfig()
    elif FLAGS.config == "DefaultPTBConfig":
        config = DefaultPTBConfig()
    else:
        raise ValueError("config argument not recognized; must be one of: "
                         "TestConfig, DefaultPTBConfig, "
                         "DefaultMovieDialogConfig")
    # Set the model path.
    if not FLAGS.decode and not FLAGS.decode_sentence:
        model_path = os.path.join(FLAGS.output_path, "model")
    else:
        model_path = os.path.join(FLAGS.input_path, "model")
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    # Set the max_steps.
    config.max_steps = FLAGS.num_steps
    # Determine which kind of DataReader we want to use.
    if FLAGS.data_reader_type == "MovieDialogReader":
        data_reader = MovieDialogReader(config, FLAGS.train_path)
    elif FLAGS.data_reader_type == "PTBDataReader":
        data_reader = PTBDataReader(config, FLAGS.train_path)
    else:
        raise ValueError("data_reader_type argument %s not recognized; must be "
                         "one of: MovieDialogReader, PTBDataReader" % FLAGS.data_reader_type)

    if FLAGS.decode_sentence:
        # Correct user's sentences.
        with tf.Session() as session:
            model = create_model(session, True, model_path, config=config)
            print("Enter a sentence you'd like to correct")
            correct_new_sentence = raw_input()
            while correct_new_sentence.lower() != 'no':
                decode_sentence(session, model=model, data_reader=data_reader,
                                sentence=correct_new_sentence,
                                corrective_tokens=data_reader.read_tokens(FLAGS.train_path))
                print("Enter a sentence you'd like to correct or press NO")
                correct_new_sentence = raw_input()
    elif FLAGS.decode:
        # Decode test sentences.
        with tf.Session() as session:
            model = create_model(session, True, model_path, config=config)
            print("Loaded model. Beginning decoding.")
            decodings = decode(session, model=model, data_reader=data_reader,
                               data_to_decode=data_reader.read_tokens(FLAGS.test_path),
                               corrective_tokens=data_reader.read_tokens(FLAGS.train_path))
            # Write the decoded tokens to stdout.
            for tokens in decodings:
               sys.stdout.flush()
    else:
        print("Training model.")
        train(data_reader, FLAGS.train_path, FLAGS.val_path, model_path)
        copy_train_data()

コード例 #2

ファイルを表示

ファイル: correct_text.py プロジェクト: fsx950223/deep-text-corrector

def main(_):
    # Determine which config we should use.
    if FLAGS.config == "TestConfig":
        config = TestConfig()
    elif FLAGS.config == "DefaultMovieDialogConfig":
        config = DefaultMovieDialogConfig()
    elif FLAGS.config == "DefaultPTBConfig":
        config = DefaultPTBConfig()
    else:
        raise ValueError("config argument not recognized; must be one of: "
                         "TestConfig, DefaultPTBConfig, "
                         "DefaultMovieDialogConfig")

    # Determine which kind of DataReader we want to use.
    if FLAGS.data_reader_type == "MovieDialogReader":
        data_reader = MovieDialogReader(config, FLAGS.train_path)
    elif FLAGS.data_reader_type == "PTBDataReader":
        data_reader = PTBDataReader(config, FLAGS.train_path)
    else:
        raise ValueError("data_reader_type argument not recognized; must be "
                         "one of: MovieDialogReader, PTBDataReader")

    if FLAGS.decode:
        # Decode test sentences.
        with tf.Session() as session:
            model = create_model(session,
                                 True,
                                 FLAGS.model_path,
                                 config=config)
            print("Loaded model. Beginning decoding.")
            decodings = decode(session,
                               model=model,
                               data_reader=data_reader,
                               data_to_decode=data_reader.read_tokens(
                                   FLAGS.test_path),
                               verbose=False)
            # Write the decoded tokens to stdout.
            for tokens in decodings:
                print(" ".join(tokens))
                sys.stdout.flush()
    else:
        print("Training model.")
        train(data_reader, FLAGS.train_path, FLAGS.val_path, FLAGS.model_path)

コード例 #3

ファイルを表示

ファイル: correct_text.py プロジェクト: thangduong/deep-text-corrector

def main(_):
    # Determine which config we should use.
    if FLAGS.config == "TestConfig":
        config = TestConfig()
    elif FLAGS.config == "DefaultMovieDialogConfig":
        config = DefaultMovieDialogConfig()
    elif FLAGS.config == "DefaultPTBConfig":
        config = DefaultPTBConfig()
    elif FLAGS.config == "DefaultWikiConfig":
        config = DefaultWikiConfig()
    else:
        raise ValueError("config argument not recognized; must be one of: "
                         "TestConfig, DefaultPTBConfig, DefaultWikiConfig, "
                         "DefaultMovieDialogConfig")

    # Determine which kind of DataReader we want to use.
    if FLAGS.data_reader_type == "MovieDialogReader":
        data_reader = MovieDialogReader(config, FLAGS.train_path)
        train_path = FLAGS.train_path
        val_path = FLAGS.val_path
    elif FLAGS.data_reader_type == "PTBDataReader":
        data_reader = PTBDataReader(config, FLAGS.train_path)
        train_path = FLAGS.train_path
        val_path = FLAGS.val_path
    elif FLAGS.data_reader_type == "WikiDataReader":
        train_path = [
            os.path.join(FLAGS.train_path,
                         "wiki2017CleanChainLifetime.enz_train.txt"),
            os.path.join(FLAGS.train_path,
                         "wiki2017CleanChainLifetime.enu_train.txt")
        ]
        val_path = [
            os.path.join(FLAGS.val_path,
                         "wiki2017CleanChainLifetime.enz_val.txt"),
            os.path.join(FLAGS.val_path,
                         "wiki2017CleanChainLifetime.enu_val.txt")
        ]
        data_reader = WikiDataReader(config, train_path)
    else:
        raise ValueError(
            "data_reader_type argument not recognized; must be "
            "one of: MovieDialogReader, PTBDataReader, WikiDataReader")

    if FLAGS.task == "decode":
        #        data_to_decode=data_reader.read_samples_from_string(FLAGS.test_string)
        #        print(list(data_to_decode))
        #        exit(0)
        print('creating session')
        # Decode test sentences.
        with tf.Session() as session:
            print("creating model")
            model = create_model(session,
                                 True,
                                 FLAGS.model_path,
                                 config=config)
            print("Loaded model. Beginning decoding.")
            if FLAGS.test_string != "":
                decodings = decode_sentence(session, model, data_reader,
                                            FLAGS.test_string)


#                decodings = decode(session, model=model, data_reader=data_reader,
#                                   data_to_decode=data_reader.read_samples_from_string(
#                                       FLAGS.test_string), verbose=True)
            else:
                decodings = decode(session,
                                   model=model,
                                   data_reader=data_reader,
                                   data_to_decode=data_reader.read_tokens(
                                       FLAGS.test_path),
                                   verbose=True)
            # Write the decoded tokens to stdout.
            print(decodings)
            for tokens in decodings:
                print(" ".join(tokens))
                sys.stdout.flush()
    elif FLAGS.task == "serve":
        print('creating session')
        # Decode test sentences.
        with tf.Session() as session:
            print("creating model")
            model = create_model(session,
                                 True,
                                 FLAGS.model_path,
                                 config=config)
            HttpHandler.model = model
            HttpHandler.data_reader = data_reader
            HttpHandler.session = session
            HttpHandler.model_name = FLAGS.model_path
            httpd = HTTPServer(("0.0.0.0", 8080), HttpHandler)
            try:
                print("Starting server...")
                httpd.serve_forever()
            except KeyboardInterrupt:
                pass
            httpd.server_close()
    else:
        print("Training model.")
        train(data_reader, train_path, val_path, FLAGS.model_path)

コード例 #4

ファイルを表示

def main(_):
    # Determine which config we should use.
    if FLAGS.config == "TestConfig":
        config = TestConfig()
    elif FLAGS.config == "DefaultMovieDialogConfig":
        config = DefaultMovieDialogConfig()
    elif FLAGS.config == "DefaultPTBConfig":
        config = DefaultPTBConfig()
    elif FLAGS.config == "DefaultFCEConfig":
        config = DefaultFCEConfig()
    else:
        raise ValueError("config argument not recognized; must be one of: "
                         "TestConfig, DefaultPTBConfig, DefaultFCEConfig, "
                         "DefaultMovieDialogConfig")
                         
    is_train = not (FLAGS.correct or FLAGS.evaluate or FLAGS.decode)
                         
    # Determine which kind of DataReader we want to use.
    if FLAGS.data_reader_type == "MovieDialogReader":
        data_reader = MovieDialogReader(config, FLAGS.train_path) if is_train else None
    elif FLAGS.data_reader_type == "PTBDataReader":
        data_reader = PTBDataReader(config, FLAGS.train_path)
    elif FLAGS.data_reader_type == "FCEReader":
        data_reader = FCEReader(config, FLAGS.train_path)
    else:
        raise ValueError("data_reader_type argument not recognized; must be "
                         "one of: MovieDialogReader, PTBDataReader")

    corrective_tokens = set()
    import pickle
    if not is_train:        
        with open(os.path.join(FLAGS.model_path, "token_to_id.pickle"), "rb") as f:
            token_to_id = pickle.load(f)
        if FLAGS.data_reader_type == "MovieDialogReader":
            data_reader = MovieDialogReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)
        elif FLAGS.data_reader_type == "PTBDataReader":
            data_reader = PTBDataReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)
        elif FLAGS.data_reader_type == "FCEReader":
            data_reader = FCEReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)
        #with open(os.path.join(FLAGS.model_path, "corrective_tokens.pickle"), "rb") as f:
        #    corrective_tokens = pickle.load(f)
        #corrective_tokens = data_reader.read_tokens(FLAGS.train_path)
        corrective_tokens = get_corrective_tokens(data_reader, FLAGS.train_path)
        #print(corrective_tokens)
    else:
        corrective_tokens = get_corrective_tokens(data_reader, FLAGS.train_path)   
        #print(corrective_tokens) 
        sys.stdout.flush()
        with open(os.path.join(FLAGS.model_path, "corrective_tokens.pickle"), "wb") as f:
            pickle.dump(corrective_tokens, f)
        with open(os.path.join(FLAGS.model_path, "token_to_id.pickle"), "wb") as f:
            pickle.dump(data_reader.token_to_id, f)

    sess_config = tf.ConfigProto(device_count={"CPU": config.cpu_num}, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0)

    if FLAGS.correct:
        with tf.Session(config=sess_config) as session:
        #session = tf.InteractiveSession()
            model = create_model(session, True, FLAGS.model_path, config=config)
            print("Loaded model. Beginning correcting.")
            while True:
                sentence = input("Input sentence or exit\n")
                if sentence:
                    if sentence.lower() == 'exit':
                        break                
                    decoded = decode_sentence(session, model=model, data_reader=data_reader, sentence=sentence, corrective_tokens=corrective_tokens, verbose=True)
                    sys.stdout.flush()
        #session.close()
    elif FLAGS.evaluate:
        with tf.Session(config=sess_config) as session:
            model = create_model(session, True, FLAGS.model_path, config=config)
            print("Loaded model. Beginning evaluating.")
            errors = evaluate_accuracy(session, model=model, data_reader=data_reader, corrective_tokens=corrective_tokens, test_path=FLAGS.test_path)
            print(errors)
            sys.stdout.flush()
    elif FLAGS.decode:
        # Decode test sentences.
        with tf.Session(config=sess_config) as session:
            model = create_model(session, True, FLAGS.model_path, config=config)
            print("Loaded model. Beginning decoding.")
            decodings = decode(session, model=model, data_reader=data_reader,
                               data_to_decode=data_reader.read_tokens(
                                   FLAGS.test_path), corrective_tokens=corrective_tokens, verbose=False)
            # Write the decoded tokens to stdout.
            for tokens in decodings:
                print(" ".join(tokens))
                sys.stdout.flush()
    else:
        print("Training model.")
        sys.stdout.flush()
        train(data_reader, FLAGS.train_path, FLAGS.val_path, FLAGS.model_path)