def main(_): # Determine which config we should use. if FLAGS.config == "TestConfig": config = TestConfig() elif FLAGS.config == "DefaultMovieDialogConfig": config = DefaultMovieDialogConfig() elif FLAGS.config == "DefaultPTBConfig": config = DefaultPTBConfig() else: raise ValueError("config argument not recognized; must be one of: " "TestConfig, DefaultPTBConfig, " "DefaultMovieDialogConfig") # Set the model path. if not FLAGS.decode and not FLAGS.decode_sentence: model_path = os.path.join(FLAGS.output_path, "model") else: model_path = os.path.join(FLAGS.input_path, "model") if not os.path.exists(model_path): os.makedirs(model_path) # Set the max_steps. config.max_steps = FLAGS.num_steps # Determine which kind of DataReader we want to use. if FLAGS.data_reader_type == "MovieDialogReader": data_reader = MovieDialogReader(config, FLAGS.train_path) elif FLAGS.data_reader_type == "PTBDataReader": data_reader = PTBDataReader(config, FLAGS.train_path) else: raise ValueError("data_reader_type argument %s not recognized; must be " "one of: MovieDialogReader, PTBDataReader" % FLAGS.data_reader_type) if FLAGS.decode_sentence: # Correct user's sentences. with tf.Session() as session: model = create_model(session, True, model_path, config=config) print("Enter a sentence you'd like to correct") correct_new_sentence = raw_input() while correct_new_sentence.lower() != 'no': decode_sentence(session, model=model, data_reader=data_reader, sentence=correct_new_sentence, corrective_tokens=data_reader.read_tokens(FLAGS.train_path)) print("Enter a sentence you'd like to correct or press NO") correct_new_sentence = raw_input() elif FLAGS.decode: # Decode test sentences. with tf.Session() as session: model = create_model(session, True, model_path, config=config) print("Loaded model. Beginning decoding.") decodings = decode(session, model=model, data_reader=data_reader, data_to_decode=data_reader.read_tokens(FLAGS.test_path), corrective_tokens=data_reader.read_tokens(FLAGS.train_path)) # Write the decoded tokens to stdout. for tokens in decodings: sys.stdout.flush() else: print("Training model.") train(data_reader, FLAGS.train_path, FLAGS.val_path, model_path) copy_train_data()
def main(_): # Determine which config we should use. if FLAGS.config == "TestConfig": config = TestConfig() elif FLAGS.config == "DefaultMovieDialogConfig": config = DefaultMovieDialogConfig() elif FLAGS.config == "DefaultPTBConfig": config = DefaultPTBConfig() else: raise ValueError("config argument not recognized; must be one of: " "TestConfig, DefaultPTBConfig, " "DefaultMovieDialogConfig") # Determine which kind of DataReader we want to use. if FLAGS.data_reader_type == "MovieDialogReader": data_reader = MovieDialogReader(config, FLAGS.train_path) elif FLAGS.data_reader_type == "PTBDataReader": data_reader = PTBDataReader(config, FLAGS.train_path) else: raise ValueError("data_reader_type argument not recognized; must be " "one of: MovieDialogReader, PTBDataReader") if FLAGS.decode: # Decode test sentences. with tf.Session() as session: model = create_model(session, True, FLAGS.model_path, config=config) print("Loaded model. Beginning decoding.") decodings = decode(session, model=model, data_reader=data_reader, data_to_decode=data_reader.read_tokens( FLAGS.test_path), verbose=False) # Write the decoded tokens to stdout. for tokens in decodings: print(" ".join(tokens)) sys.stdout.flush() else: print("Training model.") train(data_reader, FLAGS.train_path, FLAGS.val_path, FLAGS.model_path)
def main(_): # Determine which config we should use. if FLAGS.config == "TestConfig": config = TestConfig() elif FLAGS.config == "DefaultMovieDialogConfig": config = DefaultMovieDialogConfig() elif FLAGS.config == "DefaultPTBConfig": config = DefaultPTBConfig() elif FLAGS.config == "DefaultWikiConfig": config = DefaultWikiConfig() else: raise ValueError("config argument not recognized; must be one of: " "TestConfig, DefaultPTBConfig, DefaultWikiConfig, " "DefaultMovieDialogConfig") # Determine which kind of DataReader we want to use. if FLAGS.data_reader_type == "MovieDialogReader": data_reader = MovieDialogReader(config, FLAGS.train_path) train_path = FLAGS.train_path val_path = FLAGS.val_path elif FLAGS.data_reader_type == "PTBDataReader": data_reader = PTBDataReader(config, FLAGS.train_path) train_path = FLAGS.train_path val_path = FLAGS.val_path elif FLAGS.data_reader_type == "WikiDataReader": train_path = [ os.path.join(FLAGS.train_path, "wiki2017CleanChainLifetime.enz_train.txt"), os.path.join(FLAGS.train_path, "wiki2017CleanChainLifetime.enu_train.txt") ] val_path = [ os.path.join(FLAGS.val_path, "wiki2017CleanChainLifetime.enz_val.txt"), os.path.join(FLAGS.val_path, "wiki2017CleanChainLifetime.enu_val.txt") ] data_reader = WikiDataReader(config, train_path) else: raise ValueError( "data_reader_type argument not recognized; must be " "one of: MovieDialogReader, PTBDataReader, WikiDataReader") if FLAGS.task == "decode": # data_to_decode=data_reader.read_samples_from_string(FLAGS.test_string) # print(list(data_to_decode)) # exit(0) print('creating session') # Decode test sentences. with tf.Session() as session: print("creating model") model = create_model(session, True, FLAGS.model_path, config=config) print("Loaded model. Beginning decoding.") if FLAGS.test_string != "": decodings = decode_sentence(session, model, data_reader, FLAGS.test_string) # decodings = decode(session, model=model, data_reader=data_reader, # data_to_decode=data_reader.read_samples_from_string( # FLAGS.test_string), verbose=True) else: decodings = decode(session, model=model, data_reader=data_reader, data_to_decode=data_reader.read_tokens( FLAGS.test_path), verbose=True) # Write the decoded tokens to stdout. print(decodings) for tokens in decodings: print(" ".join(tokens)) sys.stdout.flush() elif FLAGS.task == "serve": print('creating session') # Decode test sentences. with tf.Session() as session: print("creating model") model = create_model(session, True, FLAGS.model_path, config=config) HttpHandler.model = model HttpHandler.data_reader = data_reader HttpHandler.session = session HttpHandler.model_name = FLAGS.model_path httpd = HTTPServer(("0.0.0.0", 8080), HttpHandler) try: print("Starting server...") httpd.serve_forever() except KeyboardInterrupt: pass httpd.server_close() else: print("Training model.") train(data_reader, train_path, val_path, FLAGS.model_path)
def main(_): # Determine which config we should use. if FLAGS.config == "TestConfig": config = TestConfig() elif FLAGS.config == "DefaultMovieDialogConfig": config = DefaultMovieDialogConfig() elif FLAGS.config == "DefaultPTBConfig": config = DefaultPTBConfig() elif FLAGS.config == "DefaultFCEConfig": config = DefaultFCEConfig() else: raise ValueError("config argument not recognized; must be one of: " "TestConfig, DefaultPTBConfig, DefaultFCEConfig, " "DefaultMovieDialogConfig") is_train = not (FLAGS.correct or FLAGS.evaluate or FLAGS.decode) # Determine which kind of DataReader we want to use. if FLAGS.data_reader_type == "MovieDialogReader": data_reader = MovieDialogReader(config, FLAGS.train_path) if is_train else None elif FLAGS.data_reader_type == "PTBDataReader": data_reader = PTBDataReader(config, FLAGS.train_path) elif FLAGS.data_reader_type == "FCEReader": data_reader = FCEReader(config, FLAGS.train_path) else: raise ValueError("data_reader_type argument not recognized; must be " "one of: MovieDialogReader, PTBDataReader") corrective_tokens = set() import pickle if not is_train: with open(os.path.join(FLAGS.model_path, "token_to_id.pickle"), "rb") as f: token_to_id = pickle.load(f) if FLAGS.data_reader_type == "MovieDialogReader": data_reader = MovieDialogReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1) elif FLAGS.data_reader_type == "PTBDataReader": data_reader = PTBDataReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1) elif FLAGS.data_reader_type == "FCEReader": data_reader = FCEReader(config, None, token_to_id, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1) #with open(os.path.join(FLAGS.model_path, "corrective_tokens.pickle"), "rb") as f: # corrective_tokens = pickle.load(f) #corrective_tokens = data_reader.read_tokens(FLAGS.train_path) corrective_tokens = get_corrective_tokens(data_reader, FLAGS.train_path) #print(corrective_tokens) else: corrective_tokens = get_corrective_tokens(data_reader, FLAGS.train_path) #print(corrective_tokens) sys.stdout.flush() with open(os.path.join(FLAGS.model_path, "corrective_tokens.pickle"), "wb") as f: pickle.dump(corrective_tokens, f) with open(os.path.join(FLAGS.model_path, "token_to_id.pickle"), "wb") as f: pickle.dump(data_reader.token_to_id, f) sess_config = tf.ConfigProto(device_count={"CPU": config.cpu_num}, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0) if FLAGS.correct: with tf.Session(config=sess_config) as session: #session = tf.InteractiveSession() model = create_model(session, True, FLAGS.model_path, config=config) print("Loaded model. Beginning correcting.") while True: sentence = input("Input sentence or exit\n") if sentence: if sentence.lower() == 'exit': break decoded = decode_sentence(session, model=model, data_reader=data_reader, sentence=sentence, corrective_tokens=corrective_tokens, verbose=True) sys.stdout.flush() #session.close() elif FLAGS.evaluate: with tf.Session(config=sess_config) as session: model = create_model(session, True, FLAGS.model_path, config=config) print("Loaded model. Beginning evaluating.") errors = evaluate_accuracy(session, model=model, data_reader=data_reader, corrective_tokens=corrective_tokens, test_path=FLAGS.test_path) print(errors) sys.stdout.flush() elif FLAGS.decode: # Decode test sentences. with tf.Session(config=sess_config) as session: model = create_model(session, True, FLAGS.model_path, config=config) print("Loaded model. Beginning decoding.") decodings = decode(session, model=model, data_reader=data_reader, data_to_decode=data_reader.read_tokens( FLAGS.test_path), corrective_tokens=corrective_tokens, verbose=False) # Write the decoded tokens to stdout. for tokens in decodings: print(" ".join(tokens)) sys.stdout.flush() else: print("Training model.") sys.stdout.flush() train(data_reader, FLAGS.train_path, FLAGS.val_path, FLAGS.model_path)