def decode():
    with tf.Session(config=get_session_configs()) as sess:
        # Create model and load parameters.
        model = create_model(sess, True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path_context'])

        # Load vocabulary vectors
        vocab_vectors = load_pickle_file(paths['vocab_vectors_context'])

        # Load FastText model used for preprocessing
        print("Load existing FastText model...")
        fast_text_model = fasttext.load_model(paths['fast_text_model_context'], encoding='utf-8')

        # Decode from standard input.
        sys.stdout.write("Human: ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
        context = ""
        while sentence:
            context_sentence = context + sentence
            output = decode_sentence(context_sentence, vocab, rev_vocab, model, sess)

            print("Ola: " + " ".join(output))
            print("Human: ", end="")

            context = sentence # or context = output
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
Beispiel #2
0
def decode():
    # Avoid allocating all of the GPU memory
    config = get_session_configs()

    with tf.Session(config=config) as sess:
        # Create model and load parameters.
        model = create_model(sess, True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path'])

        # Load vocabulary vectors
        vocab_vectors = load_pickle_file(paths['vocab_vectors'])

        # Load FastText model used for preprocessing
        print("Load existing FastText model...")
        fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8')

        # Decode from standard input.
        print("To reset states, type '*reset*'")
        sys.stdout.write("Human: ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)

        # Initial state
        if FLAGS.use_lstm:
            initial_state = np.zeros((num_layers, 2, model.batch_size, size))
        else:
            initial_state = np.zeros((num_layers, model.batch_size, size))
        states = initial_state

        while sentence:

            output, states = decode_stateful_sentence(sentence, vocab, rev_vocab, model, sess, states)
            output = " ".join(output)
            output = get_sliced_output(output, 1)
            print("Vinyals_Stateful: " + " ".join(output))
            print("Human: ", end="")
            sys.stdout.flush()
            sentence = sys.stdin.readline()

            if sentence.strip() == "*reset*":
                states = initial_state
                print("States were successfully reset.")
                print("Human: ", end="")
                sys.stdout.flush()
                sentence = sys.stdin.readline()

            sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def decode():
    with tf.Session(config=get_session_configs()) as sess:
        # Create model and load parameters.
        model = create_model(sess, True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path'])

        # Load vocabulary vectors
        vocab_vectors = load_pickle_file(paths['vocab_vectors'])

        # Load FastText model used for preprocessing
        print("Load existing FastText model...")
        fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8')

        if FLAGS.open_subtitles:
            num_output_sentences = 1
        else:
            num_output_sentences = 2

        # Decode from standard input.
        sys.stdout.write("Human: ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
        while sentence:
            output = decode_sentence(sentence, vocab, rev_vocab, model, sess)
            output = " ".join(output)
            output = get_sliced_output(output, num_output_sentences)
            print("Grid LSTM: " + output.strip())
            print("Human: ", end="")
            sys.stdout.flush()
            sentence = sys.stdin.readline()

            if FLAGS.context_full_turns:
                sentence = preprocess_input(output.strip() + " " + sentence.strip(), fast_text_model, vocab_vectors)
            else:
                sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def train():
    """Train a en->fr translation model using WMT data."""

    print("Checking for needed files")
    check_for_needed_files_and_create()
    train_path = paths['train_path']
    shuffle_file(train_path, train_path)

    print("Creating file queue")
    filename_queue = input_pipeline(root=paths['preprocess_root_files'] ,start_name=paths['train_file'])
    filename_queue_dev = input_pipeline(root=paths['preprocess_root_files'], start_name=paths['dev_file'])

    perplexity_log_path = os.path.join(FLAGS.train_dir, paths['perplexity_log'])

    if not os.path.exists(perplexity_log_path):
        with open(perplexity_log_path, 'w') as fileObject:
            fileObject.write("Learning_rate: %d \t Optimizer: %s \n" % (FLAGS.learning_rate, optimizer))
            fileObject.write("Step \tPerplexity \tBucket perplexity \n")

    # Avoid allocating all of the GPU memory
    config = get_session_configs()
    with tf.device(use_gpu):
        with tf.Session(config=config) as sess:
            # Create model.
            print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
            model = create_model(sess, False)

            # Stream data
            print("Setting up coordinator")
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            # This is for the training loop.
            train_set = [[] for _ in _buckets]
            dev_set = [[] for _ in _buckets]
            step_time, loss = 0.0, 0.0
            current_step = 0
            previous_losses = []
            read_line = 0
            reading_file_path = ""

            # Create log writer object
            print("Create log writer object")
            summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=tf.get_default_graph())

            reader_train_data = tf.TextLineReader()  # skip_header_lines=int, number of lines to skip
            key, txt_row_train_data = reader_train_data.read(filename_queue)

            reader_dev_data = tf.TextLineReader()
            _, txt_row_dev_data = reader_dev_data.read(filename_queue_dev)

            lowest_perplexity = 20.0

            train_time = time.time()

            print("Starting training loop")
            try:
                while current_step < FLAGS.max_train_steps:  # not coord.should_stop():
                    if current_step % FLAGS.print_frequency == 0:
                        print("Step number: " + str(current_step))

                    read_line, reading_file_path = check_and_shuffle_file(key, sess, read_line, paths['train_path'])

                    # Get a batch
                    train_set, bucket_id = get_batch(txt_row_train_data, train_set, FLAGS.batch_size)
                    start_time = time.time()
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id)

                    # Clean out trained bucket
                    train_set[bucket_id] = []

                    # Make a step
                    _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)

                    # Calculating variables
                    step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
                    loss += step_loss / FLAGS.steps_per_checkpoint
                    current_step += 1

                    # Once in a while, we save checkpoint, print statistics, and run evals.
                    if current_step % FLAGS.steps_per_checkpoint == 0:
                        check_time = time.time()
                        print(get_time(train_time, "to train"))
                        # Print statistics for the previous epoch.
                        dev_set, bucket_id = get_batch(txt_row_dev_data, dev_set, FLAGS.batch_size, ac_function=min)

                        perplexity = exp(float(loss)) if loss < 300 else float("inf")
                        print("global step %d learning rate %.4f step-time %.2f perplexity "
                              "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity))

                        # Decrease learning rate if no improvement was seen over last 3 times.
                        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                            sess.run(model.learning_rate_decay_op)
                        previous_losses.append(loss)

                        # Save checkpoint and zero timer and loss.
                        print("Save checkpoint")
                        checkpoint_path = os.path.join(FLAGS.train_dir, "Ola.ckpt")
                        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                        step_time, loss = 0.0, 0.0

                        # Adding perplexity to tensorboard
                        perplexity_summary = tf.Summary()
                        overall_value = perplexity_summary.value.add()
                        overall_value.tag = "perplexity_overall"
                        overall_value.simple_value = perplexity

                        # Run evals on development set and print their perplexity.
                        print("Run evaluation on development set")
                        bucket_perplexity = ""
                        for bucket_id in xrange(len(_buckets)):
                            if len(dev_set[bucket_id]) == 0:
                                print("  eval: empty bucket %d" % bucket_id)
                                continue
                            encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id)

                            # Clean out used bucket
                            del dev_set[bucket_id][:FLAGS.batch_size]

                            _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True)
                            eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf")
                            print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))

                            bucket_perplexity += "\t" + str(eval_ppx)

                            # Adding bucket perplexity to tensorboard
                            bucket_value = perplexity_summary.value.add()
                            bucket_value.tag = "perplexity_bucket %d" % bucket_id
                            bucket_value.simple_value = eval_ppx
                        summary_writer.add_summary(perplexity_summary, model.global_step.eval())

                        with open(os.path.join(FLAGS.train_dir, paths['perplexity_log']), 'a') as fileObject:
                            fileObject.write(str(model.global_step) + " \t" + str(perplexity) + bucket_perplexity + "\n")
                        # Save model if checkpoint was the best one
                        if perplexity < lowest_perplexity:  # and current_step > 400000:
                            lowest_perplexity = perplexity
                            checkpoint_path = os.path.join(FLAGS.train_dir, "Ola_best_.ckpt")
                            model.saver.save(sess, checkpoint_path, global_step=model.global_step)

                        sys.stdout.flush()
                        print(get_time(check_time), "to do checkpoint")
                        train_time = time.time()
            except tf.errors.OutOfRangeError:
                print('Done training, epoch reached')
            finally:
                coord.request_stop()
            coord.join(threads)
Beispiel #5
0
def train():
    """Train a en->fr translation model using WMT data."""

    print("Checking for needed files")
    check_for_needed_files_and_create()

    print("Creating file queues")

    filename_queue = input_pipeline(root=paths['stateful_datafiles'], start_name="merged_train", shuffle=False)

    filename_queue_dev = input_pipeline(root=paths['stateful_datafiles'], start_name="merged_dev", shuffle=False)

    perplexity_log_path = os.path.join(FLAGS.train_dir, paths['perplexity_log'])

    if not os.path.exists(perplexity_log_path):
        with open(perplexity_log_path, 'w') as fileObject:
            fileObject.write(
                "Learning_rate: %d \t Optimizer: %s \t Lstm %s \n" % (FLAGS.learning_rate, optimizer, FLAGS.use_lstm))
            fileObject.write("Step \tPerplexity \tBucket perplexity \n")

    # Avoid allocating all of the GPU memory
    config = get_session_configs()
    with tf.device(use_gpu):
        with tf.Session(config=config) as sess:
            # Create model.
            print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
            model = create_model(sess, False)

            # Stream data
            print("Setting up coordinator")
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            # This is for the training loop.
            step_time, loss = 0.0, 0.0
            current_step = 0
            train_set = [[] for _ in range(batch_size)]
            dev_set = [[] for _ in range(batch_size)]
            previous_losses = []
            read_line = 0
            read_line_dev = 0
            reading_file_path = paths['merged_train_stateful_path_file1']
            reading_dev_file_path = paths['merged_dev_stateful_path']

            # Create log writer object
            print("Create log writer object")
            summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=tf.get_default_graph())

            key, txt_row_train_data = tf.TextLineReader().read(filename_queue)

            key_dev, txt_row_dev_data = tf.TextLineReader().read(filename_queue_dev)

            lowest_perplexity = 20.0

            train_time = time.time()

            # Need an initial state for the encoder rnn
            if FLAGS.use_lstm:
                initial_state = np.zeros((num_layers, 2, batch_size, size))
            else:
                initial_state = np.zeros((num_layers, batch_size, size))
            state = initial_state
            dev_state = initial_state

            print("Starts training loop")

            try:
                while FLAGS.max_train_steps >= current_step:  # not coord.should_stop():
                    if current_step % FLAGS.print_frequency == 0:
                        print("Step number" + str(current_step))

                    # Get a batch
                    # Find empty holders in training set
                    empty_conversations = [index for index, conversation in enumerate(train_set) if conversation == []]
                    if empty_conversations != []:
                        init_key, init_line = sess.run([key, txt_row_train_data])
                        read_line, reading_file_path = check_and_shuffle_file(init_key, sess, read_line, reading_file_path, stateful=True)
                    train_set, batch_train_set, state = get_stateful_batch(txt_row_train_data, train_set, empty_conversations, init_line, state, size, FLAGS.use_lstm)
                    start_time = time.time()
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_train_set)

                    # Make a step
                    _, step_loss, _, state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, state, False)

                    # Calculating variables
                    step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
                    loss += step_loss / FLAGS.steps_per_checkpoint
                    current_step += 1

                    # Once in a while, we save checkpoint, print statistics, and run evals.
                    if current_step % FLAGS.steps_per_checkpoint == 0:

                        check_time = time.time()
                        print(get_time(train_time), "to train")

                        # Print statistics for the previous epoch.
                        empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if
                                               conversation == []]
                        if empty_dev_conversations != []:
                            init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data])
                            read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess, read_line_dev, reading_dev_file_path, stateful=True, dev=True)
                        dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set, empty_dev_conversations, init_line_dev, dev_state, size, FLAGS.use_lstm)

                        perplexity = exp(float(loss)) if loss < 300 else float("inf")
                        print("global step %d learning rate %.4f step-time %.2f perplexity "
                              "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity))

                        # Decrease learning rate if no improvement was seen over last 3 times.
                        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                            sess.run(model.learning_rate_decay_op)
                        previous_losses.append(loss)

                        # Save checkpoint and zero timer and loss.
                        print("Save checkpoint")
                        checkpoint_path = os.path.join(FLAGS.train_dir, "Vinyals.ckpt")
                        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                        step_time, loss = 0.0, 0.0

                        # Adding perplexity to tensorboard
                        perplexity_summary = tf.Summary()
                        overall_value = perplexity_summary.value.add()
                        overall_value.tag = "perplexity_overall"
                        overall_value.simple_value = perplexity

                        # Run evals on development set and print their perplexity.
                        print("Run evaluation on development set")
                        step_perplexity = ""
                         # Run eval on three steps

                        # 1
                        encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set)

                        _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state, True)
                        eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf")
                        print("  eval: step %d perplexity %.2f" % (1.0, eval_ppx))

                        step_perplexity += "\t" + str(eval_ppx)

                        # Adding step perplexity to tensorboard
                        step_value = perplexity_summary.value.add()
                        step_value.tag = "perplexity_step %d" % 1.0
                        step_value.simple_value = eval_ppx

                        # 2
                        empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if
                                                   conversation == []]
                        if empty_dev_conversations != []:
                            init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data])
                            read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess, read_line_dev, reading_dev_file_path, stateful=True, dev=True)
                        dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set,empty_dev_conversations, init_line_dev, dev_state, size, FLAGS.use_lstm)
                        encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set)

                        _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state,
                                                        True)
                        eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf")
                        print("  eval: step %d perplexity %.2f" % (2.0, eval_ppx))

                        step_perplexity += "\t" + str(eval_ppx)

                        # Adding step perplexity to tensorboard
                        step_value = perplexity_summary.value.add()
                        step_value.tag = "perplexity_step %d" % 2.0
                        step_value.simple_value = eval_ppx

                        # 3
                        empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if
                                                   conversation == []]
                        if empty_dev_conversations != []:
                            init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data])
                            read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess,
                                                                                          read_line_dev,
                                                                                          reading_dev_file_path,
                                                                                          stateful=True, dev=True)
                        dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set,
                                                                               empty_dev_conversations, init_line_dev,
                                                                               dev_state, size, FLAGS.use_lstm)

                        encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set)

                        _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state,
                                                        True)
                        eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf")
                        print("  eval: step %d perplexity %.2f" % (3.0, eval_ppx))

                        step_perplexity += "\t" + str(eval_ppx)

                        # Adding step perplexity to tensorboard
                        step_value = perplexity_summary.value.add()
                        step_value.tag = "perplexity_step %d" % 3.0
                        step_value.simple_value = eval_ppx

                        summary_writer.add_summary(perplexity_summary, model.global_step.eval())

                        with open(os.path.join(FLAGS.train_dir, paths['perplexity_log']), 'a') as fileObject:
                            fileObject.write(str(model.global_step) + " \t" + str(perplexity) + step_perplexity + "\n")

                        # Save model if checkpoint was the best one
                        if perplexity < lowest_perplexity:
                            lowest_perplexity = perplexity
                            checkpoint_path = os.path.join(FLAGS.train_dir, "Vinyals_stateful_best_.ckpt")
                            model.saver.save(sess, checkpoint_path, global_step=model.global_step)

                        sys.stdout.flush()
                        get_time(check_time, "to do checkpoint")
                        train_time = time.time()
            except tf.errors.OutOfRangeError:
                print('Done training, epoch reached')
            finally:
                coord.request_stop()
            coord.join(threads)