Ejemplo n.º 1
0
    def __init__(self):
        print('Applying Parameters:')
        for k, v in FLAGS.__dict__['__flags'].items():
            print('%s: %s' % (k, str(v)))
        print("Preparing data in %s" % FLAGS.data_dir)
        vocab_path = ''
        tag_vocab_path = ''
        label_vocab_path = ''
        date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir,
                                                      FLAGS.in_vocab_size,
                                                      FLAGS.out_vocab_size)

        in_seq_train, out_seq_train, label_train = date_set[0]
        in_seq_dev, out_seq_dev, label_dev = date_set[1]
        in_seq_test, out_seq_test, label_test = date_set[2]
        vocab_path, tag_vocab_path, label_vocab_path = date_set[3]

        vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
        tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
        label_vocab, rev_label_vocab = data_utils.initialize_vocab(
            label_vocab_path)

        self.sess = tf.Session()
        self.model, self.model_test = create_model(self.sess, len(vocab),
                                                   len(tag_vocab),
                                                   len(label_vocab))
def main(_):
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir,
                                                  FLAGS.in_vocab_size,
                                                  FLAGS.out_vocab_size)
    in_seq_train, out_seq_train, label_train = date_set[0]
    in_seq_dev, out_seq_dev, label_dev = date_set[1]
    in_seq_test, out_seq_test, label_test = date_set[2]
    vocab_path, tag_vocab_path, label_vocab_path = date_set[3]
Ejemplo n.º 3
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing trec data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''

    in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_taging_valid_out_file = result_dir + '/taging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/taging.test.hyp.txt'
    label_valid_out_file = result_dir + '/label.valid.hyp.txt'
    label_test_out_file = result_dir + '/label.valid.hyp.txt'

    vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)
    LM_vocab = vocab.copy()
    assert LM_vocab[data_utils._BOS] == data_utils.BOS_ID
    del LM_vocab[data_utils._BOS]
    LM_vocab[data_utils._BOS] = data_utils.BOS_ID
    rev_LM_vocab = [x for x in rev_vocab]
    rev_LM_vocab[data_utils.BOS_ID] = data_utils._EOS

    config = tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23), )

    with tf.Session(config=config) as sess:
        # Create model.
        print("Max sequence length: %d ." % _buckets[0][0])
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab), len(LM_vocab))
        print(
            "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d, and lm_vocab_size=%d."
            % (len(vocab), len(tag_vocab), len(label_vocab), len(LM_vocab)))

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
        test_set = read_data(in_seq_test, out_seq_test, label_test)
        train_set = read_data(in_seq_train, out_seq_train, label_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]

        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        best_valid_score = 0
        best_test_score = 0

        if FLAGS.label_in_training == 'true_label':
            print("Use TRUE label during model training")
            train_with_true_label = True
        elif FLAGS.label_in_training == 'predicted_label':
            print("Use PREDICTED label during model training")
            train_with_true_label = False
        elif FLAGS.label_in_training == 'scheduled_sampling':
            print("Use Scheduled Sampling label during model training")

        while model.global_step.eval() < FLAGS.max_training_steps:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, encoder_inputs_shiftByOne, tags, tag_weights, intent_weights, lm_weights, batch_sequence_length, labels = model.get_batch(
                train_set, bucket_id)

            if FLAGS.label_in_training == 'scheduled_sampling':
                random_number_02 = np.random.random_sample()
                final_training_step = FLAGS.max_training_steps
                if random_number_02 < float(model.global_step.eval(
                )) / final_training_step:  # use predicted label in training
                    train_with_true_label = False
                else:
                    train_with_true_label = True

            _, step_loss, tagging_logits, classification_logits = model.joint_step(
                sess,
                encoder_inputs,
                encoder_inputs_shiftByOne,
                lm_weights,
                tags,
                tag_weights,
                labels,
                intent_weights,
                batch_sequence_length,
                bucket_id,
                False,
                train_with_true_label=train_with_true_label)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint

            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d step-time %.2f. Training perplexity %.2f" %
                    (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()

                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                def run_eval(data_set, mode):  # mode = "Valid", "Test"
                    # Run evals on development/test set and print their accyracy.
                    word_list = list()
                    ref_tag_list = list()
                    hyp_tag_list = list()
                    ref_label_list = list()
                    hyp_label_list = list()
                    correct_count = 0
                    accuracy = 0.0
                    for bucket_id in xrange(
                            len(_buckets)):  # len(_buckets) = 1 here
                        eval_loss = 0.0
                        count = 0
                        total_word_count = 0
                        for i in xrange(len(data_set[bucket_id])):
                            count += 1
                            eval_encoder_inputs, eval_encoder_inputs_shiftByOne, eval_tags, eval_tag_weights, eval_intent_weights, eval_lm_weights, eval_sequence_length, eval_labels = model_test.get_one(
                                data_set, bucket_id, i)
                            eval_intent_weights = eval_tag_weights
                            tagging_logits = []
                            classification_logits = []
                            _, step_loss, tagging_logits, classification_logits = model_test.joint_step(
                                sess, eval_encoder_inputs,
                                eval_encoder_inputs_shiftByOne,
                                eval_lm_weights, eval_tags, eval_tag_weights,
                                eval_labels, eval_intent_weights,
                                eval_sequence_length, bucket_id, True)
                            eval_loss += step_loss * (eval_sequence_length[0])
                            total_word_count += eval_sequence_length[0]
                            hyp_label = None

                            # intent results
                            ref_label_list.append(
                                rev_label_vocab[eval_labels[0][0]])
                            hyp_label = np.argmax(classification_logits[0], 0)
                            hyp_label_list.append(rev_label_vocab[hyp_label])
                            if eval_labels[0] == hyp_label:
                                correct_count += 1

                            # tagging results
                            word_list.append([
                                rev_vocab[x[0]] for x in
                                eval_encoder_inputs[:eval_sequence_length[0]]
                            ])
                            ref_tag_list.append([
                                rev_tag_vocab[x[0]]
                                for x in eval_tags[:eval_sequence_length[0]]
                            ])
                            hyp_tag_list.append([
                                rev_tag_vocab[np.argmax(x)] for x in
                                tagging_logits[:eval_sequence_length[0]]
                            ])

                        eval_perplexity = math.exp(
                            float(eval_loss) / total_word_count)
                    print("  %s perplexity: %.2f" % (mode, eval_perplexity))
                    accuracy = float(correct_count) * 100 / count
                    print("  %s accuracy: %.2f %d/%d" %
                          (mode, accuracy, correct_count, count))

                    tagging_eval_result = dict()
                    if mode == 'Valid':
                        output_file = current_taging_valid_out_file
                    elif mode == 'Test':
                        output_file = current_taging_test_out_file
                    tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list,
                                                    word_list, output_file)
                    print("  %s f1-score: %.2f" %
                          (mode, tagging_eval_result['f1']))
                    sys.stdout.flush()
                    return eval_perplexity, tagging_eval_result, hyp_label_list

                # run valid
                valid_perplexity, valid_tagging_result, valid_hyp_label_list = run_eval(
                    dev_set, 'Valid')
                # record best results

                if valid_tagging_result['f1'] > best_valid_score:
                    best_valid_score = valid_tagging_result['f1']
                    subprocess.call([
                        'mv', current_taging_valid_out_file,
                        current_taging_valid_out_file +
                        '.best_f1_%.2f' % best_valid_score
                    ])
                    with open(
                            '%s.best_f1_%.2f' %
                        (label_valid_out_file, best_valid_score), 'w') as f:
                        for i in range(len(valid_hyp_label_list)):
                            f.write(valid_hyp_label_list[i] + '\n')

                # run test after each validation for development purpose.
                test_perplexity, test_tagging_result, test_hyp_label_list = run_eval(
                    test_set, 'Test')
                # record best results
                if test_tagging_result['f1'] > best_test_score:
                    best_test_score = test_tagging_result['f1']
                    subprocess.call([
                        'mv', current_taging_test_out_file,
                        current_taging_test_out_file +
                        '.best_f1_%.2f' % best_test_score
                    ])
                    with open(
                            '%s.best_f1_%.2f' %
                        (label_test_out_file, best_test_score), 'w') as f:
                        for i in range(len(test_hyp_label_list)):
                            f.write(test_hyp_label_list[i] + '\n')
Ejemplo n.º 4
0
def test():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)
    with tf.Session() as sess:
        # Create model.
        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))

        def feed_sentence(sentence, vocab):
            data_set = [[]]
            token_ids = data_utils.prepare_one_data(sentence, vocab)
            slot_ids = [0 for i in range(len(token_ids))]
            data_set[0].append([token_ids, slot_ids, [0]])
            encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one(
                data_set, 0, 0)
            if task['joint'] == 1:
                _, step_loss, tagging_logits, classification_logits = model_test.joint_step(
                    sess, encoder_inputs, tags, tag_weights, labels,
                    sequence_length, 0, True)
            elif task['tagging'] == 1:
                _, step_loss, tagging_logits = model_test.tagging_step(
                    sess, encoder_inputs, tags, tag_weights, sequence_length,
                    0, True)
            elif task['intent'] == 1:
                _, step_loss, classification_logits = model_test.classification_step(
                    sess, encoder_inputs, labels, sequence_length, 0, True)
            classification = [
                np.argmax(classification_logit)
                for classification_logit in classification_logits
            ]
            tagging_logit = [
                np.argmax(tagging_logit) for tagging_logit in tagging_logits
            ]
            classification_word = [rev_label_vocab[c] for c in classification]
            tagging_word = [
                rev_tag_vocab[t] for t in tagging_logit[:sequence_length[0]]
            ]
            return classification_word, tagging_word

        sys.stdout.write('>')
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        while sentence:
            print(feed_sentence(sentence, vocab))
            sys.stdout.write('>')
            sys.stdout.flush()
            sentence = sys.stdin.readline()
Ejemplo n.º 5
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)

    with tf.Session() as sess:
        # Create model.
        print("Max sequence length: %d." % _buckets[0][0])
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))
        print(
            "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d."
            % (len(vocab), len(tag_vocab), len(label_vocab)))

        # Read data into buckets and compute their sizes.
        print("Reading train/valid/test data (training set limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
        test_set = read_data(in_seq_test, out_seq_test, label_test)
        train_set = read_data(in_seq_train, out_seq_train, label_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0

        best_valid_score = 0
        best_test_score = 0
        while model.global_step.eval() < FLAGS.max_training_steps:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model.get_batch(
                train_set, bucket_id)
            if task['joint'] == 1:
                _, step_loss, tagging_logits, classification_logits = model.joint_step(
                    sess, encoder_inputs, tags, tag_weights, labels,
                    batch_sequence_length, bucket_id, False)
            elif task['tagging'] == 1:
                _, step_loss, tagging_logits = model.tagging_step(
                    sess, encoder_inputs, tags, tag_weights,
                    batch_sequence_length, bucket_id, False)
            elif task['intent'] == 1:
                _, step_loss, classification_logits = model.classification_step(
                    sess, encoder_inputs, labels, batch_sequence_length,
                    bucket_id, False)

            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d step-time %.2f. Training perplexity %.2f" %
                    (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                for bucket_id in range(len(_buckets)):
                    eval_loss = 0.0
                    encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model_test.get_batch(
                        dev_set, bucket_id)
                    tagging_logits = []
                    classification_logits = []
                    if task['joint'] == 1:
                        _, step_loss, tagging_logits, classification_logits = model_test.joint_step(
                            sess, encoder_inputs, tags, tag_weights, labels,
                            batch_sequence_length, bucket_id, True)
                    elif task['tagging'] == 1:
                        _, step_loss, tagging_logits = model_test.tagging_step(
                            sess, encoder_inputs, tags, tag_weights,
                            batch_sequence_length, bucket_id, True)
                    elif task['intent'] == 1:
                        _, step_loss, classification_logits = model_test.classification_step(
                            sess, encoder_inputs, labels,
                            batch_sequence_length, bucket_id, True)
                    eval_ppx = math.exp(
                        step_loss) if step_loss < 300 else float('inf')
                    print("validation perplexity %.2f" % eval_ppx)
                sys.stdout.flush()
                '''
Ejemplo n.º 6
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__flags.iteritems():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir,
                                                  FLAGS.in_vocab_size,
                                                  FLAGS.out_vocab_size)
    in_seq_train, out_seq_train, label_train = date_set[0]
    in_seq_dev, out_seq_dev, label_dev = date_set[1]
    in_seq_test, out_seq_test, label_test = date_set[2]
    vocab_path, tag_vocab_path, label_vocab_path = date_set[3]

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocab(
        label_vocab_path)

    config = tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23),
        #device_count = {'gpu': 2}
    )

    with tf.Session(config=config) as sess:
        # Create model.
        print("Max sequence length: %d." % _buckets[0][0])
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))
        print ("Creating model with " +
               "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \
               % (len(vocab), len(tag_vocab), len(label_vocab)))

        # Read data into buckets and compute their sizes.
        print("Reading train/valid/test data (training set limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
        test_set = read_data(in_seq_test, out_seq_test, label_test)
        train_set = read_data(in_seq_train, out_seq_train, label_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0

        best_valid_score = 0
        best_test_score = 0
        while model.global_step.eval() < FLAGS.max_training_steps:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            batch_data = model.get_batch(train_set, bucket_id)
            encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data
            if task['joint'] == 1:
                step_outputs = model.joint_step(sess, encoder_inputs, tags,
                                                tag_weights, labels,
                                                batch_sequence_length,
                                                bucket_id, False)
                _, step_loss, tagging_logits, class_logits = step_outputs
            elif task['tagging'] == 1:
                step_outputs = model.tagging_step(sess, encoder_inputs, tags,
                                                  tag_weights,
                                                  batch_sequence_length,
                                                  bucket_id, False)
                _, step_loss, tagging_logits = step_outputs
            elif task['intent'] == 1:
                step_outputs = model.classification_step(
                    sess, encoder_inputs, labels, batch_sequence_length,
                    bucket_id, False)
                _, step_loss, class_logits = step_outputs

            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d step-time %.2f. Training perplexity %.2f" %
                    (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                def run_valid_test(data_set, mode):  # mode: Eval, Test
                    # Run evals on development/test set and print the accuracy.
                    word_list = list()
                    ref_tag_list = list()
                    hyp_tag_list = list()
                    ref_label_list = list()
                    hyp_label_list = list()
                    correct_count = 0
                    accuracy = 0.0
                    tagging_eval_result = dict()
                    for bucket_id in xrange(len(_buckets)):
                        eval_loss = 0.0
                        count = 0
                        for i in xrange(len(data_set[bucket_id])):
                            count += 1
                            sample = model_test.get_one(data_set, bucket_id, i)
                            encoder_inputs, tags, tag_weights, sequence_length, labels = sample
                            tagging_logits = []
                            class_logits = []
                            if task['joint'] == 1:
                                step_outputs = model_test.joint_step(
                                    sess, encoder_inputs, tags, tag_weights,
                                    labels, sequence_length, bucket_id, True)
                                _, step_loss, tagging_logits, class_logits = step_outputs
                            elif task['tagging'] == 1:
                                step_outputs = model_test.tagging_step(
                                    sess, encoder_inputs, tags, tag_weights,
                                    sequence_length, bucket_id, True)
                                _, step_loss, tagging_logits = step_outputs
                            elif task['intent'] == 1:
                                step_outputs = model_test.classification_step(
                                    sess, encoder_inputs, labels,
                                    sequence_length, bucket_id, True)
                                _, step_loss, class_logits = step_outputs
                            eval_loss += step_loss / len(data_set[bucket_id])
                            hyp_label = None
                            if task['intent'] == 1:
                                ref_label_list.append(
                                    rev_label_vocab[labels[0][0]])
                                hyp_label = np.argmax(class_logits[0], 0)
                                hyp_label_list.append(
                                    rev_label_vocab[hyp_label])
                                if labels[0] == hyp_label:
                                    correct_count += 1
                            if task['tagging'] == 1:
                                word_list.append([rev_vocab[x[0]] for x in \
                                                  encoder_inputs[:sequence_length[0]]])
                                ref_tag_list.append([rev_tag_vocab[x[0]] for x in \
                                                     tags[:sequence_length[0]]])
                                hyp_tag_list.append(
                                        [rev_tag_vocab[np.argmax(x)] for x in \
                                                       tagging_logits[:sequence_length[0]]])

                    accuracy = float(correct_count) * 100 / count
                    if task['intent'] == 1:
                        print("  %s accuracy: %.2f %d/%d" \
                              % (mode, accuracy, correct_count, count))
                        sys.stdout.flush()
                    if task['tagging'] == 1:
                        if mode == 'Eval':
                            taging_out_file = current_taging_valid_out_file
                        elif mode == 'Test':
                            taging_out_file = current_taging_test_out_file
                        tagging_eval_result = conlleval(
                            hyp_tag_list, ref_tag_list, word_list,
                            taging_out_file)
                        print("  %s f1-score: %.2f" %
                              (mode, tagging_eval_result['f1']))
                        sys.stdout.flush()
                    return accuracy, tagging_eval_result

                # valid
                valid_accuracy, valid_tagging_result = run_valid_test(
                    dev_set, 'Eval')
                if task['tagging'] == 1 \
                    and valid_tagging_result['f1'] > best_valid_score:
                    best_valid_score = valid_tagging_result['f1']
                    # save the best output file
                    subprocess.call(['mv',
                                     current_taging_valid_out_file,
                                     current_taging_valid_out_file + '.best_f1_%.2f' \
                                     % best_valid_score])
                # test, run test after each validation for development purpose.
                test_accuracy, test_tagging_result = run_valid_test(
                    test_set, 'Test')
                if task['tagging'] == 1 \
                    and test_tagging_result['f1'] > best_test_score:
                    best_test_score = test_tagging_result['f1']
                    # save the best output file
                    subprocess.call(['mv',
                                     current_taging_test_out_file,
                                     current_taging_test_out_file + '.best_f1_%.2f' \
                                     % best_test_score])
Ejemplo n.º 7
0
def train():
    
  # See parameters.
  print ('Applying Parameters:')
  for k,v in FLAGS.__dict__['__flags'].items():
    print ('%s: %s' % (k, str(v)))
    
  # 4-3-1. Prepare indexing data and correspondiing labels.  
  print("Preparing data in %s" % FLAGS.data_dir)
  vocab_path = ''
  tag_vocab_path = ''
    
  # 4-3-1-1. String data --) token index / Make word and label dictionary.
  date_set = data_utils.prepare_multi_task_data(
    FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)
    
  # 4-3-1-2. Get path of each result.
  in_seq_train, out_seq_train = date_set[0]
  in_seq_test, out_seq_test = date_set[1]
  vocab_path, tag_vocab_path = date_set[2]
 
  # Where do we save the result?  
  result_dir = FLAGS.train_dir + '/test_results'
  if not os.path.isdir(result_dir):
      os.makedirs(result_dir)
  current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
  current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'
    
  # 4-3-2. Get index dictionary and word list.   
  vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
  tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
  tag_vocab_inv = dict()
    
    
  for string, i in tag_vocab.items():
        tag_vocab_inv[i] = string
  config = tf.ConfigProto(
      gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.23),
      #device_count = {'gpu': 2}
  )
    
  with tf.Session(config=config) as sess:
    print("Max sequence length: %d." % _buckets[0][0])
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    
    # 4-3-3. Make train/test model.
    model, model_test = create_model(sess, 
                                     len(vocab), 
                                     len(tag_vocab)
                                     )
    print ("Creating model with " + 
           "source_vocab_size=%d, target_vocab_size=%d" \
           % (len(vocab), len(tag_vocab)))

    # Read data into buckets and compute their sizes.
    print ("Reading train/valid/test data (training set limit: %d)."
           % FLAGS.max_train_data_size)
    
    # 4-3-4. Load data using "# 4-1."
    test_set = read_data(in_seq_test, out_seq_test)
    train_set = read_data(in_seq_train, out_seq_train)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]
    
    # 4-3-5. Train Loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    best_valid_score = 0
    best_test_score = 0

    while model.global_step.eval() < FLAGS.max_training_steps:
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])
      start_time = time.time()
        
      # 4-3-5-1. get batch
      batch_data = model.get_batch(train_set, bucket_id)
      encoder_inputs,tags,tag_weights,batch_sequence_length = batch_data
      
      step_outputs = model.tagging_step(sess, 
                                        encoder_inputs,
                                        tags,
                                        tag_weights,
                                        batch_sequence_length, 
                                        bucket_id, 
                                        False)
      _, step_loss, tagging_logits = step_outputs
        
        
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d step-time %.2f. Training perplexity %.2f" 
            % (model.global_step.eval(), step_time, perplexity))
        sys.stdout.flush()
        
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        
           
    # Test
    count = 0
    word_list = list()
    ref_tag_list = list()
    hyp_tag_list = list()
    for bucket_id in xrange(len(_buckets)):
        for i in xrange(len(test_set[bucket_id])):
            count += 1
            sample = model_test.get_one(test_set, bucket_id, i)
            encoder_inputs, tags, tag_weights, sequence_length= sample
            step_outputs = model_test.tagging_step(sess,
                                                   encoder_inputs,
                                                   tags,
                                                   tag_weights,
                                                   sequence_length,
                                                   bucket_id,
                                                   True)
            _, step_loss, tagging_logits = step_outputs
            
            lst = []
            string = ""
            for num in encoder_inputs:
                num = num[0]
                word = rev_vocab[num]
                if word == "_PAD" or word == "_UNK":
                    continue
                else:
                    lst.append(word)
                    string = string + word + " "
            string = string + " : "
            string2 = string
            
            for word in tagging_logits:
                word = word[0]
                sort_num = np.argsort(word)
                b = sort_num[39999]
                word = rev_tag_vocab[b]
                if word == "_PAD" or word == "_UNK":
                    continue
                else:
                    lst.append(word)
                    string = string + word + " "
            print(string)
            
            for word in tagging_logits:
                word = word[0]
                sort_num = np.argsort(word)
                b = sort_num[39998]
                word = rev_tag_vocab[b]
                if word == "_PAD" or word == "_UNK":
                    continue
                else:
                    lst.append(word)
                    string2 = string2 + word + " "
            print(string2)
            print("\n")
Ejemplo n.º 8
0
def test():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("\nPreparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir,
                                                  FLAGS.in_vocab_size,
                                                  FLAGS.out_vocab_size)
    in_seq_test, out_seq_test, label_test = date_set[2]
    vocab_path, tag_vocab_path, label_vocab_path = date_set[3]

    vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocab(
        label_vocab_path)

    with tf.Session() as sess:
        # Create model.
        print("\nCreating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))
        print ("Created model with " +
               "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \
               % (len(vocab), len(tag_vocab), len(label_vocab)))

        # Read data into buckets and compute their sizes.
        print("\nReading test data")
        test_set = read_data(in_seq_test, out_seq_test, label_test)

        def run_valid_test(data_set, mode):  # mode: Eval, Test
            # Run evals on development/test set and print the accuracy.
            word_list = list()
            ref_tag_list = list()
            hyp_tag_list = list()
            ref_label_list = list()
            hyp_label_list = list()
            correct_count = 0
            accuracy = 0.0
            tagging_eval_result = dict()
            for bucket_id in xrange(len(_buckets)):
                eval_loss = 0.0
                count = 0
                for i in xrange(len(data_set[bucket_id])):
                    count += 1
                    sample = model_test.get_one(data_set, bucket_id, i)
                    encoder_inputs, tags, tag_weights, sequence_length, labels = sample
                    tagging_logits = []
                    class_logits = []
                    if task['joint'] == 1:
                        step_outputs = model_test.joint_step(
                            sess, encoder_inputs, tags, tag_weights, labels,
                            sequence_length, bucket_id, True)
                        _, step_loss, tagging_logits, class_logits = step_outputs
                        class_prob = _softmax(class_logits[0])
                    elif task['tagging'] == 1:
                        step_outputs = model_test.tagging_step(
                            sess, encoder_inputs, tags, tag_weights,
                            sequence_length, bucket_id, True)
                        _, step_loss, tagging_logits = step_outputs
                    elif task['intent'] == 1:
                        step_outputs = model_test.classification_step(
                            sess, encoder_inputs, labels, sequence_length,
                            bucket_id, True)
                        _, step_loss, class_logits = step_outputs
                    eval_loss += step_loss / len(data_set[bucket_id])
                    hyp_label = None
                    if task['intent'] == 1:
                        ref_label_list.append(rev_label_vocab[labels[0][0]])
                        hyp_label = np.argmax(class_logits[0], 0)
                        hyp_label_list.append(rev_label_vocab[hyp_label])
                        if labels[0] == hyp_label:
                            correct_count += 1
                    if task['tagging'] == 1:
                        word_list.append([rev_vocab[x[0]] for x in \
                                          encoder_inputs[:sequence_length[0]]])
                        ref_tag = [x[0] for x in tags[:sequence_length[0]]]
                        ref_tag_list.append([rev_tag_vocab[x[0]] for x in \
                                             tags[:sequence_length[0]]])
                        hyp_tag = [
                            np.argmax(x)
                            for x in tagging_logits[:sequence_length[0]]
                        ]
                        hyp_tag_list.append(
                                [rev_tag_vocab[np.argmax(x)] for x in \
                                               tagging_logits[:sequence_length[0]]])

                    if labels[0] != hyp_label or ref_tag != hyp_tag:
                        error_type = []
                        if labels[0] != hyp_label:
                            error_type.append("Intent misclassification")
                        if ref_tag != hyp_tag:
                            error_type.append("Slot error")
                        print("\n" + ", ".join(error_type))
                        print("(intent) input: (%s) %s" %
                                (rev_label_vocab[labels[0][0]],
                                 " ".join([rev_vocab[x[0]] for x in \
                                           encoder_inputs[:sequence_length[0]]])))
                        print("true slots: %s" % " ".join(ref_tag_list[-1]))
                        print("pred slots: %s" % " ".join(hyp_tag_list[-1]))
                        pred_labels = np.argsort(class_prob)[-3:]
                        intent_preds = [
                            rev_label_vocab[l] for l in pred_labels
                        ]
                        print("Top 3 predicted intents:")
                        for idx in reversed(pred_labels):
                            print("%s (%.4f)" %
                                  (rev_label_vocab[idx], class_prob[idx]))

            accuracy = float(correct_count) * 100 / count
            if task['intent'] == 1:
                print("  %s accuracy: %.2f %d/%d" \
                      % (mode, accuracy, correct_count, count))
                sys.stdout.flush()
            '''
        if task['tagging'] == 1:
          tagging_eval_result = conlleval(hyp_tag_list, 
                                          ref_tag_list, 
                                          word_list, 
                                          None)
          print("  %s f1-score: %.2f" % (mode, tagging_eval_result['f1']))
          sys.stdout.flush()
        return accuracy, tagging_eval_result
        '''
            return accuracy, ref_label_list, hyp_label_list

        # test, run test after each validation for development purpose.
        #test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test')
        test_accuracy, ref_label_list, hyp_label_list = run_valid_test(
            test_set, 'Test')

        # Compute confusion matrix
        cnf_matrix = confusion_matrix(ref_label_list,
                                      hyp_label_list,
                                      labels=rev_label_vocab)
        np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        plt.figure(figsize=(12, 10))
        plot_confusion_matrix(cnf_matrix,
                              classes=rev_label_vocab,
                              title='Confusion matrix, without normalization')

        # Plot normalized confusion matrix
        plt.figure(figsize=(12, 10))
        plot_confusion_matrix(cnf_matrix,
                              classes=rev_label_vocab,
                              normalize=True,
                              title='Normalized confusion matrix')

        plt.show()
Ejemplo n.º 9
0
def train():
    tf.logging.info('Applying Parameters:')
    tf.logging.info("Preparing data in %s" % FLAGS.data_dir)
    nowTime = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    tf.logging.set_verbosity(tf.logging.INFO)
    handlers = [
        logging.FileHandler(os.path.join(FLAGS.log, nowTime + '.log')),
        logging.StreamHandler(sys.stdout)
    ]
    logging.getLogger('tensorflow').handlers = handlers

    date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir)
    in_seq_train, out_seq_train, label_train = date_set[0]
    in_seq_dev, out_seq_dev, label_dev = date_set[1]
    in_seq_test, out_seq_test, label_test = date_set[2]
    vocab_path, tag_vocab_path, label_vocab_path = date_set[3]

    result_dir = FLAGS.train_dir + '/test_results'
    if not tf.gfile.IsDirectory(result_dir):
        tf.gfile.MakeDirs(result_dir)

    current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    if not tf.gfile.Exists('data_bak/vocab.json') or not tf.gfile.Exists(
            'data_bak/rev_vocab.json'):
        vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
        with tf.gfile.GFile('data_bak/vocab.json',
                            'w') as vocab_file, tf.gfile.GFile(
                                'data_bak/rev_vocab.json',
                                'w') as rev_vocab_file:
            vocab_file.write(json.dumps(vocab, ensure_ascii=False, indent=4))
            rev_vocab_file.write(
                json.dumps(rev_vocab, ensure_ascii=False, indent=4))
    else:
        with tf.gfile.GFile('data_bak/vocab.json',
                            'r') as vocab_file, tf.gfile.GFile(
                                'data_bak/rev_vocab.json',
                                'r') as rev_vocab_file:
            vocab = json.load(vocab_file)
            rev_vocab = seq.json(rev_vocab_file).map(
                lambda x: (int(x[0]), x[1])).to_dict()

    if not tf.gfile.Exists('data_bak/tag_vocab.json') or not tf.gfile.Exists(
            'data_bak/rev_tag_vocab.json'):
        tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
        with tf.gfile.GFile('data_bak/tag_vocab.json', 'w') as tag_vocab_file, \
                tf.gfile.GFile('data_bak/rev_tag_vocab.json', 'w') as rev_tag_vocab_file:
            tag_vocab_file.write(
                json.dumps(tag_vocab, ensure_ascii=False, indent=4))
            rev_tag_vocab_file.write(
                json.dumps(rev_tag_vocab, ensure_ascii=False, indent=4))
    else:
        with tf.gfile.GFile('data_bak/tag_vocab.json',
                            'r') as tag_vocab_file, tf.gfile.GFile(
                                'data_bak/rev_tag_vocab.json',
                                'r') as rev_tag_vocab_file:
            tag_vocab = json.load(tag_vocab_file)
            rev_tag_vocab = seq.json(rev_tag_vocab_file).map(
                lambda x: (int(x[0]), x[1])).to_dict()

    if not tf.gfile.Exists('data_bak/label_vocab.json') or not tf.gfile.Exists(
            'data_bak/rev_label_vocab.json'):
        label_vocab, rev_label_vocab = data_utils.initialize_vocab(
            label_vocab_path)
        with tf.gfile.GFile('data_bak/label_vocab.json', 'w') as label_vocab_file, \
                tf.gfile.GFile('data_bak/rev_label_vocab.json', 'w') as rev_label_vocab_file:
            label_vocab_file.write(
                json.dumps(label_vocab, ensure_ascii=False, indent=4))
            rev_label_vocab_file.write(
                json.dumps(rev_label_vocab, ensure_ascii=False, indent=4))
    else:
        with tf.gfile.GFile('data_bak/label_vocab.json',
                            'r') as label_vocab_file, tf.gfile.GFile(
                                'data_bak/rev_label_vocab.json',
                                'r') as rev_label_vocab_file:
            label_vocab = json.load(label_vocab_file)
            rev_label_vocab = seq.json(rev_label_vocab_file).map(
                lambda x: (int(x[0]), x[1])).to_dict()

    # Read data into buckets and compute their sizes.
    tf.logging.info("Reading train/valid/test data (training set limit: %d)." %
                    FLAGS.max_train_data_size)
    dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
    test_set = read_data(in_seq_test, out_seq_test, label_test)
    train_set = read_data(in_seq_train, out_seq_train, label_train)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    config = tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23),
        # device_count = {'gpu': 2}
    )

    with tf.Session(config=config) as sess:
        # Create model.
        tf.logging.info("Max sequence length: %d." % _buckets[0][0])
        tf.logging.info("Creating %d layers of %d units." %
                        (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))
        tf.logging.info("Creating model with " +
              "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \
              % (len(vocab), len(tag_vocab), len(label_vocab)))

        tf.summary.scalar('loss', model.loss)
        tf.summary.scalar('dev_accuracy', model.best_dev_accuracy)
        tf.summary.scalar('dev_f1', model.best_dev_f1)
        tf.summary.scalar('test_accuracy', model.best_test_accuracy)
        tf.summary.scalar('test_f1', model.best_test_f1)

        model.merged = tf.summary.merge_all()
        model.writer = tf.summary.FileWriter(
            os.path.join(FLAGS.tensorboard, nowTime))
        model.writer.add_graph(graph=sess.graph)

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0

        no_improve_step = 0
        while model.global_step.eval() < FLAGS.max_training_steps:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            batch_data = model.get_batch(train_set, bucket_id)
            encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data
            if task['joint'] == 1:
                step_outputs = model.joint_step(sess, encoder_inputs, tags,
                                                tag_weights, labels,
                                                batch_sequence_length,
                                                bucket_id, False)
                _, step_loss, tagging_logits, class_logits = step_outputs
            elif task['tagging'] == 1:
                step_outputs = model.tagging_step(sess, encoder_inputs, tags,
                                                  tag_weights,
                                                  batch_sequence_length,
                                                  bucket_id, False)
                _, step_loss, tagging_logits = step_outputs
            elif task['intent'] == 1:
                step_outputs = model.classification_step(
                    sess, encoder_inputs, labels, batch_sequence_length,
                    bucket_id, False)
                _, step_loss, class_logits = step_outputs

            summary = sess.run(model.merged, model.input_feed)
            model.writer.add_summary(summary, model.global_step.eval())

            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                tf.logging.info(
                    "global step %d step-time %.2f. Training perplexity %.2f" %
                    (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                step_time, loss = 0.0, 0.0

                def run_valid_test(data_set, mode):  # mode: Eval, Test
                    # Run evals on development/test set and print the accuracy.
                    word_list = list()
                    ref_tag_list = list()
                    hyp_tag_list = list()
                    ref_label_list = list()
                    hyp_label_list = list()
                    correct_count = 0
                    accuracy = 0.0
                    tagging_eval_result = dict()
                    for bucket_id in xrange(len(_buckets)):
                        eval_loss = 0.0
                        count = 0
                        for i in xrange(len(data_set[bucket_id])):
                            count += 1
                            sample = model_test.get_one(data_set, bucket_id, i)
                            encoder_inputs, tags, tag_weights, sequence_length, labels = sample
                            tagging_logits = []
                            class_logits = []
                            if task['joint'] == 1:
                                step_outputs = model_test.joint_step(
                                    sess, encoder_inputs, tags, tag_weights,
                                    labels, sequence_length, bucket_id, True)
                                _, step_loss, tagging_logits, class_logits = step_outputs
                            elif task['tagging'] == 1:
                                step_outputs = model_test.tagging_step(
                                    sess, encoder_inputs, tags, tag_weights,
                                    sequence_length, bucket_id, True)
                                _, step_loss, tagging_logits = step_outputs
                            elif task['intent'] == 1:
                                step_outputs = model_test.classification_step(
                                    sess, encoder_inputs, labels,
                                    sequence_length, bucket_id, True)
                                _, step_loss, class_logits = step_outputs
                            eval_loss += step_loss / len(data_set[bucket_id])
                            hyp_label = None
                            if task['intent'] == 1:
                                ref_label_list.append(
                                    rev_label_vocab[labels[0][0]])
                                hyp_label = np.argmax(class_logits[0], 0)
                                hyp_label_list.append(
                                    rev_label_vocab[hyp_label])
                                if labels[0] == hyp_label:
                                    correct_count += 1
                            if task['tagging'] == 1:
                                word_list.append([rev_vocab[x[0]] for x in \
                                                  encoder_inputs[:sequence_length[0]]])
                                ref_tag_list.append([rev_tag_vocab[x[0]] for x in \
                                                     tags[:sequence_length[0]]])
                                hyp_tag_list.append(
                                    [rev_tag_vocab[np.argmax(x)] for x in \
                                     tagging_logits[:sequence_length[0]]])

                    accuracy = float(correct_count) * 100 / count
                    if task['intent'] == 1:
                        tf.logging.info("\t%s accuracy: %.2f %d/%d" \
                              % (mode, accuracy, correct_count, count))
                        sys.stdout.flush()
                    if task['tagging'] == 1:
                        if mode == 'Eval':
                            taging_out_file = current_taging_valid_out_file
                        elif mode == 'Test':
                            taging_out_file = current_taging_test_out_file
                        tagging_eval_result = conlleval(
                            hyp_tag_list, ref_tag_list, word_list,
                            taging_out_file)
                        tf.logging.info("\t%s f1-score: %.2f" %
                                        (mode, tagging_eval_result['f1']))
                        sys.stdout.flush()
                    return accuracy, tagging_eval_result

                # valid
                valid_accuracy, valid_tagging_result = run_valid_test(
                    dev_set, 'Eval')
                if task['tagging'] == 1 and task['intent'] == 0:
                    best_dev_f1 = model.best_dev_f1.eval()
                    if valid_tagging_result['f1'] > best_dev_f1:
                        tf.assign(model.best_dev_f1,
                                  valid_tagging_result['f1']).eval()
                        # save the best output file
                        subprocess.call(['mv',
                                         current_taging_valid_out_file,
                                         current_taging_valid_out_file + '.best_f1_%.2f' \
                                         % best_dev_f1], shell=True)
                        model.saver.save(sess,
                                         checkpoint_path,
                                         global_step=model.global_step)
                        no_improve_step = 0
                    else:
                        no_improve_step += 1

                if task['tagging'] == 1 and task['intent'] == 1:
                    best_dev_accuracy = model.best_dev_accuracy.eval()
                    best_dev_f1 = model.best_dev_f1.eval()
                    if valid_accuracy > best_dev_accuracy and valid_tagging_result[
                            'f1'] > best_dev_f1:
                        tf.assign(model.best_dev_accuracy,
                                  valid_accuracy).eval()
                        tf.assign(model.best_dev_f1,
                                  valid_tagging_result['f1']).eval()
                        subprocess.call(['mv',
                                         current_taging_valid_out_file,
                                         current_taging_valid_out_file + '.best_f1_%.2f' \
                                         % best_dev_f1], shell=True)
                        model.saver.save(sess,
                                         checkpoint_path,
                                         global_step=model.global_step)
                        no_improve_step = 0
                    else:
                        no_improve_step += 1

                # test, run test after each validation for development purpose.
                test_accuracy, test_tagging_result = run_valid_test(
                    test_set, 'Test')
                if task['tagging'] == 1 and task['intent'] == 0:
                    best_test_f1 = model.best_test_f1.eval()
                    if test_tagging_result['f1'] > best_test_f1:
                        tf.assign(model.best_test_f1,
                                  test_tagging_result['f1']).eval()
                    # save the best output file
                    subprocess.call(['mv',
                                     current_taging_test_out_file,
                                     current_taging_test_out_file + '.best_f1_%.2f' \
                                     % best_test_f1], shell=True)

                if task['tagging'] == 1 and task['intent'] == 1:
                    best_test_accuracy = model.best_test_accuracy.eval()
                    best_test_f1 = model.best_test_f1.eval()
                    if test_accuracy > best_test_accuracy and test_tagging_result[
                            'f1'] > best_test_f1:
                        tf.assign(model.best_test_accuracy,
                                  test_accuracy).eval()
                        tf.assign(model.best_test_f1,
                                  test_tagging_result['f1']).eval()
                        subprocess.call(['mv',
                                         current_taging_test_out_file,
                                         current_taging_test_out_file + '.best_f1_%.2f' \
                                         % best_test_f1], shell=True)

                if no_improve_step > FLAGS.no_improve_per_step:
                    tf.logging.info("continuous no improve per step " +
                                    str(FLAGS.no_improve_per_step) +
                                    ", auto stop...")
                    tf.logging.info("max accuracy is: " +
                                    str(model.best_dev_accuracy.eval()) +
                                    ", max f1 score is: " +
                                    str(model.best_dev_f1.eval()))
                    break
Ejemplo n.º 10
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    sent_train, label_train, \
    sent_valid, label_valid, \
    sent_test, label_test, \
    sent_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.sent_vocab_size)

    result_dir = FLAGS.data_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_valid_out_file = result_dir + '/valid_hyp'
    current_test_out_file = result_dir + '/test_hyp'

    sent_vocab, rev_sent_vocab = data_utils.initialize_vocabulary(
        sent_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)
    print(rev_label_vocab)

    sent_vocab_size = len(sent_vocab)
    label_vocab_size = len(label_vocab)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Create model.
        print("Max sequence length: %d." % FLAGS.max_sequence_length)
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        sess.run(tf.global_variables_initializer())

        model, model_test = create_model(sess, sent_vocab_size,
                                         label_vocab_size)
        print("Creating model with sent_vocab_size=%d,"
              "and label_vocab_size=%d." % (sent_vocab_size, label_vocab_size))

        # Read data into buckets and compute their sizes.
        print("Reading train/valid/test data (training set limit: %d)." %
              FLAGS.max_train_data_size)
        valid_set = data_utils.read_data(sent_valid, label_valid)
        test_set = data_utils.read_data(sent_test, label_test)
        train_set = data_utils.read_data(sent_train, label_train)

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0

        best_valid_score = 0
        best_test_score = 0

        while model.global_step.eval() < FLAGS.max_training_steps:
            # Get a batch and make a step.
            start_time = time.time()

            batch_inputs, batch_labels, batch_sequence_length = model.get_batch(
                train_set)
            # print(batch_inputs[0].shape)

            _, step_loss, logits = model.step(sess, batch_inputs, batch_labels,
                                              batch_sequence_length, False)

            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d step-time %.2f. Training perplexity %.2f" %
                    (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                def write_eval_result(result_list, result_path):
                    with tf.gfile.GFile(result_path, 'w') as f:
                        for i in range(len(result_list)):
                            f.write(result_list[i] + '\n')

                def run_valid_test(data_set, mode):  # mode: Eval, Test
                    # Run evals on development/test set and print the accuracy.
                    ref_label_list = list()
                    hyp_label_list = list()
                    label_correct_count = 0

                    # accuracy = 0.0

                    eval_loss = 0.0
                    count = 0
                    for i in range(len(data_set)):
                        count += 1
                        inputs, labels, sequence_length = model_test.get_one(
                            data_set, i)

                        _, _step_loss, logits = model_test.step(
                            sess, inputs, labels, sequence_length, True)
                        eval_loss += _step_loss / len(data_set)

                        ref_label = np.argmax(labels)
                        ref_label_list.append(rev_label_vocab[ref_label])
                        hyp_label = np.argmax(logits[0])
                        hyp_label_list.append(rev_label_vocab[hyp_label])

                        if ref_label == hyp_label:
                            label_correct_count += 1

                    label_accuracy = float(label_correct_count) * 100 / count

                    print("  %s label_accuracy: %.2f %d/%d" %
                          (mode, label_accuracy, label_correct_count, count))
                    sys.stdout.flush()
                    out_file = None
                    if mode == 'Valid':
                        out_file = current_valid_out_file
                    elif mode == 'Test':
                        out_file = current_test_out_file

                    write_eval_result(
                        hyp_label_list, out_file
                    )  # write prediction result to output file path

                    return label_accuracy, hyp_label_list

                # valid
                valid_label_accuracy, hyp_list = run_valid_test(
                    valid_set, 'Valid')
                if valid_label_accuracy > best_valid_score:
                    best_valid_score = valid_label_accuracy
                    # save the best output file
                    subprocess.call([
                        'mv', current_valid_out_file, current_valid_out_file +
                        '_best_acc_%.2f' % best_valid_score
                    ])
                # test, run test after each validation for development purpose.
                test_label_accuracy, hyp_list = run_valid_test(
                    test_set, 'Test')
                if test_label_accuracy > best_test_score:
                    best_test_score = test_label_accuracy
                    # save the best output file
                    subprocess.call([
                        'mv', current_test_out_file, current_test_out_file +
                        '_best_acc_%.2f' % best_test_score
                    ])
Ejemplo n.º 11
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].iteritems():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)

    with tf.Session() as sess:
        # Create model.
        print("Max sequence length: %d." % _buckets[0][0])
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                         len(label_vocab))
        print(
            "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d."
            % (len(vocab), len(tag_vocab), len(label_vocab)))

        # Read data into buckets and compute their sizes.
        print("Reading train/valid/test data (training set limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
        test_set = read_data(in_seq_test, out_seq_test, label_test)
        train_set = read_data(in_seq_train, out_seq_train, label_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        def run_valid_test(data_set, mode):
            # mode: Eval, Test
            # Run evals on development/test set and print the accuracy.
            word_list = list()
            ref_tag_list = list()
            hyp_tag_list = list()
            ref_label_list = list()
            hyp_label_list = list()
            correct_count = 0
            accuracy = 0.0
            tagging_eval_result = dict()
            for bucket_id in xrange(len(_buckets)):
                eval_loss = 0.0
                count = 0
                for i in xrange(len(data_set[bucket_id])):
                    count += 1
                    encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one(
                        data_set, bucket_id, i)
                    tagging_logits = []
                    classification_logits = []
                    if task['joint'] == 1:
                        _, step_loss, tagging_logits, classification_logits = model_test.joint_step(
                            sess, encoder_inputs, tags, tag_weights, labels,
                            sequence_length, bucket_id, True)
                    elif task['tagging'] == 1:
                        _, step_loss, tagging_logits = model_test.tagging_step(
                            sess, encoder_inputs, tags, tag_weights,
                            sequence_length, bucket_id, True)
                    elif task['intent'] == 1:
                        _, step_loss, classification_logits = model_test.classification_step(
                            sess, encoder_inputs, labels, sequence_length,
                            bucket_id, True)
                    eval_loss += step_loss / len(data_set[bucket_id])
                    hyp_label = None
                    if task['intent'] == 1:
                        ref_label_list.append(rev_label_vocab[labels[0][0]])
                        hyp_label = np.argmax(classification_logits[0], 0)
                        hyp_label_list.append(rev_label_vocab[hyp_label])
                        if labels[0] == hyp_label:
                            correct_count += 1
                    if task['tagging'] == 1:
                        word_list.append([
                            rev_vocab[x[0]]
                            for x in encoder_inputs[:sequence_length[0]]
                        ])
                        ref_tag_list.append([
                            rev_tag_vocab[x[0]]
                            for x in tags[:sequence_length[0]]
                        ])
                        hyp_tag_list.append([
                            rev_tag_vocab[np.argmax(x)]
                            for x in tagging_logits[:sequence_length[0]]
                        ])

            accuracy = float(correct_count) * 100 / count
            if task['intent'] == 1:
                print("  %s accuracy: %.2f %d/%d" %
                      (mode, accuracy, correct_count, count))
                sys.stdout.flush()
            if task['tagging'] == 1:
                if mode == 'Eval':
                    taging_out_file = current_taging_valid_out_file
                elif mode == 'Test':
                    print("Test!!!")
                    taging_out_file = current_taging_test_out_file

                tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list,
                                                word_list, taging_out_file)
                print("  %s f1-score: %.2f" %
                      (mode, tagging_eval_result['f1']))
                sys.stdout.flush()
            return accuracy, tagging_eval_result, hyp_label_list, hyp_tag_list

        # test, run test after each validation for development purpose.
        print("run valid")
        test_accuracy, test_tagging_result, label, tag = run_valid_test(
            test_set, 'Test')

        with open(sys.argv[1], "w+") as f:
            for i in range(len(tag)):
                f.write(' '.join(tag[i]) + '\n')
Ejemplo n.º 12
0
def train():
  print ('Applying Parameters:')
  for k,v in FLAGS.__dict__['__flags'].iteritems():
    print ('%s: %s' % (k, str(v)))
  print("Preparing data in %s" % FLAGS.data_dir)
  vocab_path = ''
  tag_vocab_path = ''
  label_vocab_path = ''
  in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
    FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)     
     
  result_dir = FLAGS.train_dir + '/test_results'
  if not os.path.isdir(result_dir):
      os.makedirs(result_dir)

  current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
  current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

  vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
  tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
  label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(label_vocab_path)
    
  with tf.Session() as sess:
    # Create model.
    print("Max sequence length: %d." % _buckets[0][0])
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    
    model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab))
    print ("Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab)))

    # Read data into buckets and compute their sizes.
    print ("Reading train/valid/test data (training set limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
    test_set = read_data(in_seq_test, out_seq_test, label_test)
    train_set = read_data(in_seq_train, out_seq_train, label_train)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0

    best_valid_score = 0
    best_test_score = 0
    while model.global_step.eval() < FLAGS.max_training_steps:
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model.get_batch(train_set, bucket_id)
      if task['joint'] == 1:
        _, step_loss, tagging_logits, classification_logits = model.joint_step(sess, encoder_inputs, tags, tag_weights, labels,
                                   batch_sequence_length, bucket_id, False)
      elif task['tagging'] == 1:
        _, step_loss, tagging_logits = model.tagging_step(sess, encoder_inputs, tags, tag_weights,
                                   batch_sequence_length, bucket_id, False)
      elif task['intent'] == 1:
        _, step_loss, classification_logits = model.classification_step(sess, encoder_inputs, labels,
                                   batch_sequence_length, bucket_id, False)                                   

      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d step-time %.2f. Training perplexity %.2f" 
            % (model.global_step.eval(), step_time, perplexity))
        sys.stdout.flush()
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0 
        
        def run_valid_test(data_set, mode): # mode: Eval, Test
        # Run evals on development/test set and print the accuracy.
            word_list = list() 
            ref_tag_list = list() 
            hyp_tag_list = list()
            ref_label_list = list()
            hyp_label_list = list()
            correct_count = 0
            accuracy = 0.0
            tagging_eval_result = dict()
            for bucket_id in xrange(len(_buckets)):
              eval_loss = 0.0
              count = 0
              for i in xrange(len(data_set[bucket_id])):
                count += 1
                encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one(
                  data_set, bucket_id, i)
                tagging_logits = []
                classification_logits = []
                if task['joint'] == 1:
                  _, step_loss, tagging_logits, classification_logits = model_test.joint_step(sess, encoder_inputs, tags, tag_weights, labels,
                                             sequence_length, bucket_id, True)
                elif task['tagging'] == 1:
                  _, step_loss, tagging_logits = model_test.tagging_step(sess, encoder_inputs, tags, tag_weights,
                                             sequence_length, bucket_id, True)
                elif task['intent'] == 1:
                  _, step_loss, classification_logits = model_test.classification_step(sess, encoder_inputs, labels,
                                             sequence_length, bucket_id, True) 
                eval_loss += step_loss / len(data_set[bucket_id])
                hyp_label = None
                if task['intent'] == 1:
                  ref_label_list.append(rev_label_vocab[labels[0][0]])
                  hyp_label = np.argmax(classification_logits[0],0)
                  hyp_label_list.append(rev_label_vocab[hyp_label])
                  if labels[0] == hyp_label:
                    correct_count += 1
                if task['tagging'] == 1:
                  word_list.append([rev_vocab[x[0]] for x in encoder_inputs[:sequence_length[0]]])
                  ref_tag_list.append([rev_tag_vocab[x[0]] for x in tags[:sequence_length[0]]])
                  hyp_tag_list.append([rev_tag_vocab[np.argmax(x)] for x in tagging_logits[:sequence_length[0]]])

            accuracy = float(correct_count)*100/count
            if task['intent'] == 1:
              print("  %s accuracy: %.2f %d/%d" % (mode, accuracy, correct_count, count))
              sys.stdout.flush()
            if task['tagging'] == 1:
              if mode == 'Eval':
                  taging_out_file = current_taging_valid_out_file
              elif mode == 'Test':
                  taging_out_file = current_taging_test_out_file
              tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, taging_out_file)
              print("  %s f1-score: %.2f" % (mode, tagging_eval_result['f1']))
              sys.stdout.flush()
            return accuracy, tagging_eval_result
            
        # valid
        valid_accuracy, valid_tagging_result = run_valid_test(dev_set, 'Eval')        
        if task['tagging'] == 1 and valid_tagging_result['f1'] > best_valid_score:
          best_valid_score = valid_tagging_result['f1']
          # save the best output file
          subprocess.call(['mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' % best_valid_score])
        # test, run test after each validation for development purpose.
        test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test')        
        if task['tagging'] == 1 and test_tagging_result['f1'] > best_test_score:
          best_test_score = test_tagging_result['f1']
          # save the best output file
          subprocess.call(['mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' % best_test_score])
Ejemplo n.º 13
0
def testing():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].iteritems():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    global sess
    global vocab
    global rev_vocab
    global tag_vocab
    global rev_tag_vocab
    global label_vocab
    global rev_label_vocab
    vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(
        label_vocab_path)

    global sess
    sess = tf.Session()
    # Create model.
    print("Max sequence length: %d." % _buckets[0][0])
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    global model_test
    model, model_test = create_model(sess, len(vocab), len(tag_vocab),
                                     len(label_vocab))
    print(
        "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d."
        % (len(vocab), len(tag_vocab), len(label_vocab)))

    # The model has been loaded.
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=3))
    #Service_OpenFace_pb2.add_openfaceServicer_to_server(Servicer_openface(), server)
    FoodBot_pb2.add_FoodBotRequestServicer_to_server(FoodbotRequest(), server)
    server.add_insecure_port('[::]:50055')
    server.start()
    print("GRCP Server is running. Press any key to stop it.")
    try:
        while True:
            # openface_GetXXXXXX will be responsed if any incoming request is received.
            time.sleep(24 * 60 * 60)
    except KeyboardInterrupt:
        server.stop(0)
Ejemplo n.º 14
0
def train():
    print('应用参数:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))

    print("准备数据 %s" % FLAGS.data_dir)
    vocab_path = ''
    tag_vocab_path = ''
    label_vocab_path = ''
    date_set = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)
    in_seq_train, out_seq_train, label_train = date_set[0]
    in_seq_dev, out_seq_dev, label_dev = date_set[1]
    in_seq_test, out_seq_test, label_test = date_set[2]
    vocab_path, tag_vocab_path, label_vocab_path = date_set[3]

    result_dir = FLAGS.train_dir + '/test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_tagging_valid_out_file = result_dir + '/tagging.valid.hyp.txt'
    current_tagging_test_out_file = result_dir + '/tagging.test.hyp.txt'

    vocab, rev_vocab = data_utils.initialize_vocab(vocab_path)
    tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path)
    label_vocab, rev_label_vocab = data_utils.initialize_vocab(label_vocab_path)

    config = tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23),
        # device_count = {'gpu': 2}
    )

    with tf.Session(config=config) as sess:
        # 创建模型。
        print("最大序列长度: %d." % _buckets[0][0])
        print("创建%d单元的%d层。" % (FLAGS.num_layers, FLAGS.size))

        model, model_test = create_model(sess,
                                         len(vocab),
                                         len(tag_vocab),
                                         len(label_vocab))
        print("创建模型 " +
              "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d."
              % (len(vocab), len(tag_vocab), len(label_vocab)))

        # 将数据读入桶中并计算桶的大小。
        print("读取 train/valid/test 数据 (训练集范围: %d)."
              % FLAGS.max_train_data_size)
        dev_set = read_data(in_seq_dev, out_seq_dev, label_dev)
        test_set = read_data(in_seq_test, out_seq_test, label_test)
        train_set = read_data(in_seq_train, out_seq_train, label_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                               for i in xrange(len(train_bucket_sizes))]

        # 这是一个训练循环。
        step_time, loss = 0.0, 0.0
        current_step = 0

        best_valid_score = 0
        best_test_score = 0
        while model.global_step.eval() < FLAGS.max_training_steps:
            random_number_01 = np.random.random_sample()
            bucket_id = min([i for i in xrange(len(train_buckets_scale))
                             if train_buckets_scale[i] > random_number_01])

            # 获取一个分支并执行一步
            start_time = time.time()
            batch_data = model.get_batch(train_set, bucket_id)
            encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data
            if task['joint'] == 1:
                step_outputs = model.joint_step(sess,
                                                encoder_inputs,
                                                tags,
                                                tag_weights,
                                                labels,
                                                batch_sequence_length,
                                                bucket_id,
                                                False)
                _, step_loss, tagging_logits, class_logits = step_outputs
            elif task['tagging'] == 1:
                step_outputs = model.tagging_step(sess,
                                                  encoder_inputs,
                                                  tags,
                                                  tag_weights,
                                                  batch_sequence_length,
                                                  bucket_id,
                                                  False)
                _, step_loss, tagging_logits = step_outputs
            elif task['intent'] == 1:
                step_outputs = model.classification_step(sess,
                                                         encoder_inputs,
                                                         labels,
                                                         batch_sequence_length,
                                                         bucket_id,
                                                         False)
                _, step_loss, class_logits = step_outputs

            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # 有时,我们保存检查点、打印统计数据并运行evals。
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print("全局步数 %d 每步时间 %.2fs 训练复杂度 %.2f"
                      % (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # 保存检查点和零计时器和损失。
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                # 模式: Eval, Test
                def run_valid_test(data_set, mode):
                    # 在开发/测试集上运行evals并打印准确性。
                    word_list = list()
                    ref_tag_list = list()
                    hyp_tag_list = list()
                    ref_label_list = list()
                    hyp_label_list = list()
                    correct_count = 0
                    accuracy = 0.0
                    tagging_eval_result = dict()
                    eval_loss = 0.0
                    count = 0
                    for bucket_id in xrange(len(_buckets)):
                        for i in xrange(len(data_set[bucket_id])):
                            count += 1
                            sample = model_test.get_one(data_set, bucket_id, i)
                            encoder_inputs, tags, tag_weights, sequence_length, labels = sample
                            tagging_logits = []
                            class_logits = []

                            step_loss = None
                            if task['joint'] == 1:
                                step_outputs = model_test.joint_step(sess,
                                                                     encoder_inputs,
                                                                     tags,
                                                                     tag_weights,
                                                                     labels,
                                                                     sequence_length,
                                                                     bucket_id,
                                                                     True)
                                _, step_loss, tagging_logits, class_logits = step_outputs
                            elif task['tagging'] == 1:
                                step_outputs = model_test.tagging_step(sess,
                                                                       encoder_inputs,
                                                                       tags,
                                                                       tag_weights,
                                                                       sequence_length,
                                                                       bucket_id,
                                                                       True)
                                _, step_loss, tagging_logits = step_outputs
                            elif task['intent'] == 1:
                                step_outputs = model_test.classification_step(sess,
                                                                              encoder_inputs,
                                                                              labels,
                                                                              sequence_length,
                                                                              bucket_id,
                                                                              True)
                                _, step_loss, class_logits = step_outputs
                            eval_loss += step_loss / len(data_set[bucket_id])
                            hyp_label = None
                            if task['intent'] == 1:
                                ref_label_list.append(rev_label_vocab[labels[0][0]])
                                hyp_label = np.argmax(class_logits[0], 0)
                                hyp_label_list.append(rev_label_vocab[hyp_label])
                                if labels[0] == hyp_label:
                                    correct_count += 1
                            if task['tagging'] == 1:
                                word_list.append([rev_vocab[x[0]] for x in
                                                  encoder_inputs[:sequence_length[0]]])
                                ref_tag_list.append([rev_tag_vocab[x[0]] for x in
                                                     tags[:sequence_length[0]]])
                                hyp_tag_list.append(
                                    [rev_tag_vocab[np.argmax(x)] for x in
                                     tagging_logits[:sequence_length[0]]])

                    accuracy = float(correct_count) * 100 / count
                    if task['intent'] == 1:
                        print("  %s 准确性: %.2f%% %d/%d"
                              % (mode, accuracy, correct_count, count))
                        sys.stdout.flush()
                    if task['tagging'] == 1:
                        taging_out_file = None
                        if mode == 'Eval':
                            taging_out_file = current_tagging_valid_out_file
                        elif mode == 'Test':
                            taging_out_file = current_tagging_test_out_file
                        tagging_eval_result = conlleval(hyp_tag_list,
                                                        ref_tag_list,
                                                        word_list,
                                                        taging_out_file)
                        print("  %s f1-score: %.2f%%" % (mode, tagging_eval_result['f1']))
                        sys.stdout.flush()
                    return accuracy, tagging_eval_result

                # valid
                valid_accuracy, valid_tagging_result = run_valid_test(dev_set, 'Eval')
                if task['tagging'] == 1 \
                        and valid_tagging_result['f1'] > best_valid_score:
                    best_valid_score = valid_tagging_result['f1']
                    # 保存最好的输出文件
                    subprocess.call(['mv',
                                     current_tagging_valid_out_file,
                                     current_tagging_valid_out_file + '.best_f1_%.2f'
                                     % best_valid_score])
                # 测试,在每个验证后运行测试,以供开发之用。
                test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test')
                if task['tagging'] == 1 \
                        and test_tagging_result['f1'] > best_test_score:
                    best_test_score = test_tagging_result['f1']
                    # 保存最好的输出文件
                    subprocess.call(['mv',
                                     current_tagging_test_out_file,
                                     current_tagging_test_out_file + '.best_f1_%.2f'
                                     % best_test_score])
Ejemplo n.º 15
0
def train():
    print('Applying Parameters:')
    for k, v in FLAGS.__dict__['__flags'].items():
        print('%s: %s' % (k, str(v)))
    print("Preparing data in %s" % FLAGS.data_dir)
    # sent_vocab_path = ''
    # s_attr_vocab_path = ''
    # s_loc_vocab_path = ''
    # s_name_vocab_path = ''
    # s_ope_vocab_path = ''
    # s_way_vocab_path = ''
    # intent_vocab_path = ''
    sent_train, slot_train, intent_train, \
    sent_valid, slot_valid, intent_valid, \
    sent_test, slot_test, intent_test, \
    sent_vocab_path, slot_vocab_path, intent_vocab_path = data_utils.prepare_multi_task_data(
        FLAGS.data_dir, FLAGS.sent_vocab_size)

    result_dir = './test_results'
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    current_valid_out_file = result_dir + '/valid_hyp'
    current_test_out_file = result_dir + '/test_hyp'
    current_train_out_file = result_dir + '/train_hyp'

    sent_vocab, rev_sent_vocab = data_utils.initialize_vocabulary(sent_vocab_path)
    s_attr_vocab, rev_s_attr_vocab = data_utils.initialize_vocabulary(slot_vocab_path[0])
    s_loc_vocab, rev_s_loc_vocab = data_utils.initialize_vocabulary(slot_vocab_path[1])
    s_name_vocab, rev_s_name_vocab = data_utils.initialize_vocabulary(slot_vocab_path[2])
    s_ope_vocab, rev_s_ope_vocab = data_utils.initialize_vocabulary(slot_vocab_path[3])
    intent_vocab, rev_intent_vocab = data_utils.initialize_vocabulary(intent_vocab_path)
    print(rev_intent_vocab)

    sent_vocab_size = len(sent_vocab)
    slot_vocab_size = [len(s_attr_vocab), len(s_loc_vocab), len(s_name_vocab), len(s_ope_vocab)]
    intent_vocab_size = len(intent_vocab)

    # print(sent_vocab_size, slot_vocab_size, intent_vocab_size)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Create model.
        print("Max sequence length: %d." % FLAGS.max_sequence_length)
        print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))

        sess.run(tf.global_variables_initializer())

        model, model_test = create_model(sess, sent_vocab_size, slot_vocab_size, intent_vocab_size)
        print("Creating model with sent_vocab_size=%d, s_attr_vocab_size=%d, "
              "s_loc_vocab_size=%d, s_name_vocab_size=%d, "
              "s_ope_vocab_size=%d, and intent_vocab_size=%d." % (sent_vocab_size, slot_vocab_size[0],
                                                                  slot_vocab_size[1], slot_vocab_size[2],
                                                                  slot_vocab_size[3], intent_vocab_size))

        # Read data into buckets and compute their sizes.
        print("Reading train/valid/test data (training set limit: %d)."
              % FLAGS.max_train_data_size)
        valid_set = data_utils.read_data(sent_valid, slot_valid, intent_valid)
        test_set = data_utils.read_data(sent_test, slot_test, intent_test)
        train_set = data_utils.read_data(sent_train, slot_train, intent_train)

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0

        best_valid_score = 0
        best_test_score = 0
        best_train_score = 0

        while model.global_step.eval() < FLAGS.max_training_steps:
            # Get a batch and make a step.
            start_time = time.time()

            batch_inputs, batch_s_attrs, batch_s_locs, batch_s_names, batch_s_opes, \
            batch_intents, batch_sequence_length = model.get_batch(train_set)
            # print(batch_inputs[0].shape)

            _, step_loss, logits = model.step(sess, batch_inputs, batch_s_attrs, batch_s_locs,
                                              batch_s_names, batch_s_opes, batch_intents,
                                              batch_sequence_length, False)
            # print(logits[-1])
            # print('s_attrs_logits', logits[0])
            # print(logits[0].shape)

            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print("global step %d step-time %.2f. Training perplexity %.2f"
                      % (model.global_step.eval(), step_time, perplexity))
                sys.stdout.flush()
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0