Beispiel #1
0
    def run(cls, dev, test, labeled_slot, labeled_train, unlabeled_slot,
            unlabeled_train, steps, gpu_memory):
        training_set = DataSet(labeled_slot, labeled_train)
        validation_set = DataSet(labeled_slot, dev)
        test_set = DataSet(labeled_slot, test)
        unlabeled_set = DataSet(unlabeled_slot, unlabeled_train)

        print('# training_set (%d)' % training_set.size())
        print('# validation_set (%d)' % validation_set.size())
        print('# test_set (%d)' % test_set.size())
        print('# unlabeled_set (%d)' % unlabeled_set.size())

        classifier = tf.contrib.learn.Estimator(
            model_fn=SlotFilling.rnn_model_fn,
            params={
                'num_slot': training_set.num_classes(),
                'num_pos': unlabeled_set.num_classes(),
                'drop_out': DROP_OUT,
                'embedding_dimension': EMBEDDING_DIMENSION,
                'vocab_size': DataSet.vocab_size(),
                'unlabeled': unlabeled_set.size() > 0
            },
            config=tf.contrib.learn.RunConfig(
                gpu_memory_fraction=gpu_memory,
                save_checkpoints_secs=30,
            ),
            model_dir='./model')

        validation_metrics = {
            "accuracy":
            tf.contrib.learn.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_accuracy,
                prediction_key='predictions',
                weight_key='labeled_mask')
        }

        monitor = tf.contrib.learn.monitors.ValidationMonitor(
            input_fn=lambda: SlotFilling.input_fn(
                validation_set, unlabeled_set, validation_set.size(), 1),
            eval_steps=1,
            every_n_steps=50,
            metrics=validation_metrics,
            early_stopping_metric="loss",
            early_stopping_metric_minimize=True,
            early_stopping_rounds=300)

        classifier.fit(input_fn=lambda: SlotFilling.input_fn(
            training_set, unlabeled_set, training_set.size(), 500),
                       monitors=[monitor],
                       steps=steps)

        predictions = classifier.predict(input_fn=lambda: SlotFilling.input_fn(
            test_set, unlabeled_set, test_set.size(), 1))

        slot_correct = 0
        slot_no_match = 0
        slot_mismatch = 0
        slot_over_match = 0

        for i, p in enumerate(predictions):
            target = test_set.labels()[i][:test_set.lengths()[i]]
            prediction = list(p['predictions'])[:test_set.lengths()[i]]
            for expected, actual in zip(target, prediction):
                actual = int(actual)
                if expected is actual:
                    slot_correct += 1
                elif test_set.get_slot(actual) is 'o':
                    slot_no_match += 1
                elif test_set.get_slot(expected) is 'o':
                    slot_over_match += 1
                else:
                    slot_mismatch += 1

        return {
            'accuracy': slot_correct / sum(test_set.lengths()),
            'correct': slot_correct,
            'no_match': slot_no_match,
            'mismatch': slot_mismatch,
            'over_match': slot_over_match,
        }
Beispiel #2
0
    print('# Experiments (%d)' % len(experiments))
    print('# validation_set (%d)' % validation_set.size())
    print('# test_set (%d)' % test_set.size())

    pos_model = None
    if 'pos_model' in config:
        pos_set = DataSet('./data/atis.pos.slot', './data/atis.pos.train')
        print('# Pre-training')
        print('# POS training set (%d)' % pos_set.size())

        pos_model = PosTagging.run(training_set=pos_set,
                                   steps=config['pos_model'],
                                   gpu_memory=0.2,
                                   random_seed=RANDOM_SEED,
                                   vocab_size=DataSet.vocab_size(),
                                   drop_out=config['drop_out'],
                                   cell_size=CELL_SIZE,
                                   embedding_dimension=EMBEDDING_DIMENSION,
                                   learning_rate=LEARNING_RATE)

    ev = {
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f-measure': [],
    }
    accuracies = []
    corrects = []
    no_matches = []
    mismatches = []