def evaluate_grammar(grammar=None,
                     executor=None,
                     examples=[],
                     examples_label=None,
                     metrics=standard_metrics(),
                     print_examples=True):
    evaluate_model(model=Model(grammar=grammar, executor=executor),
                   examples=examples,
                   metrics=metrics,
                   print_examples=print_examples)
Exemple #2
0
def evaluate_grammar(grammar=None,
                     executor=None,
                     examples=[],
                     examples_label=None,
                     metrics=standard_metrics(),
                     print_examples=True):
    return evaluate_model(model=Model(grammar=grammar, executor=executor),
                          examples=examples,
                          metrics=metrics,
                          print_examples=print_examples)
Exemple #3
0
 def evaluate(self, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=False):
     metric_values = defaultdict(int)
     for example in examples:
         parses = self.parse_input(example.input)
         for metric in metrics:
             metric_value = metric.evaluate(example, parses)
             metric_values[metric.name()] += metric_value
         if print_examples:
             print_parses(example, parses, metrics=metrics)
     return metric_values
Exemple #4
0
 def evaluate(self,
              executor=None,
              examples=[],
              examples_label=None,
              metrics=standard_metrics(),
              print_examples=False):
     return Model(grammar=self,
                  executor=executor).evaluate(examples=examples,
                                              metrics=metrics,
                                              print_examples=print_examples)
Exemple #5
0
def train_test(model=None,
               train_examples=[],
               test_examples=[],
               metrics=standard_metrics(),
               training_metric=SemanticsAccuracyMetric(),
               seed=None,
               print_examples=False):
    # print_grammar(model.grammar)
    # print
    print('%d training examples, %d test examples' %
          (len(train_examples), len(test_examples)))

    # 'Before' test
    model.weights = defaultdict(float)  # no weights
    evaluate_model(model=model,
                   examples=train_examples,
                   examples_label='train',
                   metrics=metrics,
                   print_examples=print_examples)
    evaluate_model(model=model,
                   examples=test_examples,
                   examples_label='test',
                   metrics=metrics,
                   print_examples=print_examples)

    # Train
    model = latent_sgd(model,
                       train_examples,
                       training_metric=training_metric,
                       seed=seed)

    # 'After' test
    evaluate_model(model=model,
                   examples=train_examples,
                   examples_label='train',
                   metrics=metrics,
                   print_examples=print_examples)
    evaluate_model(model=model,
                   examples=test_examples,
                   examples_label='test',
                   metrics=metrics,
                   print_examples=print_examples)
def evaluate_model(model=None,
                   examples=[],
                   examples_label=None,
                   metrics=standard_metrics(),
                   print_examples=True):
    print('=' * 80)
    print('Evaluating on %d %sexamples\n' % (
        len(examples), examples_label + ' ' if examples_label else ''))
    print('-' * 80)
    metric_values = defaultdict(int)
    for example in examples:
        parses = model.parse_input(example.input)
        for metric in metrics:
            metric_value = metric.evaluate(example, parses)
            metric_values[metric.name()] += metric_value
        if print_examples:
            print_parses(example, parses, metrics=metrics)
    print('Over %d examples:' % len(examples))
    print()
    for metric in metrics:
        print('%-34s %.3f' % (metric.name(), 1.0 * metric_values[metric.name()] / len(examples)))
    print()
def train_test(model=None,
               train_examples=[],
               test_examples=[],
               metrics=standard_metrics(),
               training_metric=SemanticsAccuracyMetric(),
               seed=None,
               print_examples=False):
    # print_grammar(model.grammar)
    # print
    print('%d training examples, %d test examples' % (len(train_examples), len(test_examples)))

    # 'Before' test
    model.weights = defaultdict(float)  # no weights
    evaluate_model(model=model,
                   examples=train_examples,
                   examples_label='train',
                   metrics=metrics,
                   print_examples=print_examples)
    evaluate_model(model=model,
                   examples=test_examples,
                   examples_label='test',
                   metrics=metrics,
                   print_examples=print_examples)

    # Train
    model = latent_sgd(model, train_examples, training_metric=training_metric, seed=seed)

    # 'After' test
    evaluate_model(model=model,
                   examples=train_examples,
                   examples_label='train',
                   metrics=metrics,
                   print_examples=print_examples)
    evaluate_model(model=model,
                   examples=test_examples,
                   examples_label='test',
                   metrics=metrics,
                   print_examples=print_examples)
Exemple #8
0
def evaluate_model(model=None,
                   examples=[],
                   examples_label=None,
                   metrics=standard_metrics(),
                   print_examples=True):
    print '=' * 80
    print 'Evaluating on %d %sexamples\n' % (len(examples), examples_label +
                                             ' ' if examples_label else '')
    print '-' * 80
    metric_values = defaultdict(int)
    for example in examples:
        parses = model.parse_input(example.input)
        for metric in metrics:
            metric_value = metric.evaluate(example, parses)
            metric_values[metric.name()] += metric_value
        if print_examples:
            print_parses(example, parses, metrics=metrics)
    print 'Over %d examples:' % len(examples)
    print
    for metric in metrics:
        print '%-34s %.3f' % (metric.name(), 1.0 *
                              metric_values[metric.name()] / len(examples))
    print
Exemple #9
0
    def train_test(self,
                   train_examples=[],
                   test_examples=[],
                   metrics=standard_metrics(),
                   training_metric=SemanticsAccuracyMetric(),
                   seed=None,
                   print_examples=False):

        # 'Before' test
        self.weights = defaultdict(float)  # no weights
        before_train_metrics = self.evaluate(
            examples=train_examples,
            examples_label='train',
            metrics=metrics,
            print_examples=print_examples)
        before_test_metrics = self.evaluate(
            examples=test_examples,
            examples_label='test',
            metrics=metrics,
            print_examples=print_examples)

        # Train
        model = self.train(train_examples, training_metric=training_metric, seed=seed)

        # 'After' test
        after_train_metrics = model.evaluate(
            examples=train_examples,
            examples_label='train',
            metrics=metrics,
            print_examples=print_examples)
        after_test_metrics = model.evaluate(
            examples=test_examples,
            examples_label='test',
            metrics=metrics,
            print_examples=print_examples)
        return before_train_metrics, before_test_metrics, after_train_metrics, after_test_metrics
Exemple #10
0
 def metrics(self):
     """Returns a list of Metrics which are appropriate for the domain."""
     return standard_metrics()