def evaluate_grammar(grammar=None, executor=None, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=True): evaluate_model(model=Model(grammar=grammar, executor=executor), examples=examples, metrics=metrics, print_examples=print_examples)
def evaluate_grammar(grammar=None, executor=None, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=True): return evaluate_model(model=Model(grammar=grammar, executor=executor), examples=examples, metrics=metrics, print_examples=print_examples)
def evaluate(self, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=False): metric_values = defaultdict(int) for example in examples: parses = self.parse_input(example.input) for metric in metrics: metric_value = metric.evaluate(example, parses) metric_values[metric.name()] += metric_value if print_examples: print_parses(example, parses, metrics=metrics) return metric_values
def evaluate(self, executor=None, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=False): return Model(grammar=self, executor=executor).evaluate(examples=examples, metrics=metrics, print_examples=print_examples)
def train_test(model=None, train_examples=[], test_examples=[], metrics=standard_metrics(), training_metric=SemanticsAccuracyMetric(), seed=None, print_examples=False): # print_grammar(model.grammar) # print print('%d training examples, %d test examples' % (len(train_examples), len(test_examples))) # 'Before' test model.weights = defaultdict(float) # no weights evaluate_model(model=model, examples=train_examples, examples_label='train', metrics=metrics, print_examples=print_examples) evaluate_model(model=model, examples=test_examples, examples_label='test', metrics=metrics, print_examples=print_examples) # Train model = latent_sgd(model, train_examples, training_metric=training_metric, seed=seed) # 'After' test evaluate_model(model=model, examples=train_examples, examples_label='train', metrics=metrics, print_examples=print_examples) evaluate_model(model=model, examples=test_examples, examples_label='test', metrics=metrics, print_examples=print_examples)
def evaluate_model(model=None, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=True): print('=' * 80) print('Evaluating on %d %sexamples\n' % ( len(examples), examples_label + ' ' if examples_label else '')) print('-' * 80) metric_values = defaultdict(int) for example in examples: parses = model.parse_input(example.input) for metric in metrics: metric_value = metric.evaluate(example, parses) metric_values[metric.name()] += metric_value if print_examples: print_parses(example, parses, metrics=metrics) print('Over %d examples:' % len(examples)) print() for metric in metrics: print('%-34s %.3f' % (metric.name(), 1.0 * metric_values[metric.name()] / len(examples))) print()
def evaluate_model(model=None, examples=[], examples_label=None, metrics=standard_metrics(), print_examples=True): print '=' * 80 print 'Evaluating on %d %sexamples\n' % (len(examples), examples_label + ' ' if examples_label else '') print '-' * 80 metric_values = defaultdict(int) for example in examples: parses = model.parse_input(example.input) for metric in metrics: metric_value = metric.evaluate(example, parses) metric_values[metric.name()] += metric_value if print_examples: print_parses(example, parses, metrics=metrics) print 'Over %d examples:' % len(examples) print for metric in metrics: print '%-34s %.3f' % (metric.name(), 1.0 * metric_values[metric.name()] / len(examples)) print
def train_test(self, train_examples=[], test_examples=[], metrics=standard_metrics(), training_metric=SemanticsAccuracyMetric(), seed=None, print_examples=False): # 'Before' test self.weights = defaultdict(float) # no weights before_train_metrics = self.evaluate( examples=train_examples, examples_label='train', metrics=metrics, print_examples=print_examples) before_test_metrics = self.evaluate( examples=test_examples, examples_label='test', metrics=metrics, print_examples=print_examples) # Train model = self.train(train_examples, training_metric=training_metric, seed=seed) # 'After' test after_train_metrics = model.evaluate( examples=train_examples, examples_label='train', metrics=metrics, print_examples=print_examples) after_test_metrics = model.evaluate( examples=test_examples, examples_label='test', metrics=metrics, print_examples=print_examples) return before_train_metrics, before_test_metrics, after_train_metrics, after_test_metrics
def metrics(self): """Returns a list of Metrics which are appropriate for the domain.""" return standard_metrics()