def evaluate(model, data, params, split): """Evaluates a pretrained model on a dataset. Inputs: model (ATISModel): Model class. data (ATISData): All of the data. params (namespace): Parameters for the model. """ filename = split if filename == 'dev': split = data.dev_data elif filename == 'train': split = data.train_data elif filename == 'test': split = data.test_data elif filename == 'valid': split = data.valid_data else: raise ValueError("Split not recognized: " + str(params.evaluate_split)) if params.use_predicted_queries: filename += "_use_predicted_queries" else: filename += "_use_gold_queries" full_name = os.path.join(params.logdir, filename) + params.results_note if params.interaction_level or params.use_predicted_queries: examples = data.get_all_interactions(split) if params.interaction_level: valid_eval_results = evaluate_interaction_sample( examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, use_predicted_queries=params.use_predicted_queries, max_generation_length=params.eval_maximum_sql_length, write_results=True, use_gpu=True, compute_metrics=params.compute_metrics)[0] else: valid_eval_results = evaluate_using_predicted_queries( examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout)[0] else: examples = data.get_all_utterances(split) valid_eval_results = evaluate_utterance_sample( examples, model, name=full_name, gold_forcing=False, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), max_generation_length=params.eval_maximum_sql_length, database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, write_results=True)[0] for name, value in valid_eval_results.items(): print("valid gold-passing " + name.name + ":\t" + "%.2f" % value) valid_token_accuracy = valid_eval_results[Metrics.TOKEN_ACCURACY] string_accuracy = valid_eval_results[Metrics.STRING_ACCURACY] print("token accuracy:\t" + str(valid_token_accuracy)) print("maximum string accuracy:\t" + str(string_accuracy))
def evaluate(model, data, params, last_save_file, split): """Evaluates a pretrained model on a dataset. Inputs: model (ATISModel): Model class. data (ATISData): All of the data. params (namespace): Parameters for the model. last_save_file (str): Location where the model save file is. """ if last_save_file: model.load(last_save_file) else: if not params.save_file: raise ValueError( "Must provide a save file name if not training first.") model.load(params.save_file) filename = split if filename == 'dev': split = data.dev_data elif filename == 'train': split = data.train_data elif filename == 'test': split = data.test_data elif filename == 'valid': split = data.valid_data else: raise ValueError("Split not recognized: " + str(params.evaluate_split)) if params.use_predicted_queries: filename += "_use_predicted_queries" else: filename += "_use_gold_queries" full_name = os.path.join(params.logdir, filename) + params.results_note if params.interaction_level or params.use_predicted_queries: examples = data.get_all_interactions(split) if params.interaction_level: evaluate_interaction_sample( examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, use_predicted_queries=params.use_predicted_queries, max_generation_length=params.eval_maximum_sql_length, write_results=True, use_gpu=True, compute_metrics=params.compute_metrics) else: evaluate_using_predicted_queries( examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout) else: examples = data.get_all_utterances(split) evaluate_utterance_sample( examples, model, name=full_name, gold_forcing=False, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), max_generation_length=params.eval_maximum_sql_length, database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, write_results=True)
def evaluate(model, data, params, last_save_file): if last_save_file: model.load(last_save_file) else: if not params.save_file: raise ValueError( "Must provide a save file name if not training first.") model.load(params.save_file) split = None if params.evaluate_split == 'dev': split = data.dev_data elif params.evaluate_split == 'train': split = data.train_data elif params.evaluate_split == 'test': split = data.test_data elif params.evaluate_split == 'valid': split = data.valid_data else: raise ValueError("Split not recognized: " + str(params.evaluate_split)) filename = params.evaluate_split if params.use_predicted_queries: filename += "predicted" else: filename += "gold" full_name = os.path.join(params.logdir, filename) + params.results_note if params.interaction_level or params.use_predicted_queries: examples = data.get_all_interactions(split) if params.interaction_level: results, _ = evaluate_interaction_sample( examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, use_predicted_queries=params.use_predicted_queries, max_generation_length=params.eval_maximum_sql_length, write_results=True, use_gpu=True) else: results, _ = evaluate_using_predicted_queries( params, examples, model, name=full_name, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), max_generation_length=params.eval_maximum_sql_length, database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout) else: examples = data.get_all_utterances(split) results, _ = evaluate_utterance_sample( examples, model, name=full_name, gold_forcing=False, metrics=FINAL_EVAL_METRICS, total_num=atis_data.num_utterances(split), max_generation_length=params.eval_maximum_sql_length, database_username=params.database_username, database_password=params.database_password, database_timeout=params.database_timeout, write_results=True)