def test_dataset_loading(): data = """ [ { "url": "http://www.freebase.com/view/en/justin_bieber", "targetValue": "(list (description \\"Jazmyn Bieber\\") (description \\"Jaxon Bieber\\"))", "utterance": "what is the name of justin bieber brother?" } ] """ dataset = get_dataset(StringIO(data)) assert dataset == [('what is the name of justin bieber brother?', ['Jazmyn Bieber', 'Jaxon Bieber'])]
def evaluate_cached_oracle(): random = Random(1) dataset_file = open(settings.DATASET_PATH) dataset = get_dataset(dataset_file) random.shuffle(dataset) system = CachedOracleSystem(dataset) dataset = [item for item in dataset if item[0] in system.queries] logger.info("Testing on %d items", len(dataset)) results = get_target_and_predicted_values(dataset, system) save(results, settings.RESULTS_PATH) analyse()
def evaluate_tensor(): random = Random(1) dataset_file = open(settings.DATASET_PATH) dataset = get_dataset(dataset_file) random.shuffle(dataset) logger.info("Training") train_set = dataset[:100] #2500] system = TensorSystem(CachedOracleSystem) system.train(train_set) test_set = dataset[2500:2510] logger.info("Testing on %d items", len(test_set)) results = get_target_and_predicted_values(test_set, system) save(results, settings.RESULTS_PATH) system.connector.save_cache() analyse()
def evaluate_quickly(): output_path = 'system-best.json' random = Random(2) dataset_file = open(settings.DATASET_PATH) dataset = get_dataset(dataset_file) random.shuffle(dataset) logger.info("Training") train_set = dataset[:2500] #system = TensorSystem(CachedOracleSystem) system = NNSystem(CachedOracleSystem) system.train(train_set) test_set = dataset[2500:2550] logger.info("Testing on %d items", len(test_set)) results = get_system_best(test_set, system) with open(output_path, 'w') as output_file: pickle.dump(results, output_file) system.connector.save_cache() print analyse_system_best(output_path)
i = 0 for query, target_entities in dataset: results, expressions = oracle.get_best_results_and_expressions(query) yield { 'query': query, 'results': results, 'expressions': expressions, 'target': target_entities, } i += 1 logger.info("Completed: %d", i) if i % 10 == 0: logger.info("Saving caches") oracle.connector.save_cache() logger.info("Saving complete") def save_oracle_data(oracle_results): with open(settings.ORACLE_CACHE_PATH, 'w') as cache_file: for result in oracle_results: pickle.dump(result, cache_file) cache_file.flush() if __name__ == "__main__": dataset_file = open(settings.DATASET_PATH) dataset = get_dataset(dataset_file) oracle_data = get_cache_oracle_data(dataset) save_oracle_data(oracle_data)