예제 #1
0
def test_dataset_loading():
    data = """
        [
          {
            "url": "http://www.freebase.com/view/en/justin_bieber",
            "targetValue": "(list (description \\"Jazmyn Bieber\\") (description \\"Jaxon Bieber\\"))",
            "utterance": "what is the name of justin bieber brother?"
          }
        ]
    """
    
    dataset = get_dataset(StringIO(data))
    assert dataset == [('what is the name of justin bieber brother?',
                        ['Jazmyn Bieber', 'Jaxon Bieber'])]
예제 #2
0
def evaluate_cached_oracle():
    random = Random(1)

    dataset_file = open(settings.DATASET_PATH)
    dataset = get_dataset(dataset_file)
    random.shuffle(dataset)

    system = CachedOracleSystem(dataset)

    dataset = [item for item in dataset if item[0] in system.queries]

    logger.info("Testing on %d items", len(dataset))
    results = get_target_and_predicted_values(dataset, system)
    save(results, settings.RESULTS_PATH)
    analyse()
예제 #3
0
def evaluate_tensor():
    random = Random(1)

    dataset_file = open(settings.DATASET_PATH)
    dataset = get_dataset(dataset_file)
    random.shuffle(dataset)

    logger.info("Training")
    train_set = dataset[:100] #2500]
    system = TensorSystem(CachedOracleSystem)
    system.train(train_set)

    test_set = dataset[2500:2510]
    logger.info("Testing on %d items", len(test_set))
    results = get_target_and_predicted_values(test_set, system)
    save(results, settings.RESULTS_PATH)
    system.connector.save_cache()
    analyse()
예제 #4
0
def evaluate_quickly():
    output_path = 'system-best.json'

    random = Random(2)

    dataset_file = open(settings.DATASET_PATH)
    dataset = get_dataset(dataset_file)
    random.shuffle(dataset)

    logger.info("Training")
    train_set = dataset[:2500]
    #system = TensorSystem(CachedOracleSystem)
    system = NNSystem(CachedOracleSystem)
    system.train(train_set)

    test_set = dataset[2500:2550]
    logger.info("Testing on %d items", len(test_set))
    results = get_system_best(test_set, system)
    with open(output_path, 'w') as output_file:
        pickle.dump(results, output_file)
    system.connector.save_cache()
    print analyse_system_best(output_path)
예제 #5
0
    i = 0
    for query, target_entities in dataset:
        results, expressions = oracle.get_best_results_and_expressions(query)
        yield {
            'query': query,
            'results': results,
            'expressions': expressions,
            'target': target_entities,
        }
        i += 1
        logger.info("Completed: %d", i)

        if i % 10 == 0:
            logger.info("Saving caches")
            oracle.connector.save_cache()
            logger.info("Saving complete")


def save_oracle_data(oracle_results):
    with open(settings.ORACLE_CACHE_PATH, 'w') as cache_file:
        for result in oracle_results:
            pickle.dump(result, cache_file)
            cache_file.flush()


if __name__ == "__main__":
    dataset_file = open(settings.DATASET_PATH)
    dataset = get_dataset(dataset_file)
    oracle_data = get_cache_oracle_data(dataset)
    save_oracle_data(oracle_data)
예제 #6
0
    i = 0
    for query, target_entities in dataset:
        results, expressions = oracle.get_best_results_and_expressions(query)
        yield {
                'query': query,
                'results': results,
                'expressions': expressions,
                'target': target_entities,
                }
        i += 1
        logger.info("Completed: %d", i)

        if i % 10 == 0:
            logger.info("Saving caches")
            oracle.connector.save_cache()
            logger.info("Saving complete")


def save_oracle_data(oracle_results):
    with open(settings.ORACLE_CACHE_PATH, 'w') as cache_file:
        for result in oracle_results:
            pickle.dump(result, cache_file)
            cache_file.flush()

if __name__ == "__main__":
    dataset_file = open(settings.DATASET_PATH)
    dataset = get_dataset(dataset_file)
    oracle_data = get_cache_oracle_data(dataset)
    save_oracle_data(oracle_data)