def learn_new_triples(predicate):
    """learns new triples and stores them in a file"""
    sc = get_sentence_classifier(predicate)
    if predicate in numeric_predicates:
        pattern = '<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#int> .'
    else:
        pattern = '<%s> <%s> "%s"@pl .'
    entities = CandidatesSelector.get_candidates(predicate)
    entities = entities[:candidates_limit]
    if verbose:
        print '%s candidates identified' % len(entities)
    n = 1000
    entities_list = [
        entities[i * n:(i + 1) * n]
        for i in xrange(int(ceil(len(entities) / n)))
    ]
    out = open(results_path + 'triples-%s' % predicate, 'w')
    ve = ValueExtractor(predicate, sc.extractor_training_data)
    for entities in entities_list:
        extracted_sentences = sc.extract_sentences(entities)
        values = ve.extract_values(extracted_sentences)
        for e, v in values.iteritems():
            if v:
                print >> out, pattern % (
                    full_resource_name(e).encode('utf-8'),
                    full_predicate_name(predicate).encode('utf-8'), v)
Example #2
0
def run_evaluation(predicate, sentence_limit=None):
    entities, true_values = get_test_data(predicate)
    sc = get_sentence_classifier(predicate, sentence_limit)
    true_values = dict((k, v) for k, v in true_values.iteritems() if k in entities)
    if verbose:
        print '%d entities were used in evaluation.' % len(entities)
    extracted_sentences = sc.extract_sentences(entities)
    ve = ValueExtractor(predicate, sc.extractor_training_data)
    values = ve.extract_values(extracted_sentences)
    print '%s results:' % predicate
    stats, fp, fn = ValueExtractorEvaluator.evaluate(true_values, values)
    table_format = '%30s %30s %20s %10s'
    print 'Error table:'
    print table_format % ('Subject:', 'Gold standard values:', 'Extracted value:', 'Error:')
    for entity, value in values.iteritems():
        if entity not in true_values:
            true_values[entity] = '-'
    for entity, true_value in true_values.iteritems():
        if entity in fp and entity in fn:
            err = 'FP/FN'
        elif entity in fp:
            err = 'FP'
        elif entity in fn:
            err = 'FN'
        else:
            err = ''
        print table_format % (entity[:30], ', '.join(true_value), values[entity] if entity in values else '-', err)
    print '\n\n'
    return stats
def learn_new_triples(predicate):
    """learns new triples and stores them in a file"""
    sc = get_sentence_classifier(predicate)
    if predicate in numeric_predicates:
        pattern = '<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#int> .'
    else:
        pattern = '<%s> <%s> "%s"@pl .'
    entities = CandidatesSelector.get_candidates(predicate)
    entities = entities[: candidates_limit]
    if verbose:
        print '%s candidates identified' % len(entities)
    n = 1000
    entities_list = [
        entities[i*n : (i+1)*n] 
        for i in xrange(int(ceil(len(entities) / n)))
    ]
    out = open(results_path + 'triples-%s' % predicate, 'w')
    ve = ValueExtractor(predicate, sc.extractor_training_data)
    for entities in entities_list:
        extracted_sentences = sc.extract_sentences(entities)
        values = ve.extract_values(extracted_sentences)
        for e, v in values.iteritems():
            if v:
                print >>out, pattern % (
                    full_resource_name(e).encode('utf-8'), 
                    full_predicate_name(predicate).encode('utf-8'), 
                    v
                )