Esempio n. 1
0
def scoring(triples: List[Tuple[str, str, str]],
            n_candidates_threshold: int = 5) -> List[Tuple[str, str, str]]:
    """Replaces predicates with linked predicates (URIs)

    :param triples: A list of triples to be processed [(subject, predicate, object), ...]
           :ex: [["http://dbpedia.org/resource/Barack_Obama", "bear in", "http://dbpedia.org/resource/Hawaii"]]
    :param n_candidates_threshold: A maximum number of candidates to be verified.
                                   The verification algorithm may take a very long time to verify a large set of candidates
           :ex: 5
    :return: A list of triples with predicates replaced
    """

    verbose.info('Mapping predicates using scoring system', scoring)
    linked_triples = []

    for triple in triples:
        # Find and sort predicate candidates by mapping_score
        candidates = _find_candidates(triple[1])
        candidates = sorted(candidates, key=lambda tup: tup[1], reverse=True)

        # Choose a candidate with highest mapping_score that is also in a domain range
        for candidate in candidates[:n_candidates_threshold]:
            if verify.agreement(triple, candidate[0]):
                linked_triples.append((triple[0], candidate[0], triple[2]))
                break

    return linked_triples
Esempio n. 2
0
def openie(text: str) -> List[Tuple[str, str, str]]:
    """Extracts triples using Stanford CoreNLP OpenIE library via CoreNLPConnector in Java REST API

    :param text: A string to be extracted
           :ex: Barack Obama born in Hawaii
    :return: A list of triples
    """

    client = env.resolve('servers.java')
    verbose.info('Extracting triples using OpenIE at: ' + client['address'], caller=openie)
    return requests.get('%s/openie/triples' % client['address'], params={'text': text}).json()
Esempio n. 3
0
def dbpedia(triple: Tuple[str, str, str], predicate: str) -> bool:
    """Verifies domain-range agreement using DBpedia's ontology

    :param triple: A triple (subject's entity, predicate, object's entity)
           :ex: ["http://dbpedia.org/resource/Barack_Obama", "bear in", "http://dbpedia.org/resource/Hawaii"]
    :param predicate: Entity of the predicate to be verified
           :ex: http://dbpedia.org/ontology/birthPlace
    :return: True if the predicate does not constitute domain-range violation
    """

    # NOTE: Since DBpedia is expected to be the primary service for ontology look-up in this project,
    #       removing part of the URI helps reduce redundant information showed on a screen.
    #       The following lines of code should be modified when the service endpoint is changed.
    config = env.resolve('database.virtuoso')
    e = '%s:%d/sparql/' % (
        config['address'], config['port']
    ) if 'port' in config else '%s/sparql/' % config['address']

    verbose.info(
        "Verifying domain-range of a predicate '%s' with SPARQL server: %s" %
        (predicate.replace('http://dbpedia.org/ontology/', ''), e), dbpedia)

    sparql = SPARQLWrapper(e)
    sparql.setQuery('SELECT DISTINCT ?vr WHERE {{ '
                    '<{0}> <http://www.w3.org/2000/01/rdf-schema#range> ?rp. '
                    '<{1}> ?vr ?rp. }}'.format(predicate, triple[2]))

    sparql.setReturnFormat(JSON)

    try:
        results = sparql.query().convert()

        for result in results['results']['bindings']:
            if len(result['vr']['value']) > 0:
                return True

    except QueryBadFormed as error:
        verbose.error(str(error), dbpedia)
        return False

    # Domain-range violation concluded
    return False
Esempio n. 4
0
def triples(triples: List[Tuple[str, str, str]],
            entities: Dict[str, str]) -> List[Tuple[str, str, str]]:
    """Replaces subjects and objects in a list of triples with entities

    :param triples: A list of triples [(subject, predicate, object), ...]
           :ex: [["Barack Obama", "bear in", "Hawaii"]]
    :param entities: A dictionary of known entities
           :ex: {"Barack Obama": "http://dbpedia.org/resource/Barack_Obama", "Hawaii": "http://dbpedia.org/resource/Hawaii"}
    :return: A list of triples with subjects and objects replaced with their entities
    """
    def replace_entity(term):
        return entities[term] if term in entities else term

    verbose.info('Aggregating triples', globals()['triples'])
    aggregated_triples = []

    for triple in triples:
        aggregated_triples.append(
            (replace_entity(triple[0].strip()), triple[1],
             replace_entity(triple[2].strip())))

    return aggregated_triples
Esempio n. 5
0
def dbpedia_spotlight(text: str, endpoint: str = None, confidence: float = .5) -> Dict[str, str]:
    """Maps entities from a text

    :param text: A string (to be mapped)
           :ex: Barack Obama born in Hawaii
    :param endpoint: Annotator endpoint
           :ex: http://model.dbpedia-spotlight.org/en/annotate
    :param confidence: Minimum threshold of confidence value of found entities
           :ex: 0.5
    :return: A dictionary of mapped entities (URI)
    """

    config = configs.resolve('knowledge.integration.map.entities.dbpedia')
    confidence = confidence or config['confidence']
    endpoint = endpoint or config['endpoint']

    verbose.info('Mapping entities with annotation endpoint: %s' % endpoint, dbpedia_spotlight)
    response = requests.post(endpoint, data={'text': text, 'confidence': str(confidence)}, headers={'Accept': 'application/json'})

    entities = {}
    for item in response.json()['Resources']:
        entities[item['@surfaceForm']] = item['@URI']

    return entities
Esempio n. 6
0
    def ready(self):
        if 'UWKGM_STATE' in os.environ and os.environ[
                'UWKGM_STATE'] == 'running':
            verbose.info('Initializing NLTK wordnet...', KnowledgeConfig)

            try:
                _create_unverified_https_context = ssl._create_unverified_context
            except AttributeError:
                pass
            else:
                ssl._create_default_https_context = _create_unverified_https_context

            nltk.download('wordnet')

            verbose.info('Testing MongoDB database connection...')
            pymongo.MongoClient('mongodb://%s:%d/' %
                                (env.resolve('database.mongo.address'),
                                 env.resolve('database.mongo.port')))
            verbose.info('MongoDB connection test pass')