def get_entities_of_class(self, _class_uri): ''' This function can fetch the properties connected to the class passed as a function parameter _class_uri. Return Type [ S,S,S,S...] ''' #Check if the resource URI is shorthand or a proper URI if not nlutils.has_url(_class_uri): warnings.warn( "The passed class %s is not a proper URI but is in shorthand. This is strongly discouraged." % _class_uri) _class_uri = nlutils.convert_shorthand_to_uri(_class_uri) #Preparing the SPARQL Query sparql = SPARQLWrapper(self.select_sparql_endpoint()) # with SPARQLWrapper(self.sparql_endpoint) as sparql: _class_uri = '<' + _class_uri + '>' sparql.setQuery(GET_ENTITIES_OF_CLASS % {'target_class': _class_uri}) sparql.setReturnFormat(JSON) response = sparql.query().convert() try: entity_list = [ x[u'entity'][u'value'].encode('ascii', 'ignore') for x in response[u'results'][u'bindings'] ] except: #TODO: Find and handle exceptions appropriately traceback.print_exc() # pass return entity_list
def get_type_of_resource(self, _resource_uri, _filter_dbpedia=False): """ Function fetches the type of a given entity and can optionally filter out the ones of DBPedia only """ # @TODO: Add basic caching setup. if not nlutils.has_url(_resource_uri): warnings.warn( "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged." % _resource_uri) _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) _resource_uri = '<' + _resource_uri + '>' response = self.shoot_custom_query(GET_TYPE_OF_RESOURCE % {'target_resource': _resource_uri}) try: type_list = [ x[u'type'][u'value'].encode('ascii', 'ignore') for x in response[u'results'][u'bindings'] ] except: traceback.print_exc() # If we need only DBPedia's types if _filter_dbpedia: filtered_type_list = [ x for x in type_list if x[:28] in [ 'http://dbpedia.org/ontology/', 'http://dbpedia.org/property/' ] ] return filtered_type_list return type_list
def get_properties_of_resource(self, _resource_uri, _with_connected_resource=False, right=True): """ This function can fetch the properties connected to this '_resource', in the format - _resource -> R -> O The boolean flag can be used if we want to return the (R,O) tuples instead of just R Return Type if _with_connected_resource == True, [ [R,O], [R,O], [R,O] ...] else [ R,R,R,R...] """ # Check if the resource URI is shorthand or a proper URI temp_query = "" if not nlutils.has_url(_resource_uri): warnings.warn( "The passed resource %s is not a proper URI but is in shorthand. This is strongly discouraged." % _resource_uri) _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) # Prepare the SPARQL Request sparql = SPARQLWrapper(self.select_sparql_endpoint()) # with SPARQLWrapper(self.sparql_endpoint) as sparql: _resource_uri = '<' + _resource_uri + '>' if _with_connected_resource: if right: temp_query = GET_RIGHT_PROPERTIES_OF_RESOURCE_WITH_OBJECTS % { 'target_resource': _resource_uri } else: temp_query = GET_LEFT_PROPERTIES_OF_RESOURCE_WITH_OBJECTS % { 'target_resource': _resource_uri } else: if right: temp_query = GET_RIGHT_PROPERTIES_OF_RESOURCE % { 'target_resource': _resource_uri } else: temp_query = GET_LEFT_PROPERTIES_OF_RESOURCE % { 'target_resource': _resource_uri } response = self.shoot_custom_query(temp_query) try: if _with_connected_resource: property_list = [[ x[u'property'][u'value'].encode('ascii', 'ignore'), x[u'resource'][u'value'].encode('ascii', 'ignore') ] for x in response[u'results'][u'bindings']] else: property_list = [ x[u'property'][u'value'].encode('ascii', 'ignore') for x in response[u'results'][u'bindings'] ] except: # TODO: Find and handle exceptions appropriately traceback.print_exc() # pass return property_list
def get_properties_on_resource(self, _resource_uri): """ Fetch properties that point to this resource. Eg. Barack Obama -> Ex-President of -> _resource_uri would yield ex-president of as the relation """ if not nlutils.has_url(_resource_uri): warnings.warn( "The passed resource %s is not a proper URI but is in shorthand. This is strongly discouraged." % _resource_uri) _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) response = self.shoot_custom_query(GET_PROPERTIES_ON_RESOURCE % {'target_resource': _resource_uri})
def get_most_specific_class(self, _resource_uri): """ Query to find the most specific DBPedia Ontology class given a URI. Limitation: works only with resources. @TODO: Extend this to work with ontology (not entities) too. Or properties. """ if not nlutils.has_url(_resource_uri): warnings.warn( "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged." % _resource_uri) _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) # Get the DBpedia classes of resource classes = self.get_type_of_resource(_resource_uri, _filter_dbpedia=True) length_array = [] # A list of tuples, it's use explained below # For every class, find the length of path to owl:Thing. for class_uri in classes: # Preparing the query target_class = '<' + class_uri + '>' try: response = self.shoot_custom_query( GET_CLASS_PATH % {'target_class': target_class}) except: traceback.print_exc() # Parsing the Result try: results = [ x[u'type'][u'value'].encode('ascii', 'ignore') for x in response[u'results'][u'bindings'] ] except: traceback.print_exc() # Count the number of returned classes and store it in treturn max(length_array,key=itemgetter(1))[0]he list. length_array.append((class_uri, len(results))) if len(length_array) > 0: return max(length_array, key=itemgetter(1))[0] else: # If there is no results from the filter type , return it as owl Thing return "http://www.w3.org/2002/07/owl#Thing"
def get_label(self, _resource_uri): """ Function used to fetch the english label for a given resource. Not thoroughly tested tho. Also now it stores the labels in a pickled folder and Always returns one value """ # print _resource_uri, "**" if not nlutils.has_url(_resource_uri): _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) # Remove leading and trailing angle brackets _resource_uri = _resource_uri.replace('<', '').replace('>', '') # Preparing the Query _resource_uri = '<' + _resource_uri + '>' return nlutils.get_label_via_parsing(_resource_uri)
def get_type_of_resource(self, _resource_uri, _filter_dbpedia=False): ''' Function fetches the type of a given entity and can optionally filter out the ones of DBPedia only ''' if not nlutils.has_url(_resource_uri): warnings.warn( "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged." % _resource_uri) _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri) #Perparing the SPARQL Query sparql = SPARQLWrapper(self.select_sparql_endpoint()) _resource_uri = '<' + _resource_uri + '>' sparql.setQuery(GET_TYPE_OF_RESOURCE % {'target_resource': _resource_uri}) sparql.setReturnFormat(JSON) response = sparql.query().convert() try: type_list = [ x[u'type'][u'value'].encode('ascii', 'ignore') for x in response[u'results'][u'bindings'] ] except: traceback.print_exc() #If we need only DBPedia's types if _filter_dbpedia: filtered_type_list = [ x for x in type_list if x[:28] in [ 'http://dbpedia.org/ontology/', 'http://dbpedia.org/property/' ] ] return filtered_type_list return type_list