def get_ontologies(file_path, property_name): #inputs: owl_file: owl file path, query: SPARQL query to be executed #output: output of the query g = Graph() g.parse(file_path) x = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX owl: PREFIX rdfs: PREFIX xsd: SELECT * WHERE {?object rdfs:subClassOf?<http://xyz.owl#>} """ qres = g.query(x) property_name = [] for sub in qres: property_name.append(str(sub).split("#")[1]) return property_name
def parseN3Logic(self, src): store = N3RuleStore(additionalBuiltins=self.ruleStore.filters) Graph(store).parse(src, format='n3') store._finalize() assert len(store.rules), "There are no rules passed in." from FuXi.Horn.HornRules import Ruleset for rule in Ruleset(n3Rules=store.rules, nsMapping=self.nsMap): self.buildNetwork(iter(rule.formula.body), iter(rule.formula.head), rule) self.rules.add(rule) self.alphaNodes = [ node for node in list(self.nodes.values()) if isinstance(node, AlphaNode) ] self.alphaBuiltInNodes = [ node for node in list(self.nodes.values()) if isinstance(node, BuiltInAlphaNode) ]
def test_n32(self): # this test not generating prefixes for subjects/objects g = Graph() g.add( ( URIRef("http://example1.com/foo"), URIRef("http://example2.com/bar"), URIRef("http://example3.com/baz"), ) ) n3 = g.serialize(format="n3") self.assertTrue( "<http://example1.com/foo> ns1:bar <http://example3.com/baz> .".encode( "latin-1" ) in n3 )
def read_file(self, path_to_file, format="xml"): """ parse the skos file and extract all available data """ rdf_graph = Graph() # bind the namespaces rdf_graph.bind("arches", ARCHES) try: rdf = rdf_graph.parse(source=path_to_file, format=format) self.path_to_file = str(path_to_file) except: raise Exception("Error occurred while parsing the file %s" % path_to_file) return rdf
def _testNegative(uri, manifest): if verbose: write(u"TESTING: %s" % uri) result = 0 # 1=failed, 0=passed inDoc = first(manifest.objects(uri, TEST["inputDocument"])) store = Graph() test = BNode() results.add((test, RESULT["test"], uri)) results.add((test, RESULT["system"], system)) try: if inDoc[-3:] == ".nt": format = "nt" else: format = "xml" store.parse(cached_file(inDoc), publicID=inDoc, format=format) except ParserError, pe: results.add((test, RDF.type, RESULT["PassingRun"]))
def attach_directory(self, dirname): """Attach to a directory containing RDF files and deliver data from there""" self.graph = Graph() self.basedir = dirname for dirpath, _, filenames in os.walk(dirname): for filename in filenames: if filename.endswith(".rdf"): self.graph.parse(os.path.join(dirpath, filename), format='turtle') elif filename.endswith(".n3"): self.graph.parse(os.path.join(dirpath, filename), format='n3') return len(self.graph)
def read_file(self, path_to_file, format='xml'): """ parse the skos file and extract all available data """ rdf_graph = Graph() #bind the namespaces rdf_graph.bind('arches',ARCHES) start = time() try: rdf = rdf_graph.parse(source=path_to_file, format=format) print 'time elapsed to parse rdf graph %s s'%(time()-start) except: raise Exception('Error occurred while parsing the file %s' % path_to_file) return rdf
def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp!
def _objsToRDF(self, base_uri, obj_dict): graph = Graph() nsman = NamespaceManager(graph) for obj_id, obj in obj_dict: logging.debug( "IndxWebHandler _objsToRDF obj_id: {0}, obj: {1}".format( obj_id, obj)) if obj_id[0] != "@": obj_uriref = URIRef("{0}{1}".format(base_uri, obj_id)) for pred in obj.keys(): if pred[0] == "@": continue # XXX TODO do something better if pred.startswith("http://") or pred.startswith( "https://"): pred_uriref = URIRef(pred) else: pred_uriref = URIRef(base_uri + "property/" + pred) for val in obj[pred]: if "@id" in val: value = val["@id"] # XXX TODO do something better if value.startswith("http://") or value.startswith( "https://"): val_uriref = URIRef(value) else: val_uriref = URIRef(base_uri + "value/" + value) graph.add((obj_uriref, pred_uriref, val_uriref)) elif "@value" in val: value = val["@value"] graph.add( (obj_uriref, pred_uriref, Literal(value))) else: continue return graph
def __init__(self, configuration=None, identifier=None): """ ProxyStore initialization. Creates an empty Graph, intializes the HTTP client. Use the defaut for internal graph storage, i.e IOMemory. The URIref of the graph must be supplied either in identifier or in configuration parameter. It will be checked by open(). The cache file path could be given in the configuration dictionary (__init__ only). We have to search about the memory cache. """ LOG.debug("-- ProxyStore.init(configuration=%s, identifer=%s) --\n", configuration, identifier) self._identifier = identifier self._format = None self._etags = None self._req_headers = {} self.configuration = None configuration = self._configuration_extraction(configuration) self._graph = Graph() # Most important parameter : identifier and graph address # If not given, we can not go further if (identifier is not None) and len(identifier) > 0: if len(configuration) == 0: configuration = {PS_CONFIG_URI: identifier} # Show the network activity if PS_CONFIG_DEBUG_HTTP in configuration.keys(): httplib2.debuglevel = 1 # Use provided Http connection if any http_cx = configuration.get(PS_CONFIG_HTTP_CX) if http_cx is None: http_cx = httplib2.Http() else: assert isinstance(http_cx, httplib2.Http) self.httpserver = http_cx # Store will call open() if configuration is not None Store.__init__(self, configuration)
def extraction(url): if "exploreat.adaptcentre.ie" in url or 'prismstandard' in url: return ""; g = Graph() g.parse(url) qres = g.query( """ SELECT DISTINCT ?obj WHERE {<"""+ url + """> rdfs:comment ?obj } """ ) comment='' for res in qres: comment+=str(res[0]) return comment
def __init__(self, graph=None): self.logger = logging.getLogger() self.logger.debug("Initiating Knowledge Graph") if graph is not None: if type(graph) is Graph: self.graph = graph elif type(graph) is str: self.graph = self._read([graph]) elif type(graph) is list: self.graph = self._read(graph) else: raise TypeError(":: Wrong input type: {}; requires path to RDF" " graph or rdflib.graph.Graph object".format(type(graph))) else: self.graph = Graph() self._property_distribution = Counter(self.graph.predicates()) self.logger.debug("Knowledge Graph ({} facts) succesfully imported".format(len(self.graph)))
def testBaseExplicit(self): """ Test that the n3 parser supports resolving relative URIs and that base will override """ input = """ @prefix : <http://example.com/> . # default base <foo> :name "Foo" . # change it @base <http://example.com/doc/> . <bar> :name "Bar" . """ g = Graph() g.parse(data=input, publicID="http://blah.com/", format="n3") print(list(g)) self.assertTrue((URIRef("http://blah.com/foo"), None, Literal("Foo")) in g) self.assertTrue((URIRef("http://example.com/doc/bar"), None, None) in g)
def __init__(self, *args, **kwargs): super(TestExamples, self).__init__(*args, **kwargs) namespaces_def = os.path.join(RELPATH, "terms", "templates", 'Namespaces.txt') fid = open(namespaces_def, "r") namespaces = fid.read() fid.close() self.term_examples = glob.glob(os.path.join(RELPATH, "terms", "examples", '*.txt')) self.example_files = example_filenames.union(self.term_examples) self.examples = dict() self.owl_files = dict() for example_file in self.example_files: ttl_file = os.path.join(os.path.dirname(os.path.dirname( os.path.abspath(__file__))), example_file) # ttl_file_url = get_turtle(provn_file) # ttl_file = provn_file.replace(".provn", ".ttl") # Read turtle self.examples[example_file] = Graph() if example_file in self.term_examples: fid = open(ttl_file, "r") ttl_txt = fid.read() fid.close() self.examples[example_file].parse(data=namespaces+ttl_txt, format='turtle') else: self.examples[example_file].parse(ttl_file, format='turtle') term_dir = os.path.join(os.path.dirname(ttl_file), os.pardir, 'terms') if not os.path.isdir(term_dir): term_dir = os.path.join(os.path.dirname(ttl_file), os.pardir, os.pardir, 'terms') # Retreive owl file for minimal examples if not os.path.isdir(term_dir): term_dir = os.path.join( os.path.dirname(ttl_file), os.pardir, os.pardir, os.pardir, 'terms') owl_files = glob.glob(os.path.join(term_dir, '*.owl')) self.owl_files[example_file] = owl_files[0] self.owl_readers = dict()
def enhance(self, content, input=Format.TEXT, output=Format.JSON): """ Enhance the content @type content: str @param content: target content @type input: C{FormatDef} @param input: input type @type output: C{FormatDef} @param output: output type @return: enhancements """ analysis = self.status["analyses"][0] params = {self.param_in: input.name, self.param_out: output.name} resource = self._build_url( "/%s/%s/%s" % (self.path, analysis, self.enhance_path), params) logging.debug("Making request to %s" % resource) response = self._post(resource, content, input.mimetype, output.mimetype) if response.status_code != 200: logging.error("Enhance request returned %d: %s" % (response.status_code, response.reason)) return response.text else: content_type = from_mimetype(response.headers["Content-Type"]) if content_type == Format.JSON or content_type == Format.REDLINKJSON: return json.loads(response.text) elif content_type == Format.XML or content_type == Format.REDLINKXML: return minidom.parse(response.text) elif content_type.rdflibMapping: g = Graph() g.parse(data=response.text, format=content_type.rdflibMapping) return g else: logging.warn( "Handler not found for %s, so returning raw text response..." % content_type.mimetype) return response.text
def test_no_addN_on_exception(self): """ Even if we've added triples so far, it may be that attempting to add the last batch is the cause of our exception, so we don't want to attempt again """ g = Graph() trips = [(URIRef("a"), URIRef("b%d" % i), URIRef("c%d" % i)) for i in range(12)] try: with BatchAddGraph(g, batch_size=10) as cut: for i, trip in enumerate(trips): cut.add(trip) if i == 11: raise Exception("myexc") except Exception as e: if str(e) != "myexc": pass self.assertEqual(10, len(g))
def testBase(self): """ Test that the n3 parser supports base declarations This is issue #22 """ input = """ @prefix : <http://example.com> . # default base <foo> :name "Foo" . # change it @base <http://example.com/doc/> . <bar> :name "Bar" . # and change it more - they are cummalative @base <doc2/> . <bing> :name "Bing" . """ g = Graph() g.parse(data=input, format="n3")
def rdf_description(name, notation='xml'): """ Funtion takes title of node, and rdf notation. """ valid_formats = ["xml", "n3", "ntriples", "trix"] default_graph_uri = "http://gstudio.gnowledge.org/rdfstore" configString = "/var/tmp/rdfstore" # Get the Sleepycat plugin. store = plugin.get('Sleepycat', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: #There is no underlying Sleepycat infrastructure, create it graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" # Now we'll add some triples to the graph & commit the changes rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') graph.bind("gstudio", "http://gnowledge.org/") exclusion_fields = [ "id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields" ] node = Objecttype.objects.get(title=name) node_dict = node.__dict__ subject = str(node_dict['id']) for key in node_dict: if key not in exclusion_fields: predicate = str(key) pobject = str(node_dict[predicate]) graph.add((rdflib[subject], rdflib[predicate], Literal(pobject))) graph.commit() print graph.serialize(format=notation) graph.close()
def parse(self, source, cType): # assume format = link-format g = Graph() self._linkFormatString = source self._graphs = self._linkFormatString.split(',') for self._graph in self._graphs: self._links = self._graph.split(';') self._subject = self._links[0] self._subject = self._subject.strip('<') self._subject = self._subject.strip('>') for self._link in self._links[1:]: self._attr, self._objs = self._link.split('=') self._objs = self._objs.strip( '"') # remove quotes from object string self._objs = self._objs.split(' ') for self._obj in self._objs: g.add((URIRef(self._subject), self._attrToPred[self._attr], Literal(self._obj))) return g
def getGraphFromText(text): text = text.encode('utf-8') text = base64.b64encode(text) comprenoData = { "Source": { "Extension": "TXT", "TxtEncoding": "utf-8", "Content": str(text, encoding='ascii') }, "SourceLanguage": "en-US", "ProcessingParameters": { "ProcessingTimeout": 600000, "HtmlParsingMode": "None", "MaxSymbolsCount": 0 }, "Operations": { "EntitiesAndFactsExtraction": { "ModelName": "Extended", }, "FiltrationParameters": { "Ontologies": [ "http://www.abbyy.com/ns/BasicEntity#", "http://www.abbyy.com/ns/BasicFact#" ] } }, } jsonRequest = json.dumps(comprenoData, skipkeys=True) newSession = requests.session() response = newSession.post( "http://infoextractorapitest.abbyy.com/api/tasks?async=false", data=jsonRequest, headers={ 'Content-type': 'application/json', 'Accept': 'text/plain' }, auth=HTTPBasicAuth(('ABBYY_Labs'), ('ABBYY_Labs_password'))) currentGraph = Graph() fakeFile = io.StringIO(response.text) # print(response.text) currentGraph.parse(fakeFile) return currentGraph
def test_compute_qname(self): """Test sequential assignment of unknown prefixes""" g = Graph() self.assertEqual( g.compute_qname(URIRef("http://foo/bar/baz")), ("ns1", URIRef("http://foo/bar/"), "baz"), ) self.assertEqual( g.compute_qname(URIRef("http://foo/bar#baz")), ("ns2", URIRef("http://foo/bar#"), "baz"), ) # should skip to ns4 when ns3 is already assigned g.bind("ns3", URIRef("http://example.org/")) self.assertEqual( g.compute_qname(URIRef("http://blip/blop")), ("ns4", URIRef("http://blip/"), "blop"), ) # should return empty qnames correctly self.assertEqual( g.compute_qname(URIRef("http://foo/bar/")), ("ns1", URIRef("http://foo/bar/"), ""), ) # should compute qnames of URNs correctly as well self.assertEqual( g.compute_qname(URIRef("urn:ISSN:0167-6423")), ("ns5", URIRef("urn:ISSN:"), "0167-6423"), ) self.assertEqual( g.compute_qname(URIRef("urn:ISSN:")), ("ns5", URIRef("urn:ISSN:"), ""), ) # should compute qnames with parenthesis correctly self.assertEqual( g.compute_qname(URIRef("http://foo/bar/name_with_(parenthesis)")), ("ns1", URIRef("http://foo/bar/"), "name_with_(parenthesis)"), )
def build_rule_network(rulesets): """ :param rulesets: iterable of n3 ruleset paths :returns: Initialized **in-memory** Rule network """ # Build Rule Network rule_store, rule_graph, network = SetupRuleStore(None, None, True) network.inferredFacts = Graph() # raptor_world for ruleset_path in rulesets: logging.debug("Loading rules from: %r" % ruleset_path) len_pre = len(network.rules) for rule in HornFromN3(ruleset_path): network.buildNetworkFromClause(rule) len_post = len(network.rules) logging.debug(" %r : %d rules (%d -> %d)" % (ruleset_path, len_post - len_pre, len_pre, len_post)) return network
def graph(self): g = Graph() entry = term.BNode() g.add((entry, RDF.type, self._type_uri())) g.add((entry, ns_slog.timestamp, term.Literal(self.date))) g.add((entry, ns_slog.subject, ns_person[self.subject])) if self.entry_type == 'UR': g.add((entry, ns_slog.url, term.Literal(self.data2))) elif self.entry_type == 'BA': g.add((entry, ns_slog.command, ns_command[self._escape(self.data1)])) g.add((entry, ns_slog.command_line, term.Literal(self.data2))) elif self.entry_type == 'PO': pass elif self.entry_type[0] == 'T' or self.entry_type[0] == 'I': g.add((entry, ns_slog.command_line, term.Literal(self.data2))) else: g.add((entry, ns_slog.num_lines, term.Literal(int(self.data1)))) g.add((entry, ns_slog.command_line, term.Literal(self.data2))) return g
def get(self, query=None): # return a sub-graph consisting of the triples with predicates in the attribute binding # filtered by the query g = Graph() self.graph = self.resources['parentObject'].resources[ 'parentObject'].resources['Description'].graph if query == None: for self._pred in self._predToAttr: for triple in self.graph.triples((None, self._pred, None)): g.add(triple) else: self._attr, self._obj = query.split('=') for (self._subject, p, o) in self.graph.triples( (None, self._attrToPred[self._attr], Literal(self._obj))): # return all links for all attributes in the binding that have matching subjects for self._pred in self._predToAttr: for triple in self.graph.triples( (self._subject, self._pred, None)): g.add(triple) return g
def get_namespace(rdfxml): """Get ontology or vocabulary namespace from the RDF/XML. look for either: * owl:versionIRI property for the ontologies * else the owl:Ontology (was void:Dataset before munging) type definition for the vocabularies """ from rdflib.graph import Graph from rdflib.namespace import RDF, OWL, VOID g = Graph() g.parse(data=rdfxml, format="application/rdf+xml") # Look for owl:versionIRI first (_assume_ only one!) for s, p, o in g.triples((None, OWL.versionIRI, None)): return str(o) # Look for void:Dataset second (_assume_ only one!) for s, p, o in g.triples((None, RDF.type, OWL.Ontology)): return str(s) # Else, oops raise Exception("Failed to find namespace!")
def enrich_and_concat(files, out_file): graph = Graph() for sourcefile in files: if sourcefile.endswith('.nt'): graph.load(sourcefile, format='nt') elif sourcefile.endswith('.ttl'): graph.load(sourcefile, format='turtle') else: graph.load(sourcefile) logger.debug("Skosify: Enriching relations") skosify.infer.skos_hierarchical(graph, True) skosify.infer.skos_related(graph) with open(out_file + '.tmp', 'wb+') as handle: graph.serialize(handle, format='turtle') os.rename(out_file + '.tmp', out_file) return len(graph)
def from_n3(s, default=None, backend=None): """ Creates the Identifier corresponding to the given n3 string. """ if not s: return default if s.startswith('<'): return URIRef(s[1:-1]) elif s.startswith('"'): # TODO: would a regex be faster? value, rest = s.rsplit('"', 1) value = value[1:] # strip leading quote if rest.startswith("@"): if "^^" in rest: language, rest = rest.rsplit('^^', 1) language = language[1:] # strip leading at sign else: language = rest[1:] # strip leading at sign rest = '' else: language = None if rest.startswith("^^"): datatype = rest[3:-1] else: datatype = None value = value.replace('\\"', '"').replace('\\\\', '\\') # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") return Literal(value, language, datatype) elif s.startswith('{'): identifier = from_n3(s[1:-1]) return QuotedGraph(backend, identifier) elif s.startswith('['): identifier = from_n3(s[1:-1]) return Graph(backend, identifier) else: if s.startswith("_:"): return BNode(s[2:]) else: return BNode(s)
def remove(self, triple, context): """Remove the set of triples matching the pattern from the store :param triple: Triple (subject, predicate, object) to remove. :param context: :returns: """ # pylint: disable-msg=W0222 # Signature differs from overriden method LOG.debug("-- ProxyStore.remove(triple=%s, context=%s) --", triple, context) Store.remove(self, triple, context) if triple == (None, None, None): self._graph = Graph() # the default implementation of Graph is not efficient in doing # this, so better create a new empty one else: self._graph.store.remove(triple)
def writeToManifestFile(manifestPath, namespaceDict, elementUriList,elementValueList): """ Write to the manifest file. manifestPath manifest file path elementUriList Element Uri List to be written into the manifest files elementValueList Element Values List to be written into the manifest files """ # Create an empty RDF Graph rdfGraph = Graph() subject = BNode() rdfGraph = bindNamespaces(rdfGraph, namespaceDict) # Write to the RDF Graph rdfGraph.add((subject, RDF.type, oxdsGroupingUri)) for index in range(len(elementUriList)): rdfGraph.add((subject,elementUriList[index], Literal(elementValueList[index]))) # Serialise it to a manifest.rdf file saveToManifestFile(rdfGraph, manifestPath) return rdfGraph
def load_from_rdf(conn, filename): try: create_table( conn, '''CREATE TABLE IF NOT EXISTS Triple (s text NOT NULL, p text NOT NULL, o text NOT NULL);''' ) from rdflib.graph import Graph g = Graph() g.parse(filename, format="xml") for subject, predicate, obj in g: if (subject, predicate, obj) in g: print(subject, predicate, obj) insert(conn, subject, predicate, obj) else: print("RDF file {0} iteration error!".format(filename)) except sqlite3.OperationalError as e: print(COLOR_FAIL + "\nRDF file {0} failed !\n".format(filename) + COLOR_END)