def parse(oboFile, typedefs): """ Parses an OBO (ontology file) creating a correspoinding Ontology object composed of Term objects that relate to the terms found in the ontology file. """ ontology = Ontology() ## Use groupby to group the lines of our file into chunks of text, some ## stanzas, some typedefs, and other metadata to be processed with open(oboFile) as f: for (key, group) in groupby(f, is_data): if key: header = group.next().rstrip('\n') if header.find('[Typedef]') != -1: dataDict = get_data_as_dict(group) ontology.add_typedef(dataDict['name']) elif header.find('[Term]') != -1: dataDict = get_data_as_dict(group, typedefs) ontology.add_term(build_term(dataDict)) else: # We are dealing with ontology metadata that should be # captured in our ontology object. ontology.metadata.append(header) ontology.metadata.extend([x.strip() for x in group]) return ontology
def decode_resource_path(self, path): result = None if path: decoded = Ontology(self.env, 'ns.medium.resource.url.decode') decoded['directory'], decoded['file name'] = os.path.split(path) if 'file name' in decoded and 'directory' in decoded: # Normalize the directory # This will replace path framents with canonic values decoded['directory'] = self.normalize(decoded['directory']) # Check if the directory resides in a volume for volume in self.volume.element.values(): if os.path.commonprefix((volume.node['real'], decoded['directory'])) == volume.node['real']: decoded['volume'] = volume.key # If a UMID was encoded in the name, infer the home id and media kind # This will also trigger rule.medium.resource.filename.parse if 'umid' in decoded: umid = Umid.decode(decoded['umid']) if umid: decoded['media kind'] = umid.media_kind decoded['home id'] = umid.home_id # Make the elements of the decoded onlology kernel elements of the result result = decoded.project('ns.medium.resource.location') for k,v in decoded.iteritems(): result[k] = v # set the host and domain result['host'] = self.host result['domain'] = self.domain return result
def set(self): genealogy = Ontology(self.env, 'ns.service.genealogy', self.document['head']['genealogy']) genealogy.merge_all(self.ontology['genealogy']) self.document['head']['genealogy'] = genealogy.node # persist document self.env.resolver.save(self.document) # refetch the document self.document = self.env.resolver.resolve(self.uri, self.ontology['query'])
def parse(self, query): for source in query["sources"]: try: document = json.load(source) except ValueError as e: self.log.warning(u"Failed to decode JSON document %s", query["remote url"]) self.log.debug(u"Exception raised %s", unicode(e)) else: if "process" in query["branch"]: action = getattr(self, query["branch"]["process"], None) if action is not None: document = action(query, document) else: self.log.warning(u"Ignoring unknown process function %s", query["branch"]["process"]) if query["branch"]["query type"] == "lookup": entry = { "branch": query["branch"], "record": { u"head": {u"genealogy": query["parameter"].project("ns.service.genealogy")}, u"body": {u"original": document}, }, } if "namespace" in query["branch"]: # make a caonical node entry["record"]["body"]["canonical"] = Ontology(self.env, entry["branch"]["namespace"]) entry["record"]["body"]["canonical"].decode_all(entry["record"]["body"]["original"], self.name) # Copy indexed values from the canonical node to the genealogy if "index" in entry["branch"]: for index in entry["branch"]["index"]: if index in entry["record"]["body"]["canonical"]: entry["record"][u"head"][u"genealogy"][index] = entry["record"]["body"][ "canonical" ][index] # Append the entry to the query result query["entires"].append(entry) elif query["branch"]["query type"] == "search": for trigger in query["branch"]["resolve"]: for element in document[query["branch"]["container"]]: # Decode a reference o = Ontology(self.env, trigger["namespace"]) o.decode_all(element, self.name) # Make a URI and trigger a resolution ref = o.project("ns.service.genealogy") ref["language"] uri = trigger["format"].format(**ref) self.log.debug(u"Trigger %s resolution", uri) self.resolver.resolve(uri)
def _load_mediainfo(self): command = self.env.initialize_command("mediainfo", self.log) if command: command.extend([u"--Language=raw", u"--Output=XML", u"--Full", self.ontology["path"]]) proc_mediainfo = Popen(command, stdout=PIPE, stderr=PIPE) proc_grep = Popen([u"grep", u"-v", u"Cover_Data"], stdin=proc_mediainfo.stdout, stdout=PIPE) raw_xml = proc_grep.communicate()[0] # parse the DOM element = ElementTree.fromstring(raw_xml) if element is not None: for node in element.findall(u"File/track"): if "type" in node.attrib: mtype = self.env.enumeration["mediainfo stream type"].search(node.attrib["type"]) if mtype is not None: if mtype.node["namespace"]: # initialize an ontology with the correct namespace o = Ontology(self.env, mtype.node["namespace"]) # iterate over the properties and populate the ontology for item in list(node): text = item.text # decode base64 encoded element if "dt" in item.attrib and item.attrib["dt"] == "binary.base64": text = base64.b64decode(text) text = unicode(text, "utf8") # set the concept on the ontology o.decode(item.tag, text) # fix the video encoder settings on video tracks if mtype.key == "video": self._fix_mediainfo_encoder_settings(o) # add the ontology to the stream stack self._execution["crawl"]["stream"].append(o) elif mtype.key == "menu": menu = Menu(self.env) for item in list(node): menu.add(Chapter.from_raw(item.tag, item.text, Chapter.MEDIAINFO)) menu.normalize() if menu.valid: self._execution["crawl"]["menu"].append(menu) # Release resources held by the element, we no longer need it element.clear()
def transform(self, template): # apply overrides on the pivot location from the template if 'override' in template: for k,v in template['override'].iteritems(): self.location[k] = v # apply track rules from the template if 'track' in template: for rule in template['track']: for branch in rule['branch']: taken = False for stream in self.resource.stream: if stream.match(branch): taken = True s = Ontology.clone(stream) s['resource path digest'] = self.resource.location['path digest'] if 'override' in rule: for k,v in rule['override'].iteritems(): s[k] = v self.stream.append(s) if rule['mode'] == 'choose': break if taken and rule['mode'] == 'choose': break return self.taken
def _transcode_ac3(self, task): product = task.produce(task.ontology) if product: taken = False for pivot in task.transform.pivot.values(): for stream in pivot.stream: if not taken and stream['stream kind'] == 'audio': taken = True # Clone the hint ontology product.hint = Ontology.clone(self.hint) command = self.env.initialize_command('ffmpeg', self.log) if command: # make ffmpeg not check for overwrite, we already do this check command.append(u'-y') # set the number of processing threads command.append(u'-threads') command.append(unicode(self.env.system['threads'])) # set the input file command.append(u'-i') command.append(self.path) for k,v in stream['ffmpeg parameters'].iteritems(): command.append(k) if v is not None: command.append(unicode(v)) if taken: break if taken and self.env.check_path_available(product.path, task.ontology['overwrite']): command.append(product.path) message = u'Transcode {} --> {}'.format(self.path, product.path) self.env.execute(command, message, task.ontology['debug'], pipeout=True, pipeerr=False, log=self.log)
def tag(self, task): update = Ontology(self.env, 'ns.medium.resource.meta.tag') meta = self.meta.project('ns.medium.resource.meta.tag') knowledge = Ontology(self.env, 'ns.medium.resource.meta.tag', self.knowledge['body']) genealogy = Ontology(self.env, 'ns.service.genealogy', self.knowledge['head']['genealogy']) knowledge.merge_all(genealogy) # Everything that is in meta but doesn't fit knowledge # should be replaced with the value in knowledge for i in meta.keys(): if meta[i] != knowledge[i]: update[i] = knowledge[i] # Everything that is in knowledge but not in meta # should be set to the value in knowledge for i in knowledge.keys(): if i not in meta: update[i] = knowledge[i] modify = [] for k,v in update.iteritems(): prototype = update.namespace.find(k) if prototype and prototype.node['subler']: modify.append(u'{{{}:{}}}'.format(prototype.node['subler'],v)) print unicode(modify).encode('utf-8')
def produce(self, override=None): # copy the location ontology p = Ontology.clone(self.location) # allow the location to recalculate those concepts del p['volume path'] del p['file name'] del p['directory'] # explicitly set the volume and host from the task p['host'] = self.env.host p['volume'] = self.ontology['volume'] # for copy and move we try to set a profile from the source if self.ontology['action'] in set(('copy', 'move', 'pack')): if self.resource.meta['profile']: p['profile'] = self.resource.meta['profile'] # for transcode we try to set the profile from the transform elif self.ontology['action'] == 'transcode': for pivot in self.transform.pivot.values(): if 'profile' in pivot.location: p['profile'] = pivot.location['profile'] # whatever happened, if a profile has been explicitly provided by the task # it will override anything we set implicitly if self.ontology['profile']: p['profile'] = self.ontology['profile'] # if an override was given set some concepts from it if override: for i in set(( 'kind', 'language', 'stream order', 'resource path digest', 'routing type' )): if i in override: p[i] = override[i] # try to produce a product product = self.resource.asset.locate_resource(p) if product: self.product.append(product) else: self.log.error(u'Could not determine destination path from:\n%s', self.env.encode_json(p)) return product
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.loc.gov/mads/rdf/v1#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.rddl.org/purposes#'))
def __init__(self): Ontology.__init__(self, Namespace(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2002/12/cal/icalSpec#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://id.loc.gov/vocabulary/identifiers/'))
def __init__(self): Ontology.__init__(self, Namespace(u'http://www.w3.org/2000/01/rdf-schema#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2007/05/powder-s#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/dc/dcam/'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/ns/adms#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://webns.net/mvcb/'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2004/02/skos/core#'))
def parse(self, query): for source in query['sources']: try: document = json.load(source) except ValueError as e: self.log.warning(u'Failed to decode JSON document %s', query['remote url']) self.log.debug(u'Exception raised %s', unicode(e)) else: if 'process' in query['branch']: # Preprocessing the entry. # Method should return a document similar to normal itunes api calls action = getattr(self, query['branch']['process'], None) if action is not None: document = action(document) else: self.log.warning(u'Ignoring unknown process function %s', query['branch']['process']) if not document['resultCount'] > 0: self.log.debug(u'No results found for query %s', query['remote url']) else: if query['branch']['query type'] == 'lookup': for element in document['results']: for product in query['branch']['produce']: if satisfies(element, product['condition']): entry = { 'branch':product['branch'], 'record':{ u'head':{ u'genealogy':Ontology(self.env, 'ns.service.genealogy'), }, u'body':{ u'original':element }, } } # make a caonical node entry['record']['body']['canonical'] = Ontology(self.env, entry['branch']['namespace']) entry['record']['body']['canonical'].decode_all(entry['record']['body']['original'], self.name) # Copy indexed values from the canonical node to the genealogy if 'index' in entry['branch']: for index in entry['branch']['index']: if index in entry['record']['body']['canonical']: entry['record'][u'head'][u'genealogy'][index] = entry['record']['body']['canonical'][index] # Only produce once for each element query['entires'].append(entry) break elif query['branch']['query type'] == 'search': for trigger in query['branch']['resolve']: for element in document['results']: if satisfies(element, trigger['condition']): # Decode concepts from the element and populate the ontology o = Ontology(self.env, trigger['namespace']) o.decode_all(element, self.name) # Make a URI and trigger a resolution ref = o.project('ns.service.genealogy') ref['language'] uri = trigger['format'].format(**ref) self.log.debug(u'Trigger %s resolution', uri) self.resolver.resolve(uri)
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/1999/xhtml/vocab#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/vocab/changeset/schema#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://rdfs.org/ns/void#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2003/01/geo/wgs84_pos#'))
def all_url_test(): l = classgenerator.Library() l2 = classgenerator.Library() l2.base_path = ["gentest","lib","ontologies"] #print ("test of getting global data") g = test_global_data_files.get_global_data() #print (g) predicates = Counter() subjects = Counter() predicate_types = Counter() predicate_types2 = Counter() objects = Counter() rebind(g) for x in g: p = x[1] s = x[0] o = x[2] predicates[p] += 1 subjects[s] += 1 objects[o] += 1 print "predicates" seen = {} libs = {} for (p,v) in predicates.most_common(4230): if 'openlinksw.com' in p: continue if 'cc.rww.io' in p : continue p2 = g.namespace_manager.qname(p) (ns,term) = p2.split(':') m = g.namespace_manager.store.namespace(ns) # skip if str(m) =='http://www.w3.org/1999/xhtml/vocab#': continue if ns not in seen : #print "NS",ns, m seen[ns]=1 if 'ns' in ns: print "g.namespace_manager.bind(\"{prefix}\",\"{url}\",True) ".format(prefix=ns,url=m) #pass path = l.get_module(ns,m) #print ns, m, path if path: importl = l.get_import_path(path) prefix = l.get_import_path(ns) #l.createpath(path) #l.create_module(path,prefix,url=m) #print "import {module} as {prefix}".format(module=importl,prefix=ns) replace= { 'http://purl.org/dc/dcam/' : 'https://raw.githubusercontent.com/dcmi/vocabtool/master/build/dcam.rdf' } if str(m) in replace : o = replace[str(m)] #print "replacing " ,m,"with", o m = o _format = 'guess' turtles = [ 'http://www.w3.org/ns/ldp#', 'http://usefulinc.com/ns/doap#', ] if str(m) in turtles : _format = 'turtle' xmls = [ 'http://xmlns.com/foaf/0.1/', 'http://www.w3.org/ns/auth/cert#', 'http://www.w3.org/ns/auth/acl#', 'http://www.w3.org/2000/10/swap/pim/doc#', 'http://www.w3.org/2003/06/sw-vocab-status/ns#', ] if str(m) in xmls : _format = 'xml' o = Ontology(url=m,prefix=prefix,_format=_format) o.set_path(path) #print "prefix", prefix, m libs[prefix]=o ## now revisit the graph and link it #pprint.pprint(libs) for p in libs: o = libs[p] prefix = o.prefix #print "Lib", p, o.path og = o.fetch(g.namespace_manager) rebind(og) od = o.extract_graph(og,l, libs) ours = od[0] others = od[2] prefixs = od[2] code = [] importcode = [] # create members used_prefixes= {} for x in ours : if 'http' in x : pass else: types=[] # lets try and create a class attrs = ours[x] for y in attrs: p = y[0] s = y[1] p1 = resolve(p) s1 = resolve(s) if p1 == 'rdf.type' : if s1 == 'owl.Restriction' : pass else: types.append(s1) ## append to used types #print "check prefix for import",s,s1 used_prefixes[s[0]] =1 #print "\t","pred",p1,s1 if len(types) > 0: caml= convert2(x) short= convert(x) if caml.startswith('Ub'): pass else: classcode = ast.parse("class {_class}({base}):\n term=\"{name}\"\n".format( _class=caml, name=x, base=",".join(types))) used_prefixes[prefix]=1 alias_code = ast.parse("{alias} = {_class}()\n".format( prefix=prefix, alias=short, _class=caml)) code.append(classcode) code.append(alias_code) ##### create prefixes for x in prefixs: m = prefixs[x] if x not in used_prefixes: continue # remove unused prefixes if x == o.prefix : continue import_module = Module(body=[ImportFrom( module=m.module_name(), names=[alias( name='ontology', asname=x)], level=0)]) #code = "from {module} import ontology as {alias}".format(module=m.module_name(), alias=x) # x = Import(names=[alias( # name=m.module_name(), # asname=None)]), #print(astunparse.dump(ast.parse(code))) importcode.append(import_module) ### if True: npath= "gentest/" + o.path #path = l.get_module(ns,m) #print ns, m, path importl = l.get_import_path(npath) #prefix = l.get_import_path(ns) l.createpath(npath) print "npath",npath #print code l.create_module(npath,o.prefix,url=o.base,members=code,imports=importcode)
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://creativecommons.org/ns#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'https://cc.rww.io/vocab#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2003/06/sw-vocab-status/ns#'))
def __init__(self): Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/dc/elements/1.1/'))