Example #1
0
def parse(oboFile, typedefs):
    """
    Parses an OBO (ontology file) creating a correspoinding Ontology 
    object composed of Term objects that relate to the terms found in the
    ontology file.
    """
    ontology = Ontology()

    ## Use groupby to group the lines of our file into chunks of text, some 
    ## stanzas, some typedefs, and other metadata to be processed 
    with open(oboFile) as f:
        for (key, group) in groupby(f, is_data):
            if key:
                header = group.next().rstrip('\n')

                if header.find('[Typedef]') != -1:
                    dataDict = get_data_as_dict(group)
                    ontology.add_typedef(dataDict['name'])
                elif header.find('[Term]') != -1:
                    dataDict = get_data_as_dict(group, typedefs)
                    ontology.add_term(build_term(dataDict))
                else:                    
                    # We are dealing with ontology metadata that should be 
                    # captured in our ontology object.
                    ontology.metadata.append(header)
                    ontology.metadata.extend([x.strip() for x in group])

    return ontology               
Example #2
0
 def decode_resource_path(self, path):
     result = None
     if path:
         decoded = Ontology(self.env, 'ns.medium.resource.url.decode')
         decoded['directory'], decoded['file name'] = os.path.split(path)
         if 'file name' in decoded and 'directory' in decoded:
             
             # Normalize the directory
             # This will replace path framents with canonic values
             decoded['directory'] = self.normalize(decoded['directory'])
             
             # Check if the directory resides in a volume
             for volume in self.volume.element.values():
                 if os.path.commonprefix((volume.node['real'], decoded['directory'])) == volume.node['real']:
                     decoded['volume'] = volume.key
                     
             # If a UMID was encoded in the name, infer the home id and media kind
             # This will also trigger rule.medium.resource.filename.parse
             if 'umid' in decoded:
                 umid = Umid.decode(decoded['umid'])
                 if umid:
                     decoded['media kind'] = umid.media_kind
                     decoded['home id'] = umid.home_id
                     
             # Make the elements of the decoded onlology kernel elements of the result
             result = decoded.project('ns.medium.resource.location')
             for k,v in decoded.iteritems(): result[k] = v
             
             # set the host and domain
             result['host'] = self.host
             result['domain'] = self.domain
     return result
Example #3
0
 def set(self):
     genealogy = Ontology(self.env, 'ns.service.genealogy', self.document['head']['genealogy'])
     genealogy.merge_all(self.ontology['genealogy'])
     self.document['head']['genealogy'] = genealogy.node
     
     # persist document
     self.env.resolver.save(self.document)
     
     # refetch the document
     self.document = self.env.resolver.resolve(self.uri, self.ontology['query'])
Example #4
0
    def parse(self, query):
        for source in query["sources"]:
            try:
                document = json.load(source)
            except ValueError as e:
                self.log.warning(u"Failed to decode JSON document %s", query["remote url"])
                self.log.debug(u"Exception raised %s", unicode(e))
            else:
                if "process" in query["branch"]:
                    action = getattr(self, query["branch"]["process"], None)
                    if action is not None:
                        document = action(query, document)
                    else:
                        self.log.warning(u"Ignoring unknown process function %s", query["branch"]["process"])

                if query["branch"]["query type"] == "lookup":
                    entry = {
                        "branch": query["branch"],
                        "record": {
                            u"head": {u"genealogy": query["parameter"].project("ns.service.genealogy")},
                            u"body": {u"original": document},
                        },
                    }

                    if "namespace" in query["branch"]:
                        # make a caonical node
                        entry["record"]["body"]["canonical"] = Ontology(self.env, entry["branch"]["namespace"])
                        entry["record"]["body"]["canonical"].decode_all(entry["record"]["body"]["original"], self.name)

                        # Copy indexed values from the canonical node to the genealogy
                        if "index" in entry["branch"]:
                            for index in entry["branch"]["index"]:
                                if index in entry["record"]["body"]["canonical"]:
                                    entry["record"][u"head"][u"genealogy"][index] = entry["record"]["body"][
                                        "canonical"
                                    ][index]

                    # Append the entry to the query result
                    query["entires"].append(entry)

                elif query["branch"]["query type"] == "search":
                    for trigger in query["branch"]["resolve"]:
                        for element in document[query["branch"]["container"]]:
                            # Decode a reference
                            o = Ontology(self.env, trigger["namespace"])
                            o.decode_all(element, self.name)

                            # Make a URI and trigger a resolution
                            ref = o.project("ns.service.genealogy")
                            ref["language"]
                            uri = trigger["format"].format(**ref)
                            self.log.debug(u"Trigger %s resolution", uri)
                            self.resolver.resolve(uri)
Example #5
0
    def _load_mediainfo(self):
        command = self.env.initialize_command("mediainfo", self.log)
        if command:
            command.extend([u"--Language=raw", u"--Output=XML", u"--Full", self.ontology["path"]])
            proc_mediainfo = Popen(command, stdout=PIPE, stderr=PIPE)
            proc_grep = Popen([u"grep", u"-v", u"Cover_Data"], stdin=proc_mediainfo.stdout, stdout=PIPE)
            raw_xml = proc_grep.communicate()[0]

            # parse the DOM
            element = ElementTree.fromstring(raw_xml)
            if element is not None:
                for node in element.findall(u"File/track"):
                    if "type" in node.attrib:
                        mtype = self.env.enumeration["mediainfo stream type"].search(node.attrib["type"])
                        if mtype is not None:
                            if mtype.node["namespace"]:
                                # initialize an ontology with the correct namespace
                                o = Ontology(self.env, mtype.node["namespace"])

                                # iterate over the properties and populate the ontology
                                for item in list(node):
                                    text = item.text

                                    # decode base64 encoded element
                                    if "dt" in item.attrib and item.attrib["dt"] == "binary.base64":
                                        text = base64.b64decode(text)
                                        text = unicode(text, "utf8")

                                    # set the concept on the ontology
                                    o.decode(item.tag, text)

                                # fix the video encoder settings on video tracks
                                if mtype.key == "video":
                                    self._fix_mediainfo_encoder_settings(o)

                                # add the ontology to the stream stack
                                self._execution["crawl"]["stream"].append(o)

                            elif mtype.key == "menu":
                                menu = Menu(self.env)
                                for item in list(node):
                                    menu.add(Chapter.from_raw(item.tag, item.text, Chapter.MEDIAINFO))
                                menu.normalize()
                                if menu.valid:
                                    self._execution["crawl"]["menu"].append(menu)

            # Release resources held by the element, we no longer need it
            element.clear()
Example #6
0
 def transform(self, template):
     # apply overrides on the pivot location from the template
     if 'override' in template:
         for k,v in template['override'].iteritems():
             self.location[k] = v
             
     # apply track rules from the template
     if 'track' in template:
         for rule in template['track']:
             for branch in rule['branch']:
                 taken = False
                 for stream in self.resource.stream:
                     if stream.match(branch):
                         taken = True
                         s = Ontology.clone(stream)
                         s['resource path digest'] = self.resource.location['path digest']
                         if 'override' in rule:
                             for k,v in rule['override'].iteritems(): s[k] = v
                         self.stream.append(s)
                         
                         if rule['mode'] == 'choose':
                             break
                             
                 if taken and rule['mode'] == 'choose':
                     break
                     
     return self.taken
Example #7
0
 def _transcode_ac3(self, task):
     product = task.produce(task.ontology)
     if product:
         taken = False
         
         for pivot in task.transform.pivot.values():
             for stream in pivot.stream:
                 if not taken and stream['stream kind'] == 'audio':
                     taken = True
                     
                     # Clone the hint ontology
                     product.hint = Ontology.clone(self.hint)
                     
                     command = self.env.initialize_command('ffmpeg', self.log)
                     if command:
                         # make ffmpeg not check for overwrite, we already do this check
                         command.append(u'-y')
                         
                         # set the number of processing threads
                         command.append(u'-threads')
                         command.append(unicode(self.env.system['threads']))
                         
                         # set the input file
                         command.append(u'-i')
                         command.append(self.path)
                         
                         for k,v in stream['ffmpeg parameters'].iteritems():
                             command.append(k)
                             if v is not None: command.append(unicode(v))
                             
             if taken: break
         if taken and self.env.check_path_available(product.path, task.ontology['overwrite']):
             command.append(product.path)
             message = u'Transcode {} --> {}'.format(self.path, product.path)
             self.env.execute(command, message, task.ontology['debug'], pipeout=True, pipeerr=False, log=self.log)
Example #8
0
 def tag(self, task):
     update = Ontology(self.env, 'ns.medium.resource.meta.tag')
     meta = self.meta.project('ns.medium.resource.meta.tag')
     knowledge = Ontology(self.env, 'ns.medium.resource.meta.tag', self.knowledge['body'])
     genealogy = Ontology(self.env, 'ns.service.genealogy', self.knowledge['head']['genealogy'])
     knowledge.merge_all(genealogy)
     
     # Everything that is in meta but doesn't fit knowledge
     # should be replaced with the value in knowledge 
     for i in meta.keys():
         if meta[i] != knowledge[i]:
             update[i] = knowledge[i]
             
     # Everything that is in knowledge but not in meta
     # should be set to the value in knowledge 
     for i in knowledge.keys():
         if i not in meta:
             update[i] = knowledge[i]
             
     modify = []
     for k,v in update.iteritems():
         prototype = update.namespace.find(k)
         if prototype and prototype.node['subler']:
             modify.append(u'{{{}:{}}}'.format(prototype.node['subler'],v))
             
     print unicode(modify).encode('utf-8')
Example #9
0
 def produce(self, override=None):
     # copy the location ontology
     p = Ontology.clone(self.location)
     
     # allow the location to recalculate those concepts 
     del p['volume path']
     del p['file name']
     del p['directory']
     
     # explicitly set the volume and host from the task
     p['host'] = self.env.host
     p['volume'] = self.ontology['volume']
     
     # for copy and move we try to set a profile from the source
     if self.ontology['action'] in set(('copy', 'move', 'pack')):
         if self.resource.meta['profile']:
             p['profile'] = self.resource.meta['profile']
             
     # for transcode we try to set the profile from the transform
     elif self.ontology['action'] == 'transcode':
         for pivot in self.transform.pivot.values():
             if 'profile' in pivot.location:
                 p['profile'] = pivot.location['profile']
                 
     # whatever happened, if a profile has been explicitly provided by the task
     # it will override anything we set implicitly
     if self.ontology['profile']:
         p['profile'] = self.ontology['profile']
         
     # if an override was given set some concepts from it 
     if override:
         for i in set((
             'kind', 
             'language',
             'stream order',
             'resource path digest',
             'routing type'
         )):
             if i in override: p[i] = override[i]
             
     # try to produce a product
     product = self.resource.asset.locate_resource(p)
     if product:
         self.product.append(product)
     else:
         self.log.error(u'Could not determine destination path from:\n%s', self.env.encode_json(p))
         
     return product
Example #10
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.loc.gov/mads/rdf/v1#'))
Example #11
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.rddl.org/purposes#'))
 def __init__(self):
     Ontology.__init__(self, Namespace(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#'))
Example #13
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2002/12/cal/icalSpec#'))
Example #14
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://id.loc.gov/vocabulary/identifiers/'))
 def __init__(self):
     Ontology.__init__(self, Namespace(u'http://www.w3.org/2000/01/rdf-schema#'))
Example #16
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2007/05/powder-s#'))
Example #17
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/dc/dcam/'))
Example #18
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/ns/adms#'))
Example #19
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://webns.net/mvcb/'))
Example #20
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2004/02/skos/core#'))
Example #21
0
 def parse(self, query):
     for source in query['sources']:
         try:
             document = json.load(source)
         except ValueError as e:
             self.log.warning(u'Failed to decode JSON document %s', query['remote url'])
             self.log.debug(u'Exception raised %s', unicode(e))
         else:
             if 'process' in query['branch']:
                 # Preprocessing the entry.
                 # Method should return a document similar to normal itunes api calls
                 action = getattr(self, query['branch']['process'], None)
                 if action is not None:
                     document = action(document)
                 else:
                     self.log.warning(u'Ignoring unknown process function %s', query['branch']['process'])
                     
             if not document['resultCount'] > 0:
                 self.log.debug(u'No results found for query %s', query['remote url'])
             else:
                 if query['branch']['query type'] == 'lookup':
                     for element in document['results']:
                         for product in query['branch']['produce']:
                             if satisfies(element, product['condition']):
                                 
                                 entry = {
                                     'branch':product['branch'],
                                     'record':{
                                         u'head':{ u'genealogy':Ontology(self.env, 'ns.service.genealogy'), },
                                         u'body':{ u'original':element },
                                     }
                                 }
                                 
                                 # make a caonical node
                                 entry['record']['body']['canonical'] = Ontology(self.env, entry['branch']['namespace'])
                                 entry['record']['body']['canonical'].decode_all(entry['record']['body']['original'], self.name)
                                 
                                 # Copy indexed values from the canonical node to the genealogy
                                 if 'index' in entry['branch']:
                                     for index in entry['branch']['index']:
                                         if index in entry['record']['body']['canonical']:
                                             entry['record'][u'head'][u'genealogy'][index] = entry['record']['body']['canonical'][index]
                                             
                                 # Only produce once for each element
                                 query['entires'].append(entry)
                                 break
                                 
                 elif query['branch']['query type'] == 'search':
                     for trigger in query['branch']['resolve']:
                         for element in document['results']:
                             if satisfies(element, trigger['condition']):
                                 
                                 # Decode concepts from the element and populate the ontology
                                 o = Ontology(self.env, trigger['namespace'])
                                 o.decode_all(element, self.name)
                                     
                                 # Make a URI and trigger a resolution
                                 ref = o.project('ns.service.genealogy')
                                 ref['language']
                                 uri = trigger['format'].format(**ref)
                                 self.log.debug(u'Trigger %s resolution', uri)
                                 self.resolver.resolve(uri)
Example #22
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/1999/xhtml/vocab#'))
Example #23
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/vocab/changeset/schema#'))
Example #24
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://rdfs.org/ns/void#'))
Example #25
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2003/01/geo/wgs84_pos#'))
Example #26
0
def all_url_test():
    l = classgenerator.Library()
    l2 = classgenerator.Library()
    l2.base_path = ["gentest","lib","ontologies"]                    
    #print ("test of getting global data")
    g = test_global_data_files.get_global_data()
    #print (g)
    predicates = Counter()
    subjects = Counter()
    predicate_types = Counter()
    predicate_types2 = Counter()
    objects = Counter()
    rebind(g)
    for x in g:
        p = x[1]
        s = x[0]
        o = x[2]
        predicates[p] += 1
        subjects[s] += 1
        objects[o] += 1
    print "predicates"

    seen = {}
    libs = {}    
    for (p,v) in  predicates.most_common(4230):
        if 'openlinksw.com' in p:
            continue
        if 'cc.rww.io' in p :
            continue
        
        p2 = g.namespace_manager.qname(p)

        (ns,term) = p2.split(':')
        m = g.namespace_manager.store.namespace(ns)

        # skip
        if str(m) =='http://www.w3.org/1999/xhtml/vocab#':
            continue
    
        if ns not in seen  :
            #print "NS",ns, m
            
            seen[ns]=1
            if 'ns' in ns:
                print "g.namespace_manager.bind(\"{prefix}\",\"{url}\",True)  ".format(prefix=ns,url=m)
                #pass
            path = l.get_module(ns,m)
            #print ns, m, path
            if path:
                importl = l.get_import_path(path)
                prefix = l.get_import_path(ns)
            
                #l.createpath(path)
                #l.create_module(path,prefix,url=m)
                #print "import {module} as {prefix}".format(module=importl,prefix=ns)
                replace= {
                    'http://purl.org/dc/dcam/' : 'https://raw.githubusercontent.com/dcmi/vocabtool/master/build/dcam.rdf'
                }
                
                if str(m) in replace :
                    o = replace[str(m)]
                    #print "replacing " ,m,"with", o
                    m = o

                _format = 'guess'
                turtles = [
                    'http://www.w3.org/ns/ldp#',
                    'http://usefulinc.com/ns/doap#',
                    
                ]
                
                if str(m) in turtles  :
                    _format = 'turtle'
                xmls = [
                    'http://xmlns.com/foaf/0.1/',
                    'http://www.w3.org/ns/auth/cert#',
                    'http://www.w3.org/ns/auth/acl#',
                    'http://www.w3.org/2000/10/swap/pim/doc#',
                    'http://www.w3.org/2003/06/sw-vocab-status/ns#',
                ]
                
                if str(m) in xmls  :
                    _format = 'xml'
                o = Ontology(url=m,prefix=prefix,_format=_format)
                o.set_path(path)
                #print "prefix", prefix, m
                libs[prefix]=o
                
    ## now revisit the graph and link it
    #pprint.pprint(libs)
        
    for p in libs:

        o = libs[p]
        prefix = o.prefix
        
        #print "Lib", p, o.path
        og = o.fetch(g.namespace_manager)
        rebind(og)
        od = o.extract_graph(og,l, libs)

        ours = od[0]
        others = od[2]
        prefixs = od[2]
        code = []

        importcode = []

        
        # create members
        used_prefixes= {}
        for x in ours :
            if 'http' in x :
                pass
            else:
                types=[]
                
                # lets try and create a class
                attrs = ours[x]
                for y in attrs:
                    p = y[0]
                    s = y[1]
                    p1 = resolve(p)
                    s1 = resolve(s)
                    if p1 == 'rdf.type' :
                        if s1 == 'owl.Restriction' :
                            pass
                        else:
                            types.append(s1)
                            ## append to used types
                            #print "check prefix for import",s,s1
                            
                            used_prefixes[s[0]] =1 
                        #print "\t","pred",p1,s1

                if len(types) > 0:
                    caml= convert2(x)
                    short= convert(x)
                    if caml.startswith('Ub'):
                        pass
                    else:
                    
                        classcode = ast.parse("class {_class}({base}):\n    term=\"{name}\"\n".format(
                            _class=caml,
                            name=x,
                            base=",".join(types)))
                        used_prefixes[prefix]=1
                        alias_code =  ast.parse("{alias} = {_class}()\n".format(
                            prefix=prefix,
                            alias=short,
                            _class=caml))

                        code.append(classcode)
                        code.append(alias_code)


        ##### create prefixes
        for x in prefixs:
            m = prefixs[x]

            if x not in used_prefixes:
                continue # remove unused prefixes
        
            if x == o.prefix :
                continue
            
            import_module = Module(body=[ImportFrom(
                module=m.module_name(),
                names=[alias(
                name='ontology',
                    asname=x)],
                level=0)])

            #code = "from {module} import ontology as {alias}".format(module=m.module_name(), alias=x)
            # x = Import(names=[alias(
            #     name=m.module_name(),
            #     asname=None)]),
            #print(astunparse.dump(ast.parse(code)))
            importcode.append(import_module)

        
        ###
        if True:
            npath= "gentest/" + o.path
            #path = l.get_module(ns,m)
            #print ns, m, path
            importl = l.get_import_path(npath)
            #prefix = l.get_import_path(ns)
            l.createpath(npath)
            print "npath",npath
            #print code
            l.create_module(npath,o.prefix,url=o.base,members=code,imports=importcode)
Example #27
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://creativecommons.org/ns#'))
Example #28
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'https://cc.rww.io/vocab#'))
Example #29
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://www.w3.org/2003/06/sw-vocab-status/ns#'))
Example #30
0
 def __init__(self):
     Ontology.__init__(self, rdflib.term.URIRef(u'http://purl.org/dc/elements/1.1/'))