예제 #1
0
    def enterNewAnnotationMenu(self):
        '''
        Interactive input for a new annotation
        '''

        self.printNewAnnotationMenu()

        i = 1
        for year in self.yearsAnnotations:
            print '{}) {}'.format(i, year["year"])
            i += 1
        print

        year = raw_input('Table to annotate: ')
        cell = raw_input('Cell to annotate: ')
        author = raw_input('Author: ')
        corrected = raw_input('Corrected value (leave blank if none): ')
        flag = raw_input('Flag: ')

        graphURI = URIRef(self.yearsAnnotations[int(year) - 1]["uri"])
        d2sGraphURI = graphURI.replace("cedar-project.nl",
                                       "www.data2semantics.org")
        annoURI = URIRef(d2sGraphURI + '/NOORDBRABANT/' + cell)
        cellURI = annoURI.replace("annotations", "data")

        # Create the new annotation
        query = """
            PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

            INSERT INTO GRAPH <""" + graphURI + """>
            {
                <""" + annoURI + """> a oa:Annotation;
                oa:annotated \"""" + str(
            datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date;
                oa:annotator \"""" + author + """\";
                oa:generated \"""" + str(datetime.datetime.now().strftime(
                "%Y-%m-%d")) + """\"^^xsd:date;
                oa:generator <https://cedar-project.nl/tools/cedar-demo.py>;
                oa:hasBody [ rdf:value \"""" + corrected + ' ' + flag + """\" ];
                oa:hasTarget <""" + cellURI + """>;
                oa:modelVersion <http://www.openannotation.org/spec/core/20120509.html> .
            }
        """

        # query = "INSERT INTO GRAPH <http://cedar-project.nl/annotations/VT_1859_01_H1> {<http://a> rdf:type <http:b>}"

        print query

        self.sparql.setQuery(query)

        self.sparql.setReturnFormat(JSON)
        self.results = self.sparql.query().convert()
예제 #2
0
def changeUrls(graph, urlFrom, urlTo):
    for s, p, o in graph.triples((None, None, None)):
        if urlFrom in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(urlFrom, urlTo))
            graph.add((s, p, o))
        if urlFrom in p:
            graph.remove((s, p, o))
            p = URIRef(p.replace(urlFrom, urlTo))
            graph.add((s, p, o))
        if urlFrom in o:
            graph.remove((s, p, o))
            o = URIRef(o.replace(urlFrom, urlTo))
            graph.add((s, p, o))
예제 #3
0
 def change_urls_in_result(self, graph):
     for s, p, o in graph.triples((None, None, None)):
         if self._remote_tests_location in s:
             graph.remove((s, p, o))
             s = URIRef(s.replace(self._remote_tests_location, self._tests_location))
             graph.add((s, p, o))
         if self._remote_tests_location in p:
             graph.remove((s, p, o))
             p = URIRef(p.replace(self._remote_tests_location, self._tests_location))
             graph.add((s, p, o))
         if self._remote_tests_location in o:
             graph.remove((s, p, o))
             o = URIRef(o.replace(self._remote_tests_location, self._tests_location))
             graph.add((s, p, o))
예제 #4
0
    def enterNewAnnotationMenu(self):
        '''
        Interactive input for a new annotation
        '''
        
        self.printNewAnnotationMenu()

        i = 1
        for year in self.yearsAnnotations:
          print '{}) {}'.format(i,year["year"])
          i += 1
        print
            
        year = raw_input('Table to annotate: ')
        cell = raw_input('Cell to annotate: ')
        author = raw_input('Author: ')
        corrected = raw_input('Corrected value (leave blank if none): ')
        flag = raw_input('Flag: ')

        graphURI = URIRef(self.yearsAnnotations[int(year)-1]["uri"])
        d2sGraphURI = graphURI.replace("cedar-project.nl", "www.data2semantics.org")
        annoURI = URIRef(d2sGraphURI + '/NOORDBRABANT/' + cell)
        cellURI = annoURI.replace("annotations", "data")

        # Create the new annotation
        query = """
            PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

            INSERT INTO GRAPH <""" + graphURI  + """>
            {
                <""" + annoURI  + """> a oa:Annotation;
                oa:annotated \"""" + str(datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date;
                oa:annotator \"""" + author  + """\";
                oa:generated \"""" + str(datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date;
                oa:generator <https://cedar-project.nl/tools/cedar-demo.py>;
                oa:hasBody [ rdf:value \"""" + corrected + ' ' + flag  + """\" ];
                oa:hasTarget <""" + cellURI  + """>;
                oa:modelVersion <http://www.openannotation.org/spec/core/20120509.html> .
            }
        """

        # query = "INSERT INTO GRAPH <http://cedar-project.nl/annotations/VT_1859_01_H1> {<http://a> rdf:type <http:b>}"

        print query

        self.sparql.setQuery(query)

        self.sparql.setReturnFormat(JSON)
        self.results = self.sparql.query().convert()
예제 #5
0
 def get_gw_resource(rid):
     r_uri = URIRef(proxy.base + '/' + rid)
     if cache is not None:
         g, ttl = cache.create(gid=r_uri,
                               loader=proxy.load,
                               format='text/turtle')
         headers = {'Cache-Control': 'max-age={}'.format(ttl)}
     else:
         g, headers = proxy.load(r_uri, **request.args)
     return serialize(g, uri=URIRef(r_uri.replace('=', '%3D'))), headers
예제 #6
0
파일: api.py 프로젝트: fserena/rdf2rest
def get_service():
    g = new_graph()
    me = URIRef(url_for('get_service', _external=True))
    g.add((me, RDF.type, SERVICE_TYPE))
    for db_resource in service_graph.subjects(RDF.type, PARTITION.Root):
        db_resource = URIRef(
            url_for('get_resource', rid=db_resource.replace(URI_PREFIX, ""), _external=True))
        g.add((me, CONTAINMENT_LINK, db_resource))
    response = make_response(g.serialize(format='turtle'))
    response.headers['Content-Type'] = 'text/turtle'
    return response
예제 #7
0
 def relativize(self, uri):
     base = URIRef(self.base)
     basedir = URIRef(self.base if base.endswith('/') else base.rsplit('/', 1)[0])
     if base is not None:
         if uri == base:
             uri = URIRef('')
         elif uri == basedir:
             uri = URIRef('.')
         elif uri.startswith(basedir + '/'):
             uri = URIRef(uri.replace(basedir + '/', "", 1))
     return uri
예제 #8
0
def _formatNodeURIRef(uriref, anno_uri, body_uri):
    '''
        Rewrite a URIRef according to the node configuration
        * uriref:rdflib.URIRef 
        * anno_uri:String as hexadecimal 
        * body_uri:String as hexadecimal
    ''' 
    if isinstance(uriref, URIRef) and NODE_URI in uriref:
        uriref = URIRef(uriref.replace(NODE_URI,
                               getattr(settings,
                                       'NODE_URI', 
                                       NODE_URI) + '/'))

    if isinstance(uriref, URIRef) and CH_NODE in uriref:
        uriref = URIRef(uriref.replace(CH_NODE + ':', 
                                       getattr(settings, 'NODE_URI', NODE_URI) + '/'))        
    if isinstance(uriref, URIRef) and ANNO_URI in uriref:
        uriref = URIRef(uriref.replace(ANNO_URI, "resource/" + anno_uri))
    if isinstance(uriref, URIRef) and BODY_URI in uriref:
        uriref = URIRef(uriref.replace(BODY_URI, "resource/" + body_uri))    
    return uriref
예제 #9
0
 def relativize(self, uri):
     base = URIRef(self.base)
     basedir = URIRef(
         self.base if base.endswith('/') else base.rsplit('/', 1)[0])
     if base is not None:
         if uri == base:
             uri = URIRef('')
         elif uri == basedir:
             uri = URIRef('.')
         elif uri.startswith(basedir + '/'):
             uri = URIRef(uri.replace(basedir + '/', "", 1))
     return uri
예제 #10
0
파일: api.py 프로젝트: fserena/rdf2rest
def get_service():
    g = new_graph()
    me = URIRef(url_for('get_service', _external=True))
    g.add((me, RDF.type, SERVICE_TYPE))
    for db_resource in service_graph.subjects(RDF.type, PARTITION.Root):
        db_resource = URIRef(
            url_for('get_resource',
                    rid=db_resource.replace(URI_PREFIX, ""),
                    _external=True))
        g.add((me, CONTAINMENT_LINK, db_resource))
    response = make_response(g.serialize(format='turtle'))
    response.headers['Content-Type'] = 'text/turtle'
    return response
예제 #11
0
    def _update_namespace(self):
        # updating the namespace
        OLD = "file://" + os.path.join(
                os.path.dirname(__file__), 'data', WIT_FILENAME)
        NEW = "http://www.w3.org/2005/Incubator/webid/earl/%s#" % WIT

        for cid, _, source in self.g.triples((None, None, None)):
            if source:
                try:
                    context = self.g.get_context(cid)
                    for s, p, o in context:
                        context.remove((s, p, o))
                        if isinstance(s, URIRef) and OLD in s:
                            s = URIRef(s.replace(OLD, NEW))
                        if isinstance(s, rdflib.URIRef) and OLD in p:
                            p = URIRef(p.replace(OLD, NEW))
                        if isinstance(o, URIRef) and OLD in o:
                            o = URIRef(o.replace(OLD, NEW))
                        context.add((s, p, o))
                except Exception, e:
                    #print e
                    raise e
예제 #12
0
    def _update_namespace(self):
        # updating the namespace
        OLD = "file://" + os.path.join(os.path.dirname(__file__), 'data',
                                       WIT_FILENAME)
        NEW = "http://www.w3.org/2005/Incubator/webid/earl/%s#" % WIT

        for cid, _, source in self.g.triples((None, None, None)):
            if source:
                try:
                    context = self.g.get_context(cid)
                    for s, p, o in context:
                        context.remove((s, p, o))
                        if isinstance(s, URIRef) and OLD in s:
                            s = URIRef(s.replace(OLD, NEW))
                        if isinstance(s, rdflib.URIRef) and OLD in p:
                            p = URIRef(p.replace(OLD, NEW))
                        if isinstance(o, URIRef) and OLD in o:
                            o = URIRef(o.replace(OLD, NEW))
                        context.add((s, p, o))
                except Exception as e:
                    # print e
                    raise e
예제 #13
0
    def tableView_objectValueForTableColumn_row_(self, tableView, tableColumn, row):
	id = tableColumn.identifier()
	uri = self.resources[row]
	if id=="uri":
	    base =self.context
	    base = base.split("#", 1)[0]
            uri = URIRef(uri.replace(base, "", 1)) # relativize
	    return uri
	elif id=="label":
	    return self.redfoot.label(uri, "")
	elif id=="comment":
	    return self.redfoot.comment(uri, "")
	else:
	    return ""
예제 #14
0
    def clean_actor_uris(self, uris) -> Graph or None:
        cleaned_graph = Graph()
        self.__sparql.setQuery(
            Queries.lmdb_actors_to_dbpedia_movie_uri_pairs())
        response = self.__sparql.query().convert()
        pairs = list(
            map(Stardog.__to_movie_clean_uri_pair,
                response["results"]["bindings"]))

        if len(pairs) == 0:
            return None

        for lmdb_resource, dbp_uri in pairs:
            lmdb_resource = URIRef(lmdb_resource)
            dbp_uri = URIRef(
                dbp_uri.replace('"', "").replace("'", "").replace("`", ""))
            cleaned_graph.add((lmdb_resource, namespace.OWL.sameAs, dbp_uri))
        cleaned_graph.serialize('Data/CleanedLMDBActorSameAs.ttl',
                                format='turtle')

        return cleaned_graph  # It is now safe to import the cleaned data using stardog.
예제 #15
0
class Cluster:
    def __init__(self, model, uri):
        self.model = model
        self.uri = URIRef(uri)
        self.__prototype = None
        self.__type = None
        self.__members = []
        self.__forward = None
        self.__backward = None
        self.__targets = None
        self.__selected_targets = None
        self.__target_wiki = None
        self.__freebases = None
        self.__qids = Counter()
        self.__selected_qnodes = None
        self.__q_urls = {}
        self.__groundtruth = None
        self.__debug_info = None
        self.__all_labels = None

        if model.graph:
            self.__open_clause = 'GRAPH <%s> {' % self.model.graph
            self.__close_clause = '}'
        else:
            self.__open_clause = self.__close_clause = ''

    @property
    def href(self):
        res = self.uri.replace('http://www.isi.edu/gaia',
                               '/cluster').replace('http://www.columbia.edu',
                                                   '/cluster')
        res = res.replace('/entities/', '/entities/' + self.model.repo + '/')
        res = res.replace('/events/', '/events/' + self.model.repo + '/')
        if self.model.graph:
            res = res + '?g=' + self.model.graph
        return res

    @property
    def label(self):
        if self.uri in self.model.pickled and 'label' in self.model.pickled[
                self.uri]:
            return self.model.pickled[self.uri]['label']
        return self.prototype.label

    @property
    def all_labels(self):
        if not self.__all_labels:
            self.__all_labels = Counter()
            for m in self.members:
                for l, c in m.all_labels:
                    if l in self.__all_labels:
                        self.__all_labels[l] += c
                    else:
                        self.__all_labels[l] = c
        return self.__all_labels.most_common()

    @property
    def prototype(self):
        if not self.__prototype:
            self._init_cluster_prototype()
        return self.__prototype

    @property
    def type(self):
        if self.uri in self.model.pickled and 'type' in self.model.pickled[
                self.uri]:
            return self.model.pickled[self.uri]['type']
        if not self.__type:
            self._init_cluster_prototype()
        return self.__type

    @property
    def members(self):
        if not self.__members:
            self._init_cluster_members()
        return self.__members

    @property
    def targets(self):
        if self.__targets is None:
            self._init_cluster_members()
        return self.__targets.most_common()

    @property
    def selected_targets(self):
        if self.__selected_targets is None:
            self.__selected_targets = self.debug_info.selected_targets
        return self.__selected_targets

    def get_target_stats(self, target):
        return self.debug_info.target_statistics[target]

    @property
    def target_wiki(self):
        if self.__target_wiki is None:
            self._init_cluster_members()
        return self.__target_wiki

    @property
    def freebases(self):
        if self.__freebases is None:
            self._init_cluster_members()
        return self.__freebases.most_common()

    @property
    def targetsSize(self):
        return len(self.targets)

    @property
    def qids(self):
        if not self.__qids:
            self._init_qnodes()
        return self.__qids.most_common()

    @property
    def selected_qnodes(self):
        if not self.__selected_qnodes:
            self.__selected_qnodes = self.debug_info.selected_qnodes
        return self.__selected_qnodes

    def get_qnode_stats(self, qurl):
        if qurl in self.debug_info.qnode_statistics:
            return self.debug_info.qnode_statistics[qurl]
        else:
            return None

    @property
    def q_urls(self):
        if not self.__q_urls:
            self._init_qnodes()
        return self.__q_urls

    @property
    def size(self):
        if self.__members:
            return len(self.__members)
        return self._query_for_size()

    @property
    def forward(self):
        if self.__forward is None:
            self.__forward = set()
            self._init_forward_clusters()
        return self.__forward

    @property
    def backward(self):
        if self.__backward is None:
            self.__backward = set()
            self._init_backward_clusters()
        return self.__backward

    @property
    def neighbors(self):
        return self.forward | self.backward

    def neighborhood(self, hop=1):
        if hop == 1 and self.prototype.type != AIDA.Relation:
            hood = self.neighbors
            # for neighbor in [x for x in self.neighbors if x.subject.proto]
            for neighbor in self.neighbors:
                if neighbor.subject.prototype.type == AIDA.Relation:
                    hood |= neighbor.subject.neighbors
            return hood
        if hop <= 1:
            return self.neighbors
        hood = set()
        for neighbor in self.neighbors:
            hood |= neighbor.subject.neighborhood(hop - 1)
            hood |= neighbor.object.neighborhood(hop - 1)
        return hood

    @property
    def img(self):
        import os.path
        _, name = split_uri(self.uri)
        svgpath = 'static/img/' + name + '.svg'
        if os.path.isfile(svgpath):
            return name

        from graph import SuperEdgeBasedGraph
        graph = SuperEdgeBasedGraph(self.model, self.neighborhood(), self,
                                    self.uri)
        path = graph.dot()
        return graph.name

    @classmethod
    def ask(cls, sparql, graph, uri):
        if graph:
            open_clause = 'GRAPH <%s> {' % graph
            close_clause = '}'
        else:
            open_clause = close_clause = ''
        query = "ASK { %s ?cluster a aida:SameAsCluster %s}" % (open_clause,
                                                                close_clause)
        for ans in sparql.query(query, namespaces, {'cluster': URIRef(uri)}):
            return ans
        return False

    @property
    def groundtruth(self):
        if self.__groundtruth is None:
            self._init_groundtruth()
        return self.__groundtruth

    @property
    def has_debug(self):
        return debug.has_debug(self.model.repo, self.model.graph)

    @property
    def debug_info(self):
        if self.__debug_info is None:
            if debug.has_debug(self.model.repo, self.model.graph):
                self._init_debug_info()
            else:
                self.__debug_info = False
        return self.__debug_info

    def _init_cluster_prototype(self):
        query = """
SELECT ?prototype (MIN(?label) AS ?mlabel) ?type ?category
WHERE {
    %s
    ?cluster aida:prototype ?prototype .
    ?prototype a ?type .
    OPTIONAL { ?prototype aida:hasName ?label } .
    OPTIONAL { ?statement a rdf:Statement ;
               rdf:subject ?prototype ;
               rdf:predicate rdf:type ;
               rdf:object ?category ; }
    %s
}
GROUP BY ?prototype ?type ?category """ % (self.__open_clause,
                                           self.__close_clause)
        for prototype, label, type_, cate in self.model.sparql.query(
                query, namespaces, {'cluster': self.uri}):
            if not label and cate:
                _, label = split_uri(cate)
            self.__prototype = ClusterMember(self.model, prototype, label,
                                             type_)
            self.__type = cate

    def _init_cluster_members(self):
        self.__targets = Counter()
        self.__target_wiki = {}
        self.__freebases = Counter()
        query = """
SELECT ?member (MIN(?label) AS ?mlabel) ?type
WHERE {
    %s
    ?membership aida:cluster ?cluster ;
                aida:clusterMember ?member .
    MINUS {?cluster aida:prototype ?member}
    %s
    OPTIONAL { ?member aida:hasName ?label } .
    OPTIONAL {?statement a rdf:Statement ;
              rdf:subject ?member ;
              rdf:predicate rdf:type ;
              rdf:object ?type }.
     
}
GROUP BY ?member ?type """ % (self.__open_clause, self.__close_clause)
        for member, label, type_ in self.model.sparql.query(
                query, namespaces, {'cluster': self.uri}):
            m = ClusterMember(
                model=self.model,
                uri=str(member),
                label=label,
                type_=type_,
                debug_info=self.debug_info.members[str(member)]['raw_object'])
            self.__members.append(m)
            for target in m.targets.keys():
                self.__targets[target] += 1
            for freebase in m.freebases.keys():
                self.__freebases[freebase] += 1

        query = '''
SELECT ?qnode ?qnodeLabel 
WHERE 
{
    ?qnode wdt:P1566 ?target .
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} '''
        for target in self.__targets.keys():
            target_t = target[target.index(':') + 1:]
            for qnode, qnodeLabel in wikidata_sparql.query(
                    query, namespaces, {'target': Literal(target_t)}):
                url = str(qnode)
                qnode = url[url.rfind('/') + 1:]
                self.__target_wiki[target] = {}
                self.__target_wiki[target]['qnode'] = qnode
                self.__target_wiki[target]['url'] = url
                self.__target_wiki[target]['label'] = str(qnodeLabel)

    def _init_qnodes(self):
        for fbid, count in self.freebases:
            if ":NIL" not in fbid:
                fbid = '/' + fbid.replace('.', '/')
                query = """
                    SELECT ?qid ?label WHERE {
                      ?qid wdt:P646 ?freebase .  
                      ?qid rdfs:label ?label filter (lang(?label) = "en") .
                    }
                    LIMIT 1
                """
                for qid, label in wikidata_sparql.query(
                        query, namespaces, {'freebase': Literal(fbid)}):
                    qnodeURL = str(qid)
                    qid = qnodeURL.rsplit('/', 1)[1]
                    self.__qids[qid] = count
                    if qid not in self.__q_urls:
                        self.__q_urls[qid] = qnodeURL

    def _init_groundtruth(self):
        # query to find cluster of the missing member
        query = '''
            SELECT ?cluster 
            WHERE {
                %s
                ?membership aida:cluster ?cluster ;
                aida:clusterMember ?member .
                %s
            }
        ''' % (self.__open_clause, self.__close_clause)

        member_set = set([str(m.uri) for m in self.members])
        gt_set = set()
        for m in member_set:
            if self.model.graph:
                res = requests.get(groundtruth_url + '/' + self.model.repo +
                                   '?g=' + self.model.graph + '&e=' + m)
            else:
                res = requests.get(groundtruth_url + '/' + self.model.repo +
                                   '?e=' + m)
            if res.status_code == 404:
                self.__groundtruth = False
                return
            if len(res.json()) > 0:
                gt_set = set(res.json())
                break

        if len(gt_set) > 0:
            hit = member_set.intersection(gt_set)
            miss = member_set.difference(gt_set)
            missing = gt_set.difference(member_set)
            missing_dict = {}

            if missing:
                for m in missing:
                    for c, in self.model.sparql.query(query, namespaces,
                                                      {'member': URIRef(m)}):
                        missing_dict[m] = str(c).replace(
                            'http://www.isi.edu/gaia/entities/', '')

            self.__groundtruth = Groundtruth(gt_set, hit, miss, missing_dict)

        else:
            self.__groundtruth = False

    def _init_debug_info(self):
        info = debug.get_debug_for_cluster(self.model.repo, self.model.graph,
                                           str(self.uri))
        if info:
            self.__debug_info = DebugInfo(info)
        else:
            self.__debug_info = False

    def _init_forward_clusters(self):
        query = """
SELECT ?p ?o ?cnt
WHERE {
    %s
  ?s aida:prototype ?proto1 .
  ?o aida:prototype ?proto2 .
  ?se rdf:subject ?proto1 ;
      rdf:predicate ?p ;
      rdf:object ?proto2 ;
      aida:confidence/aida:confidenceValue ?conf .
  BIND(ROUND(1/(2*(1-?conf))) as ?cnt)
  %s
} """ % (self.__open_clause, self.__close_clause)
        for p, o, cnt in self.model.sparql.query(query, namespaces,
                                                 {'s': self.uri}):
            self.__forward.add(
                SuperEdge(self, Cluster(self.model, o), p,
                          int(float(str(cnt)))))

    def _init_backward_clusters(self):
        query = """
SELECT ?s ?p ?cnt
WHERE {
    %s
  ?s aida:prototype ?proto1 .
  ?o aida:prototype ?proto2 .
  ?se rdf:subject ?proto1 ;
      rdf:predicate ?p ;
      rdf:object ?proto2 ;
      aida:confidence/aida:confidenceValue ?conf .
  BIND(ROUND(1/(2*(1-?conf))) as ?cnt)
    %s
} """ % (self.__open_clause, self.__close_clause)
        for s, p, cnt in self.model.sparql.query(query, namespaces,
                                                 {'o': self.uri}):
            self.__backward.add(
                SuperEdge(Cluster(self.model, s), self, p,
                          int(float(str(cnt)))))

    def _query_for_size(self):
        if self.uri in self.model.pickled and 'size' in self.model.pickled[
                self.uri]:
            return self.model.pickled[self.uri]['size']
        query = """
SELECT (COUNT(?member) AS ?size)
WHERE {
    %s
    ?membership aida:cluster ?cluster ;
                aida:clusterMember ?member .
    MINUS {?cluster aida:prototype ?member}
    %s
}  """ % (self.__open_clause, self.__close_clause)
        for size, in self.model.sparql.query(query, namespaces,
                                             {'cluster': self.uri}):
            return int(size)
        return 0

    def __hash__(self):
        return self.uri.__hash__()

    def __eq__(self, other):
        return isinstance(other, Cluster) and str(self.uri) == str(other.uri)
예제 #16
0
파일: converter.py 프로젝트: wbap/bifd
def main(args):
    # load graph
    g = rdflib.Graph()
    g.parse(args.input, publicID=URI_TMP, format="xml")

    # Tripleを含まないgraphをファイルから作成し、そこにTripleを追加していく
    g2 = rdflib.Graph()
    g2.parse(args.header, publicID=URI_TMP, format="xml")

    # bifd.owl
    g3 = rdflib.Graph()
    g3.parse(args.bifd, publicID=URI_TMP, format="xml")

    convert_uris = load_dict(args.subject)
    convert_ps = load_dict(args.predicate)
    convert_ps["https://wba-initiative.org/bifd/label"] = str(RDFS.label)

    # 処理対象のクラスの抽出 このうちのs.tsvに記載のあるものしか最終出力に含めない
    query_class = g.query(
        """SELECT ?class
        WHERE {
        ?class rdf:type owl:Class.
        }
        """)

    keep_s = set()

    for c in query_class:
        keep_s.add(c[0])

    query_references = g.query(
        """SELECT ?uri ?p ?v 
        WHERE {
        ?uri rdf:type swivt:Subject.
        ?uri ?p ?v.
        filter (?p in (property:BibTex-3Ahas_doi, URI("https://wba-initiative.org/noprefix/URLhas"), rdfs:label))
        filter (strstarts(str(?uri), "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/-2A"))
        } 
        """)

    references = {}
    references_val = {}
    references_s_o = {}

    for x in query_references:
        p = str(x[1])
        if x[0] not in references:
            references[x[0]] = [p]
        else:
            references[x[0]].append(p)
        references_val["{}\t{}".format(str(x[0]), str(x[1]))] = str(x[2])
    for k in references.keys():
        predicates = []
        for p in references[k]:
            predicates.append(p)
        if "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3ABibTex-3Ahas_doi" in predicates:
            o = references_val["{}\t{}".format(str(k), "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3ABibTex-3Ahas_doi")]
            references_s_o[str(k.split("/")[-1])] = o
            continue
        if "https://wba-initiative.org/noprefix/URLhas" in predicates:
            o = references_val["{}\t{}".format(str(k), "https://wba-initiative.org/noprefix/URLhas")]
            references_s_o[str(k.split("/")[-1])] = o
            continue
        if str(RDFS.label) in predicates:
            o = references_val["{}\t{}".format(str(k), str(RDFS.label))]
            references_s_o[str(k.split("/")[-1])] = o
            continue
        if True:
            print("Error: no info for references provided.")
            exit(1)
    obo_id_dict = {}

    for s, p, o in g:
        if s not in keep_s:
            continue
        if str(s) in convert_uris.keys():
            s = URIRef(convert_uris[str(s)])
        if str(o) in convert_uris.keys():
            o = URIRef(convert_uris[str(o)])
        if str(p) in convert_ps.keys():
            p = URIRef(convert_ps[str(p)])
        if str(p) == "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3AOBO_ID":
            obo_id_dict[str(s)] = str(o)
        if str(s) in convert_uris.values() and (p == RDFS.subClassOf or p == RDFS.label or str(p).startswith(BIFD_PREFIX) or o == OWL.Class):
            g2.add((s, p, o))

    for s, p, o in g:
        if s not in keep_s:
            continue
        if str(s) in convert_uris.keys():
            s = URIRef(convert_uris[str(s)])
        if str(p) == "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3AOBO_ID":
            if str(s) in obo_id_dict.keys():
                reg = re.compile(r'^[a-zA-Z_][\w.-]*$')
                if reg.match(obo_id_dict[str(s)]): # check if it results in a valid uri
                    if str(s) in convert_uris.values():
                        g2.add((s, OWL.sameAs, URIRef("http://purl.obolibrary.org/obo/{}".format(obo_id_dict[str(s)]))))

    query_object_property = g3.query(
        """SELECT ?op
        WHERE {
        ?op rdf:type owl:ObjectProperty.
        }""")

    object_properties = set()
    for res in query_object_property:
        p = str(res[0]).strip("/")
        object_properties.add(p)
    for s, p, o in g2:
        if str(p) == 'https://wba-initiative.org/bifd/reference':
            k = o.replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/", '')
            if k in references_s_o.keys():
                g2.add((s, p, Literal(references_s_o[k], datatype=XSD.string)))
            g2.remove((s, p, o))

        if str(p) == 'https://wba-initiative.org/bifd/taxon':
            g2.add((s, p, Literal("http://purl.obolibrary.org/obo/{}".format(obo_id_dict[str(o)]), datatype=XSD.string)))

        if str(p) in convert_ps.values() and p != RDFS.label and str(p) in object_properties:  # プロパティの制約条件の変換
            if str(p) == "https://wba-initiative.org/bifd/transmitter" or str(p) == "https://wba-initiative.org/bifd/modType":
                continue
            g2.remove((s, p, o))
            blank_node = BNode()
            g2.add((s, RDFS.subClassOf, blank_node))
            g2.add((blank_node, RDF.type, OWL.Restriction))
            g2.add((blank_node, OWL.onProperty, p))
            g2.add((blank_node, OWL.someValuesFrom, o))

    for s, p, o in g2:
        if o.startswith("http://183.181.89.140/mediawiki/index.php/Special:URIResolver"):
            g2.remove((s, p, o))
            # s.tsvに含まれる変換対象のURIではない、oのURIの変換を正規表現ベースでやる
            o = URIRef(o.replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Category-3ABIF-3A",
                             "https://wba-initiative.org/bifd/") \
                   .replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Category-3A",
                            "http://wba-initiative.org/wbra/")
            # Glutamateは特別扱い
            .replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Glutamate", "https://wba-initiative.org/bifd/Glutamate"))
            g2.add((s, p, o))

    g2.serialize(args.output, publicID=URI_TMP, format="pretty-xml")
예제 #17
0
 def relativize(self, uri):
     base = self.base
     if base is not None and uri.startswith(base):
         uri = URIRef(uri.replace(base, "", 1))
     return uri
예제 #18
0
def convert(teifile, namespace):
    #graph_uri = "http://contextus.net/resource/blue_velvet/"

    ns = Namespace(namespace)

    graph = ConjunctiveGraph()
    graph.load(teifile, format="rdfa")

    graph.bind("default", ns)

    to_update = ""

    for prefix, nsuri in graph.namespaces():
        #print("prefix: " + str(prefix) + " - " + str(nsuri))
        if nsuri in ns:
            to_update = nsuri

    for s, p, o in graph:
        #    		print s, p, o
        if to_update != "" and to_update in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(to_update, ns))
            graph.add((s, p, o))

    act = ""
    scene = ""
    line = ""
    char = 0
    loc = 0

    #timeline = ns['timeline/narrative']
    #graph.add((timeline, RDF.type, ome['Timeline']))

    tree = ET.parse(teifile)
    cast = dict()

    titleNode = tree.find('//title')

    castItems = tree.findall('/text/body/div1/castList//castItem')
    for castItem in castItems:
        actorNode = castItem.find('actor')
        roleNode = castItem.find('role')

        if roleNode != None:
            id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")

        #print("Found castItem!")

        actor = None
        role = None

        # Check to see if we already have an entry
        if (roleNode != None and roleNode.get("about")):

            charname = roleNode.get("about")

            if (charname.find(":") > -1):
                nmsp, nom = charname.split(":", 1)
                charcode = "character/" + str(char)
                charref = nmsp + ":" + charcode + "]"
                role = extractCURIEorURI(graph, charref, nom[0:-1])
                char += 1
                #print("1:" + charname + ": adding id " + id + " to " + role)
            else:
                role = extractCURIEorURI(graph, charname)
                #print("2:" + charname + ": adding id " + id + " to " + role)

            cast[id] = role
            graph.add((role, RDF.type, omb['Character']))
            #print(charname + ": adding id " + id + " to " + role)

        if (actorNode != None and actorNode.get("about")):
            actor = extractCURIEorURI(graph, actorNode.get("about"))
            graph.add((actor, RDF.type, omb['Being']))

        if actor != None and role != None:
            graph.add((actor, omb['portrays'], role))
            graph.add((role, omb['portrayed-by'], actor))

    eventCount = 1
    groupCount = 1
    prior_event = None

    actItems = tree.findall('/text/body/div1')
    ref = ""

    for actItem in actItems:

        if actItem.get("type") == "act":
            act = actItem.get("n")

        sceneItems = actItem.findall('div2')

        for sceneItem in sceneItems:

            #print("Found sceneItems!")

            if sceneItem.get("type") == "scene":
                scene = sceneItem.get("n")

            # Work out the location of this scene
            location = None
            stageItems = sceneItem.findall("stage")

            #internalnum = 1
            stagenum = 0
            speechnum = 1

            for stageItem in stageItems:
                if stageItem.get("type") == "location":
                    # The RDFa parser doesn't handle the type - so we can grab that here.

                    if stageItem.get("about") != None:
                        locname = stageItem.get("about")

                        # Adding location type/oml:space for location
                        if stageItem.get("typeof") and stageItem.get("about"):
                            type = extractCURIEorURI(graph,
                                                     stageItem.get("typeof"))
                            #print "1. Location: " + str(location) + " Type: " + str(type)
                        elif stageItem.get("about"):
                            #print "2. Location: " + str(locname)
                            type = extractCURIEorURI(graph, oml['Space'])

                        # Get location value and add rdfs:label is location is not using the TEI value
                        if (locname.find(":") > -1):
                            nmsp, nom = locname.split(":", 1)
                            loccode = "location/" + str(loc)
                            locref = nmsp + ":" + loccode + "]"
                            location = extractCURIEorURI(
                                graph, locref, nom[0:-1])
                            loc += 1
                            graph.add((
                                location,
                                rdflib.URIRef(
                                    'http://www.w3.org/2000/01/rdf-schema#label'
                                ), Literal(nom[0:-1])))
                        else:
                            location = extractCURIEorURI(
                                graph, stageItem.get("about"))

                        # Add location to graph
                        graph.add((location, RDF.type, type))
                    else:
                        location = ""

                    #print("Adding location type: " + type + " (" + location + ")")

            if cast:
                # Work out a list of all cast in a given section
                currentCast = list()
                speakers = list()

            # Iterate through elements within stageItem
            # Find speaker events and add to list of current cast for inclusion in social event
            # Find reference events and add to ongoing social event ?
            # Find stage events
            # If event is an entrance then
            # create social event for people talking before entrance
            # create travel event i.e. entrance
            # add new arrival to current cast list
            # If event is exit event then
            # create social event for people talking before exit
            # create travel event i.e. exit
            # if leavers are not named directly the calculate who is leaving
            # remove leavers from current cast list
            # If reach end of scene then create social event with current cast list

            #Also need to check if social event before exit has same composition as social event after exit since then they should be merged

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            refersTo = list()
            #parent = None
            speakerNodes = list()
            speakerRef = list()

            xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(
                perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
            stagecount = 0
            stage_array = list()

            for node in sceneItem.getiterator():
                #print("Node: " + node.tag)
                """
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""

                if node.tag == "sp":
                    id = node.get("who")

                    if id and cast:
                        speakers.append(cast[id[1:]])
                        speakerNodes.append(node)

                        if perseusid == None:
                            speakerRef.append(ref)
                        else:
                            #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
                            speechRef = xpointer + "#xpointer(//div2/sp[" + str(
                                speechnum) + "])"
                            speakerRef.append(speechRef)
                        #print("Line ref: " + ref)

                        if cast[id[1:]] not in currentCast:
                            currentCast.append(cast[id[1:]])

                    #internalnum = 1
                    speechnum += 1
                    stagecount = 0

                    previousl = 0

                    for subnode in node.getiterator():
                        if subnode.tag == "l":
                            previousl += 1

                        if subnode.tag == "stage":
                            #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
                            stage_array.append(previousl)
                            stagecount += 1

                elif node.tag == "stage":

                    if stagecount > 0:
                        s_max = len(stage_array)
                        diff = s_max - stagecount

                        #if diff == 0:
                        #	stagenum += 1

                        entRef = xpointer + "#xpointer(//div2/sp[" + str(
                            speechnum - 1) + "]/l[" + str(
                                stage_array[diff]) + "]/stage)"
                        #internalnum += 1
                        stagecount -= 1
                    else:
                        stagenum += 1
                        entRef = xpointer + "#xpointer(//div2/stage[" + str(
                            stagenum) + "])"

                    if node.get("type") == "entrance":

                        # Add Social Events for all the people who spoke since the last break (if there were any)

                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                        #print("Found entrence event!")
                        if location:
                            graph.add((event, ome['to'], location))

                        involved = node.get("about")

                        if (len(involved) > 0 and involved[0] == "["
                                and involved[-1] == "]"):
                            involved = involved[1:-1]

                        chunks = involved.split()

                        chunk_count = len(chunks)

                        if chunk_count > 1:
                            #type = extractCURIEorURI(graph, "[omb:Group]")
                            #graph.add((group, RDF.type, type))
                            graph.add((group, RDF.type, omb['Group']))

                        event_label = ""
                        en = 1

                        for chunk in chunks:
                            striped = chunk.strip()

                            if (len(striped) > 0 and striped[0] == "["
                                    and striped[-1] == "]"):
                                striped = striped[1:-1]
                                currentCast.append(cast[striped])

                            if chunk_count > 1:
                                graph.add(
                                    (group, ome['contains'], cast[striped]))

                                if en == chunk_count:
                                    event_label = event_label[
                                        0:-2] + " and " + striped
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(event_label + " arrive")))
                                elif en < chunk_count:
                                    event_label += striped + ", "

                            else:
                                #print("Adding person as subject-entity to entry event "   + str(eventCount))
                                graph.add((
                                    event,
                                    rdflib.URIRef(
                                        'http://www.w3.org/2000/01/rdf-schema#label'
                                    ), Literal(striped + " arrives")))
                                graph.add((event, ome['has-subject-entity'],
                                           cast[striped]))

                            en += 1

                        if chunk_count > 1:
                            graph.add(
                                (event, ome['has-subject-entity'], group))
                            #print("Adding group as subject-entity to entry event "   + str(eventCount))
                            groupCount = groupCount + 1
                            group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                    if node.get("type") == "exit":

                        # Add Social Events for all the people who spoke since the last break (if there were any)
                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #exitRef = xpointer
                            #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Found entrence event!")
                        if location != None:
                            graph.add((event, ome['from'], location))

                        involved = node.get("about")

                        if involved.strip() == "" or "-all" in involved:
                            # Remove everyone

                            #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #for peep in currentCast:
                            #	print(peep)

                            if len(currentCast) > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for peep in currentCast:
                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if peep == value:
                                        short_ref = key

                                if len(currentCast) > 1:
                                    graph.add((group, ome['contains'], peep))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exuant event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         peep))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if len(currentCast) > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exuant event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            currentCast = list()

                        elif "!" in involved:
                            #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #print("Event: " + involved);

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            involved = involved.strip()

                            if (len(involved) > 0 and involved[0] == "!"
                                    and involved[1] == "("
                                    and involved[-1] == ")"):
                                involved = involved[2:-1]

                            #print("involved: " + involved)

                            striped = involved.strip()

                            c_ids = striped.split()

                            chunks = list()

                            for stay in c_ids:
                                #print("Staying: " + cast[stay])
                                chunks.append(cast[stay])

                            staying = list()
                            going = list()

                            for player in currentCast:
                                #print("Player: " + player)
                                if player in chunks:
                                    staying.append(player)
                                else:
                                    going.append(player)

                            going_count = len(going)

                            if going_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for ghost in going:
                                #print("ghost: " + ghost)

                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if ghost == value:
                                        short_ref = key

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if going_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(going):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(going):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if going_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        else:
                            #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            striped = involved.strip()
                            chunks = striped.split()

                            #print("striped: " + striped)

                            chunk_count = len(chunks)

                            if chunk_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for chunk in chunks:
                                #print("chunk: " + chunk)

                                ghost = cast[chunk]

                                #print("ghost: " + ghost)

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if chunk_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + chunk
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += chunk + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(chunk + " leaves")))

                                en += 1

                            if chunk_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                #elif node.tag == "rs":
                #	#print("Found rs node")
                #	if parent:
                #		#print("Parent type is " + parent.tag)
                #		if parent.tag == "p" or  parent.tag == "l":
                #			refersTo.append(node.get("about"))

                #parent = node

            # Add Social Events for all the people who spoke since the last break (if there were any)
            #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
            update = list()
            update = getSocial(graph, ns, speakers, speakerNodes, speakerRef,
                               cast, currentCast, eventCount, event,
                               prior_event, location)
            eventCount = update[0]
            prior_event = update[1]

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            speakers = list()
            speakerNodes = list()
            currentCast = list()
            speakerRef = list()

    print graph.serialize(format='xml')
예제 #19
0
def createimage2graph(inputfile, entity_map, ontology, filesubject, lowerlevel, consolidatedGraph, triple_dir, generateEmbTriples):
    
    #filesubject is the publication URI, which has to be linked to the image components

    # g = Graph()
    # g.parse(ontology,format="n3")
    # len(g)
    imagetriples = []

    block_dict = {
        "Figure":"Figure",
        "conv": "ConvBlock",
        "deconv":"DeconvBlock",
        "dense":"DenseBlock",
        "flatten":"FlattenBlock",
        "dropout":"DropoutBlock",
        "pooling":"PoolingBlock",
        "unpooling":"UnpoolingBlock",
        "concat":"ConcatBlock",
        "rnn":"RnnBlock",
        "rnnseq": "RnnSeqBlock",
        "lstm":"LSTMBlock",
        "lstmseq":"LSTMSeqBlock",
        "norm":"NormBlock",
        "embed":"EmbedBlock",
        "activation":"ActivationBlock",
        "loss":"LossBlock",
        "output":"OutputBlock",
        "input":"InputBlock",
        "upsample":"UpsamplingBlock"
    }

# Namespaces
    dcc_namespace = "https://github.com/deepcurator/DCC/"

    # Classes
    Figure = URIRef(dcc_namespace + "Figure")
    # ActivationBlock = URIRef(dcc_namespace + "ActivationBlock")
    # EmbedBlock = URIRef(dcc_namespace + "EmbedBlock")
    # NormBlock = URIRef(dcc_namespace + "NormBlock")
    # LSTMSeqBlock = URIRef(dcc_namespace + "LSTMSeqBlock")
    # LSTMBlock = URIRef(dcc_namespace + "LSTMBlock")
    # RNNSeqBlock = URIRef(dcc_namespace + "RNNSeqBlock")
    # RNNBlock = URIRef(dcc_namespace + "RNNBlock")
    # ConcatBlock = URIRef(dcc_namespace + "ConcatBlock")
    # UnpoolingBlock = URIRef(dcc_namespace + "UnpoolingBlock")
    # PoolingBlock = URIRef(dcc_namespace + "PoolingBlock")
    # DropoutBlock = URIRef(dcc_namespace + "DropoutBlock")
    # FlattenBlock = URIRef(dcc_namespace + "FlattenBlock")
    # DenseBlock = URIRef(dcc_namespace + "DenseBlock")
    # DeconvBlock = URIRef(dcc_namespace + "DeconvBlock")
    # ConvBlock = URIRef(dcc_namespace + "ConvBlock")
    # LossBlock = URIRef(dcc_namespace + "LossBlock")
    # Properties
    partOf = URIRef(dcc_namespace + "partOf")
    followedBy = URIRef(dcc_namespace + "followedBy")

    # Open the image2graph

    with open(inputfile,encoding="ISO-8859-1") as f:
        lines = f.readlines()
    lines = [x.strip() for x in lines]

    # Each line in the image2graph is a triple
    # Split the triple into s,p,o
    # Create the URIRefs for RDF based on the ontology
    # URIRefs require the namespace and the class term from ontology

    for line in lines:
        triple = line.split(" ")
        subject = triple[0]
        predicate = triple[1]
        obj = triple[2]

        filename = inputfile.split(os.path.sep)[-1]
        filename = filename.split('.txt')[0]     

        if (subject.startswith(":")):
            subject = subject[1:]
        if (obj.startswith(":")):
            obj = obj[1:]
        
        # print(line + "\n")
        if(predicate == "partOf"):
            ## Subject is a component
            ## Create a unique URI for that
            filename = inputfile.split('/')[-1]
            filename = filename.split('.txt')[0]
            
            # print(subject + "\tpart of\t" + obj[4:])
            imagetriples.append(subject.replace("\\", "/") + "\tpart of\t" + obj[4:].replace("\\", "/"))
            subject = URIRef(dcc_namespace + filename[4:].replace("\\", "/") + "_" + subject.replace("\\", "/"))
            obj = URIRef(dcc_namespace + obj[4:].replace("\\", "/"))
            # g.add((subject,partOf,obj))
            
            consolidatedGraph.add((subject,partOf,obj))
        elif(predicate == "hasCaption"):
            triplesubj = subject 
            subject = URIRef(dcc_namespace + subject)
            literaltext = Literal(obj)
            consolidatedGraph.add((subject,URIRef(dcc_namespace + "hasCaptionText"),literaltext))

        elif(predicate == "isA"):
            triplesubj = subject 
            subject = URIRef(dcc_namespace + subject)
            
            # if(obj in entity_map):
            #     print("found obj in entity map")
            #     print("Found " + obj + " in cso")
            #     csovalue = entity_map[obj]
            #     str_value = str(csovalue)
            #     print("CSO value is then " + str_value)

            
            # g.add((subject,RDF.type, URIRef(dcc_namespace + block_dict.get(obj))))
            if(obj == "Figure"):
                # print(filesubject + "\thas Figure\t" + obj)
                imagetriples.append(filesubject + "\thas Figure\t" + obj)
                consolidatedGraph.add((URIRef(dcc_namespace + filesubject),URIRef(dcc_namespace + "hasFigure"),subject))
            
            # print(triplesubj +"\tisA\t" + block_dict.get(obj))

            imagetriples.append(triplesubj +"\tisA\t" + block_dict.get(obj))
            consolidatedGraph.add((subject,RDF.type, URIRef(dcc_namespace + block_dict.get(obj))))
        elif(predicate == "isType"):

            filename = inputfile.split(os.path.sep)[-1]
            # print("FILENAME: " + filename)
            filename = filename.split('.txt')[0]
            # print(subject + "\tisA\t" + block_dict.get(obj))
            # print(obj)
            imagetriples.append(subject + "\tisA\t" + block_dict.get(obj))
            subject = URIRef(dcc_namespace + filename[4:] + "_" + subject)
            # print("Subject is " + subject)
            # g.add((subject, RDF.type, URIRef(dcc_namespace + block_dict.get(obj))))
            consolidatedGraph.add((subject, RDF.type, URIRef(dcc_namespace + block_dict.get(obj))))

            # Link CSO 
            if(obj in entity_map):
                # print("found obj in entity map")
                # print("Found " + obj + " in cso")
                csovalue = entity_map[obj]
                str_value = str(csovalue)
                # print("CSO value is then " + str_value)
                if("cso" in str_value):
                    consolidatedGraph.add((subject,URIRef(dcc_namespace + "hasCSOEquivalent"),csovalue))


    if generateEmbTriples:
        save_image_triple_file(imagetriples,filesubject,lowerlevel, triple_dir)
예제 #20
0
    stmt = (subject2, URIRef("http://schema.org/geo"), subject)

    g.add(stmt)

    stmt = (subject2, URIRef("http://www.w3.org/2000/01/rdf-schema#label"),
            Literal(label))
    g.add(stmt)

    stmt = (subject2, URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"),
            URIRef(obj["@id"]))
    g.add(stmt)

    stmt = (subject2,
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), aaa)
    g.add(stmt)

    path = subject2.replace("https://w3id.org/hi", "../../docs") + ".json"
    print(path)
    dirname = os.path.dirname(path)
    os.makedirs(dirname, exist_ok=True)
    g.serialize(destination=path, format='json-ld')

    all += g

path = "data/all.rdf"
all.serialize(destination=path, format='pretty-xml')

with open('data/p.csv', 'w') as f:
    writer = csv.writer(f, lineterminator='\n')  # 改行コード(\n)を指定しておく
    writer.writerows(rows)
예제 #21
0
 def relativize(self, uri):
     base = self.base
     if base is not None and uri.startswith(base):
         uri = URIRef(uri.replace(base, "", 1))
     return uri
예제 #22
0
class Cluster:
    def __init__(self, uri):
        self.uri = URIRef(uri)
        self.__prototype = None
        self.__type = None
        self.__members = []
        self.__forward = None
        self.__backward = None
        self.__targets = Counter()
        self.__qnodes = Counter()
        self.__qnodesURL = {}

    @property
    def href(self):
        return self.uri.replace('http://www.isi.edu/gaia', '/cluster').replace('http://www.columbia.edu', '/cluster')

    @property
    def label(self):
        if self.uri in pickled and 'label' in pickled[self.uri]:
            return pickled[self.uri]['label']
        return self.prototype.label

    @property
    def prototype(self):
        if not self.__prototype:
            self._init_cluster_prototype()
        return self.__prototype

    @property
    def type(self):
        if self.uri in pickled and 'type' in pickled[self.uri]:
            return pickled[self.uri]['type']
        if not self.__type:
            self._init_cluster_prototype()
        return self.__type

    @property
    def members(self):
        if not self.__members:
            self._init_cluster_members()
        return self.__members

    @property
    def targets(self):
        if not self.__targets:
            self._init_cluster_members()
        return self.__targets.most_common()

    @property
    def targetsSize(self):
        return len(self.targets)

    @property
    def qnodes(self):
        if not self.__qnodes:
            self._init_qnodes()
        return self.__qnodes.most_common()

    @property
    def qnodesURL(self):
        if not self.__qnodesURL:
            self._init_qnodes()
        return self.__qnodesURL

    @property
    def size(self):
        if self.__members:
            return len(self.__members)
        return self._query_for_size()

    @property
    def forward(self):
        if self.__forward is None:
            self.__forward = set()
            self._init_forward_clusters()
        return self.__forward

    @property
    def backward(self):
        if self.__backward is None:
            self.__backward = set()
            self._init_backward_clusters()
        return self.__backward

    @property
    def neighbors(self):
        return self.forward | self.backward

    def neighborhood(self, hop=1):
        if hop == 1 and self.prototype.type != AIDA.Relation:
            hood = self.neighbors
            # for neighbor in [x for x in self.neighbors if x.subject.proto]
            for neighbor in self.neighbors:
                if neighbor.subject.prototype.type == AIDA.Relation:
                    hood |= neighbor.subject.neighbors
            return hood
        if hop <= 1:
            return self.neighbors
        hood = set()
        for neighbor in self.neighbors:
            hood |= neighbor.subject.neighborhood(hop-1)
            hood |= neighbor.object.neighborhood(hop-1)
        return hood

    @property
    def img(self):
        import os.path
        _, name = split_uri(self.uri)
        svgpath = 'static/img/' + name + '.svg'
        if os.path.isfile(svgpath):
            return name

        from graph import SuperEdgeBasedGraph
        graph = SuperEdgeBasedGraph(self.neighborhood(), self, self.uri)
        path = graph.dot()
        return graph.name

    @classmethod
    def ask(cls, uri):
        query = "ASK { ?cluster a aida:SameAsCluster }"
        for ans in sparql.query(query, namespaces, {'cluster': URIRef(uri)}):
            return ans
        return False

    def _init_cluster_prototype(self):
        query = """
SELECT ?prototype (MIN(?label) AS ?mlabel) ?type ?category
WHERE {
    ?cluster aida:prototype ?prototype .
    ?prototype a ?type .
    OPTIONAL { ?prototype aida:hasName ?label } .
    ?statement a rdf:Statement ;
               rdf:subject ?prototype ;
               rdf:predicate rdf:type ;
               rdf:object ?category ;
}
GROUP BY ?prototype ?type ?category """
        for prototype, label, type_, cate in sparql.query(query, namespaces, {'cluster': self.uri}):
            if not label:
                _, label = split_uri(cate)
            self.__prototype = ClusterMember(prototype, label, type_)
            self.__type = cate

    def _init_cluster_members(self):
        query = """
SELECT ?member (MIN(?label) AS ?mlabel) ?type ?target
WHERE {
  ?membership aida:cluster ?cluster ;
              aida:clusterMember ?member .
  OPTIONAL { ?member aida:hasName ?label } .
  OPTIONAL { ?member aida:link/aida:linkTarget ?target } .
  ?statement a rdf:Statement ;
             rdf:subject ?member ;
             rdf:predicate rdf:type ;
             rdf:object ?type .
}
GROUP BY ?member ?type ?target """
        for member, label, type_, target in sparql.query(query, namespaces, {'cluster': self.uri}):
            self.__members.append(ClusterMember(member, label, type_, target))
            if target:
                self.__targets[str(target)] += 1

    def _init_qnodes(self):
        for target, count in self.targets:
            if ":NIL" not in target:
                fbid = '/' + target[target.find(':')+1:].replace('.', '/')
                query = """
                    SELECT ?qid ?label WHERE {
                      ?qid wdt:P646 ?freebase .
                      ?qid rdfs:label ?label filter (lang(?label) = "en") .
                    }
                    LIMIT 1
                """
                for qid, label in wikidata_sparql.query(query, namespaces, {'freebase': Literal(fbid)}):
                    qnodeURL = str(qid)
                    qid = qnodeURL.rsplit('/', 1)[1]
                    self.__qnodes[qid] = count
                    if qid not in self.__qnodesURL:
                        self.__qnodesURL[qid] = qnodeURL

    def _init_forward_clusters(self):
        query = """
SELECT ?p ?o ?cnt
WHERE {
  ?s aida:prototype ?proto1 .
  ?o aida:prototype ?proto2 .
  ?se rdf:subject ?proto1 ;
      rdf:predicate ?p ;
      rdf:object ?proto2 ;
      aida:confidence/aida:confidenceValue ?conf .
  BIND(ROUND(1/(2*(1-?conf))) as ?cnt)
} """
        for p, o, cnt in sparql.query(query, namespaces, {'s': self.uri}):
            self.__forward.add(SuperEdge(self, Cluster(o), p, int(cnt)))

    def _init_backward_clusters(self):
        query = """
SELECT ?s ?p ?cnt
WHERE {
  ?s aida:prototype ?proto1 .
  ?o aida:prototype ?proto2 .
  ?se rdf:subject ?proto1 ;
      rdf:predicate ?p ;
      rdf:object ?proto2 ;
      aida:confidence/aida:confidenceValue ?conf .
  BIND(ROUND(1/(2*(1-?conf))) as ?cnt)
} """
        for s, p, cnt in sparql.query(query, namespaces, {'o': self.uri}):
            self.__backward.add(SuperEdge(Cluster(s), self, p, int(cnt)))

    def _query_for_size(self):
        if self.uri in pickled and 'size' in pickled[self.uri]:
            return pickled[self.uri]['size']
        query = """
SELECT (COUNT(?member) AS ?size)
WHERE {
    ?membership aida:cluster ?cluster ;
                aida:clusterMember ?member .
}  """
        for size, in sparql.query(query, namespaces, {'cluster': self.uri}):
            return int(size)
        return 0

    def __hash__(self):
        return self.uri.__hash__()

    def __eq__(self, other):
        return isinstance(other, Cluster) and str(self.uri) == str(other.uri)
예제 #23
0
def add_data(data, category):
    if category == 'Manufacturers':
        for man in data[category]:
            name = URIRef(ns + man['Name'])
            url = Literal(man['Site'])
            country = Literal(man['Country'])
            info = Literal(man['Description'])

            g.add((name, RDF.type, OWL.NamedIndividual))
            g.add((name, RDF.type, ns.Manufacturer))
            if (name, RDFS.comment, None) not in g:
                g.add((name, RDFS.comment, info))
            if (name, RDFS.seeAlso, None) not in g:
                g.add((name, RDFS.seeAlso, url))
            # if (name,  RDFS.comment, country) not in g:
            #     g.add((name, RDFS.comment, country))

    if category == 'Guitars':
        for key in data[category].keys():
            strings_num = 6
            if key == 'Acoustic guitars':
                cls = ns.AcousticGuitar
            elif key == 'Classical guitars':
                cls = ns.ClassicalGuitar
            elif key == 'Electric guitars':
                cls = ns.ElectricGuitar
            elif key == 'Bass guitars':
                cls = ns.BassGuitar
                strings_num = 4
            for guitar in data[category][key]:
                name = guitar['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = ns + URIRef(name)
                price = Literal(guitar['Price'])
                brand = Literal(guitar['Brand'])
                color = Literal(guitar['Color'])
                pickup = Literal(guitar['Pickup'])
                strings = Literal(guitar['Strings'])
                if 'Strings number' in guitar.keys():
                    strings_num = Literal(guitar['Strings number'])
                strings_num = Literal(strings_num)

                g.add((name, RDF.type, OWL.NamedIndividual))
                g.add((name, RDF.type, cls))
                g.add((name, ns.hasManufacturer, URIRef(ns + brand)))
                g.add((name, ns.hasColor, color))
                g.add((name, ns.hasPickup, pickup))
                if (name, ns.hasPrice, None) in g:
                    g.set((name, ns.hasPrice, price))
                else:
                    g.add((name, ns.hasPrice, price))
                g.add((name, ns.hasStrings, strings))
                g.add((name, ns.numStrings, strings_num))

    if category == 'Amplifiers' or category == 'Bass amplifiers':
        for key in data[category].keys():
            if key == 'Combo':
                cls = ns.ComboAmplifier
                speakers_num = Literal(1)
            elif key == 'Heads':
                cls = ns.Head
            elif key == 'Preamplifiers':
                cls = ns.Preamplifier

            for amp in data[category][key]:
                name = amp['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = ns + URIRef(name)
                price = Literal(amp['Price'])
                brand = Literal(amp['Brand'].replace(' ', '_'))
                type = amp['Type']
                if type == 'transistor':
                    type = ns.Digital
                elif type == 'tube':
                    type = ns.Tube
                elif type == 'hybrid':
                    type = ns.Hybrid
                if 'Power' in amp.keys():
                    power = Literal(amp['Power'])
                if 'Speakers' in amp.keys():
                    speakers_num = Literal(amp['Speakers'])
                if key == 'Combo':
                    g.add((name, ns.numSpeakers, speakers_num))
                g.add((name, RDF.type, OWL.NamedIndividual))
                g.add((name, RDF.type, cls))
                g.add((name, ns.hasManufacturer, URIRef(ns + brand)))
                g.add((name, RDF.type, type))
                if 'Power' in amp.keys():
                    g.add((name, ns.hasPower, power))
                if (name, ns.hasPrice, None) in g:
                    g.set((name, ns.hasPrice, price))
                else:
                    g.add((name, ns.hasPrice, price))
                if category == 'Bass amplifiers':
                    g.add((name, RDF.type, ns.BassAmplifier))

    if category == 'Pickups':
        for pickup in data[category]:
            name = pickup['Name'].replace(' ', '_')
            for char in ['®', '#', ',', '`']:
                name = name.replace(char, '')
            name = ns + URIRef(name)
            price = Literal(pickup['Price'])
            brand = Literal(pickup['Brand'].replace(' ', '_'))
            type = Literal(pickup['Type'])
            active = pickup['Active']
            use = pickup['Use']

            g.add((name, RDF.type, OWL.NamedIndividual))
            if active:
                g.add((name, RDF.type, ns.Active))
            else:
                g.add((name, RDF.type, ns.Passive))
            g.add((name, ns.hasManufacturer, URIRef(ns + brand)))
            g.add((name, ns.hasType, type))
            if use == 'electric':
                g.add((name, ns.isSuitableFor, ns.ElectricGuitar))
            elif use == 'bass':
                g.add((name, ns.isSuitableFor, ns.BassGuitar))
            if (name, ns.hasPrice, None) in g:
                g.set((name, ns.hasPrice, price))
            else:
                g.add((name, ns.hasPrice, price))

    if category == 'Strings':
        for strings in data[category]:
            name = strings['Name'].replace(' ', '_')
            for char in ['®', '#', ',', '`']:
                name = name.replace(char, '')
            name = ns + URIRef(name)
            price = Literal(strings['Price'])
            brand = Literal(strings['Brand'].replace(' ', '_'))
            material = Literal(strings['Material'])
            gauge = Literal(strings['Gauge'])
            use = strings['Use']
            number = Literal(strings['Number'])

            g.add((name, RDF.type, OWL.NamedIndividual))
            g.add((name, ns.hasManufacturer, URIRef(ns + brand)))
            g.add((name, ns.hasMaterial, material))
            g.add((name, ns.hasGauge, gauge))
            if use == 'electric':
                g.add((name, ns.isSuitableFor, ns.ElectricGuitar))
            elif use == 'acoustic':
                g.add((name, ns.isSuitableFor, ns.AcousticGuitar))
            elif use == 'classical':
                g.add((name, ns.isSuitableFor, ns.ClassicalGuitar))
            elif use == 'bass':
                g.add((name, ns.isSuitableFor, ns.BassGuitar))
            if (name, ns.hasPrice, None) in g:
                g.set((name, ns.hasPrice, price))
            else:
                g.add((name, ns.hasPrice, price))
예제 #24
0

root = xmlschema_doc.getroot()


for complexType in root.findall(".//{http://www.w3.org/2001/XMLSchema}complexType"):
        print(complexType.attrib["name"])
        URIRef(complexType.attrib["name"])
        description = complexType.find(".//{http://www.w3.org/2001/XMLSchema}documentation")
#        print(description.__dict__)
#        print(description.text)
        name = URIRef(complexType.attrib["name"])
        print(dir(description))
        if "text" in dir(description):
            ZinGraph.add((name, DCTERMS.description, Literal(description.text, lang="nl")))
            ZinGraph.add((name, RDFS.label, Literal(name.replace("CDT_", ""), lang="nl")))
        for element in complexType.findall(".//{http://www.w3.org/2001/XMLSchema}element"):
            element_name = URIRef(element.attrib["name"])
            ZinGraph.add((element_name, RDFS.label, Literal(element_name, lang="nl")))

            if "type" in element.attrib.keys():
                element_type = URIRef(element.attrib["type"].replace("iwlz:", "http://www.istandaarden.nl/iwlz/1_2/basisschema/schema/1_2/"))
                ZinGraph.add((element_name, RDF.type, element_type))


            ZinGraph.add((element_name, DCTERMS.isPartOf, name))

            #description2 = element.find(".//{http://www.w3.org/2001/XMLSchema}documentation")
            #print(description2.text)
        #for grandchild in child:
        #    print(grandchild.tag, grandchild.attrib)
def insert_data(data, category):
    if category == 'Manufacturers':
        for man in data[category]:
            name = URIRef(namespace + man['Name'])
            url = Literal(man['Website'])
            country = Literal(man['Country'])
            info = Literal(man['Description'])

            graph.add((name, RDF.type, OWL.NamedIndividual))
            graph.add((name, RDF.type, namespace.Manufacturers))
            if (name, RDFS.comment, None) not in graph:
                graph.add((name, RDFS.comment, info))
                graph.add((name, RDFS.comment, country))
            if (name, RDFS.seeAlso, None) not in graph:
                graph.add((name, RDFS.seeAlso, url))

    if category == 'Body':
        for key in data[category].keys():
            if key == 'Bumpers':
                cls = namespace.Bumpers
            elif key == 'Body_kits':
                cls = namespace.Body_kits
            elif key == 'Body_kits':
                cls = namespace.Wings
            elif key == 'Side_skirts':
                cls = namespace.Side_skirts
            elif key == 'Hoods':
                cls = namespace.Hoods
            elif key == 'Wings':
                cls = namespace.Wings

            #Iterating through keys and assign required vars
            for body in data[category][key]:
                name = body['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(body['Price'])
                manufacturer = Literal(body['Manufacturer'])
                if 'CompatibleWithCar' in body.keys():
                    cwc = Literal(body['CompatibleWithCar'])
                if 'Material' in body.keys():
                    material = Literal(body['Material'])

                #Add found data to ontology
                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if 'CompatibleWithCar' in body.keys():
                    graph.add((name, namespace.compatibleWithCar, cwc))
                if 'Material' in body.keys():
                    graph.add((name, namespace.hasMaterial, material))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Brake_system':
        for key in data[category].keys():
            if key == 'Brake_disks':
                cls = namespace.Brake_disks
            elif key == 'Brake_pads':
                cls = namespace.Brake_pads
            elif key == 'Calipers':
                cls = namespace.Calipers

            for bs in data[category][key]:
                name = bs['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(bs['Price'])
                manufacturer = Literal(bs['Manufacturer'].replace(' ', '_'))
                if 'Material' in bs.keys():
                    material = Literal(bs['Material'])
                if 'ForPublicRoad' in bs.keys():
                    fpr = Literal(bs['ForPublicRoad'])

                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if 'Material' in bs.keys():
                    graph.add((name, namespace.hasMaterial, material))
                if 'ForPublicRoad' in bs.keys():
                    graph.add((name, namespace.forPublicRoad, fpr))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Engine':
        for key in data[category].keys():
            if key == 'ECU':
                cls = namespace.ECU
            elif key == 'Exhaust_systems':
                cls = namespace.Exhaust_systems
            elif key == 'Fuel_systems':
                cls = namespace.Fuel_systems
            elif key == 'Intake_systems':
                cls = namespace.Intake_systems
            elif key == 'Stroker_kits':
                cls = namespace.Stroker_kits
            elif key == 'Turbochargers':
                cls = namespace.Turbochargers

            for engine in data[category][key]:
                name = engine['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(engine['Price'])
                manufacturer = Literal(engine['Manufacturer'].replace(
                    ' ', '_'))
                if 'CompatibleWithEngine' in engine.keys():
                    cwe = Literal(engine['CompatibleWithEngine'])
                if 'CalculatedPotential' in engine.keys():
                    potential = Literal(engine['CalculatedPotential'])
                if 'Material' in engine.keys():
                    material = Literal(engine['Material'])

                graph.add((name, RDF.type, OWL.NamedIndividual))
                print(name, RDF.type, cls)
                graph.add((name, RDF.type, cls))
                print(name, namespace.hasManufacturer,
                      URIRef(namespace + manufacturer))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if 'Material' in engine.keys():
                    graph.add((name, namespace.hasMaterial, material))
                if 'CompatibleWithEngine' in engine.keys():
                    graph.add((name, namespace.compatibleWithEngine, cwe))
                if 'CalculatedPotential' in engine.keys():
                    graph.add(
                        (name, namespace.hasCalculatedPotential, potential))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Interior':
        for key in data[category].keys():
            if key == 'Roll_cages':
                cls = namespace.Roll_cages
            elif key == 'Seats':
                cls = namespace.Seats
            elif key == 'Steering_wheels':
                cls = namespace.Steering_wheels

            for seat in data[category][key]:
                name = seat['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(seat['Price'])
                manufacturer = Literal(seat['Manufacturer'].replace(' ', '_'))
                if 'CompatibleWithCar' in seat.keys():
                    cwc = Literal(seat['CompatibleWithCar'])
                if 'ForPublicRoad' in seat.keys():
                    fpr = Literal(seat['ForPublicRoad'])

                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if 'CompatibleWithCar' in seat.keys():
                    graph.add((name, namespace.compatibleWithCar, cwc))
                if 'ForPublicRoad' in seat.keys():
                    graph.add((name, namespace.forPublicRoad, fpr))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Suspension':
        for key in data[category].keys():
            if key == 'Anti_roll_bars':
                cls = namespace.Anti_roll_bars
            elif key == 'Body_stiffness':
                cls = namespace.Body_stiffness
            elif key == 'Springs_and_shock_absorbers':
                cls = namespace.Springs_and_shock_absorbers

            for spring in data[category][key]:
                name = spring['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(spring['Price'])
                manufacturer = Literal(spring['Manufacturer'].replace(
                    ' ', '_'))

                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Transmission':
        for key in data[category].keys():
            if key == 'Clutches':
                cls = namespace.Clutches
            elif key == 'Driveshafts':
                cls = namespace.Driveshafts

            for clutch in data[category][key]:
                name = clutch['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(clutch['Price'])
                manufacturer = Literal(clutch['Manufacturer'].replace(
                    ' ', '_'))
                material = Literal(clutch['Material'])

                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                graph.add((name, namespace.hasMaterial, material))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))

    if category == 'Wheels':
        for key in data[category].keys():
            if key == 'Rims':
                cls = namespace.Rims
            elif key == 'Tires':
                cls = namespace.Tires

            for wheel in data[category][key]:
                name = wheel['Name'].replace(' ', '_')
                for char in ['®', '#', ',', '`']:
                    name = name.replace(char, '')
                name = namespace + URIRef(name)
                price = Literal(wheel['Price'])
                manufacturer = Literal(wheel['Manufacturer'].replace(' ', '_'))
                if 'Type' in wheel.keys():
                    wheelType = Literal(wheel['Type'])
                if 'Size' in wheel.keys():
                    size = Literal(wheel['Size'])
                if 'MaxSpeed' in wheel.keys():
                    maxSpeed = Literal(wheel['MaxSpeed'])
                if 'ForPublicRoad' in wheel.keys():
                    fpr = Literal(wheel['ForPublicRoad'])

                graph.add((name, RDF.type, OWL.NamedIndividual))
                graph.add((name, RDF.type, cls))
                graph.add((name, namespace.hasManufacturer,
                           URIRef(namespace + manufacturer)))
                if 'Type' in wheel.keys():
                    graph.add((name, namespace.hasType, wheelType))
                if 'Size' in wheel.keys():
                    graph.add((name, namespace.hasSize, size))
                if 'MaxSpeed' in wheel.keys():
                    graph.add((name, namespace.hasMaxSpeed, maxSpeed))
                if 'ForPublicRoad' in wheel.keys():
                    graph.add((name, namespace.forPublicRoad, fpr))
                if (name, namespace.hasPrice, None) in graph:
                    graph.set((name, namespace.hasPrice, price))
                else:
                    graph.add((name, namespace.hasPrice, price))
예제 #26
0
파일: script.py 프로젝트: tkuhn/Metrics
class FairMetricData():
    def __init__(self, id):
        self.base = 'https://purl.org/fair-metrics/'
        self.id = URIRef(id)
        self.assertion = URIRef(id+'#assertion')

        # id = id.replace(self.base, '')  # HACK -- remove this line before merging commit
        self.g = ConjunctiveGraph()
        self.g.parse(id, format='trig')

    def getID(self):
        return self.id

    def getShortID(self):
        return self.id.replace(self.base, '')

    def getAuthors(self):
        authors = [o.toPython() for o in self.g.objects(subject=self.assertion, predicate=DCTERMS.author)]
        authors.sort()
        return ' \\\\ '.join(authors)

    def getTitle(self):
        return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=RDFS.comment)])

    def getShortTitle(self):
        return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=DCTERMS.title)])

    def getTopicDescription(self):
        descs = []
        for o in self.g.objects(subject=self.id, predicate=FOAF.primaryTopic):
            # o should be fair:A1.1
            for o2 in fairGraph.objects(subject=o, predicate=DCTERMS.description):
                descs.append(o2.toPython())
        return ' '.join(descs)

    def getTopicTitle(self):
        descs = []
        for o in self.g.objects(subject=self.id, predicate=FOAF.primaryTopic):
            # o should be fair:A1.1
            for o2 in fairGraph.objects(subject=o, predicate=DCTERMS.title):
                descs.append(o2.toPython())
        return ' '.join(descs)

    def getMeasuring(self):
        # return fm:measuring
        return self.getFMPropertyValue(FM.measuring)

    def getRationale(self):
        # return fm:rationale
        return self.getFMPropertyValue(FM.rationale)

    def getRequirements(self):
        # return fm:requirements
        return self.getFMPropertyValue(FM.requirements)

    def getProcedure(self):
        # return fm:procedure
        return self.getFMPropertyValue(FM.procedure)

    def getValidation(self):
        # return fm:validation
        return self.getFMPropertyValue(FM.validation)

    def getRelevance(self):
        # return fm:relevance
        return self.getFMPropertyValue(FM.relevance)

    def getExamples(self):
        # return fm:examples
        return self.getFMPropertyValue(FM.examples)

    def getComments(self):
        # return fm:comments
        return self.getFMPropertyValue(FM.comments)

    def getFMPropertyLabel(self, property):
        return ', '.join([ o.toPython() for o in fairTermGraph.objects(subject=FM[property], predicate=RDFS['label'])])

    def getFMPropertyValue(self, property):
        return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=property)])
예제 #27
0
def convert(teifile, namespace):
	#graph_uri = "http://contextus.net/resource/blue_velvet/"
	
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")
	
	graph.bind("default", ns)
	
	to_update = ""

	for prefix, nsuri in graph.namespaces(): 
		#print("prefix: " + str(prefix) + " - " + str(nsuri))
		if nsuri in ns:
			to_update = nsuri
			
	for s, p, o in graph:
#    		print s, p, o
    		if to_update != "" and to_update in s:
    			graph.remove((s, p, o))
			s = URIRef(s.replace(to_update, ns))			
			graph.add((s, p, o))
	
	act = ""
	scene = ""
	line = ""
	char = 0
	loc = 0
	
	
	#timeline = ns['timeline/narrative']
	#graph.add((timeline, RDF.type, ome['Timeline']))

	tree = ET.parse(teifile)
	cast = dict()
	
	titleNode = tree.find('//title')
	
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')

		if roleNode != None:
			id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")
		
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):		

			charname = roleNode.get("about")
			
			if(charname.find(":") > -1):
				nmsp,nom = charname.split(":", 1)		
				charcode =  "character/" + str(char)
				charref = nmsp + ":" + charcode + "]"
				role = extractCURIEorURI(graph, charref,nom[0:-1])
				char += 1		
				#print("1:" + charname + ": adding id " + id + " to " + role)
			else:
				role = extractCURIEorURI(graph, charname)
				#print("2:" + charname + ": adding id " + id + " to " + role)

			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print(charname + ": adding id " + id + " to " + role)
		
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	groupCount = 1
	prior_event = None
	
	actItems = tree.findall('/text/body/div1')
	ref = ""
	
	for actItem in actItems:
	
		if actItem.get("type") == "act":
			act = actItem.get("n")
		
		sceneItems = actItem.findall('div2')
		
		for sceneItem in sceneItems:
			
			#print("Found sceneItems!")
			
			if sceneItem.get("type") == "scene":
				scene = sceneItem.get("n")		
			
			# Work out the location of this scene
			location = None
			stageItems = sceneItem.findall("stage")
			
			#internalnum = 1
			stagenum = 0
			speechnum = 1
			
			for stageItem in stageItems:
				if stageItem.get("type") == "location":
					# The RDFa parser doesn't handle the type - so we can grab that here.
					
					if stageItem.get("about") != None:
						locname = stageItem.get("about")
					
						# Adding location type/oml:space for location
						if stageItem.get("typeof") and stageItem.get("about"):
							type = extractCURIEorURI(graph, stageItem.get("typeof"))
							#print "1. Location: " + str(location) + " Type: " + str(type)
						elif stageItem.get("about"):	
							#print "2. Location: " + str(locname)											
							type = extractCURIEorURI(graph, oml['Space'])						
						
						
						# Get location value and add rdfs:label is location is not using the TEI value
						if(locname.find(":") > -1):
							nmsp,nom = locname.split(":", 1)		
							loccode =  "location/" + str(loc)
							locref = nmsp + ":" + loccode + "]"
							location = extractCURIEorURI(graph, locref, nom[0:-1])
							loc += 1
							graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1])))
						else:
							location = extractCURIEorURI(graph, stageItem.get("about"))
						
						# Add location to graph
						graph.add((location, RDF.type, type))	
					else:
						location = ""
					
						
					#print("Adding location type: " + type + " (" + location + ")")
	
	
			if cast:
				# Work out a list of all cast in a given section
				currentCast = list()
				speakers = list()
			
	
			# Iterate through elements within stageItem
				# Find speaker events and add to list of current cast for inclusion in social event
				# Find reference events and add to ongoing social event ?
				# Find stage events
					# If event is an entrance then
						# create social event for people talking before entrance
						# create travel event i.e. entrance
						# add new arrival to current cast list
					# If event is exit event then
						# create social event for people talking before exit
						# create travel event i.e. exit
							# if leavers are not named directly the calculate who is leaving
						# remove leavers from current cast list
				# If reach end of scene then create social event with current cast list
				
				#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
				
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]	
			
			refersTo = list()
			#parent = None
			speakerNodes = list()
			speakerRef = list()
			
			xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
			stagecount = 0
			stage_array = list()
						
			for node in sceneItem.getiterator():
				#print("Node: " + node.tag)	
				
				
				"""
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""		
						
				if node.tag == "sp":
					id = node.get("who")
					
					if id and cast:
						speakers.append(cast[id[1:]])	
						speakerNodes.append(node)
						
						if perseusid == None:
							speakerRef.append(ref)
						else:
							#speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
							speechRef  = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])";
							speakerRef.append(speechRef)
						#print("Line ref: " + ref)
						
						if cast[id[1:]] not in currentCast:
							currentCast.append(cast[id[1:]])
							
					#internalnum = 1
					speechnum += 1
					stagecount = 0
					
					
					previousl = 0
					
					for subnode in node.getiterator():
						if subnode.tag == "l":
							previousl += 1
						
						if subnode.tag == "stage":
							#print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
							stage_array.append(previousl)
							stagecount += 1
							
					
						
				elif node.tag == "stage":
					
					if stagecount > 0:
						s_max = len(stage_array)
						diff = s_max - stagecount
						
						#if diff == 0:
						#	stagenum += 1
					
						entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)";
						#internalnum += 1
						stagecount -= 1
					else:
						stagenum += 1
						entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])";				
					
					if node.get("type") == "entrance":		
					
						# Add Social Events for all the people who spoke since the last break (if there were any)
						
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
					
						# Add Travel Event
						
						graph.add((event, RDF.type, omj['Travel']))
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
						
						#print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
	
						#print("Found entrence event!")
						if location:
							graph.add((event, ome['to'], location))		
							
						involved = node.get("about")
						
						if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
							involved = involved[1:-1]
							
						chunks = involved.split()
						
						chunk_count = len(chunks)
						
						if chunk_count > 1:
							#type = extractCURIEorURI(graph, "[omb:Group]")
							#graph.add((group, RDF.type, type))
							graph.add((group, RDF.type, omb['Group']))
							
						event_label = ""	
						en = 1
						
						for chunk in chunks:
							striped = chunk.strip()
							
							if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"):
								striped = striped[1:-1]
								currentCast.append(cast[striped])								
							
							if chunk_count > 1:
								graph.add((group, ome['contains'], cast[striped]))
								
								if en == chunk_count:
									event_label = event_label[0:-2] + " and " + striped
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive")))
								elif en < chunk_count:
									event_label += striped + ", "									
									
							else:
								#print("Adding person as subject-entity to entry event "   + str(eventCount))
								graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives")))
								graph.add((event, ome['has-subject-entity'], cast[striped]))
								
							en += 1
									
							
						if chunk_count > 1:
							graph.add((event, ome['has-subject-entity'], group))	
							#print("Adding group as subject-entity to entry event "   + str(eventCount))
							groupCount = groupCount + 1
							group = ns['group/'+str(groupCount)]	
		
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
									
					if node.get("type") == "exit":		
						
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						
						# Add Travel Event
					
						graph.add((event, RDF.type, omj['Travel']))		
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#exitRef = xpointer
							#graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
	
						#print("Found entrence event!")
						if location != None:
							graph.add((event, ome['from'], location))		
							
						involved = node.get("about")	
						
						if involved.strip() == "" or "-all" in involved:
							# Remove everyone
													
							#print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#for peep in currentCast:	
							#	print(peep)
							
							if len(currentCast) > 1:							
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
															
							event_label = ""
							en = 1
							
							for peep in currentCast:	
								short_ref = ""
								for key, value in cast.iteritems():
									if peep == value:	
										short_ref = key
							
								if len(currentCast) > 1:
									graph.add((group, ome['contains'], peep))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += short_ref + ", "
																	
								else:
									#print("Adding person as subject-entity to exuant event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], peep))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
	
							if len(currentCast) > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exuant event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							
							currentCast = list()
						
						elif "!" in involved:
							#print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#print("Event: " + involved);
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							involved = involved.strip()	
							
							if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"):
								involved = involved[2:-1]	
							
							#print("involved: " + involved)
							
							striped = involved.strip()	
							
							c_ids = striped.split()
							
							chunks = list()
							
							for stay in c_ids:
								#print("Staying: " + cast[stay])
								chunks.append(cast[stay])							
							
							staying = list()
							going = list()
							
							for player in currentCast:
								#print("Player: " + player)							
								if player in chunks:
									staying.append(player)
								else:
									going.append(player)
									
							going_count = len(going)	
							
							if going_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))	
								graph.add((group, RDF.type, omb['Group']))
								

							event_label = ""
							en = 1
								
							for ghost in going:							
								#print("ghost: " + ghost)
								
								short_ref = ""
								for key, value in cast.iteritems():
									if ghost == value:	
										short_ref = key
										
										
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if going_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(going):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(going):
										event_label += short_ref + ", "	
										
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
								
								
							if going_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
		
										
						else:
							#print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							striped = involved.strip()							
							chunks = striped.split()
							
							#print("striped: " + striped)
					
							chunk_count = len(chunks)
						
							if chunk_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
								
								
							event_label = ""
							en = 1								
							
							for chunk in chunks:							
								#print("chunk: " + chunk)			
									
								ghost = cast[chunk]
								
								#print("ghost: " + ghost)
								
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if chunk_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + chunk
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += chunk + ", "										
									
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves")))
									
								en += 1	
								
							if chunk_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
	
		
							
							
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
						
				#elif node.tag == "rs":	
				#	#print("Found rs node")
				#	if parent:
				#		#print("Parent type is " + parent.tag)
				#		if parent.tag == "p" or  parent.tag == "l":
				#			refersTo.append(node.get("about"))
							
				#parent = node
					
	
			# Add Social Events for all the people who spoke since the last break (if there were any)
			#print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
			update = list()
			update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
			eventCount = update[0]
			prior_event = update[1]
			
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]
				
			speakers = list()
			speakerNodes = list()
			currentCast = list()
			speakerRef = list()
		
		
		
	print graph.serialize(format='xml')		
예제 #28
0
class ClusterMember:
    def __init__(self, model, uri, label=None, type_=None, debug_info=None):
        self.model = model
        self.uri = URIRef(uri)
        self.__id = None
        self.__label = label
        self.__all_labels = None
        self.__type = type_
        self.__targets = None
        self.__freebases = None
        self.__qids = None
        self.__q_labels = None
        self.__q_aliases = None
        self.__q_urls = None
        self.__source = None
        self.__context_pos = []
        self.__context_extractor = None
        self.__cluster: Cluster = None
        self.__debug_info = debug_info

        if model.graph:
            self.__open_clause = 'GRAPH <%s> {' % self.model.graph
            self.__close_clause = '}'
        else:
            self.__open_clause = self.__close_clause = ''

    @property
    def id(self):
        if not self.__id:
            self.__id = self.uri.replace(
                'http://www.isi.edu/gaia/entities/',
                '').replace('http://www.columbia.edu/entities/', '')
        return self.__id

    @property
    def label(self):
        if not self.__label:
            self._init_member()
        return self.__label

    @property
    def all_labels(self):
        if not self.__all_labels:
            self.__all_labels = Counter()
            query = """
                SELECT ?label (COUNT(?label) AS ?n)
                WHERE {
                  ?member aida:justifiedBy/skos:prefLabel ?label .
                }
                GROUP BY ?label
                ORDER BY DESC(?n)
            """
            for label, n in self.model.sparql.query(query, namespaces,
                                                    {'member': self.uri}):
                if label:
                    label = " ".join(label.split())  # remove double spaces
                    self.__all_labels[label] = int(n)

            query = """
                SELECT ?label (COUNT(?label) AS ?n)
                    WHERE {
                      ?member aida:hasName ?label .
                    }
                    GROUP BY ?label
                    ORDER BY DESC(?n)
                """
            for label, n in self.model.sparql.query(query, namespaces,
                                                    {'member': self.uri}):
                if label:
                    label = " ".join(label.split())  # remove double spaces
                    if label in self.__all_labels:
                        self.__all_labels[label] += int(n)
                    else:
                        self.__all_labels[label] = int(n)

        return self.__all_labels.most_common()

    @property
    def type(self):
        if not self.__type:
            self._init_member()
        return self.__type

    @property
    def type_text(self):
        _, text = split_uri(self.type)
        return text

    @property
    def targets(self):
        if self.__targets is None:
            self._init_member()
        return self.__targets

    @property
    def freebases(self):
        if self.__freebases is None:
            self._init_member()
        return self.__freebases

    @property
    def qids(self):
        if self.__qids is None and self.freebases:
            self._init_qnode()
        return self.__qids

    @property
    def q_urls(self):
        if self.__qids is None and self.freebases:
            self._init_qnode()
        return self.__q_urls

    @property
    def q_labels(self):
        if self.__q_labels is None and self.freebases:
            self._init_qnode()
        return self.__q_labels

    @property
    def q_aliases(self):
        if self.__q_aliases is None and self.freebases:
            self._init_qnode()
        return self.__q_aliases

    def _init_qnode(self):
        self.__qids = {}  # qid to score
        self.__q_urls = {}
        self.__q_labels = {}
        self.__q_aliases = {}

        for fbid, score in self.freebases.items():
            if ":NIL" not in fbid:
                fbid = '/' + fbid[fbid.find(':') + 1:].replace('.', '/')
                query = """
                    SELECT ?qid ?label WHERE {
                      ?qid wdt:P646 ?freebase .
                      ?qid rdfs:label ?label filter (lang(?label) = "en") .
                    }
                    LIMIT 1
                """
                for q_url, label in wikidata_sparql.query(
                        query, namespaces, {'freebase': Literal(fbid)}):
                    qid = str(q_url).rsplit('/', 1)[1]
                    self.__qids[qid] = score
                    self.__q_urls[qid] = str(q_url)
                    self.__q_labels[qid] = str(label)

                query = """
                    SELECT ?qid ?alias WHERE {
                      ?qid wdt:P646 ?freebase .
                      ?qid skos:altLabel ?alias filter (lang(?alias) = "en") .
                    }
                """
                aliases = []
                qid = None
                for q_url, alias in wikidata_sparql.query(
                        query, namespaces, {'freebase': Literal(fbid)}):
                    qid = str(q_url).rsplit('/', 1)[1]
                    aliases.append(str(alias))
                self.__q_aliases[qid] = ', '.join(aliases)

    @property
    def context_extractor(self):
        if self.__context_extractor is None:
            self.__context_extractor = LTFSourceContext(self.source)
        return self.__context_extractor

    @property
    def roles(self):
        query = """
        SELECT ?pred ?obj ?objtype (MIN(?objlbl) AS ?objlabel)
        WHERE {
            ?statement rdf:subject ?event ;
                       rdf:predicate ?pred ;
                       rdf:object ?obj .
            ?objstate rdf:subject ?obj ;
                      rdf:predicate rdf:type ;
                      rdf:object ?objtype .
            OPTIONAL { ?obj aida:hasName ?objlbl }
        }
        GROUP BY ?pred ?obj ?objtype
        """
        for pred, obj, obj_type, obj_lbl in self.model.sparql.query(
                query, namespaces, {'event': self.uri}):
            if not obj_lbl:
                _, obj_lbl = split_uri(obj_type)
            # _, pred = split_uri(pred)
            ind = pred.find('_')
            pred = pred[ind + 1:]
            yield pred, ClusterMember(self.model, obj, obj_lbl, obj_type)

    @property
    def events_by_role(self):
        query = """
      SELECT ?pred ?event ?event_type (MIN(?lbl) AS ?label)
      WHERE {
          ?event a aida:Event .
          ?statement rdf:subject ?event ;
                    rdf:predicate ?pred ;
                    rdf:object ?obj .
          ?event_state rdf:subject ?event ;
                    rdf:predicate rdf:type ;
                    rdf:object ?event_type .
          OPTIONAL { ?event aida:justifiedBy/skos:prefLabel ?lbl }
      }
      GROUP BY ?pred ?event ?event_type
      """
        for pred, event, event_type, event_lbl in self.model.sparql.query(
                query, namespaces, {'obj': self.uri}):
            if not event_lbl:
                _, event_lbl = split_uri(event_type)
            ind = pred.find('_')
            pred = pred[ind + 1:]
            yield pred, ClusterMember(self.model, event, event_lbl, event_type)

    @property
    def entity_relations(self):
        query = """
        SELECT ?relation ?pred2 ?obj2 ?relation_type (min(?lbl) as ?label)
        WHERE {
            ?relation a aida:Relation .
            ?s1 rdf:subject ?relation ;
                        rdf:predicate ?pred ;
                        rdf:object ?obj .
            ?s2 rdf:subject ?relation ;
                        rdf:predicate rdf:type ;
                        rdf:object ?relation_type .
            ?s3 rdf:subject ?relation ;
                        rdf:predicate ?pred2 ;
                        rdf:object ?obj2 .
            OPTIONAL {?obj2 aida:hasName ?lbl}
            filter(?s3 != ?s2 && ?s3 != ?s1)
        }
        groupby ?relation ?pred2 ?obj2 ?relation_type
          """
        for relation, pred, obj, relation_type, label in self.model.sparql.query(
                query, namespaces, {'obj': self.uri}):
            _, relation_type = split_uri(relation_type)
            ind = pred.find('_')
            pred = pred[ind + 1:]
            yield relation_type, obj, label

    @property
    def cluster(self):
        if self.__cluster is None:
            query = "SELECT ?cluster WHERE { %s ?membership aida:cluster ?cluster ; aida:clusterMember ?member . MINUS {?cluster aida:prototype ?member} %s}" % (
                self.__open_clause, self.__close_clause)
            for cluster, in self.model.sparql.query(query, namespaces,
                                                    {'member': self.uri}):
                self.__cluster = self.model.get_cluster(cluster)
        return self.__cluster

    def _init_member(self):
        query = """
SELECT ?label ?type
WHERE {
  OPTIONAL { ?member aida:hasName ?label }
  OPTIONAL { ?member aida:justifiedBy ?justification .
    ?justification skos:prefLabel ?label }
  ?statement rdf:subject ?member ;
             rdf:predicate rdf:type ;
             rdf:object ?type .
}
LIMIT 1 """
        for label, type_ in self.model.sparql.query(query, namespaces,
                                                    {'member': self.uri}):
            if not label:
                _, label = split_uri(type_)
            self.__label = label
            self.__type = type_

        self.__targets = {}
        if self.__debug_info:
            if self.__debug_info['targets']:
                for i in range(0, len(self.__debug_info['targets'])):
                    target = self.__debug_info['targets'][i]
                    score = self.__debug_info['target_scores'][i]
                    self.__targets[target] = score
        else:
            query = """
                SELECT ?target
                WHERE {
                  ?member aida:link/aida:linkTarget ?target 
                } """
            for target, in self.model.sparql.query(query, namespaces,
                                                   {'member': self.uri}):
                self.__targets[str(target)] = 0

        self.__freebases = {}
        if self.__debug_info:
            if self.__debug_info['fbid']:
                for i in range(0, len(self.__debug_info['fbid'])):
                    fbid = self.__debug_info['fbid'][i]
                    score = self.__debug_info['fbid_score_avg'][i]
                    self.__freebases[fbid] = score
        else:
            query = """
                SELECT DISTINCT ?fbid {
                   ?member aida:privateData [
                        aida:jsonContent ?fbid ;
                        aida:system <http://www.rpi.edu/EDL_Freebase>
                    ]
                }
            """

            for j_fbid, in self.model.sparql.query(query, namespaces,
                                                   {'member': self.uri}):
                fbids = json.loads(j_fbid).get('freebase_link').keys()
                for fbid in fbids:
                    self.__freebases[fbid] = 0

    def _init_source(self):
        query = """
SELECT DISTINCT ?source ?start ?end
WHERE {
  ?member aida:justifiedBy ?justification .
  ?justification aida:source ?source ;
                 aida:startOffset ?start ;
                 aida:endOffsetInclusive ?end .
}
ORDER BY ?start """
        for source, start, end in self.model.sparql.query(
                query, namespaces, {'member': self.uri}):
            self.__source = str(source)
            self.__context_pos.append((int(start), int(end)))

    @property
    def source(self):
        if not self.__source:
            self._init_source()
        return self.__source

    @property
    def mention(self):
        if self.context_extractor.doc_exists():
            for start, end in self.__context_pos:
                res = self.context_extractor.query_context(start, end)
                if not res:
                    continue
                yield res

    def __hash__(self):
        return self.uri.__hash__()