from collections import defaultdict, Counter from graph import * from rdf2vec import RDF2VecTransformer print(end='Loading data... ', flush=True) g = rdflib.Graph() g.parse('../data/aifb.n3', format='n3') print('OK') test_data = pd.read_csv('../data/AIFB_test.tsv', sep='\t') train_data = pd.read_csv('../data/AIFB_train.tsv', sep='\t') train_people = [rdflib.URIRef(x) for x in train_data['person']] train_labels = train_data['label_affiliation'] test_people = [rdflib.URIRef(x) for x in test_data['person']] test_labels = test_data['label_affiliation'] label_predicates = [ rdflib.URIRef('http://swrc.ontoware.org/ontology#affiliation'), rdflib.URIRef('http://swrc.ontoware.org/ontology#employs'), rdflib.URIRef('http://swrc.ontoware.org/ontology#carriedOutBy') ] # Extract the train and test graphs kg = rdflib_to_kg(g, label_predicates=label_predicates)
def rdfapi(request, action="read"): # get base subject s = request.REQUEST['s'] if not surf.util.is_uri(s): s = surf.ns.LOCAL[s] crdf = CignoRDF() _subject = crdf.CignoResources(s) #s = 'http://cigno.ve.ismar.cnr.it/data/geonode:parchi_nazionali_regionali' #p = "http://purl.org/dc/terms/hasPart" #o = 'http://cigno.ve.ismar.cnr.it/data/' #pl = 'has part' #ol = 'esempio' if action != 'read' and not request.user.is_authenticated(): json = { "success": False, "errors": "You are not allowed to change relations for this resource." } json_str = simplejson.dumps ( json, sort_keys=True, indent=4 ) mimetype = "application/json" mimetype = "text/plain" # debug to see it indented in browser return HttpResponse("User not authorized to delete map", mimetype=mimetype, status=403) return response if action == 'read': # TODO inspect resource type json = {'rows': [], 'count': 0} # append converage # uritype = 'http://purl.org/dc/terms/spatial' # for node in getattr(_subject, surf.util.rdf2attr(uritype, True)): # _predicate = crdf.Properties(uritype) # node = crdf.GeoNames(node) if isinstance(node, URIRef) else node # node.load() # _object = node # json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject), # 's': s, # 'p': _predicate.subject, # 'o': _object.subject, # 'pl': lbyl(_predicate.rdfs_label), # 'ol': lbyl(_object.gn_name), # 'd': True # }) for uritype in supported_relations: for node in getattr(_subject, surf.util.rdf2attr(uritype, True)): _predicate = crdf.Properties(uritype) node = crdf.CignoResources(node) if isinstance(node, URIRef) else node node.load() _object = node json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject), 's': s, 'p': _predicate.subject, 'o': _object.subject, 'pl': lbyl(_predicate.rdfs_label), 'ol': lbyl(_object.rdfs_label), 'd': True }) for uritype in supported_relations: if URIRef(uritype) in _subject.rdf_inverse: _predicate = crdf.Properties(reverse_relations[uritype]) for node in _subject.rdf_inverse[URIRef(uritype)]: node = crdf.CignoResources(node) if isinstance(node, URIRef) else node node.load() _object = node json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject), 's': s, 'p': _predicate.subject, 'o': _object.subject, 'pl': lbyl(_predicate.rdfs_label), 'ol': lbyl(_object.rdfs_label), 'd': False }) elif action == 'create': rows = simplejson.loads(request.REQUEST['rows']) if not isinstance(rows, list): rows = [rows] for row in rows: crdf.store.add_triple(rdflib.URIRef(s), rdflib.URIRef(row['p']), rdflib.URIRef(row['o'])) # if row['p'] in reverse_relations: # store.add_triple(rdflib.URIRef(row['o']), rdflib.URIRef(reverse_relations[row['p']]), rdflib.URIRef(s)) crdf.store.save() # if external try to load rdf info # TODO: use a better test and test if already loaded if not row['o'].startswith(surf.ns.LOCAL): crdf.store.load_triples(source = row['o']) json = {'success': True} # get type #result = session.default_store.execute_sparql("SELECT ?o WHERE { <%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o }" % ) #if len(result['results']['bindings']) >0: # type_uri = result['results']['bindings'][0]['o']['value'] elif action == 'destroy': rows = simplejson.loads(request.REQUEST['rows']) if not isinstance(rows, list): rows = [rows] for row in rows: # get triple by id s, p, o = row['id'].split('|') crdf.store.remove_triple(rdflib.URIRef(s), rdflib.URIRef(p), rdflib.URIRef(o)) # if p in reverse_relations: # store.remove_triple(rdflib.URIRef(o), rdflib.URIRef(reverse_relations[p]), rdflib.URIRef(s)) crdf.store.save() json = {'success': True} # get type #result = session.default_store.execute_sparql("SELECT ?o WHERE { <%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o }" % ) #if len(result['results']['bindings']) >0: # type_uri = result['results']['bindings'][0]['o']['value'] json_str = simplejson.dumps ( json, sort_keys=True, indent=4 ) mimetype = "application/json" mimetype = "text/plain" # debug to see it indented in browser response = HttpResponse( json_str, mimetype=mimetype ) return response
def about_batterbox(self, gameCode, gameinfo, inn, score, batterbox, sbo, pitcher, hitter, isaboutpitcher, isabouthitter, isaboutrunner): bbox = str(batterbox).split(".")[-1] batterbox_uri = rdflib.URIRef(self.uri + bbox) thisGame = rdflib.URIRef(self.uri + gameCode) pitcher_name = self.get_player_name(pitcher) pitcher = rdflib.URIRef(self.uri + pitcher) hitter_name = self.get_player_name(hitter) hitter = rdflib.URIRef(self.uri + hitter) situation = self.get_situation(gameinfo=gameinfo, inn=inn, score=score, sbo=sbo) annotation = [] if (isaboutpitcher): """ 이름 추가해서 상황 추가해서 0 팀 소속 0 투수 투수 오늘 경기 0 번째 타석에서 공을 던집니다. 투수 오늘 경기 0 번째 타자를 상대하고 있습니다. 투수 오늘 경기 0 개의 삼진을 잡아내고 있습니다. 투수 오늘 경기 0 개의 포볼로 타자 출루 시켰습니다. 투수 오늘 경기 0 개의 플라이 아웃으로 타자 잡아냈습니다. 투수 오늘 경기 0 개의 땅볼 아웃으로 타자 잡아냈습니다. 투수 오늘 경기 0 개의 싱글 안타 허용하였습니다. 투수 오늘 경기 0 개의 2루타 허용하였습니다. 투수 이번 시즌 0의 평균 자책점을 기록하고 있습니다. 투수 저번 타석 0을 기록하였습니다. 투수 과연 어떤 공을 던질까요? """ query = "SELECT ?o WHERE {?pitcher ?thisERA ?o}" r = self.rdf.query(query, initBindings={ "pitcher": pitcher, "thisERA": self.thisERA }) era = list(r)[0][0] query = "SELECT ?o WHERE {?s ?fromPitcher ?pitcher . ?s ?inGame ?thisGame . ?s ?result ?o} order by ?s" r = self.rdf.query(query, initBindings={ "fromPitcher": self.fromPitcher, "pitcher": pitcher, "inGame": self.inGame, "thisGame": thisGame, "result": self.result }) total_batterbox = len(list(r)) + 1 strikeout = len([1 for i in r if 'Strikeout' in i[0]]) baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]]) fly = len([1 for i in r if 'Fly' in i[0]]) outinbase = len([1 for i in r if 'OutInBase' in i[0]]) singlehit = len([1 for i in r if 'SingleHit' in i[0]]) double = len([1 for i in r if 'Double' in i[0]]) if (r): recent_result = self.change_result_history_to_korean( list(r)[-1][0].split("#")[1].split("_")[1]) annotation_about_this_game = [ "투수 오늘 경기 " + str(total_batterbox) + "번째 타석에서 공을 던집니다", "투수 오늘 경기 " + str(total_batterbox) + "번째 타자를 상대하고 있습니다", "투수 오늘 경기 " + str(strikeout) + "개의 삼진을 잡아내고 있습니다", "투수 오늘 경기 " + str(baseonballs) + "개의 포볼로 타자 출루 시켰습니다", "투수 오늘 경기 " + str(fly) + "개의 플라이 아웃으로 타자 잡아냈습니다", "투수 오늘 경기 " + str(outinbase) + "개의 땅볼 아웃으로 타자 잡아냈습니다", "투수 오늘 경기 " + str(singlehit) + "개의 싱글 안타 허용하였습니다", "투수 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다", pitcher_name + " 투수 오늘 경기 " + str(total_batterbox) + "번째 타석에서 공을 던집니다", pitcher_name + " 투수 오늘 경기 " + str(total_batterbox) + "번째 타자를 상대하고 있습니다", pitcher_name + " 투수 오늘 경기 " + str(strikeout) + "개의 삼진을 잡아내고 있습니다", pitcher_name + " 투수 오늘 경기 " + str(baseonballs) + "개의 포볼로 타자 출루 시켰습니다", pitcher_name + " 투수 오늘 경기 " + str(fly) + "개의 플라이 아웃으로 타자 잡아냈습니다", pitcher_name + " 투수 오늘 경기 " + str(outinbase) + "개의 땅볼 아웃으로 타자 잡아냈습니다", pitcher_name + " 투수 오늘 경기 " + str(singlehit) + "개의 싱글 안타 허용하였습니다", pitcher_name + " 투수 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다", pitcher_name + " 오늘 경기 " + str(total_batterbox) + "번째 타석에서 공을 던집니다", pitcher_name + " 오늘 경기 " + str(total_batterbox) + "번째 타자를 상대하고 있습니다", pitcher_name + " 오늘 경기 " + str(strikeout) + "개의 삼진을 잡아내고 있습니다", pitcher_name + " 오늘 경기 " + str(baseonballs) + "개의 포볼로 타자 출루 시켰습니다", pitcher_name + " 오늘 경기 " + str(fly) + "개의 플라이 아웃으로 타자 잡아냈습니다", pitcher_name + " 오늘 경기 " + str(outinbase) + "개의 땅볼 아웃으로 타자 잡아냈습니다", pitcher_name + " 오늘 경기 " + str(singlehit) + "개의 싱글 안타 허용하였습니다", pitcher_name + " 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다", ] annotation = annotation + list( map("".join, product(situation, annotation_about_this_game))) if (total_batterbox > 1): annotation = annotation + [ "투수 지난 타석 " + str(recent_result) + "을 기록하였습니다", pitcher_name + " 투수 지난 타석 " + str(recent_result) + "을 기록하였습니다", pitcher_name + " 지난 타석 " + str(recent_result) + "을 기록하였습니다", ] annotation = annotation + [ "투수 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다", pitcher_name + " 투수 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다", pitcher_name + " 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다", pitcher_name + " 투수", pitcher_name + " 투수 어떤 공을 던질까요", ] if (isabouthitter): """ 타자 오늘 0번째 타석 입니다 타자 오늘 0번째 타석에서 섰습니다 if 타석 > 1 타자 오늘 0번째 타석에서 0개의 안타 기록했습니다 타자 오늘 0개의 안타 기록합니다 타자 저번 타석 0을 기록하였습니다 if 아웃 >= 1 타자 오늘 0번째 타석에서 0개의 0아웃 기록했습니다 타자 이번 시즌 0의 평균 타율을 기록하고 있습니다 타자 이번 타석 안타를 기록 할 수 있을까요 """ query = "SELECT ?o where {?batter ?thisAVG ?o}" r = self.rdf.query(query, initBindings={ "batter": hitter, "thisAVG": self.thisAVG }) avg = list(r)[0][0] query = "SELECT ?o where {?s ?toHitter ?hitter . ?s ?inGame ?thisGame . ?s ?result ?o } order by ?s" r = self.rdf.query(query, initBindings={ "toHitter": self.toHitter, "hitter": hitter, "inGame": self.inGame, "thisGame": thisGame, "result": self.result }) total_batterbox = len(list(r)) + 1 strikeout = len([1 for i in r if 'Strikeout' in i[0]]) baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]]) fly = len([1 for i in r if 'Fly' in i[0]]) outinbase = len([1 for i in r if 'OutInBase' in i[0]]) singlehit = len([1 for i in r if 'SingleHit' in i[0]]) double = len([1 for i in r if 'Double' in i[0]]) triple = len([1 for i in r if 'Triple' in i[0]]) homerun = len([1 for i in r if 'HomeRun' in i[0]]) hits = int(singlehit) + int(double) + int(triple) + int(homerun) outs = int(fly) + int(outinbase) + int(strikeout) if (r): recent_result = self.change_result_history_to_korean( list(r)[-1][0].split("#")[1].split("_")[1]) annotation_about_this_game = [ "타자 오늘 경기 " + str(total_batterbox) + "번째 타석입니다", "타자 오늘 경기 " + str(total_batterbox) + "번째 타석에 섰습니다", hitter_name + " 타자 오늘 경기 " + str(total_batterbox) + "번째 타석입니다", hitter_name + " 타자 오늘 경기 " + str(total_batterbox) + "번째 타석에 섰습니다", hitter_name + " 오늘 경기 " + str(total_batterbox) + "번째 타석입니다", hitter_name + " 오늘 경기 " + str(total_batterbox) + "번째 타석에 섰습니다", ] if (total_batterbox > 1): annotation_about_this_game = annotation_about_this_game + [ "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(hits) + "개의 안타 기록했습니다", "타자 오늘 " + str(singlehit) + "개의 1루타 기록했습니다", "타자 오늘 " + str(double) + "개의 2루타 기록했습니다", "타자 오늘 " + str(triple) + "개의 3루타 기록했습니다", "타자 저번 타석 " + str(recent_result) + "을 기록하였습니다", hitter_name + " 타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(hits) + "개의 안타 기록했습니다", hitter_name + " 타자 오늘 " + str(singlehit) + "개의 1루타 기록했습니다", hitter_name + " 타자 오늘 " + str(double) + "개의 2루타 기록했습니다", hitter_name + " 타자 오늘 " + str(triple) + "개의 3루타 기록했습니다", hitter_name + " 타자 저번 타석 " + str(recent_result) + "을 기록하였습니다", hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(hits) + "개의 안타 기록했습니다", hitter_name + " 오늘 " + str(singlehit) + "개의 1루타 기록했습니다", hitter_name + " 오늘 " + str(double) + "개의 2루타 기록했습니다", hitter_name + " 오늘 " + str(triple) + "개의 3루타 기록했습니다", hitter_name + " 저번 타석 " + str(recent_result) + "을 기록하였습니다", ] if (outs > 0): annotation_about_this_game = annotation_about_this_game + [ "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(outs) + "개의 아웃 기록했습니다", "타자 오늘 " + str(outs) + "개의 아웃 기록했습니다", "타자 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다", "타자 오늘 " + str(outinbase) + "개의 땅볼 아웃 기록했습니다", hitter_name + " 타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(outs) + "개의 아웃 기록했습니다", hitter_name + " 타자 오늘 " + str(outs) + "개의 아웃 기록했습니다", hitter_name + " 타자 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다", hitter_name + " 타자 오늘 " + str(outinbase) + "개의 땅볼 아웃 기록했습니다", hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(outs) + "개의 아웃 기록했습니다", hitter_name + " 오늘 " + str(outs) + "개의 아웃 기록했습니다", hitter_name + " 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다", hitter_name + " 오늘 " + str(outinbase) + "개의 땅볼 아웃 기록했습니다", ] if (strikeout > 0): annotation_about_this_game = annotation_about_this_game + [ "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(strikeout) + "개의 삼진 아웃 당했습니다", "타자 오늘 " + str(strikeout) + "개의 삼진 아웃 당헀습니다", hitter_name + " 타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(strikeout) + "개의 삼진 아웃 당했습니다", hitter_name + " 타자 오늘 " + str(strikeout) + "개의 삼진 아웃 당헀습니다", hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(strikeout) + "개의 삼진 아웃 당했습니다", hitter_name + " 오늘 " + str(strikeout) + "개의 삼진 아웃 당헀습니다", ] if (baseonballs > 0): annotation_about_this_game = annotation_about_this_game + [ "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(baseonballs) + "개의 포볼로 출루 하였습니다", "타자 오늘 " + str(baseonballs) + "개의 포볼 기록합니다", hitter_name + " 타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(baseonballs) + "개의 포볼로 출루 하였습니다", hitter_name + " 타자 오늘 " + str(baseonballs) + "개의 포볼 기록합니다", hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(baseonballs) + "개의 포볼로 출루 하였습니다", hitter_name + " 오늘 " + str(baseonballs) + "개의 포볼 기록합니다", ] if (homerun > 0): annotation_about_this_game = annotation_about_this_game + [ "타자 오늘 홈런 기록하였습니다", hitter_name + " 타자 오늘 홈런 기록하였습니다", hitter_name + " 오늘 홈런 기록하였습니다", ] annotation = annotation + list( map("".join, product(situation, annotation_about_this_game))) annotation = annotation + [ "타자 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다", hitter_name + " 타자 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다", hitter_name + " 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다", hitter_name + " 타자", ] if (isaboutpitcher and isabouthitter): query = "SELECT ?o where {?s ?inGame ?thisGame . ?s ?toHitter ?hitter . ?s ?fromPitcher ?pitcher . ?s ?result ?o} order by desc(?s)" r = self.rdf.query(query, initBindings={ "inGame": self.inGame, "thisGame": thisGame, "toHitter": self.toHitter, "hitter": hitter, "fromPitcher": self.fromPitcher, "pitcher": pitcher, "result": self.result }) total_batterbox = len(list(r)) + 1 strikeout = len([1 for i in r if 'Strikeout' in i[0]]) baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]]) fly = len([1 for i in r if 'Fly' in i[0]]) outinbase = len([1 for i in r if 'OutInBase' in i[0]]) singlehit = len([1 for i in r if 'SingleHit' in i[0]]) double = len([1 for i in r if 'Double' in i[0]]) triple = len([1 for i in r if 'Triple' in i[0]]) homerun = len([1 for i in r if 'HomeRun' in i[0]]) if (r): history = [ self.change_result_history_to_korean( row[0].split("#")[1].split("_")[1]) for row in r ] recent_result = history[0] hits = int(singlehit) + int(double) + int(triple) + int(homerun) outs = int(fly) + int(outinbase) annotation = annotation + [ hitter_name + " 타자 " + pitcher_name + " 투수를 상대로 오늘 " + str(hits) + "개의 안타 기록 하였습니다", pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " + str(hits) + "개의 안타를 허용 하였습니다", pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " + str(total_batterbox) + "번째 대결입니다", "투수와 타자 사이에 팽팽한 긴장감이 감지됩니다.", ] if (strikeout > 0): annotation = annotation + [ pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " + str(strikeout) + "개의 스트라이크 아웃을 잡아냈습니다", hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " + str(strikeout) + "개의 스트라이크 아웃 당했습니다", ] if (fly > 0): annotation = annotation + [ pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " + str(fly) + "개의 플라이 아웃을 잡아냈습니다", hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " + str(fly) + "개의 플라이 아웃 당했습니다", ] if (baseonballs > 0): annotation = annotation + [ pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " + str(baseonballs) + "개의 포볼로 출루 시켰습니다", hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " + str(baseonballs) + "개의 포볼로 출루 하였습니다", ] if (homerun > 0): annotation = annotation + [ pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 홈런을 허용하였습니다", hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 홈런 기록하였습니다", ] if (total_batterbox > 1): annotation = annotation + [ pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 저번 타석 " + str(recent_result) + " 기록하였습니다", hitter_name + " 타자 " + pitcher_name + " 투수 상대로 저번 타석 " + str(recent_result) + " 기록하였습니다", ] if (isaboutrunner): """ 각 루에 누가 있는지 ex) 0루에 OOO(이)가 있습니다. ex) 0루에 OOO(이)가 주자로 있습니다. ex) OOO(이)가 나가있습니다. ex) 000(이)가 득점권에 있습니다. 1루에 주자 있을 때 ex) 타자 1루 주자 있었던 최근 타석 000를 기록하였습니다. ex) 오늘 1루 주자가 있는 타석에서 타자 000를 기록하였습니다. ex) 투수 1루 주자 있었던 최근 타석 000를 기록하였습니다. """ first_runner, second_runner, third_runner = self.search_runner( batterbox=batterbox) if (first_runner): first_runner = self.get_player_name( first_runner.split("#")[-1]) if (second_runner): second_runner = self.get_player_name( second_runner.split("#")[-1]) if (third_runner): third_runner = self.get_player_name( third_runner.split("#")[-1]) if (first_runner or second_runner or third_runner): annotation = annotation + [ " ".join([ i for i in [first_runner, second_runner, third_runner] if i is not None ]) + " 주자로 나가있습니다", "주자에는 " + " ".join([ i for i in [first_runner, second_runner, third_runner] if i is not None ]) + "가 있습니다" ] if (first_runner): query = "SELECT ?o where {?s ?toHitter ?hitter . ?s ?result ?o . ?s ?stayIn1stBase ?o1} order by ?s" r = self.rdf.query(query, initBindings={ "toHitter": self.toHitter, "hitter": hitter, "inGame": self.inGame, "thisGame": thisGame, "result": self.result, "stayIn1stBase": self.stayIn1stBase }) if (r): recent_result = self.change_result_history_to_korean( list(r)[-1][0].split("#")[1].split("_")[1]) annotation = annotation + [ "타자 1루 주자가 있는 타석에서 최근 " + str(recent_result) + "을 기록하였습니다", hitter_name + " 타자 1루 주자가 있는 타석에서 최근 " + str(recent_result) + "을 기록하였습니다" ] query = "SELECT ?o where {?s ?fromPitcher ?pitcher . ?s ?result ?o . ?s ?stayIn1stBase ?o1} order by ?s" r = self.rdf.query(query, initBindings={ "fromPitcher": self.fromPitcher, "pitcher": pitcher, "inGame": self.inGame, "thisGame": thisGame, "result": self.result, "stayIn1stBase": self.stayIn1stBase }) if (r): recent_result = self.change_result_history_to_korean( list(r)[-1][0].split("#")[1].split("_")[1]) annotation = annotation + [ "투수 1루 주자가 있는 타석에서 최근 " + str(recent_result) + "을 기록하였습니다", pitcher_name + " 투수 1루 주자가 있는 타석에서 최근 " + str(recent_result) + "을 기록하였습니다" ] annotation = annotation + [ "1루에는 " + str(first_runner) + "가 주자로 있습니다", "1루에는 " + str(first_runner) + "가 있습니다", str(first_runner) + " 선수 1루에 있습니다", ] if (second_runner): annotation = annotation + [ "득점권에 주자 나가 있습니다", str(second_runner) + ", 득점권에 주자로 있습니다", str(second_runner) + " 선수 2루에 있습니다", "2루에는 " + str(second_runner) + "가 주자로 있습니다", "2루에는 " + str(second_runner) + "가 있습니다", ] if (third_runner): annotation = annotation + [ "득점권에 주자 나가 있습니다", str(third_runner) + ", 득점권에 주자로 있습니다", str(third_runner) + " 선수 3루에 있습니다", "3루에는 " + str(third_runner) + "가 주자로 있습니다", "3루에는 " + str(third_runner) + "가 있습니다", ] return annotation
def readuris(f): return [rdflib.URIRef(x.strip()).n3() for x in file(f)]
def get_exception_ids(): graph = rdflib.Graph() graph.parse('http://spdx.org/licenses/exceptions-index.html', 'rdfa') ref = rdflib.URIRef('http://spdx.org/rdf/terms#licenseId') objs = graph.subject_objects(ref) return map(lambda x: x[1].value, objs)
def delete_activity(name): g = Graph() g.parse('output1.ttl#', format="turtle") act_name = tove2_prefix + name print act_name act_timeInterval = g.value(subject=rdflib.URIRef(act_name),predicate=hasTimeInterval) print act_timeInterval act_start = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasBeginning) act_start_min = g.value(subject=rdflib.URIRef(act_start), predicate=hasMin) act_start_max = g.value(subject=rdflib.URIRef(act_start), predicate=hasMax) act_end = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasEnd) act_end_min = g.value(subject=rdflib.URIRef(act_end), predicate=hasMin) act_end_max = g.value(subject=rdflib.URIRef(act_end), predicate=hasMax) act_dur = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasDuration) act_dur_min = g.value(subject=rdflib.URIRef(act_dur), predicate=hasMin) act_dur_max = g.value(subject=rdflib.URIRef(act_dur), predicate=hasMax) g.remove((rdflib.URIRef(act_dur),None,None)) g.remove((rdflib.URIRef(act_dur_min),None,None)) g.remove((rdflib.URIRef(act_dur_max),None,None)) g.remove((rdflib.URIRef(act_start),None,None)) g.remove((rdflib.URIRef(act_start_min),None,None)) g.remove((rdflib.URIRef(act_start_max),None,None)) g.remove((rdflib.URIRef(act_end),None,None)) g.remove((rdflib.URIRef(act_end_min),None,None)) g.remove((rdflib.URIRef(act_end_max),None,None)) g.remove((rdflib.URIRef(act_timeInterval),None,None)) g.remove((rdflib.URIRef(act_name),None,None)) g.close() g.serialize(destination='output1.ttl', format='turtle')
def modify_labels(self): """Modify the labels in the graph. Append a T. Helper method. """ namespace = self.namespace_registry.city triples = list() for s, p, o in self.graph: if s in namespace and p in ( rdflib.SKOS.prefLabel, rdflib.RDFS.label, ): # To test querying by label. label_SKOS = f"{o}_T" triples.append(( s, rdflib.SKOS.prefLabel, rdflib.Literal(label_SKOS, lang="en"), )) # To test RDFS labels and special characters. label_RDFS = f"{o}-$" triples.append(( s, rdflib.RDFS.label, rdflib.Literal(label_RDFS, lang="en"), )) # To test non-english languages. label_RDFS_jp = f"{o}_T_jp" triples.append(( s, rdflib.RDFS.label, rdflib.Literal(label_RDFS_jp, lang="jp"), )) label_SKOS_aa = f"{o}_T_aa_SKOS" triples.append(( s, rdflib.SKOS.prefLabel, rdflib.Literal(label_SKOS_aa, lang="aa"), )) # To test undefined languages. label_RDFS_unk = f"{o}_T_unknown_lang" triples.append( (s, rdflib.RDFS.label, rdflib.Literal(label_RDFS_unk))) # To test labels that coincide in different languages. label_RDFS_es = f"{o}_T_cosa" label_RDFS_it = f"{o}_T_cosa" for label, lang in ( (label_RDFS_es, "es"), (label_RDFS_it, "it"), ): triples.append(( s, rdflib.RDFS.label, rdflib.Literal(label, lang=lang), )) else: triples.append((s, p, o)) # Test different concepts with same label, and querying by language. triples.append(( rdflib.URIRef(str(namespace._iri) + "City"), rdflib.RDFS.label, rdflib.Literal("Burro", lang="it"), )) triples.append(( rdflib.URIRef(str(namespace._iri) + "Street"), rdflib.RDFS.label, rdflib.Literal("Burro", lang="es"), )) self.graph.remove((None, None, None)) for t in triples: self.graph.add(t)
def f2016KDDCupSelectedPapersHandler(graph, nss, f): entries = [] progress = 0 for line in f: terms = line.strip().split('\t') ident = terms[0] title = capitalize(terms[1]) year = terms[2] confID = terms[3] #confShortName = terms[4] entries.append((ident, confID, year)) # paper node plus label root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' + ident) label = rdflib.Literal(rawString(title), lang='en') graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label)) graph.add((root, rdflib.URIRef(nss['dcterms'] + 'isPartOf'), rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'), rdflib.URIRef(nss['dcterms'] + 'hasPart'), root)) # id node idNode = rdflib.Literal(ident, datatype=rdflib.URIRef(nss['xsd'] + 'ID')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode)) # title graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasTitle'), rdflib.Literal(rawString(title), lang='en'))) # year ynode = rdflib.Literal(year, datatype=rdflib.URIRef(nss['xsd'] + 'gYear')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasYearOfPublication'), ynode)) # type tnode = rdflib.URIRef(nss['base'] + 'MAG_Paper') graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode)) # id node of conference plus link # overwrite if exists croot = rdflib.URIRef(nss['base'] + 'MAG_Conference_' + confID) #graph.add((root, rdflib.URIRef(nss['base'] +'MAG_isPresentedAt'), croot)) #graph.add((croot, rdflib.URIRef(nss['base'] +'MAG_hasPresented'), root)) graph.add((croot, rdflib.URIRef(nss['dcterms'] + 'isPartOf'), rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'), rdflib.URIRef(nss['dcterms'] + 'hasPart'), croot)) progress += 1 if progress % 10000 == 0: sys.stdout.write('\r ' + str(progress) + ' lines read ') return entries
def fKDDConferenceInstancesHandler(graph, nss, f, conferenceIDs, years, paperIDs): #geoIndex = GeoIndex() """conferenceIDsPerYear = [(cid.value, y.value) \ for pid in paperIDs \ for paper, _, _ in graph.triples((None, rdflib.URIRef(nss['base'] + 'MAG_hasID'), rdflib.Literal(pid))) \ for _, _, conference in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_presentedAt'), None)) \ for _, _, cid in graph.triples((conference, rdflib.URIRef(nss['base'] + 'MAG_hasID'), None)) \ for _, _, y in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_yearOfPublication'), None))] """ # linked by indices #conferenceIDs, years = zip(*conferenceIDsPerYear) #kddConferenceInstances = set() progress = 0 for line in f: terms = line.strip().split('\t') organizationId = terms[0] ident = terms[1] shortName = terms[2] name = terms[3] location = terms[4] if len(terms) > 4 and terms[4] != '' else None url = terms[5] if len(terms) > 5 and terms[5] != '' else None startdate = parser.parse( terms[6]) if len(terms) > 6 and terms[6] != '' else None enddate = parser.parse( terms[7]) if len(terms) > 7 and terms[7] != '' else None abstractdate = parser.parse( terms[8]) if len(terms) > 8 and terms[8] != '' else None subdate = parser.parse( terms[9]) if len(terms) > 9 and terms[9] != '' else None notdate = parser.parse( terms[10]) if len(terms) > 10 and terms[10] != '' else None finaldate = parser.parse( terms[11]) if len(terms) > 11 and terms[11] != '' else None progress += 1 if progress % 10000 == 0: sys.stdout.write('\r ' + str(progress) + ' lines read ') if ' ' in ident: # dirty fix cause the specs dont hold continue if startdate is not None: paperyear = startdate.year elif enddate is not None: paperyear = enddate.year elif finaldate is not None: paperyear = finaldate.year else: paperyear = -1 i = 0 while i < len(conferenceIDs): if conferenceIDs[i] == organizationId and (int(paperyear) > 2010 or int(paperyear) < 0): break i += 1 if i >= len(conferenceIDs): # not a target conference continue # kddConferenceInstances.add(ident) # add kdd conf instances # instance node plus label root = rdflib.URIRef(nss['base'] + 'MAG_ConferenceInstance_' + ident) label = rdflib.Literal(rawString(name), lang='en') graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label)) if int(years[i]) == int(paperyear): graph.add((rdflib.URIRef(nss['base'] + 'MAG_Paper_' + paperIDs[i]), rdflib.URIRef(nss['base'] + 'MAG_isPresentedAt'), root)) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasPresented'), rdflib.URIRef(nss['base'] + 'MAG_Paper_' + paperIDs[i]))) # type tnode = rdflib.URIRef(nss['base'] + 'MAG_ConferenceInstance') graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode)) # id node of affiliation idNode = rdflib.Literal(ident, datatype=rdflib.URIRef(nss['xsd'] + 'ID')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode)) graph.add((rdflib.URIRef(nss['base'] + 'MAG_Conference_' + organizationId), \ rdflib.URIRef(nss['base'] + 'MAG_hasOrganized'), \ root)) graph.add((root, \ rdflib.URIRef(nss['base'] + 'MAG_isOrganizedBy'), \ rdflib.URIRef(nss['base'] + 'MAG_Conference_' + organizationId))) # URL if url is not None and isURL(url): node = rdflib.Literal(url, datatype=rdflib.URIRef(nss['xsd'] + 'anyURI')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasUrl'), node)) # short name node = rdflib.Literal(rawString(shortName), datatype=rdflib.URIRef(nss['xsd'] + 'string')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasShortName'), node)) # location #geoURI =geoIndex.resolve(location) #if geoURI is None: # loc = rdflib.Literal(location) #else: # loc = rdflib.URIRef(geoURI) # range should actually be geoThing (coordinates) if location is not None and isLocation(location): loc = rdflib.Literal(location) # turned off for now graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasLocation'), loc)) # to facilitate easy queries if paperyear is not None and int(paperyear) > 0: year = rdflib.Literal(int(paperyear), datatype=rdflib.URIRef(nss['xsd'] + 'gYear')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasYearOfOccurence'), year)) # Start date if startdate is not None: startdateLiteral = rdflib.Literal( startdate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasStartDate'), startdateLiteral)) # end date if enddate is not None: enddateLiteral = rdflib.Literal(enddate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasEndDate'), enddateLiteral)) # abstract date if abstractdate is not None: abstractdateLiteral = rdflib.Literal( abstractdate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasAbstractDueOn'), abstractdateLiteral)) # submission date if subdate is not None: subLiteral = rdflib.Literal(subdate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasSubmissionDueOn'), subLiteral)) # notification date if notdate is not None: notLiteral = rdflib.Literal(notdate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasNotificationDueOn'), notLiteral)) # final date if finaldate is not None: finalLiteral = rdflib.Literal(finaldate.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasFinalDueOn'), finalLiteral)) graph.add((root, rdflib.URIRef(nss['dcterms'] + 'isPartOf'), rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'), rdflib.URIRef(nss['dcterms'] + 'hasPart'), root))
import rdflib as rdf from ontology import Ontology from functional_properties import buildDictTofindFunctionalProperties,listOFPropertiesByThr from comparing import Comparing import matplotlib.pyplot as plt import pandas as pd TYPE = rdf.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") if __name__ == '__main__': source = Ontology("data/000/onto.owl") target = Ontology("data/001/onto.owl") subjList = source.uniqueSubjects() # some test about how deal with triples that come from class Ontology: s = source.rdfToDict() t = target.rdfToDict() countPsource, DPsource = buildDictTofindFunctionalProperties(s) propScores = listOFPropertiesByThr(countPsource, DPsource) refalignPath = 'data/refalign.tsv' ra = pd.read_csv(refalignPath, sep='\t') trueSimilars = [] for i, row in ra.iterrows(): trueSimilars.append((str(row.values[0]), str(row.values[1]))) compare = Comparing(propScores, s, t)
def init(graph, nss): magld = rdflib.URIRef(nss['base'] + 'MAG_LD') graph.add((magld, rdflib.URIRef(nss['rdf'] + 'type'), rdflib.URIRef(nss['void'] + 'Dataset'))) graph.add((magld, \ rdflib.URIRef(nss['foaf'] + 'homepage'),\ rdflib.Literal('http://mag.spider.d2s.labs.vu.nl/',\ datatype=rdflib.URIRef(nss['xsd'] + 'anyURI')))) graph.add((magld, rdflib.URIRef(nss['dcterms'] + 'description'),\ rdflib.Literal("A enriched Semantic Web translation of the Microsoft Academic Graph.", lang='en'))) kddSubset = rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset') # dct collection graph.add((kddSubset, rdflib.URIRef(nss['rdf'] + 'type'), rdflib.URIRef(nss['dcterms'] + 'Collection'))) graph.add((kddSubset, rdflib.URIRef(nss['dcterms'] + 'description'),\ rdflib.Literal("A subset of the Microsoft Academic Graph as specified by the KDD for their 2016 KDD Cup.", lang='en'))) graph.add((magld, rdflib.URIRef(nss['dcterms'] + 'hasPart'), kddSubset)) graph.add((kddSubset, rdflib.URIRef(nss['dcterms'] + 'isPartOf'), magld))
def graph(): g=rdflib.Graph() g.add(( rdflib.URIRef('http://example.org/a'), rdflib.URIRef('http://example.org/p'), rdflib.Literal('<msg>hei</hei>', datatype=RDF.XMLLiteral))) return g
def en_elections(self, single_row, reification_index): """ English mapping rules for elections pages. It is used to add triples to the graph. :param reification_index: (int) index used to reification of the row concept :param single_row: (dict) dictionary containing data to map. {'header': [values]} :return: nothing """ """ row_subject,row_predicate, row_object are values used to make a triple which represents reification for the row concept. -subject, as the resources analyzed are in fact wiki pages, is their dbpedia representation Eg. http://dbpedia.org/resource/United_States_presidential_election,_2012 -predicate for the row concept is dbo:Election NOTE: it would be better to use a different concept to map the predicate (Electoral Result?) but due to lack of time I didn't get a feedback by Community over this idea. -object = reification for the row concept It uses the resourceName__reificationIndex format as you can see in CareerStation for Soccer Players Please see http://dbpedia.org/page/Andrea_Pirlo and http://dbpedia.org/page/Andrea_Pirlo__1 to fully understand what is done here. """ row_subject = rdflib.URIRef(self.dbr + self.resource) # Eg. resource =United_States_presidential_election,_2012 row_predicate = self.dbo.Election # Election http://dbpedia.org/ontology/Election row_object = rdflib.URIRef(self.dbr + self.resource + "__" + str( reification_index)) # Reification eg USA_presidential_elections_1984__1 for the first row, # __2 for second one etc. # keeping track of how many cells are added to the graph, for this row self.cells_mapped = 0 # Iterates over cell in the current row. cell equals to header for header in single_row: # values is a list containing data extracted values = single_row[header] try: # try to map data, but only if data for that cell exists :) if values[0] != "-": # set row as the reification of the row's concept row = row_object # set subject, predicate and object for the single cell cell_subject = None cell_predicate = None cell_object = None """ NOTE: data are substantially mapped using the corresponding header. In fact manipulation of values and rules to set subject object and predicate of a cell strictly depending on the value of the header associated with values. From here you can see if blocks representing the actual mapping rules. FUTURE DEVELOPMENT: It would be useful to use soft coded mapping rules using standard methods to manipulate values and algorithm to decide how to map each part. """ # 1° RULE if 'Candidate' in header or 'candidate' in header : # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # Predicate is http://dbpedia.org/property/candidate cell_predicate = rdflib.URIRef(self.dbp.candidate) # if the length of data list is 2 if len(values) == 2: # object is the second value cell_object = values[1] # value: eg [u'New York (stato)', u'Franklin D. Roosevelt'] else: # if not, object is the first between values list cell_object = values[0] # find if a comma is inside the object comma_index = cell_object.find(",") if comma_index >= 0: # If so, replace it with everything comes before the comma cell_object = cell_object[:comma_index] # replace the spaces with underscores cell_object = cell_object.replace(" ", "_") # try to know if the value in object is a existing dbpedia resource res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object) if res_exists: # If the resource already exists use the reference to that resource cell_object = rdflib.URIRef(self.dbr + cell_object) else: # replace underscores with simple spaces cell_object = cell_object.replace("_", " ") # NOTE use lang= instead of datatype? # use a Literal containing value as the object cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string) # 2° RULE elif 'Candidati - Vicepresidente' in header: # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # row # Predicate is http://dbpedia.org/ontology/VicePresident cell_predicate = rdflib.URIRef(self.dbo.VicePresident) # choose which value has to be selected depending on mode and values lenght if len(values) == 2 and self.mode == 'json': cell_object = values[1] # values eg [u'Iowa', u'Henry A. Wallace'] else: cell_object = values[0] # find if there is a comma inside the object comma_index = cell_object.find(",") if comma_index >= 0: # if so, replace it with everything comes before the comma cell_object = cell_object[:comma_index] # try to know if the value in object is a existing dbpedia resource res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object) if res_exists: # If the resource already exists use the reference to that resource cell_object = rdflib.URIRef(self.dbr + cell_object) else: # replace underscores with simple spaces cell_object = cell_object.replace("_", " ") # NOTE use lang= instead of datatype? # use a Literal containing value as the object cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string) # 3° RULE elif header == 'Candidati - Partito' or 'Party' in header: # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # row # predicate is http://dbpedia.org/ontology/PoliticalParty cell_predicate = rdflib.URIRef(self.dbo.PoliticalParty) # object is values[0] cell_object = values[0] # values eg [u'Partito Democratico (Stati Uniti)'] # test out if object is a string or a unicode basestr = isinstance(cell_object, basestring) if basestr: # if so, test if "Stati Uniti" is inside it if "Stati Uniti" in cell_object or "Stati_Uniti" in cell_object: # if so, add to the last part of the string "_d'America)" cell_object = cell_object[:-1] + "_d'America)" # try to know if the value in object is a existing dbpedia resource res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object) if res_exists: # If the resource already exists use the reference to that resource cell_object = rdflib.URIRef(self.dbr + cell_object) else: # replace underscores with simple spaces cell_object = cell_object.replace("_", " ") # NOTE use lang= instead of datatype? # use a Literal containing value as the object cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string) # 4° RULE elif 'Grandi elettori - #' in header : # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # row # predicate is http://dbpedia.org/property/electoralVote which stands for the # number of Great Electors cell_predicate = rdflib.URIRef(self.dbp.electoralVote) # test if value is >= 0 if values[0] >= 0: if self.is_int(values[0]): # if so set object as int(value) cell_object = int(values[0]) # values eg [449.0] # finally use a Literal with a positiveInteger data type cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.positiveInteger) # 5° RULE elif 'Votes' in header or '#' in header: # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # row # predicate is http://dbpedia.org/property/popularVote which stands for the # number of votes cell_predicate = rdflib.URIRef(self.dbo.popularVote) basestr = isinstance(values[0], basestring) if basestr: # delete spaces if ' ' in values[0]: values[0] = values[0].replace(' ', '') # delete dots if '.' in values[0]: values[0] = values[0].replace('.', '') # test if value can be casted to int type if self.is_int(values[0]): # cast it to int values[0] = int(values[0]) # set object to a Literal with a positiveInteger data type cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.positiveInteger) # 6° RULE elif header == 'Voti - %' or header == '?% voti' or header == '% voti' \ or header == 'Percentuale' or '%' in header or header == '?%' or header == 'Voti (%)' \ or header == 'Voto popolare - Percentuale': # subject is the row concept # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940 cell_subject = row # row # predicate is http://dbpedia.org/property/pvPct which stands for popular vote, percentage cell_predicate = rdflib.URIRef(self.dbp.pvPct) # test if the value can be casted to a float if self.is_float(values[0]): values[0] = float(values[0]) # set object to a float Literal cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float) # values else: # test if it is a string or a unicode basestr = isinstance(values[0], basestring) if basestr: # Sometimes wiki Users use comma instead of dot desribing percentage, so we have # to convert commas in dots. if ',' in values[0]: values[0] = values[0].replace(",", ".") # set as percentage the last character of the string percentage = values[0][-1:] # test if this character is a '%' percentage = re.match(r'%', percentage) if percentage: # if so, replace value with float(value_less_last_character) values[0] = float(values[0][:-1]) # set object as a float Literal cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float) else: # set object as a float Literal cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float) # IF HEADER DOES NOT MATCH ANY RULE else: # Reset sub, obj and predicate to None cell_subject = None cell_predicate = None cell_object = None # print out this condition to the console print ("Something went wrong choosing mapping rules :'(( data: %s header: %s" % (values, header)) # increase the count of 'no mapping rule found' cells self.no_mapping_found_cells += 1 # test if the header is already in headers_not_mapped if header not in self.headers_not_mapped.keys(): # If not, add to the list of headers with no mapping rules defined the current header self.headers_not_mapped[header] = values # if sub,pred,obj are set for this cell, add them to the graph if cell_predicate and cell_object and cell_subject: # increase the count of cells mapped for this row self.cells_mapped += 1 # Adding the triple to the graph using graph.add(sub, pred, obj) self.graph.add((cell_subject, cell_predicate, cell_object)) # increase the amount of triples added to the graph self.triples_added_to_the_graph += 1 # print in the console the triple added using print_triple(sub, pred, object) self.print_triple(cell_subject, cell_predicate, cell_object) except: print("Error mapping %s ,associate with cell: %s" % (values, header)) # Increase the number of mapping exceptions self.num_of_mapping_errors += 1 # Finally if at least one cell is correctly mapped if self.cells_mapped > 0: # add only those rows with some mapped cells to the graph self.graph.add((row_subject, row_predicate, row_object)) # add the row to the triples mapped self.triples_added_to_the_graph += 1 # print triple added self.print_triple(row_subject, row_predicate, row_object) # added this row cells to the total number of cells maapped self.total_cell_mapped += self.cells_mapped else: # decrease the reification index as the row has not been added to the graph self.reification_index -= 1
# assert disjointness between top level types based on +fig1a+ figs7 sigh for dis in (peps, sigs, tuple(figs7.values())): for i, n in enumerate(dis[:-1]): for on in dis[i+1:]: n.disjointWith(on) #LPCbyPepties = Neuron(*LPCPep.pes) for n, p in Huang2017.items(): if isinstance(p, Phenotype) and not n.startswith('_'): # FIXME rdflib allows instances but tests type so OntId can go in, but won't ever match ident = OntId(p.p) if n in Genes.__dict__: o = rdflib.Literal(n) if not hasattr(p, '_label') else rdflib.Literal(p._label) lt = (rdflib.URIRef(ident), rdfs.label, o) Neuron.core_graph.add(lt) if ident.prefix != 'NCBIGene': Neuron.out_graph.add(lt) # FIXME maybe a helper graph? if ident.prefix == 'JAX': sct = (rdflib.URIRef(ident), rdfs.subClassOf, ilxtr.transgenicLine) Neuron.core_graph.add(sct) Neuron.out_graph.add(sct) elif ident.prefix == 'ilxtr':# or ident.prefix == 'NCBIGene': # FIXME NCBIGene temp fix ... if ident.suffix in ('LowerExpression', 'HigherExpression', 'to'): continue sct = (rdflib.URIRef(ident), rdfs.subClassOf, ilxtr.gene) Neuron.core_graph.add(sct) Neuron.out_graph.add(sct) else:
import logging import rdflib from rdflib.namespace import RDF, OWL, RDFS, FOAF from rdflib import Literal, BNode, URIRef, Graph, Namespace import DateTimeDescription import ctime import tove2 cot = rdflib.Namespace('http://ontology.eil.utoronto.ca/TOVE2/ctime.rdf#') time = rdflib.Namespace('http://www.w3.org/2006/time#') rdf = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') rdf = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') # namespace for predicate hasBeginning = rdflib.URIRef('http://www.w3.org/2006/time#hasBeginning') hasEnd = rdflib.URIRef('http://www.w3.org/2006/time#hasEnd') hasMin = rdflib.URIRef( 'http://ontology.eil.utoronto.ca/TOVE2/ctime.owl#hasMin') hasMax = rdflib.URIRef( 'http://ontology.eil.utoronto.ca/TOVE2/ctime.owl#hasMax') hasDepartment = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#has_Department') hasDuration = rdflib.URIRef('http://www.w3.org/2006/time#hasDuration') hasTimeInterval = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#has_TimeInterval') # all the departments Water = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Water') Sewage = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Sewage') Power = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Power') Permits = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Permits') Police = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Police')
def fFieldOfStudyHierarchyHandler(graph, nss, f): root = rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudyHierarchy') tnode = rdflib.URIRef(nss['skos'] + 'ConceptScheme') graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode)) progress = 0 for line in f: terms = line.strip().split('\t') childId = terms[0] childLvl = terms[1][1:] parentId = terms[2] parentLvl = terms[3][1:] confidence = terms[4] graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \ rdflib.URIRef(nss['skos'] + 'inScheme'), \ root)) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \ rdflib.URIRef(nss['skos'] + 'inScheme'), \ root)) if int(childLvl) == 0: graph.add((root, \ rdflib.URIRef(nss['skos'] + 'hasTopConcept'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId))) if int(parentLvl) == 0: graph.add((root, \ rdflib.URIRef(nss['skos'] + 'hasTopConcept'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId))) if int(childLvl) - int(parentLvl) == 1: graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \ rdflib.URIRef(nss['skos'] + 'narrower'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \ rdflib.URIRef(nss['skos'] + 'broader'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \ rdflib.URIRef(nss['skos'] + 'note'), \ rdflib.Literal('Confidence of being broader than {} is {}'.format(childId, confidence)))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \ rdflib.URIRef(nss['skos'] + 'note'), \ rdflib.Literal('Confidence of being narrower than {} is {}'.format(parentId, confidence)))) elif int(parentLvl) - int(childLvl) == 1: graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \ rdflib.URIRef(nss['skos'] + 'broader'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \ rdflib.URIRef(nss['skos'] + 'narrower'), \ rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \ rdflib.URIRef(nss['skos'] + 'note'), \ rdflib.Literal('Confidence of being narrower than {} is {}'.format(childId, confidence)))) graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \ rdflib.URIRef(nss['skos'] + 'note'), \ rdflib.Literal('Confidence of being broader than {} is {}'.format(parentId, confidence)))) progress += 1 if progress % 10000 == 0: sys.stdout.write('\r ' + str(progress) + ' lines read ')
def add_activity(name,department,start_min_year,start_min_month,start_min_day,start_max_year,start_max_month,start_max_day,end_min_year,end_min_month,end_min_day,end_max_year,end_max_month,end_max_day,duration_min_year,duration_min_month,duration_min_day,duration_max_year,duration_max_month,duration_max_day): bmin = [start_min_year,start_min_month,start_min_day] bmax = [start_max_year,start_max_month,start_max_day] emin = [end_min_year,end_min_month,end_min_day] emax = [end_max_year,end_max_month,end_max_day] durmin = [duration_min_year,duration_min_month,duration_min_day] durmax = [duration_max_year,duration_max_month,duration_max_day] g = Graph() test2 = rdflib.Namespace('example.ttl#') g.parse('example.ttl#', format="turtle") # if there is alredy activity with same name in the schedule, just return false if check_activity_name(g,name): return False #add the activity act_name = tove2_prefix + name g.add((rdflib.URIRef(act_name),RDF.type,rdflib.URIRef(activity_prefix))) #add its department g.add((rdflib.URIRef(act_name),hasDepartment, department)) #add timeInterval act_timeInterval = rdflib.URIRef(act_name + '_timeInterval') g.add((act_timeInterval, RDF.type, time.CDateTimeInterval)) g.add((rdflib.URIRef(act_name),hasTimeInterval,act_timeInterval)) #add start start = rdflib.URIRef(act_name + '_start') start_min_ins = rdflib.URIRef(act_name + '_start_min') start_max_ins = rdflib.URIRef(act_name + '_start_max') tove2.create_CDTInstant(g, start, bmin, bmax, start_min_ins, start_max_ins) g.add((act_timeInterval, hasBeginning, start)) #add end end = rdflib.URIRef(act_name + '_end') end_min_ins = rdflib.URIRef(act_name + '_end_min') end_max_ins = rdflib.URIRef(act_name + '_end_max') tove2.create_CDTInstant(g,end,emin,emax, end_min_ins, end_max_ins) g.add((act_timeInterval, hasEnd, end)) # add duration dur = rdflib.URIRef(act_name + '_duration') dur_min_ins = rdflib.URIRef(act_name + '_duration_min') dur_max_ins = rdflib.URIRef(act_name + '_duration_max') tove2.create_CDTInstant(g, dur, durmin, durmax, dur_min_ins, dur_max_ins) g.add((act_timeInterval, hasDuration, dur)) g.close() g.serialize(destination='output1.ttl', format='turtle') return True
def fKDDPapersHandler(graph, nss, f, paperIDs, confIDs): #paperIDs = [pid.value for paper, _, _ in graph.triples((None, rdflib.URIRef(nss['rdf'] + 'type'), rdflib.URIRef(nss['base'] + 'MAG_Paper'))) \ # for _, _, pid in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_hasID'), None))] allPaperConfIDs = dict() journalIDs = set() progress = 0 for line in f: terms = line.strip().split('\t') ident = terms[0] title = terms[1] # title_alt = terms[2] if terms[2] != '' else None year = terms[3] if terms[3] != '' else None date = getDate(terms[4]) if terms[4] != '' else None doi = terms[5] if terms[5] != '' else None # venue = terms[6] if terms[6] != '' else None # superseded by conference ID # venue_alt = terms[7] if terms[7] != '' else None journalId = terms[8] if terms[8] != '' else None conferenceId = terms[9] if terms[9] != '' else None rank = terms[10] if terms[10] != '' else None progress += 1 if progress % 10000 == 0: sys.stdout.write('\r ' + str(progress) + ' lines read ') root = None if ident not in paperIDs: if conferenceId in confIDs and int(year) > 2010: # add node plus label root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' + ident) label = rdflib.Literal(rawString(title), lang='en') graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label)) # title graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasTitle'), rdflib.Literal(rawString(title), lang='en'))) # year if year is not None: ynode = rdflib.Literal(year, datatype=rdflib.URIRef(nss['xsd'] + 'gYear')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasYearOfPublication'), ynode)) # type tnode = rdflib.URIRef(nss['base'] + 'MAG_Paper') graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode)) # id node idNode = rdflib.Literal(ident, datatype=rdflib.URIRef(nss['xsd'] + 'ID')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode)) if journalId is not None: journalIDs.append(journalId) else: allPaperConfIDs[ ident] = conferenceId # exclude KDD subset as we already know about their confs continue # add node plus label root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' + ident) if root is None else root #label = rdflib.Literal(rawString(title), lang='en') #graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label)) # date if date is not None: dnode = rdflib.Literal(date.isoformat(), datatype=rdflib.URIRef(nss['xsd'] + 'Date')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasDateOfPublication'), dnode)) # doi if doi is not None: doinode = rdflib.Literal(doi, datatype=rdflib.URIRef(nss['xsd'] + 'ID')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasDoi'), doinode)) # rank if rank is not None: ranknode = rdflib.Literal( rank, datatype=rdflib.URIRef(nss['xsd'] + 'positiveInteger')) graph.add( (root, rdflib.URIRef(nss['base'] + 'MAG_hasRank'), ranknode)) # journal if journalId is not None: journalIDs.append(journalId) jnode = rdflib.URIRef(nss['base'] + 'MAG_Journal_' + journalId) graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_isPublishedIn'), jnode)) graph.add( (jnode, rdflib.URIRef(nss['base'] + 'MAG_hasPublished'), root)) return (allPaperConfIDs, journalIDs)
def test_namespace_registry_from_iri(self): """Test getting namespaces from iri.""" self.installer.install("city") ns_iri = rdflib.URIRef("http://www.osp-core.com/city#") city_iri = ns_iri + "City" hasPart_iri = ns_iri + "hasPart" self.modify_labels() c = self.namespace_registry.from_iri(rdflib_cuba.Entity) self.assertIsInstance(c, OntologyClass) self.assertEqual(c.namespace.get_name(), "cuba") self.assertEqual(c.name, "Entity") r = self.namespace_registry.from_iri(rdflib_cuba.relationship) self.assertIsInstance(r, OntologyRelationship) self.assertEqual(r.namespace.get_name(), "cuba") self.assertEqual(r.name, "relationship") a = self.namespace_registry.from_iri(rdflib_cuba.attribute) self.assertIsInstance(a, OntologyAttribute) self.assertEqual(a.namespace.get_name(), "cuba") self.assertEqual(a.name, "attribute") c = self.namespace_registry.from_iri(city_iri) self.assertIsInstance(c, OntologyClass) self.assertEqual(c.namespace.get_name(), "city") self.assertEqual(c.name, "City") r = self.namespace_registry.from_iri(hasPart_iri) self.assertIsInstance(r, OntologyRelationship) self.assertEqual(r.namespace.get_name(), "city") self.assertEqual(r.name, "hasPart") import osp.core.namespaces old_ns_reg = osp.core.ontology.namespace_registry.namespace_registry try: osp.core.ontology.namespace_registry.namespace_registry = ( self.namespace_registry) from_iri = self.namespace_registry.from_iri c = from_iri(rdflib_cuba.Entity) self.assertIsInstance(c, OntologyClass) self.assertEqual(c.namespace.get_name(), "cuba") self.assertEqual(c.name, "Entity") self.graph.add((ns_iri, rdflib_cuba._reference_by_label, rdflib.Literal(True))) self.namespace_registry.from_iri.cache_clear() self.namespace_registry._get.cache_clear() c = from_iri(city_iri) self.assertIsInstance(c, OntologyClass) self.assertEqual(c.namespace.get_name(), "city") self.assertEqual(c.name, "City_T") r = from_iri(hasPart_iri) self.assertIsInstance(r, OntologyRelationship) self.assertEqual(r.namespace.get_name(), "city") self.assertEqual(r.name, "hasPart_T") # undefined namespace self.graph.add( (rdflib.URIRef("a/b#c"), rdflib.RDF.type, rdflib.OWL.Class)) self.graph.add( (rdflib.URIRef("d/e/f"), rdflib.RDF.type, rdflib.OWL.Class)) a = from_iri("a/b#c") b = from_iri("d/e/f") self.assertIsInstance(a, OntologyClass) self.assertEqual(a.namespace.get_name(), "a/b#") self.assertEqual(a.name, "c") self.assertIsInstance(b, OntologyClass) self.assertEqual(b.namespace.get_name(), "d/e/") self.assertEqual(b.name, "f") finally: osp.core.ontology.namespace_registry = old_ns_reg
def ontid(self): return rdflib.URIRef( f'https://sparc.olympiangods.org/sparc/ontologies/{self.id}')
def triples_gen(self, subject): if not (isinstance(subject, rdflib.URIRef) or isinstance(subject, rdflib.BNode)): if isinstance(subject, idlib.Stream): subject = subject.asType(rdflib.URIRef) else: subject = rdflib.URIRef(subject) def protocol_stuff(): nonlocal _v nonlocal s d = _v.asDict( ) # FIXME this is a silly way to do this, use Stream.triples_gen _o = ( owl.Class if isinstance(v, OntTerm) else # FIXME not really accurate owl.NamedIndividual) yield s, rdf.type, _o if 'label' in d: yield s, rdfs.label, rdflib.Literal(d['label']) if 'synonyms' in d: # FIXME yield from o.synonyms(s) for syn in d['synonyms']: yield s, NIFRID.synonym, rdflib.Literal(syn) #maybe_not_normalized = self.message_passing_key in self._source # TODO maybe not here? for field, value in self._source.items(): #normalized = not (maybe_not_normalized and field in self._source) # TODO #log.debug(f'{field}: {value}') if type(field) is object: continue # the magic helper key for Pipeline convert = getattr(self, field, None) extra = getattr(self.extra, field, None) if convert is not None: if isinstance(value, tuple) or isinstance(value, list): values = value else: values = value, for v in values: #log.debug(f'{field} {v} {convert}') if isinstance(v, oq.OntId): _old_v = v v = v.asInstrumented() try: p, o = convert(v) except exc.NoTripleError as e: continue #log.debug((o, v)) a = (isinstance(o, idlib.Stream) and hasattr(o, 'asUri') or isinstance(o, OntTerm)) b = (isinstance(v, idlib.Stream) and hasattr(v, 'asUri') or isinstance(v, OntTerm)) if (a or b): # FIXME this thing is a mess ... _v = o if a else v s = _v.asUri(rdflib.URIRef) yield subject, p, s try: yield from protocol_stuff() except idlib.exc.ResolutionError: pass elif isinstance(o, ProtcurExpression) or isinstance( o, Quantity): s = rdflib.BNode() yield subject, p, s qt = sparc.Measurement if isinstance(o, Range): yield from o.asRdf(s, quantity_rdftype=qt) elif isinstance(o, Quantity): yield from o.asRdf(s, rdftype=qt) n = rdflib.BNode() yield s, TEMP.asBaseUnits, n yield from o.to_base_units().asRdf(n) else: log.warning(f'unhanded Expr type {o}') yield from o.asRdf(s) else: yield subject, p, o if extra is not None: yield from extra(v) elif field in self.known_skipped: pass else: msg = f'Unhandled {self.__class__.__name__} field: {field}' if self.addError(msg, pipeline_stage=self.__class__.__name__ + '.export-error'): log.warning(msg)
def dsid(self): return rdflib.URIRef(self.uri_api)
from sklearn.exceptions import NotFittedError from pyrdf2vec.graphs import KG from pyrdf2vec.rdf2vec import RDF2VecTransformer np.random.seed(42) random.seed(42) KNOWLEDGE_GRAPH = KG( "samples/mutag/mutag.owl", label_predicates=set("http://dl-learner.org/carcinogenesis#isMutagenic"), ) TRAIN_DF = pd.read_csv("samples/mutag/train.tsv", sep="\t", header=0) ENTITIES = [rdflib.URIRef(x) for x in TRAIN_DF["bond"]] ENTITIES_SUBSET = ENTITIES[:5] WALKS: DefaultDict[rdflib.URIRef, rdflib.URIRef] = defaultdict(list) class TestRDF2VecTransformer: def test_fit(self): transformer = RDF2VecTransformer() with pytest.raises(ValueError): transformer.fit(KNOWLEDGE_GRAPH, ["does", "not", "exist"]) transformer.fit(KNOWLEDGE_GRAPH, ENTITIES_SUBSET) def test_fit_transform(self): np.testing.assert_array_equal( RDF2VecTransformer().fit_transform(KNOWLEDGE_GRAPH,
def id_(v): s = rdflib.URIRef(dsid) yield s, a, owl.NamedIndividual yield s, a, sparc.Resource yield s, rdfs.label, rdflib.Literal( self.folder_name) # not all datasets have titles
def test_uriref_not_url(self): try: rdflib.URIRef("*****@*****.**") except Exception: self.fail("Doesn't actually fail...which is weird")
def subject_id(self, v, species=None): # TODO species for human/animal v = quote(v, safe=tuple()) s = rdflib.URIRef(self.dsid + '/subjects/' + v) return s
import rdflib import tqdm features = pd.read_csv('feature.txt', sep='\t', header=None, index_col=0) cites = pd.read_csv('net.txt', sep='\t', header=None) labels = pd.read_csv('label.txt', sep='\t', header=None, index_col=0) g = rdflib.Graph() for i, row in tqdm.tqdm(features.iterrows(), total=len(features)): vals = [float(x.split(':')[1]) for x in row.values[0].split()] assert all([x == 1.0 for x in vals]) if i in labels.index: label = str(labels.loc[i][1]) g.add((rdflib.URIRef('http://paper_' + str(i)), rdflib.URIRef('http://hasLabel'), rdflib.URIRef('http://label_' + label))) words = [x.split(':')[0] for x in row.values[0].split()] for word in words: g.add((rdflib.URIRef('http://paper_' + str(i)), rdflib.URIRef('http://hasWord'), rdflib.URIRef('http://word_' + word))) for i, row in tqdm.tqdm(cites.iterrows(), total=len(cites)): dest, src, _ = map(str, row.values) g.add((rdflib.URIRef('http://paper_' + src), rdflib.URIRef('http://cites'), rdflib.URIRef('http://paper_' + dest))) #g.add((rdflib.URIRef('http://paper_'+dest), rdflib.URIRef('http://hasCitation'), rdflib.URIRef('http://paper_'+src)))
def sample_id(self, v, species=None): # TODO species for human/animal #v = v.replace(' ', '%20') # FIXME use quote urlencode v = quote(v, safe=tuple()) s = rdflib.URIRef(self.dsid + '/samples/' + v) return s
def _set_data_properties(self): self.thisERA = rdflib.URIRef(self.uri + "thisERA") self.thisAVG = rdflib.URIRef(self.uri + "thisAVG")
?person <http://dbpedia.org/ontology/birthPlace> ?place . ?place <http://www.w3.org/2000/01/rdf-schema#label> ?placename . OPTIONAL { ?place <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long } . OPTIONAL { ?place <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat }. } """ sparql_client.setReturnFormat(JSON) sparql_client.setQuery(query) q_results = sparql_client.query().convert() people = set() places = dict() for r in q_results["results"]["bindings"]: people.add(rdflib.URIRef(r["person"]["value"])) if "long" in r.keys() and "lat" in r.keys(): lat = r["lat"]["value"] long = r["long"]["value"] places[r["placename"]["value"]] = (lat, long) describe_query = "DESCRIBE " for p in people: describe_query += p.n3() + "\n" sparql_client.setReturnFormat(XML) sparql_client.setQuery(describe_query) g = sparql_client.query().convert() print(len(g), "triples in people around Claude Shannon") a = g.serialize(format='application/rdf+xml')