Ejemplo n.º 1
0
from collections import defaultdict, Counter

from graph import *
from rdf2vec import RDF2VecTransformer


print(end='Loading data... ', flush=True)
g = rdflib.Graph()
g.parse('../data/aifb.n3', format='n3')
print('OK')

test_data = pd.read_csv('../data/AIFB_test.tsv', sep='\t')
train_data = pd.read_csv('../data/AIFB_train.tsv', sep='\t')


train_people = [rdflib.URIRef(x) for x in train_data['person']]
train_labels = train_data['label_affiliation']

test_people = [rdflib.URIRef(x) for x in test_data['person']]
test_labels = test_data['label_affiliation']

label_predicates = [
    rdflib.URIRef('http://swrc.ontoware.org/ontology#affiliation'),
    rdflib.URIRef('http://swrc.ontoware.org/ontology#employs'),
    rdflib.URIRef('http://swrc.ontoware.org/ontology#carriedOutBy')
]

# Extract the train and test graphs

kg = rdflib_to_kg(g, label_predicates=label_predicates)
Ejemplo n.º 2
0
def rdfapi(request, action="read"):
  # get base subject
  s = request.REQUEST['s']
  if not surf.util.is_uri(s):
    s = surf.ns.LOCAL[s]

  crdf = CignoRDF()
  _subject = crdf.CignoResources(s)

  #s = 'http://cigno.ve.ismar.cnr.it/data/geonode:parchi_nazionali_regionali'
  #p = "http://purl.org/dc/terms/hasPart"
  #o = 'http://cigno.ve.ismar.cnr.it/data/'
  #pl = 'has part'
  #ol = 'esempio'

  if action != 'read' and not request.user.is_authenticated():
    json = {
      "success": False,
      "errors": "You are not allowed to change relations for this resource."
      }
    json_str = simplejson.dumps ( json, sort_keys=True, indent=4 )
    mimetype = "application/json"
    mimetype = "text/plain" # debug to see it indented in browser                                                                                                                          
    return HttpResponse("User not authorized to delete map", mimetype=mimetype, status=403)
    return response


  if action == 'read':
    # TODO inspect resource type
    json = {'rows': [], 'count': 0}

    # append converage
    # uritype = 'http://purl.org/dc/terms/spatial'
    #     for node in getattr(_subject, surf.util.rdf2attr(uritype, True)):
    #       _predicate = crdf.Properties(uritype)
    #       node = crdf.GeoNames(node) if  isinstance(node, URIRef) else node
    #       node.load()
    #       _object = node
    #       json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject),
    #                            's': s, 
    #                            'p': _predicate.subject, 
    #                            'o': _object.subject,
    #                            'pl': lbyl(_predicate.rdfs_label),
    #                            'ol': lbyl(_object.gn_name),
    #                            'd': True
    #               })

    for uritype in supported_relations:
      for node in getattr(_subject, surf.util.rdf2attr(uritype, True)):
        _predicate = crdf.Properties(uritype)
        node = crdf.CignoResources(node) if  isinstance(node, URIRef) else node
        node.load()
        _object = node
        json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject),
                             's': s, 
                             'p': _predicate.subject, 
                             'o': _object.subject,
                             'pl': lbyl(_predicate.rdfs_label),
                             'ol': lbyl(_object.rdfs_label),
                             'd': True
                             })

    for uritype in supported_relations:
      if URIRef(uritype) in _subject.rdf_inverse:
        _predicate = crdf.Properties(reverse_relations[uritype])
        for node in _subject.rdf_inverse[URIRef(uritype)]:
          node = crdf.CignoResources(node) if  isinstance(node, URIRef) else node
          node.load()
          _object = node
          json['rows'].append({'id': '%s|%s|%s' % (s, _predicate.subject, _object.subject),
                             's': s, 
                             'p': _predicate.subject, 
                             'o': _object.subject,
                             'pl': lbyl(_predicate.rdfs_label),
                             'ol': lbyl(_object.rdfs_label),
                             'd': False
                             })
    
  elif action == 'create':
    rows = simplejson.loads(request.REQUEST['rows'])
    if not isinstance(rows, list): rows = [rows]
    for row in rows:
      crdf.store.add_triple(rdflib.URIRef(s), rdflib.URIRef(row['p']), rdflib.URIRef(row['o']))
      # if row['p'] in reverse_relations:
      #  store.add_triple(rdflib.URIRef(row['o']), rdflib.URIRef(reverse_relations[row['p']]), rdflib.URIRef(s))
    crdf.store.save()
    # if external try to load rdf info
    # TODO: use a better test and test if already loaded
    if not row['o'].startswith(surf.ns.LOCAL):
      crdf.store.load_triples(source = row['o'])

    json = {'success': True}
    # get type
    #result = session.default_store.execute_sparql("SELECT ?o WHERE { <%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o }" % )
    #if len(result['results']['bindings']) >0:
    #  type_uri = result['results']['bindings'][0]['o']['value']

  elif action == 'destroy':
    rows = simplejson.loads(request.REQUEST['rows'])
    if not isinstance(rows, list): rows = [rows]
    for row in rows:
      # get triple by id 
      s, p, o = row['id'].split('|')      
      crdf.store.remove_triple(rdflib.URIRef(s), rdflib.URIRef(p), rdflib.URIRef(o))
      # if p in reverse_relations:
      #  store.remove_triple(rdflib.URIRef(o), rdflib.URIRef(reverse_relations[p]), rdflib.URIRef(s))
      
    crdf.store.save()
    json = {'success': True}
    # get type
    #result = session.default_store.execute_sparql("SELECT ?o WHERE { <%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o }" % )
    #if len(result['results']['bindings']) >0:
    #  type_uri = result['results']['bindings'][0]['o']['value']

  json_str = simplejson.dumps ( json, sort_keys=True, indent=4 )
  mimetype = "application/json"
  mimetype = "text/plain" # debug to see it indented in browser                                                                                                                          
  response = HttpResponse( json_str, mimetype=mimetype )
  return response
Ejemplo n.º 3
0
    def about_batterbox(self, gameCode, gameinfo, inn, score, batterbox, sbo,
                        pitcher, hitter, isaboutpitcher, isabouthitter,
                        isaboutrunner):
        bbox = str(batterbox).split(".")[-1]
        batterbox_uri = rdflib.URIRef(self.uri + bbox)
        thisGame = rdflib.URIRef(self.uri + gameCode)
        pitcher_name = self.get_player_name(pitcher)
        pitcher = rdflib.URIRef(self.uri + pitcher)
        hitter_name = self.get_player_name(hitter)
        hitter = rdflib.URIRef(self.uri + hitter)

        situation = self.get_situation(gameinfo=gameinfo,
                                       inn=inn,
                                       score=score,
                                       sbo=sbo)
        annotation = []
        if (isaboutpitcher):
            """
                이름 추가해서
                    상황 추가해서
                        0 팀 소속 0 투수
                        투수 오늘 경기 0 번째 타석에서 공을 던집니다.
                        투수 오늘 경기 0 번째 타자를 상대하고 있습니다.
                        투수 오늘 경기 0 개의 삼진을 잡아내고 있습니다.
                        투수 오늘 경기 0 개의 포볼로 타자 출루 시켰습니다.
                        투수 오늘 경기 0 개의 플라이 아웃으로 타자 잡아냈습니다.
                        투수 오늘 경기 0 개의 땅볼 아웃으로 타자 잡아냈습니다.
                        투수 오늘 경기 0 개의 싱글 안타 허용하였습니다.
                        투수 오늘 경기 0 개의 2루타 허용하였습니다.

                    투수 이번 시즌 0의 평균 자책점을 기록하고 있습니다.
                    투수 저번 타석 0을 기록하였습니다.

                투수 과연 어떤 공을 던질까요?
            """
            query = "SELECT ?o WHERE {?pitcher ?thisERA ?o}"
            r = self.rdf.query(query,
                               initBindings={
                                   "pitcher": pitcher,
                                   "thisERA": self.thisERA
                               })
            era = list(r)[0][0]

            query = "SELECT ?o WHERE {?s ?fromPitcher ?pitcher . ?s ?inGame ?thisGame . ?s ?result ?o} order by ?s"
            r = self.rdf.query(query,
                               initBindings={
                                   "fromPitcher": self.fromPitcher,
                                   "pitcher": pitcher,
                                   "inGame": self.inGame,
                                   "thisGame": thisGame,
                                   "result": self.result
                               })

            total_batterbox = len(list(r)) + 1
            strikeout = len([1 for i in r if 'Strikeout' in i[0]])
            baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]])
            fly = len([1 for i in r if 'Fly' in i[0]])
            outinbase = len([1 for i in r if 'OutInBase' in i[0]])
            singlehit = len([1 for i in r if 'SingleHit' in i[0]])
            double = len([1 for i in r if 'Double' in i[0]])

            if (r):
                recent_result = self.change_result_history_to_korean(
                    list(r)[-1][0].split("#")[1].split("_")[1])

            annotation_about_this_game = [
                "투수 오늘 경기 " + str(total_batterbox) + "번째 타석에서 공을 던집니다",
                "투수 오늘 경기 " + str(total_batterbox) + "번째 타자를 상대하고 있습니다",
                "투수 오늘 경기 " + str(strikeout) + "개의 삼진을 잡아내고 있습니다",
                "투수 오늘 경기 " + str(baseonballs) + "개의 포볼로 타자 출루 시켰습니다",
                "투수 오늘 경기 " + str(fly) + "개의 플라이 아웃으로 타자 잡아냈습니다",
                "투수 오늘 경기 " + str(outinbase) + "개의 땅볼 아웃으로 타자 잡아냈습니다",
                "투수 오늘 경기 " + str(singlehit) + "개의 싱글 안타 허용하였습니다",
                "투수 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다",
                pitcher_name + " 투수 오늘 경기 " + str(total_batterbox) +
                "번째 타석에서 공을 던집니다",
                pitcher_name + " 투수 오늘 경기 " + str(total_batterbox) +
                "번째 타자를 상대하고 있습니다",
                pitcher_name + " 투수 오늘 경기 " + str(strikeout) +
                "개의 삼진을 잡아내고 있습니다",
                pitcher_name + " 투수 오늘 경기 " + str(baseonballs) +
                "개의 포볼로 타자 출루 시켰습니다",
                pitcher_name + " 투수 오늘 경기 " + str(fly) +
                "개의 플라이 아웃으로 타자 잡아냈습니다",
                pitcher_name + " 투수 오늘 경기 " + str(outinbase) +
                "개의 땅볼 아웃으로 타자 잡아냈습니다",
                pitcher_name + " 투수 오늘 경기 " + str(singlehit) +
                "개의 싱글 안타 허용하였습니다",
                pitcher_name + " 투수 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다",
                pitcher_name + " 오늘 경기 " + str(total_batterbox) +
                "번째 타석에서 공을 던집니다",
                pitcher_name + " 오늘 경기 " + str(total_batterbox) +
                "번째 타자를 상대하고 있습니다",
                pitcher_name + " 오늘 경기 " + str(strikeout) + "개의 삼진을 잡아내고 있습니다",
                pitcher_name + " 오늘 경기 " + str(baseonballs) +
                "개의 포볼로 타자 출루 시켰습니다",
                pitcher_name + " 오늘 경기 " + str(fly) + "개의 플라이 아웃으로 타자 잡아냈습니다",
                pitcher_name + " 오늘 경기 " + str(outinbase) +
                "개의 땅볼 아웃으로 타자 잡아냈습니다",
                pitcher_name + " 오늘 경기 " + str(singlehit) + "개의 싱글 안타 허용하였습니다",
                pitcher_name + " 오늘 경기 " + str(double) + "개의 2루타 허용하였습니다",
            ]
            annotation = annotation + list(
                map("".join, product(situation, annotation_about_this_game)))

            if (total_batterbox > 1):
                annotation = annotation + [
                    "투수 지난 타석 " + str(recent_result) + "을 기록하였습니다",
                    pitcher_name + " 투수 지난 타석 " + str(recent_result) +
                    "을 기록하였습니다",
                    pitcher_name + " 지난 타석 " + str(recent_result) +
                    "을 기록하였습니다",
                ]

            annotation = annotation + [
                "투수 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다",
                pitcher_name + " 투수 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다",
                pitcher_name + " 이번 시즌 " + str(era) + "의 평균 자책점을 기록하고 있습니다",
                pitcher_name + " 투수",
                pitcher_name + " 투수 어떤 공을 던질까요",
            ]

        if (isabouthitter):
            """
            타자 오늘 0번째 타석 입니다
            타자 오늘 0번째 타석에서 섰습니다
                if 타석 > 1
                타자 오늘 0번째 타석에서 0개의 안타 기록했습니다
                타자 오늘 0개의 안타 기록합니다
                타자 저번 타석 0을 기록하였습니다
                    if 아웃 >= 1
                    타자 오늘 0번째 타석에서 0개의 0아웃 기록했습니다
    
    
            타자 이번 시즌 0의 평균 타율을 기록하고 있습니다
            타자 이번 타석 안타를 기록 할 수 있을까요
            """

            query = "SELECT ?o where {?batter ?thisAVG ?o}"
            r = self.rdf.query(query,
                               initBindings={
                                   "batter": hitter,
                                   "thisAVG": self.thisAVG
                               })
            avg = list(r)[0][0]

            query = "SELECT ?o where {?s ?toHitter ?hitter . ?s ?inGame ?thisGame . ?s ?result ?o } order by ?s"
            r = self.rdf.query(query,
                               initBindings={
                                   "toHitter": self.toHitter,
                                   "hitter": hitter,
                                   "inGame": self.inGame,
                                   "thisGame": thisGame,
                                   "result": self.result
                               })

            total_batterbox = len(list(r)) + 1
            strikeout = len([1 for i in r if 'Strikeout' in i[0]])
            baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]])
            fly = len([1 for i in r if 'Fly' in i[0]])
            outinbase = len([1 for i in r if 'OutInBase' in i[0]])
            singlehit = len([1 for i in r if 'SingleHit' in i[0]])
            double = len([1 for i in r if 'Double' in i[0]])
            triple = len([1 for i in r if 'Triple' in i[0]])
            homerun = len([1 for i in r if 'HomeRun' in i[0]])

            hits = int(singlehit) + int(double) + int(triple) + int(homerun)
            outs = int(fly) + int(outinbase) + int(strikeout)

            if (r):
                recent_result = self.change_result_history_to_korean(
                    list(r)[-1][0].split("#")[1].split("_")[1])

            annotation_about_this_game = [
                "타자 오늘 경기 " + str(total_batterbox) + "번째 타석입니다",
                "타자 오늘 경기 " + str(total_batterbox) + "번째 타석에 섰습니다",
                hitter_name + " 타자 오늘 경기 " + str(total_batterbox) + "번째 타석입니다",
                hitter_name + " 타자 오늘 경기 " + str(total_batterbox) +
                "번째 타석에 섰습니다",
                hitter_name + " 오늘 경기 " + str(total_batterbox) + "번째 타석입니다",
                hitter_name + " 오늘 경기 " + str(total_batterbox) + "번째 타석에 섰습니다",
            ]
            if (total_batterbox > 1):
                annotation_about_this_game = annotation_about_this_game + [
                    "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(hits) +
                    "개의 안타 기록했습니다",
                    "타자 오늘 " + str(singlehit) + "개의 1루타 기록했습니다",
                    "타자 오늘 " + str(double) + "개의 2루타 기록했습니다",
                    "타자 오늘 " + str(triple) + "개의 3루타 기록했습니다",
                    "타자 저번 타석 " + str(recent_result) + "을 기록하였습니다",
                    hitter_name + " 타자 오늘 " + str(total_batterbox) +
                    "번째 타석에서 " + str(hits) + "개의 안타 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(singlehit) + "개의 1루타 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(double) + "개의 2루타 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(triple) + "개의 3루타 기록했습니다",
                    hitter_name + " 타자 저번 타석 " + str(recent_result) +
                    "을 기록하였습니다",
                    hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(hits) + "개의 안타 기록했습니다",
                    hitter_name + " 오늘 " + str(singlehit) + "개의 1루타 기록했습니다",
                    hitter_name + " 오늘 " + str(double) + "개의 2루타 기록했습니다",
                    hitter_name + " 오늘 " + str(triple) + "개의 3루타 기록했습니다",
                    hitter_name + " 저번 타석 " + str(recent_result) + "을 기록하였습니다",
                ]
            if (outs > 0):
                annotation_about_this_game = annotation_about_this_game + [
                    "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " + str(outs) +
                    "개의 아웃 기록했습니다",
                    "타자 오늘 " + str(outs) + "개의 아웃 기록했습니다",
                    "타자 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다",
                    "타자 오늘 " + str(outinbase) + "개의 땅볼 아웃 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(total_batterbox) +
                    "번째 타석에서 " + str(outs) + "개의 아웃 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(outs) + "개의 아웃 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다",
                    hitter_name + " 타자 오늘 " + str(outinbase) +
                    "개의 땅볼 아웃 기록했습니다",
                    hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(outs) + "개의 아웃 기록했습니다",
                    hitter_name + " 오늘 " + str(outs) + "개의 아웃 기록했습니다",
                    hitter_name + " 오늘 " + str(fly) + "개의 플라이 아웃 기록했습니다",
                    hitter_name + " 오늘 " + str(outinbase) + "개의 땅볼 아웃 기록했습니다",
                ]
            if (strikeout > 0):
                annotation_about_this_game = annotation_about_this_game + [
                    "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(strikeout) + "개의 삼진 아웃 당했습니다",
                    "타자 오늘 " + str(strikeout) + "개의 삼진 아웃 당헀습니다",
                    hitter_name + " 타자 오늘 " + str(total_batterbox) +
                    "번째 타석에서 " + str(strikeout) + "개의 삼진 아웃 당했습니다",
                    hitter_name + " 타자 오늘 " + str(strikeout) +
                    "개의 삼진 아웃 당헀습니다",
                    hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(strikeout) + "개의 삼진 아웃 당했습니다",
                    hitter_name + " 오늘 " + str(strikeout) + "개의 삼진 아웃 당헀습니다",
                ]
            if (baseonballs > 0):
                annotation_about_this_game = annotation_about_this_game + [
                    "타자 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(baseonballs) + "개의 포볼로 출루 하였습니다",
                    "타자 오늘 " + str(baseonballs) + "개의 포볼 기록합니다",
                    hitter_name + " 타자 오늘 " + str(total_batterbox) +
                    "번째 타석에서 " + str(baseonballs) + "개의 포볼로 출루 하였습니다",
                    hitter_name + " 타자 오늘 " + str(baseonballs) + "개의 포볼 기록합니다",
                    hitter_name + " 오늘 " + str(total_batterbox) + "번째 타석에서 " +
                    str(baseonballs) + "개의 포볼로 출루 하였습니다",
                    hitter_name + " 오늘 " + str(baseonballs) + "개의 포볼 기록합니다",
                ]
            if (homerun > 0):
                annotation_about_this_game = annotation_about_this_game + [
                    "타자 오늘 홈런 기록하였습니다",
                    hitter_name + " 타자 오늘 홈런 기록하였습니다",
                    hitter_name + " 오늘 홈런 기록하였습니다",
                ]

            annotation = annotation + list(
                map("".join, product(situation, annotation_about_this_game)))
            annotation = annotation + [
                "타자 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다",
                hitter_name + " 타자 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다",
                hitter_name + " 이번 시즌 " + str(avg) + "의 평균 타율을 기록하고 있습니다",
                hitter_name + " 타자",
            ]

        if (isaboutpitcher and isabouthitter):
            query = "SELECT ?o where {?s ?inGame ?thisGame . ?s ?toHitter ?hitter . ?s ?fromPitcher ?pitcher . ?s ?result ?o} order by desc(?s)"
            r = self.rdf.query(query,
                               initBindings={
                                   "inGame": self.inGame,
                                   "thisGame": thisGame,
                                   "toHitter": self.toHitter,
                                   "hitter": hitter,
                                   "fromPitcher": self.fromPitcher,
                                   "pitcher": pitcher,
                                   "result": self.result
                               })

            total_batterbox = len(list(r)) + 1
            strikeout = len([1 for i in r if 'Strikeout' in i[0]])
            baseonballs = len([1 for i in r if 'BaseOnBalls' in i[0]])
            fly = len([1 for i in r if 'Fly' in i[0]])
            outinbase = len([1 for i in r if 'OutInBase' in i[0]])
            singlehit = len([1 for i in r if 'SingleHit' in i[0]])
            double = len([1 for i in r if 'Double' in i[0]])
            triple = len([1 for i in r if 'Triple' in i[0]])
            homerun = len([1 for i in r if 'HomeRun' in i[0]])

            if (r):
                history = [
                    self.change_result_history_to_korean(
                        row[0].split("#")[1].split("_")[1]) for row in r
                ]
                recent_result = history[0]

            hits = int(singlehit) + int(double) + int(triple) + int(homerun)
            outs = int(fly) + int(outinbase)

            annotation = annotation + [
                hitter_name + " 타자 " + pitcher_name + " 투수를 상대로 오늘 " +
                str(hits) + "개의 안타 기록 하였습니다",
                pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " +
                str(hits) + "개의 안타를 허용 하였습니다",
                pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " +
                str(total_batterbox) + "번째 대결입니다",
                "투수와 타자 사이에 팽팽한 긴장감이 감지됩니다.",
            ]
            if (strikeout > 0):
                annotation = annotation + [
                    pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " +
                    str(strikeout) + "개의 스트라이크 아웃을 잡아냈습니다",
                    hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " +
                    str(strikeout) + "개의 스트라이크 아웃 당했습니다",
                ]
            if (fly > 0):
                annotation = annotation + [
                    pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " +
                    str(fly) + "개의 플라이 아웃을 잡아냈습니다",
                    hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " +
                    str(fly) + "개의 플라이 아웃 당했습니다",
                ]
            if (baseonballs > 0):
                annotation = annotation + [
                    pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 오늘 경기 " +
                    str(baseonballs) + "개의 포볼로 출루 시켰습니다",
                    hitter_name + " 타자 " + pitcher_name + " 투수 상대로 오늘 경기 " +
                    str(baseonballs) + "개의 포볼로 출루 하였습니다",
                ]
            if (homerun > 0):
                annotation = annotation + [
                    pitcher_name + " 투수 " + hitter_name +
                    " 타자를 상대로 오늘 경기 홈런을 허용하였습니다",
                    hitter_name + " 타자 " + pitcher_name +
                    " 투수 상대로 오늘 경기 홈런 기록하였습니다",
                ]
            if (total_batterbox > 1):
                annotation = annotation + [
                    pitcher_name + " 투수 " + hitter_name + " 타자를 상대로 저번 타석 " +
                    str(recent_result) + " 기록하였습니다",
                    hitter_name + " 타자 " + pitcher_name + " 투수 상대로 저번 타석 " +
                    str(recent_result) + " 기록하였습니다",
                ]

        if (isaboutrunner):
            """
            각 루에 누가 있는지
                ex) 0루에 OOO(이)가 있습니다.
                ex) 0루에 OOO(이)가 주자로 있습니다.
                ex) OOO(이)가 나가있습니다.
                ex) 000(이)가 득점권에 있습니다.

            1루에 주자 있을 때
                ex) 타자 1루 주자 있었던 최근 타석 000를 기록하였습니다.
                ex) 오늘 1루 주자가 있는 타석에서 타자 000를 기록하였습니다.
                ex) 투수 1루 주자 있었던 최근 타석 000를 기록하였습니다.
            """
            first_runner, second_runner, third_runner = self.search_runner(
                batterbox=batterbox)
            if (first_runner):
                first_runner = self.get_player_name(
                    first_runner.split("#")[-1])
            if (second_runner):
                second_runner = self.get_player_name(
                    second_runner.split("#")[-1])
            if (third_runner):
                third_runner = self.get_player_name(
                    third_runner.split("#")[-1])

            if (first_runner or second_runner or third_runner):
                annotation = annotation + [
                    " ".join([
                        i for i in [first_runner, second_runner, third_runner]
                        if i is not None
                    ]) + " 주자로 나가있습니다", "주자에는 " + " ".join([
                        i for i in [first_runner, second_runner, third_runner]
                        if i is not None
                    ]) + "가 있습니다"
                ]
            if (first_runner):
                query = "SELECT ?o where {?s ?toHitter ?hitter . ?s ?result ?o . ?s ?stayIn1stBase ?o1} order by ?s"
                r = self.rdf.query(query,
                                   initBindings={
                                       "toHitter": self.toHitter,
                                       "hitter": hitter,
                                       "inGame": self.inGame,
                                       "thisGame": thisGame,
                                       "result": self.result,
                                       "stayIn1stBase": self.stayIn1stBase
                                   })
                if (r):
                    recent_result = self.change_result_history_to_korean(
                        list(r)[-1][0].split("#")[1].split("_")[1])

                    annotation = annotation + [
                        "타자 1루 주자가 있는 타석에서 최근 " + str(recent_result) +
                        "을 기록하였습니다", hitter_name + " 타자 1루 주자가 있는 타석에서 최근 " +
                        str(recent_result) + "을 기록하였습니다"
                    ]

                query = "SELECT ?o where {?s ?fromPitcher ?pitcher . ?s ?result ?o . ?s ?stayIn1stBase ?o1} order by ?s"
                r = self.rdf.query(query,
                                   initBindings={
                                       "fromPitcher": self.fromPitcher,
                                       "pitcher": pitcher,
                                       "inGame": self.inGame,
                                       "thisGame": thisGame,
                                       "result": self.result,
                                       "stayIn1stBase": self.stayIn1stBase
                                   })

                if (r):
                    recent_result = self.change_result_history_to_korean(
                        list(r)[-1][0].split("#")[1].split("_")[1])

                    annotation = annotation + [
                        "투수 1루 주자가 있는 타석에서 최근 " + str(recent_result) +
                        "을 기록하였습니다", pitcher_name + " 투수 1루 주자가 있는 타석에서 최근 " +
                        str(recent_result) + "을 기록하였습니다"
                    ]

                annotation = annotation + [
                    "1루에는 " + str(first_runner) + "가 주자로 있습니다",
                    "1루에는 " + str(first_runner) + "가 있습니다",
                    str(first_runner) + " 선수 1루에 있습니다",
                ]
            if (second_runner):
                annotation = annotation + [
                    "득점권에 주자 나가 있습니다",
                    str(second_runner) + ", 득점권에 주자로 있습니다",
                    str(second_runner) + " 선수 2루에 있습니다",
                    "2루에는 " + str(second_runner) + "가 주자로 있습니다",
                    "2루에는 " + str(second_runner) + "가 있습니다",
                ]
            if (third_runner):
                annotation = annotation + [
                    "득점권에 주자 나가 있습니다",
                    str(third_runner) + ", 득점권에 주자로 있습니다",
                    str(third_runner) + " 선수 3루에 있습니다",
                    "3루에는 " + str(third_runner) + "가 주자로 있습니다",
                    "3루에는 " + str(third_runner) + "가 있습니다",
                ]

        return annotation
Ejemplo n.º 4
0
def readuris(f):
    return [rdflib.URIRef(x.strip()).n3() for x in file(f)]
Ejemplo n.º 5
0
def get_exception_ids():
    graph = rdflib.Graph()
    graph.parse('http://spdx.org/licenses/exceptions-index.html', 'rdfa')
    ref = rdflib.URIRef('http://spdx.org/rdf/terms#licenseId')
    objs = graph.subject_objects(ref)
    return map(lambda x: x[1].value, objs)
Ejemplo n.º 6
0
def delete_activity(name):
    g = Graph()
    g.parse('output1.ttl#', format="turtle")

    act_name = tove2_prefix + name
    print act_name
    act_timeInterval = g.value(subject=rdflib.URIRef(act_name),predicate=hasTimeInterval)
    print act_timeInterval
    act_start = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasBeginning)
    act_start_min = g.value(subject=rdflib.URIRef(act_start), predicate=hasMin)
    act_start_max = g.value(subject=rdflib.URIRef(act_start), predicate=hasMax)
    act_end = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasEnd)
    act_end_min = g.value(subject=rdflib.URIRef(act_end), predicate=hasMin)
    act_end_max = g.value(subject=rdflib.URIRef(act_end), predicate=hasMax)
    act_dur = g.value(subject=rdflib.URIRef(act_timeInterval), predicate=hasDuration)
    act_dur_min = g.value(subject=rdflib.URIRef(act_dur), predicate=hasMin)
    act_dur_max = g.value(subject=rdflib.URIRef(act_dur), predicate=hasMax)

    g.remove((rdflib.URIRef(act_dur),None,None))
    g.remove((rdflib.URIRef(act_dur_min),None,None))
    g.remove((rdflib.URIRef(act_dur_max),None,None))

    g.remove((rdflib.URIRef(act_start),None,None))
    g.remove((rdflib.URIRef(act_start_min),None,None))
    g.remove((rdflib.URIRef(act_start_max),None,None))

    g.remove((rdflib.URIRef(act_end),None,None))
    g.remove((rdflib.URIRef(act_end_min),None,None))
    g.remove((rdflib.URIRef(act_end_max),None,None))

    g.remove((rdflib.URIRef(act_timeInterval),None,None))
    g.remove((rdflib.URIRef(act_name),None,None))



    g.close()
    g.serialize(destination='output1.ttl', format='turtle')
Ejemplo n.º 7
0
    def modify_labels(self):
        """Modify the labels in the graph. Append a T.

        Helper method.
        """
        namespace = self.namespace_registry.city
        triples = list()
        for s, p, o in self.graph:
            if s in namespace and p in (
                    rdflib.SKOS.prefLabel,
                    rdflib.RDFS.label,
            ):
                # To test querying by label.
                label_SKOS = f"{o}_T"
                triples.append((
                    s,
                    rdflib.SKOS.prefLabel,
                    rdflib.Literal(label_SKOS, lang="en"),
                ))
                # To test RDFS labels and special characters.
                label_RDFS = f"{o}-$"
                triples.append((
                    s,
                    rdflib.RDFS.label,
                    rdflib.Literal(label_RDFS, lang="en"),
                ))
                # To test non-english languages.
                label_RDFS_jp = f"{o}_T_jp"
                triples.append((
                    s,
                    rdflib.RDFS.label,
                    rdflib.Literal(label_RDFS_jp, lang="jp"),
                ))
                label_SKOS_aa = f"{o}_T_aa_SKOS"
                triples.append((
                    s,
                    rdflib.SKOS.prefLabel,
                    rdflib.Literal(label_SKOS_aa, lang="aa"),
                ))
                # To test undefined languages.
                label_RDFS_unk = f"{o}_T_unknown_lang"
                triples.append(
                    (s, rdflib.RDFS.label, rdflib.Literal(label_RDFS_unk)))
                # To test labels that coincide in different languages.
                label_RDFS_es = f"{o}_T_cosa"
                label_RDFS_it = f"{o}_T_cosa"
                for label, lang in (
                    (label_RDFS_es, "es"),
                    (label_RDFS_it, "it"),
                ):
                    triples.append((
                        s,
                        rdflib.RDFS.label,
                        rdflib.Literal(label, lang=lang),
                    ))
            else:
                triples.append((s, p, o))
        # Test different concepts with same label, and querying by language.
        triples.append((
            rdflib.URIRef(str(namespace._iri) + "City"),
            rdflib.RDFS.label,
            rdflib.Literal("Burro", lang="it"),
        ))
        triples.append((
            rdflib.URIRef(str(namespace._iri) + "Street"),
            rdflib.RDFS.label,
            rdflib.Literal("Burro", lang="es"),
        ))
        self.graph.remove((None, None, None))
        for t in triples:
            self.graph.add(t)
Ejemplo n.º 8
0
def f2016KDDCupSelectedPapersHandler(graph, nss, f):
    entries = []

    progress = 0
    for line in f:

        terms = line.strip().split('\t')

        ident = terms[0]
        title = capitalize(terms[1])
        year = terms[2]
        confID = terms[3]
        #confShortName = terms[4]

        entries.append((ident, confID, year))

        # paper node plus label
        root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' + ident)
        label = rdflib.Literal(rawString(title), lang='en')
        graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label))

        graph.add((root, rdflib.URIRef(nss['dcterms'] + 'isPartOf'),
                   rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset')))
        graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'),
                   rdflib.URIRef(nss['dcterms'] + 'hasPart'), root))

        # id node
        idNode = rdflib.Literal(ident,
                                datatype=rdflib.URIRef(nss['xsd'] + 'ID'))
        graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode))

        # title
        graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasTitle'),
                   rdflib.Literal(rawString(title), lang='en')))

        # year
        ynode = rdflib.Literal(year,
                               datatype=rdflib.URIRef(nss['xsd'] + 'gYear'))
        graph.add(
            (root, rdflib.URIRef(nss['base'] + 'MAG_hasYearOfPublication'),
             ynode))

        # type
        tnode = rdflib.URIRef(nss['base'] + 'MAG_Paper')
        graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode))

        # id node of conference plus link
        # overwrite if exists
        croot = rdflib.URIRef(nss['base'] + 'MAG_Conference_' + confID)
        #graph.add((root, rdflib.URIRef(nss['base'] +'MAG_isPresentedAt'), croot))
        #graph.add((croot, rdflib.URIRef(nss['base'] +'MAG_hasPresented'), root))

        graph.add((croot, rdflib.URIRef(nss['dcterms'] + 'isPartOf'),
                   rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset')))
        graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'),
                   rdflib.URIRef(nss['dcterms'] + 'hasPart'), croot))

        progress += 1
        if progress % 10000 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')

    return entries
Ejemplo n.º 9
0
def fKDDConferenceInstancesHandler(graph, nss, f, conferenceIDs, years,
                                   paperIDs):
    #geoIndex = GeoIndex()
    """conferenceIDsPerYear = [(cid.value, y.value) \
                                 for pid in paperIDs \
                                 for paper, _, _ in graph.triples((None, rdflib.URIRef(nss['base'] + 'MAG_hasID'), rdflib.Literal(pid))) \
                                 for _, _, conference in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_presentedAt'), None)) \
                                 for _, _, cid in graph.triples((conference, rdflib.URIRef(nss['base'] + 'MAG_hasID'), None)) \
                                 for _, _, y in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_yearOfPublication'), None))]
    """
    # linked by indices
    #conferenceIDs, years = zip(*conferenceIDsPerYear)

    #kddConferenceInstances = set()

    progress = 0

    for line in f:

        terms = line.strip().split('\t')

        organizationId = terms[0]
        ident = terms[1]
        shortName = terms[2]
        name = terms[3]
        location = terms[4] if len(terms) > 4 and terms[4] != '' else None
        url = terms[5] if len(terms) > 5 and terms[5] != '' else None
        startdate = parser.parse(
            terms[6]) if len(terms) > 6 and terms[6] != '' else None
        enddate = parser.parse(
            terms[7]) if len(terms) > 7 and terms[7] != '' else None
        abstractdate = parser.parse(
            terms[8]) if len(terms) > 8 and terms[8] != '' else None
        subdate = parser.parse(
            terms[9]) if len(terms) > 9 and terms[9] != '' else None
        notdate = parser.parse(
            terms[10]) if len(terms) > 10 and terms[10] != '' else None
        finaldate = parser.parse(
            terms[11]) if len(terms) > 11 and terms[11] != '' else None

        progress += 1
        if progress % 10000 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')

        if ' ' in ident:  # dirty fix cause the specs dont hold
            continue

        if startdate is not None:
            paperyear = startdate.year
        elif enddate is not None:
            paperyear = enddate.year
        elif finaldate is not None:
            paperyear = finaldate.year
        else:
            paperyear = -1

        i = 0
        while i < len(conferenceIDs):
            if conferenceIDs[i] == organizationId and (int(paperyear) > 2010
                                                       or int(paperyear) < 0):
                break
            i += 1
        if i >= len(conferenceIDs):  # not a target conference
            continue

        # kddConferenceInstances.add(ident) # add kdd conf instances

        # instance node plus label
        root = rdflib.URIRef(nss['base'] + 'MAG_ConferenceInstance_' + ident)
        label = rdflib.Literal(rawString(name), lang='en')
        graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label))

        if int(years[i]) == int(paperyear):
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_Paper_' + paperIDs[i]),
                       rdflib.URIRef(nss['base'] + 'MAG_isPresentedAt'), root))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasPresented'),
                 rdflib.URIRef(nss['base'] + 'MAG_Paper_' + paperIDs[i])))

        # type
        tnode = rdflib.URIRef(nss['base'] + 'MAG_ConferenceInstance')
        graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode))

        # id node of affiliation
        idNode = rdflib.Literal(ident,
                                datatype=rdflib.URIRef(nss['xsd'] + 'ID'))
        graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode))

        graph.add((rdflib.URIRef(nss['base'] + 'MAG_Conference_' + organizationId), \
                   rdflib.URIRef(nss['base'] + 'MAG_hasOrganized'), \
                   root))
        graph.add((root, \
                   rdflib.URIRef(nss['base'] + 'MAG_isOrganizedBy'), \
                   rdflib.URIRef(nss['base'] + 'MAG_Conference_' + organizationId)))

        # URL
        if url is not None and isURL(url):
            node = rdflib.Literal(url,
                                  datatype=rdflib.URIRef(nss['xsd'] +
                                                         'anyURI'))
            graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasUrl'), node))

        # short name
        node = rdflib.Literal(rawString(shortName),
                              datatype=rdflib.URIRef(nss['xsd'] + 'string'))
        graph.add(
            (root, rdflib.URIRef(nss['base'] + 'MAG_hasShortName'), node))

        # location
        #geoURI =geoIndex.resolve(location)
        #if geoURI is None:
        #    loc = rdflib.Literal(location)
        #else:
        #    loc = rdflib.URIRef(geoURI) # range should actually be geoThing (coordinates)
        if location is not None and isLocation(location):
            loc = rdflib.Literal(location)  # turned off for now
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasLocation'), loc))

        # to facilitate easy queries
        if paperyear is not None and int(paperyear) > 0:
            year = rdflib.Literal(int(paperyear),
                                  datatype=rdflib.URIRef(nss['xsd'] + 'gYear'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasYearOfOccurence'),
                 year))

        # Start date
        if startdate is not None:
            startdateLiteral = rdflib.Literal(
                startdate.isoformat(),
                datatype=rdflib.URIRef(nss['xsd'] + 'Date'))
            graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasStartDate'),
                       startdateLiteral))

        # end date
        if enddate is not None:
            enddateLiteral = rdflib.Literal(enddate.isoformat(),
                                            datatype=rdflib.URIRef(nss['xsd'] +
                                                                   'Date'))
            graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasEndDate'),
                       enddateLiteral))

        # abstract date
        if abstractdate is not None:
            abstractdateLiteral = rdflib.Literal(
                abstractdate.isoformat(),
                datatype=rdflib.URIRef(nss['xsd'] + 'Date'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasAbstractDueOn'),
                 abstractdateLiteral))

        # submission date
        if subdate is not None:
            subLiteral = rdflib.Literal(subdate.isoformat(),
                                        datatype=rdflib.URIRef(nss['xsd'] +
                                                               'Date'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasSubmissionDueOn'),
                 subLiteral))

        # notification date
        if notdate is not None:
            notLiteral = rdflib.Literal(notdate.isoformat(),
                                        datatype=rdflib.URIRef(nss['xsd'] +
                                                               'Date'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasNotificationDueOn'),
                 notLiteral))

        # final date
        if finaldate is not None:
            finalLiteral = rdflib.Literal(finaldate.isoformat(),
                                          datatype=rdflib.URIRef(nss['xsd'] +
                                                                 'Date'))
            graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasFinalDueOn'),
                       finalLiteral))

        graph.add((root, rdflib.URIRef(nss['dcterms'] + 'isPartOf'),
                   rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset')))
        graph.add((rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset'),
                   rdflib.URIRef(nss['dcterms'] + 'hasPart'), root))
Ejemplo n.º 10
0
import rdflib as rdf
from ontology import Ontology
from functional_properties import buildDictTofindFunctionalProperties,listOFPropertiesByThr
from comparing import Comparing
import matplotlib.pyplot as plt
import pandas as pd

TYPE = rdf.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")


if __name__ == '__main__':

    source = Ontology("data/000/onto.owl")
    target = Ontology("data/001/onto.owl")
    subjList = source.uniqueSubjects()

    # some test about how deal with triples that come from class Ontology:
    s = source.rdfToDict()
    t = target.rdfToDict()
    countPsource, DPsource = buildDictTofindFunctionalProperties(s)

    propScores = listOFPropertiesByThr(countPsource, DPsource)

    refalignPath = 'data/refalign.tsv'
    ra = pd.read_csv(refalignPath, sep='\t')
    trueSimilars = []
    for i, row in ra.iterrows():
        trueSimilars.append((str(row.values[0]), str(row.values[1])))
    compare = Comparing(propScores, s, t)
Ejemplo n.º 11
0
def init(graph, nss):
    magld = rdflib.URIRef(nss['base'] + 'MAG_LD')
    graph.add((magld, rdflib.URIRef(nss['rdf'] + 'type'),
               rdflib.URIRef(nss['void'] + 'Dataset')))
    graph.add((magld, \
               rdflib.URIRef(nss['foaf'] + 'homepage'),\
               rdflib.Literal('http://mag.spider.d2s.labs.vu.nl/',\
                              datatype=rdflib.URIRef(nss['xsd'] + 'anyURI'))))
    graph.add((magld, rdflib.URIRef(nss['dcterms'] + 'description'),\
              rdflib.Literal("A enriched Semantic Web translation of the Microsoft Academic Graph.", lang='en')))

    kddSubset = rdflib.URIRef(nss['base'] + 'MAG_KDD_Subset')  # dct collection
    graph.add((kddSubset, rdflib.URIRef(nss['rdf'] + 'type'),
               rdflib.URIRef(nss['dcterms'] + 'Collection')))
    graph.add((kddSubset, rdflib.URIRef(nss['dcterms'] + 'description'),\
              rdflib.Literal("A subset of the Microsoft Academic Graph as specified by the KDD for their 2016 KDD Cup.", lang='en')))

    graph.add((magld, rdflib.URIRef(nss['dcterms'] + 'hasPart'), kddSubset))
    graph.add((kddSubset, rdflib.URIRef(nss['dcterms'] + 'isPartOf'), magld))
Ejemplo n.º 12
0
def graph():
    g=rdflib.Graph()
    g.add(( rdflib.URIRef('http://example.org/a'), 
            rdflib.URIRef('http://example.org/p'), 
            rdflib.Literal('<msg>hei</hei>', datatype=RDF.XMLLiteral)))
    return g
Ejemplo n.º 13
0
    def en_elections(self, single_row, reification_index):
        """
        English mapping rules for elections pages. It is used to add triples to the graph.


        :param reification_index: (int) index used to reification of the row concept
        :param single_row: (dict) dictionary containing data to map. {'header': [values]}
        :return: nothing
        """

        """
        row_subject,row_predicate, row_object are values used to make a triple which represents reification for the
          row concept.

        -subject, as the resources analyzed are in fact wiki pages, is their dbpedia representation
        Eg. http://dbpedia.org/resource/United_States_presidential_election,_2012

        -predicate for the row concept is dbo:Election
         NOTE: it would be better to use a different concept to map the predicate (Electoral Result?) but due to lack of
          time I didn't get a feedback by Community over this idea.

        -object = reification for the row concept
          It uses the resourceName__reificationIndex format as you can see in CareerStation for Soccer Players
          Please see http://dbpedia.org/page/Andrea_Pirlo and http://dbpedia.org/page/Andrea_Pirlo__1 to fully
           understand what is done here.
        """

        row_subject = rdflib.URIRef(self.dbr + self.resource)  # Eg. resource =United_States_presidential_election,_2012
        row_predicate = self.dbo.Election  # Election http://dbpedia.org/ontology/Election
        row_object = rdflib.URIRef(self.dbr + self.resource + "__" + str(
            reification_index))  # Reification eg USA_presidential_elections_1984__1 for the first row,
        #  __2 for second one etc.

        # keeping track of how many cells are added to the graph, for this row
        self.cells_mapped = 0
        # Iterates over cell in the current row. cell equals to header
        for header in single_row:
            # values is a list containing data extracted
            values = single_row[header]
            try:
                # try to map data, but only if data for that cell exists :)
                if values[0] != "-":

                    # set row as the reification of the row's concept
                    row = row_object

                    # set subject, predicate and object for the single cell
                    cell_subject = None
                    cell_predicate = None
                    cell_object = None

                    """
                    NOTE: data are substantially mapped using the corresponding header. In fact manipulation of values
                     and rules to set subject object and predicate of a cell strictly depending on the value of the
                     header associated with values.
                    From here you can see if blocks representing the actual mapping rules.

                    FUTURE DEVELOPMENT: It would be useful to use soft coded mapping rules using standard methods to
                     manipulate values and algorithm to decide how to map each part.
                    """

                    # 1° RULE

                    if 'Candidate' in header or 'candidate' in header :

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row

                        # Predicate is http://dbpedia.org/property/candidate
                        cell_predicate = rdflib.URIRef(self.dbp.candidate)

                        # if the length of data list is 2
                        if len(values) == 2:
                            # object is the second value
                            cell_object = values[1]  # value: eg [u'New York (stato)', u'Franklin D. Roosevelt']
                        else:
                            # if not, object is the first between values list
                            cell_object = values[0]

                            # find if a comma is inside the object
                            comma_index = cell_object.find(",")
                            if comma_index >= 0:
                                # If so, replace it with everything comes before the comma
                                cell_object = cell_object[:comma_index]

                        # replace the spaces with underscores
                        cell_object = cell_object.replace(" ", "_")

                        # try to know if the value in object is a existing dbpedia resource
                        res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object)
                        if res_exists:
                            # If the resource already exists use the reference to that resource
                            cell_object = rdflib.URIRef(self.dbr + cell_object)
                        else:
                            # replace underscores with simple spaces
                            cell_object = cell_object.replace("_", " ")
                            # NOTE use lang= instead of datatype?
                            # use a Literal containing value as the object
                            cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string)

                    # 2° RULE

                    elif 'Candidati - Vicepresidente' in header:

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row  # row

                        # Predicate is http://dbpedia.org/ontology/VicePresident
                        cell_predicate = rdflib.URIRef(self.dbo.VicePresident)

                        # choose which value has to be selected depending on mode and values lenght
                        if len(values) == 2 and self.mode == 'json':
                            cell_object = values[1]  # values eg [u'Iowa', u'Henry A. Wallace']
                        else:
                            cell_object = values[0]

                            # find if there is a comma inside the object
                            comma_index = cell_object.find(",")
                            if comma_index >= 0:
                                # if so, replace it with everything comes before the comma
                                cell_object = cell_object[:comma_index]

                        # try to know if the value in object is a existing dbpedia resource
                        res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object)
                        if res_exists:
                            # If the resource already exists use the reference to that resource
                            cell_object = rdflib.URIRef(self.dbr + cell_object)
                        else:
                            # replace underscores with simple spaces
                            cell_object = cell_object.replace("_", " ")
                            # NOTE use lang= instead of datatype?
                            # use a Literal containing value as the object
                            cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string)

                    # 3° RULE

                    elif header == 'Candidati - Partito' or 'Party' in header:

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row  # row

                        # predicate is http://dbpedia.org/ontology/PoliticalParty
                        cell_predicate = rdflib.URIRef(self.dbo.PoliticalParty)

                        # object is values[0]
                        cell_object = values[0]  # values eg [u'Partito Democratico (Stati Uniti)']

                        # test out if object is a string or a unicode
                        basestr = isinstance(cell_object, basestring)
                        if basestr:
                            # if so, test if "Stati Uniti" is inside it
                            if "Stati Uniti" in cell_object or "Stati_Uniti" in cell_object:
                                # if so, add to the last part of the string "_d'America)"
                                cell_object = cell_object[:-1] + "_d'America)"

                        # try to know if the value in object is a existing dbpedia resource
                        res_exists = self.utils.ask_if_resource_exists(self.dbr + cell_object)
                        if res_exists:
                            # If the resource already exists use the reference to that resource
                            cell_object = rdflib.URIRef(self.dbr + cell_object)
                        else:
                            # replace underscores with simple spaces
                            cell_object = cell_object.replace("_", " ")
                            # NOTE use lang= instead of datatype?
                            # use a Literal containing value as the object
                            cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.string)

                    # 4° RULE

                    elif 'Grandi elettori - #' in header :

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row  # row

                        # predicate is http://dbpedia.org/property/electoralVote which stands for the
                        #  number of Great Electors
                        cell_predicate = rdflib.URIRef(self.dbp.electoralVote)

                        # test if value is >= 0
                        if values[0] >= 0:
                            if self.is_int(values[0]):
                                # if so set object as int(value)
                                cell_object = int(values[0])  # values eg [449.0]

                                # finally  use a Literal with a positiveInteger data type
                                cell_object = rdflib.Literal(cell_object, datatype=rdflib.namespace.XSD.positiveInteger)

                    # 5° RULE

                    elif 'Votes' in header or '#' in header:

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row  # row

                        # predicate is http://dbpedia.org/property/popularVote which stands for the
                        #  number of votes
                        cell_predicate = rdflib.URIRef(self.dbo.popularVote)

                        basestr = isinstance(values[0], basestring)
                        if basestr:
                            # delete spaces
                            if ' ' in values[0]:
                                values[0] = values[0].replace(' ', '')
                            # delete dots
                            if '.' in values[0]:
                                values[0] = values[0].replace('.', '')

                        # test if value can be casted to int type
                        if self.is_int(values[0]):
                            # cast it to int
                            values[0] = int(values[0])
                            # set object to a Literal with a positiveInteger data type
                            cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.positiveInteger)

                    # 6° RULE

                    elif header == 'Voti - %' or header == '?% voti' or header == '% voti' \
                            or header == 'Percentuale' or '%' in header or header == '?%' or header == 'Voti (%)' \
                            or header == 'Voto popolare - Percentuale':

                        # subject is the row concept
                        # Eg. http://it.dbpedia.org/resource/Elezioni_presidenziali_negli_Stati_Uniti_d'America_del_1940
                        cell_subject = row  # row

                        # predicate is http://dbpedia.org/property/pvPct which stands for popular vote, percentage
                        cell_predicate = rdflib.URIRef(self.dbp.pvPct)

                        # test if the value can be casted to a float
                        if self.is_float(values[0]):
                            values[0] = float(values[0])
                            # set object to a float Literal
                            cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float)  # values

                        else:
                            # test if it is a string or a unicode
                            basestr = isinstance(values[0], basestring)
                            if basestr:
                                # Sometimes wiki Users use comma instead of dot desribing percentage, so we have
                                # to convert commas in dots.
                                if ',' in values[0]:
                                    values[0] = values[0].replace(",", ".")

                                # set as percentage the last character of the string
                                percentage = values[0][-1:]
                                # test if this character is a '%'
                                percentage = re.match(r'%', percentage)
                                if percentage:
                                    # if so, replace value with float(value_less_last_character)

                                    values[0] = float(values[0][:-1])
                                    # set object as a float Literal
                                    cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float)
                                else:
                                    # set object as a float Literal
                                    cell_object = rdflib.Literal(values[0], datatype=rdflib.namespace.XSD.float)

                    # IF HEADER DOES NOT MATCH ANY RULE
                    else:

                        # Reset sub, obj and predicate to None
                        cell_subject = None
                        cell_predicate = None
                        cell_object = None

                        # print out this condition to the console
                        print ("Something went wrong choosing mapping rules :'((  data: %s header: %s"
                               % (values, header))

                        # increase the count of 'no mapping rule found' cells
                        self.no_mapping_found_cells += 1

                        # test if the header is already in headers_not_mapped
                        if header not in self.headers_not_mapped.keys():
                            # If not, add to the list of headers with no mapping rules defined the current header
                            self.headers_not_mapped[header] = values

                    # if sub,pred,obj are set for this cell, add them to the graph
                    if cell_predicate and cell_object and cell_subject:
                        # increase the count of cells mapped for this row
                        self.cells_mapped += 1

                        # Adding the triple to the graph using graph.add(sub, pred, obj)
                        self.graph.add((cell_subject, cell_predicate, cell_object))

                        # increase the amount of triples added to the graph
                        self.triples_added_to_the_graph += 1
                        # print in the console the triple added using print_triple(sub, pred, object)
                        self.print_triple(cell_subject, cell_predicate, cell_object)

            except:
                print("Error mapping %s   ,associate with cell: %s" % (values, header))
                # Increase the number of mapping exceptions
                self.num_of_mapping_errors += 1

        # Finally if at least one cell is correctly mapped
        if self.cells_mapped > 0:
            # add only those rows with some mapped cells to the graph
            self.graph.add((row_subject, row_predicate, row_object))
            # add the row to the triples mapped
            self.triples_added_to_the_graph += 1
            # print triple added
            self.print_triple(row_subject, row_predicate, row_object)
            # added this row cells to the total number of cells maapped
            self.total_cell_mapped += self.cells_mapped

        else:
            # decrease the reification index as the row has not been added to the graph
            self.reification_index -= 1
Ejemplo n.º 14
0
        # assert disjointness between top level types based on +fig1a+ figs7 sigh
        for dis in (peps, sigs, tuple(figs7.values())):
            for i, n in enumerate(dis[:-1]):
                for on in dis[i+1:]:
                    n.disjointWith(on)
        #LPCbyPepties = Neuron(*LPCPep.pes)


for n, p in Huang2017.items():
    if isinstance(p, Phenotype) and not n.startswith('_'):
        # FIXME rdflib allows instances but tests type so OntId can go in, but won't ever match
        ident = OntId(p.p)
        if n in Genes.__dict__:
            o = rdflib.Literal(n) if not hasattr(p, '_label') else rdflib.Literal(p._label)
            lt = (rdflib.URIRef(ident), rdfs.label, o)
            Neuron.core_graph.add(lt)
            if ident.prefix != 'NCBIGene':
                Neuron.out_graph.add(lt)  # FIXME maybe a helper graph?

            if ident.prefix == 'JAX':
                sct = (rdflib.URIRef(ident), rdfs.subClassOf, ilxtr.transgenicLine)
                Neuron.core_graph.add(sct)
                Neuron.out_graph.add(sct)
            elif ident.prefix == 'ilxtr':# or ident.prefix == 'NCBIGene':  # FIXME NCBIGene temp fix ...
                if ident.suffix in ('LowerExpression', 'HigherExpression', 'to'):
                    continue
                sct = (rdflib.URIRef(ident), rdfs.subClassOf, ilxtr.gene)
                Neuron.core_graph.add(sct)
                Neuron.out_graph.add(sct)
        else:
Ejemplo n.º 15
0
import logging
import rdflib
from rdflib.namespace import RDF, OWL, RDFS, FOAF
from rdflib import Literal, BNode, URIRef, Graph, Namespace

import DateTimeDescription
import ctime
import tove2

cot = rdflib.Namespace('http://ontology.eil.utoronto.ca/TOVE2/ctime.rdf#')
time = rdflib.Namespace('http://www.w3.org/2006/time#')
rdf = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
rdf = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')

# namespace for predicate
hasBeginning = rdflib.URIRef('http://www.w3.org/2006/time#hasBeginning')
hasEnd = rdflib.URIRef('http://www.w3.org/2006/time#hasEnd')
hasMin = rdflib.URIRef(
	'http://ontology.eil.utoronto.ca/TOVE2/ctime.owl#hasMin')
hasMax = rdflib.URIRef(
	'http://ontology.eil.utoronto.ca/TOVE2/ctime.owl#hasMax')
hasDepartment = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#has_Department')
hasDuration = rdflib.URIRef('http://www.w3.org/2006/time#hasDuration')
hasTimeInterval = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#has_TimeInterval')

# all the departments
Water = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Water')
Sewage = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Sewage')
Power = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Power')
Permits = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Permits')
Police = rdflib.URIRef('http://www.semanticweb.org/luolixuan/ontologies/tove2/test#Police')
Ejemplo n.º 16
0
def fFieldOfStudyHierarchyHandler(graph, nss, f):
    root = rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudyHierarchy')
    tnode = rdflib.URIRef(nss['skos'] + 'ConceptScheme')
    graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode))

    progress = 0

    for line in f:

        terms = line.strip().split('\t')

        childId = terms[0]
        childLvl = terms[1][1:]
        parentId = terms[2]
        parentLvl = terms[3][1:]
        confidence = terms[4]

        graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \
                   rdflib.URIRef(nss['skos'] + 'inScheme'), \
                   root))
        graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \
                   rdflib.URIRef(nss['skos'] + 'inScheme'), \
                   root))

        if int(childLvl) == 0:
            graph.add((root, \
                       rdflib.URIRef(nss['skos'] + 'hasTopConcept'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId)))
        if int(parentLvl) == 0:
            graph.add((root, \
                       rdflib.URIRef(nss['skos'] + 'hasTopConcept'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId)))

        if int(childLvl) - int(parentLvl) == 1:
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \
                       rdflib.URIRef(nss['skos'] + 'narrower'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId)))
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \
                       rdflib.URIRef(nss['skos'] + 'broader'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId)))

            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \
                       rdflib.URIRef(nss['skos'] + 'note'), \
                       rdflib.Literal('Confidence of being broader than {} is {}'.format(childId, confidence))))
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \
                       rdflib.URIRef(nss['skos'] + 'note'), \
                       rdflib.Literal('Confidence of being narrower than {} is {}'.format(parentId, confidence))))
        elif int(parentLvl) - int(childLvl) == 1:
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \
                       rdflib.URIRef(nss['skos'] + 'broader'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId)))
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \
                       rdflib.URIRef(nss['skos'] + 'narrower'), \
                       rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId)))

            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + parentId), \
                       rdflib.URIRef(nss['skos'] + 'note'), \
                       rdflib.Literal('Confidence of being narrower than {} is {}'.format(childId, confidence))))
            graph.add((rdflib.URIRef(nss['base'] + 'MAG_FieldOfStudy_' + childId), \
                       rdflib.URIRef(nss['skos'] + 'note'), \
                       rdflib.Literal('Confidence of being broader than {} is {}'.format(parentId, confidence))))

        progress += 1
        if progress % 10000 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')
Ejemplo n.º 17
0
def add_activity(name,department,start_min_year,start_min_month,start_min_day,start_max_year,start_max_month,start_max_day,end_min_year,end_min_month,end_min_day,end_max_year,end_max_month,end_max_day,duration_min_year,duration_min_month,duration_min_day,duration_max_year,duration_max_month,duration_max_day):
    bmin = [start_min_year,start_min_month,start_min_day]
    bmax = [start_max_year,start_max_month,start_max_day]
    emin = [end_min_year,end_min_month,end_min_day]
    emax = [end_max_year,end_max_month,end_max_day]
    durmin = [duration_min_year,duration_min_month,duration_min_day]
    durmax = [duration_max_year,duration_max_month,duration_max_day]


    g = Graph()
    test2 = rdflib.Namespace('example.ttl#')
    g.parse('example.ttl#', format="turtle")

    # if there is alredy activity with same name in the schedule, just return false
    if check_activity_name(g,name):
        return False

    #add the activity
    act_name = tove2_prefix + name
    g.add((rdflib.URIRef(act_name),RDF.type,rdflib.URIRef(activity_prefix)))

    #add its department
    g.add((rdflib.URIRef(act_name),hasDepartment, department))

    #add timeInterval
    act_timeInterval = rdflib.URIRef(act_name + '_timeInterval')
    g.add((act_timeInterval, RDF.type, time.CDateTimeInterval))
    g.add((rdflib.URIRef(act_name),hasTimeInterval,act_timeInterval))

    #add start
    start = rdflib.URIRef(act_name + '_start')
    start_min_ins = rdflib.URIRef(act_name + '_start_min')
    start_max_ins = rdflib.URIRef(act_name + '_start_max')
    tove2.create_CDTInstant(g, start, bmin, bmax, start_min_ins, start_max_ins)
    g.add((act_timeInterval, hasBeginning, start))

    #add end
    end = rdflib.URIRef(act_name + '_end')
    end_min_ins = rdflib.URIRef(act_name + '_end_min')
    end_max_ins = rdflib.URIRef(act_name + '_end_max')
    tove2.create_CDTInstant(g,end,emin,emax, end_min_ins, end_max_ins)
    g.add((act_timeInterval, hasEnd, end))

    # add duration
    dur = rdflib.URIRef(act_name + '_duration')
    dur_min_ins = rdflib.URIRef(act_name + '_duration_min')
    dur_max_ins = rdflib.URIRef(act_name + '_duration_max')
    tove2.create_CDTInstant(g, dur, durmin, durmax, dur_min_ins, dur_max_ins)
    g.add((act_timeInterval, hasDuration, dur))

    g.close()
    g.serialize(destination='output1.ttl', format='turtle')

    return True
Ejemplo n.º 18
0
def fKDDPapersHandler(graph, nss, f, paperIDs, confIDs):

    #paperIDs = [pid.value for paper, _, _ in graph.triples((None, rdflib.URIRef(nss['rdf'] + 'type'), rdflib.URIRef(nss['base'] + 'MAG_Paper'))) \
    #            for _, _, pid in graph.triples((paper, rdflib.URIRef(nss['base'] + 'MAG_hasID'), None))]

    allPaperConfIDs = dict()
    journalIDs = set()
    progress = 0

    for line in f:
        terms = line.strip().split('\t')

        ident = terms[0]
        title = terms[1]
        # title_alt = terms[2] if terms[2] != '' else None
        year = terms[3] if terms[3] != '' else None
        date = getDate(terms[4]) if terms[4] != '' else None
        doi = terms[5] if terms[5] != '' else None
        # venue = terms[6] if terms[6] != '' else None   # superseded by conference ID
        # venue_alt = terms[7] if terms[7] != '' else None
        journalId = terms[8] if terms[8] != '' else None
        conferenceId = terms[9] if terms[9] != '' else None
        rank = terms[10] if terms[10] != '' else None

        progress += 1
        if progress % 10000 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')

        root = None
        if ident not in paperIDs:
            if conferenceId in confIDs and int(year) > 2010:
                # add node plus label
                root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' + ident)
                label = rdflib.Literal(rawString(title), lang='en')
                graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label))

                # title
                graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_hasTitle'),
                           rdflib.Literal(rawString(title), lang='en')))

                # year
                if year is not None:
                    ynode = rdflib.Literal(year,
                                           datatype=rdflib.URIRef(nss['xsd'] +
                                                                  'gYear'))
                    graph.add(
                        (root,
                         rdflib.URIRef(nss['base'] +
                                       'MAG_hasYearOfPublication'), ynode))

                # type
                tnode = rdflib.URIRef(nss['base'] + 'MAG_Paper')
                graph.add((root, rdflib.URIRef(nss['rdf'] + 'type'), tnode))

                # id node
                idNode = rdflib.Literal(ident,
                                        datatype=rdflib.URIRef(nss['xsd'] +
                                                               'ID'))
                graph.add(
                    (root, rdflib.URIRef(nss['base'] + 'MAG_hasID'), idNode))

                if journalId is not None:
                    journalIDs.append(journalId)
            else:
                allPaperConfIDs[
                    ident] = conferenceId  # exclude KDD subset as we already know about their confs
                continue

        # add node plus label
        root = rdflib.URIRef(nss['base'] + 'MAG_Paper_' +
                             ident) if root is None else root
        #label = rdflib.Literal(rawString(title), lang='en')
        #graph.add((root, rdflib.URIRef(nss['rdfs'] + 'label'), label))

        # date
        if date is not None:
            dnode = rdflib.Literal(date.isoformat(),
                                   datatype=rdflib.URIRef(nss['xsd'] + 'Date'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasDateOfPublication'),
                 dnode))

        # doi
        if doi is not None:
            doinode = rdflib.Literal(doi,
                                     datatype=rdflib.URIRef(nss['xsd'] + 'ID'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasDoi'), doinode))

        # rank
        if rank is not None:
            ranknode = rdflib.Literal(
                rank, datatype=rdflib.URIRef(nss['xsd'] + 'positiveInteger'))
            graph.add(
                (root, rdflib.URIRef(nss['base'] + 'MAG_hasRank'), ranknode))

        # journal
        if journalId is not None:
            journalIDs.append(journalId)
            jnode = rdflib.URIRef(nss['base'] + 'MAG_Journal_' + journalId)
            graph.add((root, rdflib.URIRef(nss['base'] + 'MAG_isPublishedIn'),
                       jnode))
            graph.add(
                (jnode, rdflib.URIRef(nss['base'] + 'MAG_hasPublished'), root))

    return (allPaperConfIDs, journalIDs)
Ejemplo n.º 19
0
    def test_namespace_registry_from_iri(self):
        """Test getting namespaces from iri."""
        self.installer.install("city")
        ns_iri = rdflib.URIRef("http://www.osp-core.com/city#")
        city_iri = ns_iri + "City"
        hasPart_iri = ns_iri + "hasPart"
        self.modify_labels()

        c = self.namespace_registry.from_iri(rdflib_cuba.Entity)
        self.assertIsInstance(c, OntologyClass)
        self.assertEqual(c.namespace.get_name(), "cuba")
        self.assertEqual(c.name, "Entity")
        r = self.namespace_registry.from_iri(rdflib_cuba.relationship)
        self.assertIsInstance(r, OntologyRelationship)
        self.assertEqual(r.namespace.get_name(), "cuba")
        self.assertEqual(r.name, "relationship")
        a = self.namespace_registry.from_iri(rdflib_cuba.attribute)
        self.assertIsInstance(a, OntologyAttribute)
        self.assertEqual(a.namespace.get_name(), "cuba")
        self.assertEqual(a.name, "attribute")
        c = self.namespace_registry.from_iri(city_iri)
        self.assertIsInstance(c, OntologyClass)
        self.assertEqual(c.namespace.get_name(), "city")
        self.assertEqual(c.name, "City")
        r = self.namespace_registry.from_iri(hasPart_iri)
        self.assertIsInstance(r, OntologyRelationship)
        self.assertEqual(r.namespace.get_name(), "city")
        self.assertEqual(r.name, "hasPart")
        import osp.core.namespaces

        old_ns_reg = osp.core.ontology.namespace_registry.namespace_registry
        try:
            osp.core.ontology.namespace_registry.namespace_registry = (
                self.namespace_registry)
            from_iri = self.namespace_registry.from_iri

            c = from_iri(rdflib_cuba.Entity)
            self.assertIsInstance(c, OntologyClass)
            self.assertEqual(c.namespace.get_name(), "cuba")
            self.assertEqual(c.name, "Entity")

            self.graph.add((ns_iri, rdflib_cuba._reference_by_label,
                            rdflib.Literal(True)))
            self.namespace_registry.from_iri.cache_clear()
            self.namespace_registry._get.cache_clear()
            c = from_iri(city_iri)
            self.assertIsInstance(c, OntologyClass)
            self.assertEqual(c.namespace.get_name(), "city")
            self.assertEqual(c.name, "City_T")
            r = from_iri(hasPart_iri)
            self.assertIsInstance(r, OntologyRelationship)
            self.assertEqual(r.namespace.get_name(), "city")
            self.assertEqual(r.name, "hasPart_T")

            # undefined namespace
            self.graph.add(
                (rdflib.URIRef("a/b#c"), rdflib.RDF.type, rdflib.OWL.Class))
            self.graph.add(
                (rdflib.URIRef("d/e/f"), rdflib.RDF.type, rdflib.OWL.Class))
            a = from_iri("a/b#c")
            b = from_iri("d/e/f")
            self.assertIsInstance(a, OntologyClass)
            self.assertEqual(a.namespace.get_name(), "a/b#")
            self.assertEqual(a.name, "c")
            self.assertIsInstance(b, OntologyClass)
            self.assertEqual(b.namespace.get_name(), "d/e/")
            self.assertEqual(b.name, "f")
        finally:
            osp.core.ontology.namespace_registry = old_ns_reg
Ejemplo n.º 20
0
 def ontid(self):
     return rdflib.URIRef(
         f'https://sparc.olympiangods.org/sparc/ontologies/{self.id}')
Ejemplo n.º 21
0
    def triples_gen(self, subject):
        if not (isinstance(subject, rdflib.URIRef)
                or isinstance(subject, rdflib.BNode)):
            if isinstance(subject, idlib.Stream):
                subject = subject.asType(rdflib.URIRef)
            else:
                subject = rdflib.URIRef(subject)

        def protocol_stuff():
            nonlocal _v
            nonlocal s
            d = _v.asDict(
            )  # FIXME this is a silly way to do this, use Stream.triples_gen
            _o = (
                owl.Class
                if isinstance(v, OntTerm) else  # FIXME not really accurate
                owl.NamedIndividual)
            yield s, rdf.type, _o
            if 'label' in d:
                yield s, rdfs.label, rdflib.Literal(d['label'])
            if 'synonyms' in d:  # FIXME yield from o.synonyms(s)
                for syn in d['synonyms']:
                    yield s, NIFRID.synonym, rdflib.Literal(syn)

        #maybe_not_normalized = self.message_passing_key in self._source  # TODO maybe not here?
        for field, value in self._source.items():
            #normalized = not (maybe_not_normalized and field in self._source)  # TODO

            #log.debug(f'{field}: {value}')
            if type(field) is object:
                continue  # the magic helper key for Pipeline
            convert = getattr(self, field, None)
            extra = getattr(self.extra, field, None)
            if convert is not None:
                if isinstance(value, tuple) or isinstance(value, list):
                    values = value
                else:
                    values = value,

                for v in values:
                    #log.debug(f'{field} {v} {convert}')
                    if isinstance(v, oq.OntId):
                        _old_v = v
                        v = v.asInstrumented()
                    try:
                        p, o = convert(v)
                    except exc.NoTripleError as e:
                        continue

                    #log.debug((o, v))
                    a = (isinstance(o, idlib.Stream) and hasattr(o, 'asUri')
                         or isinstance(o, OntTerm))
                    b = (isinstance(v, idlib.Stream) and hasattr(v, 'asUri')
                         or isinstance(v, OntTerm))
                    if (a or b):
                        # FIXME this thing is a mess ...
                        _v = o if a else v
                        s = _v.asUri(rdflib.URIRef)
                        yield subject, p, s
                        try:
                            yield from protocol_stuff()
                        except idlib.exc.ResolutionError:
                            pass

                    elif isinstance(o, ProtcurExpression) or isinstance(
                            o, Quantity):
                        s = rdflib.BNode()
                        yield subject, p, s
                        qt = sparc.Measurement
                        if isinstance(o, Range):
                            yield from o.asRdf(s, quantity_rdftype=qt)
                        elif isinstance(o, Quantity):
                            yield from o.asRdf(s, rdftype=qt)
                            n = rdflib.BNode()
                            yield s, TEMP.asBaseUnits, n
                            yield from o.to_base_units().asRdf(n)
                        else:
                            log.warning(f'unhanded Expr type {o}')
                            yield from o.asRdf(s)
                    else:
                        yield subject, p, o

                    if extra is not None:
                        yield from extra(v)

            elif field in self.known_skipped:
                pass

            else:
                msg = f'Unhandled {self.__class__.__name__} field: {field}'
                if self.addError(msg,
                                 pipeline_stage=self.__class__.__name__ +
                                 '.export-error'):
                    log.warning(msg)
Ejemplo n.º 22
0
 def dsid(self):
     return rdflib.URIRef(self.uri_api)
Ejemplo n.º 23
0
from sklearn.exceptions import NotFittedError

from pyrdf2vec.graphs import KG
from pyrdf2vec.rdf2vec import RDF2VecTransformer

np.random.seed(42)
random.seed(42)

KNOWLEDGE_GRAPH = KG(
    "samples/mutag/mutag.owl",
    label_predicates=set("http://dl-learner.org/carcinogenesis#isMutagenic"),
)

TRAIN_DF = pd.read_csv("samples/mutag/train.tsv", sep="\t", header=0)

ENTITIES = [rdflib.URIRef(x) for x in TRAIN_DF["bond"]]
ENTITIES_SUBSET = ENTITIES[:5]

WALKS: DefaultDict[rdflib.URIRef, rdflib.URIRef] = defaultdict(list)


class TestRDF2VecTransformer:
    def test_fit(self):
        transformer = RDF2VecTransformer()
        with pytest.raises(ValueError):
            transformer.fit(KNOWLEDGE_GRAPH, ["does", "not", "exist"])
        transformer.fit(KNOWLEDGE_GRAPH, ENTITIES_SUBSET)

    def test_fit_transform(self):
        np.testing.assert_array_equal(
            RDF2VecTransformer().fit_transform(KNOWLEDGE_GRAPH,
Ejemplo n.º 24
0
 def id_(v):
     s = rdflib.URIRef(dsid)
     yield s, a, owl.NamedIndividual
     yield s, a, sparc.Resource
     yield s, rdfs.label, rdflib.Literal(
         self.folder_name)  # not all datasets have titles
Ejemplo n.º 25
0
 def test_uriref_not_url(self):
     try:
         rdflib.URIRef("*****@*****.**")
     except Exception:
         self.fail("Doesn't actually fail...which is weird")
Ejemplo n.º 26
0
 def subject_id(self, v, species=None):  # TODO species for human/animal
     v = quote(v, safe=tuple())
     s = rdflib.URIRef(self.dsid + '/subjects/' + v)
     return s
import rdflib
import tqdm

features = pd.read_csv('feature.txt', sep='\t', header=None, index_col=0)
cites = pd.read_csv('net.txt', sep='\t', header=None)
labels = pd.read_csv('label.txt', sep='\t', header=None, index_col=0)

g = rdflib.Graph()

for i, row in tqdm.tqdm(features.iterrows(), total=len(features)):
    vals = [float(x.split(':')[1]) for x in row.values[0].split()]
    assert all([x == 1.0 for x in vals])

    if i in labels.index:
        label = str(labels.loc[i][1])
        g.add((rdflib.URIRef('http://paper_' + str(i)),
               rdflib.URIRef('http://hasLabel'),
               rdflib.URIRef('http://label_' + label)))

    words = [x.split(':')[0] for x in row.values[0].split()]
    for word in words:
        g.add((rdflib.URIRef('http://paper_' + str(i)),
               rdflib.URIRef('http://hasWord'),
               rdflib.URIRef('http://word_' + word)))

for i, row in tqdm.tqdm(cites.iterrows(), total=len(cites)):
    dest, src, _ = map(str, row.values)
    g.add((rdflib.URIRef('http://paper_' + src), rdflib.URIRef('http://cites'),
           rdflib.URIRef('http://paper_' + dest)))
    #g.add((rdflib.URIRef('http://paper_'+dest), rdflib.URIRef('http://hasCitation'), rdflib.URIRef('http://paper_'+src)))
Ejemplo n.º 28
0
 def sample_id(self, v, species=None):  # TODO species for human/animal
     #v = v.replace(' ', '%20')  # FIXME use quote urlencode
     v = quote(v, safe=tuple())
     s = rdflib.URIRef(self.dsid + '/samples/' + v)
     return s
Ejemplo n.º 29
0
 def _set_data_properties(self):
     self.thisERA = rdflib.URIRef(self.uri + "thisERA")
     self.thisAVG = rdflib.URIRef(self.uri + "thisAVG")
Ejemplo n.º 30
0
   ?person <http://dbpedia.org/ontology/birthPlace> ?place .
   ?place <http://www.w3.org/2000/01/rdf-schema#label> ?placename .
   OPTIONAL { ?place <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long } .
   OPTIONAL { ?place <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat }.
}

"""

sparql_client.setReturnFormat(JSON)
sparql_client.setQuery(query)
q_results = sparql_client.query().convert()

people = set()
places = dict()
for r in q_results["results"]["bindings"]:
    people.add(rdflib.URIRef(r["person"]["value"]))
    if "long" in r.keys() and "lat" in r.keys():
        lat = r["lat"]["value"]
        long = r["long"]["value"]
        places[r["placename"]["value"]] = (lat, long)

describe_query = "DESCRIBE "
for p in people:
    describe_query += p.n3() + "\n"

sparql_client.setReturnFormat(XML)
sparql_client.setQuery(describe_query)
g = sparql_client.query().convert()

print(len(g), "triples in people around Claude Shannon")
a = g.serialize(format='application/rdf+xml')