Beispiel #1
0
 def count(self, s, p, o, s_blank, o_l, o_blank, statement):
     # count all properties
     self.histogram[p] = self.histogram.get(p, 0) + 1
     # distinct
     spo = s+p+o
     if not dh.query_distinct_spo(spo, 0):
         dh.set_distinct_spo(spo, 0)
         self.distinct[p] = self.distinct.get(p, 0) + 1
     # per subject
     sp = s+p
     if len(sp) > 16:
         sp_hash = hashlib.md5(sp).digest()
     else:
         sp_hash = sp
     if not self.distinct_seen.has_key(sp_hash):
         self.distinct_seen[sp_hash] = 1
         self.distinct_subject[p] = self.distinct_subject.get(p, 0) + 1
     # per object
     po = p+o
     if len(po) > 16:
         po_hash = hashlib.md5(po).digest()
     else:
         po_hash = po
     if not self.distinct_seen.has_key(po_hash):
         self.distinct_seen[po_hash] = 1
         self.distinct_object[p] = self.distinct_object.get(p, 0) + 1
Beispiel #2
0
 def count(self, s, p, o, s_blank, o_l, o_blank, statement):
     # mimic make-void, count every class usage
     count_it = False
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and statement.object.is_resource():
         self.histogram[o] = self.histogram.get(o, 0) + 1
         count_it = True
     # distinct per subject
     spo = s+p+o
     if count_it and not dh.query_distinct_spo(spo, 1):
         dh.set_distinct_spo(spo, 1)
         self.subject_distinct[o] = self.subject_distinct.get(o, 0) + 1
Beispiel #3
0
 def count(self, s, p, o, s_blank, o_l, o_blank, statement):
     # mimic make-void, count every class usage
     count_it = False
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and statement.object.is_resource(
     ):
         self.histogram[o] = self.histogram.get(o, 0) + 1
         count_it = True
     # distinct per subject
     spo = s + p + o
     if count_it and not dh.query_distinct_spo(spo, 1):
         dh.set_distinct_spo(spo, 1)
         self.subject_distinct[o] = self.subject_distinct.get(o, 0) + 1
Beispiel #4
0
 def count(self, s, p, o, s_blank, o_l, o_blank, statement):
     # count all class definitions
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and o == 'http://www.w3.org/2000/01/rdf-schema#Class':
         self.histogram[s] = 0
     # count usage of defined classes
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and self.histogram.has_key(o):
         self.histogram[o] += 1
     # distinct per subject
     spo = s+p+o
     if self.histogram.has_key(o) and not dh.query_distinct_spo(spo, 2):
         dh.set_distinct_spo(spo, 2)
         self.subject_distinct[o] = self.subject_distinct.get(o, 0) + 1
Beispiel #5
0
 def count(self, s, p, o, s_blank, o_l, o_blank, statement):
     # count all class definitions
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and o == 'http://www.w3.org/2000/01/rdf-schema#Class':
         self.histogram[s] = 0
     # count usage of defined classes
     if p == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' and self.histogram.has_key(
             o):
         self.histogram[o] += 1
     # distinct per subject
     spo = s + p + o
     if self.histogram.has_key(o) and not dh.query_distinct_spo(spo, 2):
         dh.set_distinct_spo(spo, 2)
         self.subject_distinct[o] = self.subject_distinct.get(o, 0) + 1
Beispiel #6
0
    def count(self, s, p, o, s_blank, o_l, o_blank, statement):
        # count all properties
        self.histogram[p] = self.histogram.get(p, 0) + 1
        # distinct
        spo = s+p+o
        if not dh.query_distinct_spo(spo, 0):
            dh.set_distinct_spo(spo, 0)
            self.distinct[p] = self.distinct.get(p, 0) + 1
        # per subject
        sp = s+p
        if len(sp) > 16:
            sp_hash = hashlib.md5(sp).digest()
        else:
            sp_hash = sp
        if not self.distinct_seen.has_key(sp_hash):
            self.distinct_seen[sp_hash] = 1
            self.distinct_subject[p] = self.distinct_subject.get(p, 0) + 1
            if o_l:
                value = None
                if str(statement.object.literal[2]) in [str(ns_xs.decimal), str(ns_xs.float), str(ns_xs.double)] or \
                   p in ['http://www.w3.org/2003/01/geo/wgs84_pos#long',
                         'http://www.w3.org/2003/01/geo/wgs84_pos#lat',
                         'http://www.w3.org/2003/01/geo/wgs84_pos#alt']:
                    value = float(o)
                elif str(statement.object.literal[2]) in [str(ns_xs.int), str(ns_xs.integer)]:
                    value = int(o)
                elif str(statement.object.literal[2]) in [str(ns_xs.dateTime), str(ns_xs.date)]:
                    value = o
                    
                if value is not None:
                    if self.min_value.has_key(p):
                        self.min_value[p] = min(self.min_value[p], value)
                    else:
                        self.min_value[p] = value

                    if self.max_value.has_key(p):
                        self.max_value[p] = max(self.max_value[p], value)
                    else:
                        self.max_value[p] = value

        # per object
        po = p+o
        if len(po) > 16:
            po_hash = hashlib.md5(po).digest()
        else:
            po_hash = po
        if not self.distinct_seen.has_key(po_hash):
            self.distinct_seen[po_hash] = 1
            self.distinct_object[p] = self.distinct_object.get(p, 0) + 1