Exemple #1
0
 def defaultSQL(self):
     """
     Since rdf:type is modeled explicitely (in the ABOX partition) it must be inserted as a 'default'
     identifier
     """
     return 'INSERT into %s values (%s,"U","%s");' % (
         self, normalizeValue(RDF.type, 'U'), RDF.type)
Exemple #2
0
 def defaultStatements(self):
     """
     Since rdf:type is modeled explicitely (in the ABOX partition) it
     must be inserted as a 'default' identifier.
     """
     return ["INSERT INTO %s VALUES (%s, 'U', '%s');" %
               (self, normalizeValue(RDF.type, 'U', self.useSignedInts),
                RDF.type)]
Exemple #3
0
def GarbageCollectionQUERY(idHash,valueHash,aBoxPart,binRelPart,litPart):
    """
    Performs garbage collection on interned identifiers and their references.  Joins
    the given KB partitions against the identifiers and values and removes the 'danglers'.  This
    must be performed after every removal of an assertion and so becomes a primary bottleneck
    """
    purgeQueries = ["drop table if exists danglingIds"]
    rdfTypeInt = normalizeValue(RDF.type,'U')
    idHashKeyName = idHash.columns[0][0]
    valueHashKeyName = valueHash.columns[0][0]
    idHashJoinees    = [aBoxPart,binRelPart,litPart]
    idJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    unionSelects = []
    for part in idHashJoinees:
        partJoinClauses = []
        #pprint(part)
        #pprint(part.columnNames)
        partUnionColumns = []
        for colName in part.columnNames:
            if part.columnNames.index(colName) >= 4:
                colName,sqlType,index = colName
                if sqlType.lower()[:6]=='bigint':
                    partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                    #idJoinColumnCandidates.append("%s.%s"%(part,colName))
                    partUnionColumns.append(colName)
            elif colName and not (str(part).endswith('literalProperties') and colName == 'object'):
                partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                #idJoinColumnCandidates.append("%s.%s"%(part,colName))
                partUnionColumns.append(colName)
        for col in partUnionColumns:
            unionSelects.append("SELECT %s FROM %s" % (col, part))
        idJoinColumnCandidates.append("%s.%s"%(part, part.columnNames[0]))
        explicitJoins.append("left join %s on (%s)"%(part,' or '.join(partJoinClauses)))
        idJoinClauses.extend(partJoinClauses)
    
    #pprint(idJoinColumnCandidates)
    #pprint(explicitJoins)
    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    unionClause = ' EXCEPT ( ' + ' UNION '.join(unionSelects) + ' ) '
    #idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%(
        #idHash,
        #idHashKeyName,
        #idHash,
        #' '.join(explicitJoins),
        #intersectionClause,
        #idHash,
        #idHashKeyName,
        #rdfTypeInt
    #)
    idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%(
        idHash,
        idHashKeyName,
        idHash,
        unionClause
    )

    idPurgeQuery = PURGE_KEY_SQL%(idHash,idHashKeyName,idHash,idHashKeyName)
    purgeQueries.append(idGCQuery)
    purgeQueries.append(idPurgeQuery)

    partJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    partJoinClauses.append("%s.%s = %s.%s"%(litPart,litPart.columnNames[OBJECT],valueHash,valueHashKeyName))
    idJoinColumnCandidates.append("%s.%s"%(litPart,litPart.columnNames[OBJECT]))

    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    valueGCQuery = VALUE_GARBAGE_COLLECTION_SQL%(
        valueHash,
        valueHashKeyName,
        valueHash,
        "left join %s on (%s)"%(litPart,' or '.join(partJoinClauses)),
        intersectionClause
    )

    valuePurgeQuery = PURGE_KEY_SQL%(valueHash,valueHashKeyName,valueHash,valueHashKeyName)
    purgeQueries.append("drop table if exists danglingIds")
    purgeQueries.append(valueGCQuery)
    purgeQueries.append(valuePurgeQuery)
    #pprint(purgeQueries)
    #return
    return purgeQueries
Exemple #4
0
 def updateIdentifierQueue(self,termList):
     for term,termType in termList:
         md5Int = normalizeValue(term, termType, self.useSignedInts)
         self.hashUpdateQueue[md5Int]=self.normalizeTerm(term)
def GarbageCollectionQUERY(idHash,valueHash,aBoxPart,binRelPart,litPart):
    """
    Performs garbage collection on interned identifiers and their references.  Joins
    the given KB parititions against the identifiers and values and removes the 'danglers'.  This
    must be performed after every removal of an assertion and so becomes a primary bottleneck
    """
    purgeQueries = ["drop temporary table if exists danglingIds"]
    rdfTypeInt = normalizeValue(RDF.type,'U')
    idHashKeyName = idHash.columns[0][0]
    valueHashKeyName = valueHash.columns[0][0]
    idHashJoinees    = [aBoxPart,binRelPart,litPart]
    idJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    for part in idHashJoinees:
        partJoinClauses = []
        for colName in part.columnNames:
            if part.columnNames.index(colName) >= 4:
                colName,sqlType,index = colName
                if sqlType.lower()[:6]=='bigint':
                    partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                    idJoinColumnCandidates.append("%s.%s"%(part,colName))
            elif colName:
                partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                idJoinColumnCandidates.append("%s.%s"%(part,colName))
        explicitJoins.append("left join %s on (%s)"%(part,' or '.join(partJoinClauses)))
        idJoinClauses.extend(partJoinClauses)

    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%(
        idHash,
        idHashKeyName,
        idHash,
        ' '.join(explicitJoins),
        intersectionClause,
        idHash,
        idHashKeyName,
        rdfTypeInt
    )

    idPurgeQuery = PURGE_KEY_SQL%(idHash,idHash,idHashKeyName,idHash,idHashKeyName)
    purgeQueries.append(idGCQuery)
    purgeQueries.append(idPurgeQuery)

    partJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    partJoinClauses.append("%s.%s = %s.%s"%(litPart,litPart.columnNames[OBJECT],valueHash,valueHashKeyName))
    idJoinColumnCandidates.append("%s.%s"%(litPart,litPart.columnNames[OBJECT]))

    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    valueGCQuery = VALUE_GARBAGE_COLLECTION_SQL%(
        valueHash,
        valueHashKeyName,
        valueHash,
        "left join %s on (%s)"%(litPart,' or '.join(partJoinClauses)),
        intersectionClause
    )

    valuePurgeQuery = PURGE_KEY_SQL%(valueHash,valueHash,valueHashKeyName,valueHash,valueHashKeyName)
    purgeQueries.append("drop temporary table if exists danglingIds")
    purgeQueries.append(valueGCQuery)
    purgeQueries.append(valuePurgeQuery)
    return purgeQueries
 def defaultSQL(self):
     """
     Since rdf:type is modeled explicitely (in the ABOX partition) it must be inserted as a 'default'
     identifier
     """
     return 'INSERT into %s values (%s,"U","%s");'%(self,normalizeValue(RDF.type,'U'),RDF.type)
def GarbageCollectionQUERY(idHash,valueHash,aBoxPart,binRelPart,litPart):
    """
    Performs garbage collection on interned identifiers and their references.  Joins
    the given KB parititions against the identifiers and values and removes the 'danglers'.  This
    must be performed after every removal of an assertion and so becomes a primary bottleneck
    """
    purgeQueries = ["drop temporary table if exists danglingIds"]
    rdfTypeInt = normalizeValue(RDF.type,'U')
    idHashKeyName = idHash.columns[0][0]
    valueHashKeyName = valueHash.columns[0][0]
    idHashJoinees    = [aBoxPart,binRelPart,litPart]
    idJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    for part in idHashJoinees:
        partJoinClauses = []
        for colName in part.columnNames:
            if part.columnNames.index(colName) >= 4:
                colName,sqlType,index = colName
                if sqlType.lower()[:6]=='bigint':
                    partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                    idJoinColumnCandidates.append("%s.%s"%(part,colName))
            elif colName:
                partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName))
                idJoinColumnCandidates.append("%s.%s"%(part,colName))
        explicitJoins.append("left join %s on (%s)"%(part,' or '.join(partJoinClauses)))
        idJoinClauses.extend(partJoinClauses)

    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%(
        idHash,
        idHashKeyName,
        idHash,
        ' '.join(explicitJoins),
        intersectionClause,
        idHash,
        idHashKeyName,
        rdfTypeInt
    )

    idPurgeQuery = PURGE_KEY_SQL%(idHash,idHash,idHashKeyName,idHash,idHashKeyName)
    purgeQueries.append(idGCQuery)
    purgeQueries.append(idPurgeQuery)

    partJoinClauses = []
    idJoinColumnCandidates = []
    explicitJoins = []
    partJoinClauses.append("%s.%s = %s.%s"%(litPart,litPart.columnNames[OBJECT],valueHash,valueHashKeyName))
    idJoinColumnCandidates.append("%s.%s"%(litPart,litPart.columnNames[OBJECT]))

    intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates])
    valueGCQuery = VALUE_GARBAGE_COLLECTION_SQL%(
        valueHash,
        valueHashKeyName,
        valueHash,
        "left join %s on (%s)"%(litPart,' or '.join(partJoinClauses)),
        intersectionClause
    )

    valuePurgeQuery = PURGE_KEY_SQL%(valueHash,valueHash,valueHashKeyName,valueHash,valueHashKeyName)
    purgeQueries.append("drop temporary table if exists danglingIds")
    purgeQueries.append(valueGCQuery)
    purgeQueries.append(valuePurgeQuery)
    return purgeQueries