def defaultSQL(self): """ Since rdf:type is modeled explicitely (in the ABOX partition) it must be inserted as a 'default' identifier """ return 'INSERT into %s values (%s,"U","%s");' % ( self, normalizeValue(RDF.type, 'U'), RDF.type)
def defaultStatements(self): """ Since rdf:type is modeled explicitely (in the ABOX partition) it must be inserted as a 'default' identifier. """ return ["INSERT INTO %s VALUES (%s, 'U', '%s');" % (self, normalizeValue(RDF.type, 'U', self.useSignedInts), RDF.type)]
def GarbageCollectionQUERY(idHash,valueHash,aBoxPart,binRelPart,litPart): """ Performs garbage collection on interned identifiers and their references. Joins the given KB partitions against the identifiers and values and removes the 'danglers'. This must be performed after every removal of an assertion and so becomes a primary bottleneck """ purgeQueries = ["drop table if exists danglingIds"] rdfTypeInt = normalizeValue(RDF.type,'U') idHashKeyName = idHash.columns[0][0] valueHashKeyName = valueHash.columns[0][0] idHashJoinees = [aBoxPart,binRelPart,litPart] idJoinClauses = [] idJoinColumnCandidates = [] explicitJoins = [] unionSelects = [] for part in idHashJoinees: partJoinClauses = [] #pprint(part) #pprint(part.columnNames) partUnionColumns = [] for colName in part.columnNames: if part.columnNames.index(colName) >= 4: colName,sqlType,index = colName if sqlType.lower()[:6]=='bigint': partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName)) #idJoinColumnCandidates.append("%s.%s"%(part,colName)) partUnionColumns.append(colName) elif colName and not (str(part).endswith('literalProperties') and colName == 'object'): partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName)) #idJoinColumnCandidates.append("%s.%s"%(part,colName)) partUnionColumns.append(colName) for col in partUnionColumns: unionSelects.append("SELECT %s FROM %s" % (col, part)) idJoinColumnCandidates.append("%s.%s"%(part, part.columnNames[0])) explicitJoins.append("left join %s on (%s)"%(part,' or '.join(partJoinClauses))) idJoinClauses.extend(partJoinClauses) #pprint(idJoinColumnCandidates) #pprint(explicitJoins) intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates]) unionClause = ' EXCEPT ( ' + ' UNION '.join(unionSelects) + ' ) ' #idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%( #idHash, #idHashKeyName, #idHash, #' '.join(explicitJoins), #intersectionClause, #idHash, #idHashKeyName, #rdfTypeInt #) idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%( idHash, idHashKeyName, idHash, unionClause ) idPurgeQuery = PURGE_KEY_SQL%(idHash,idHashKeyName,idHash,idHashKeyName) purgeQueries.append(idGCQuery) purgeQueries.append(idPurgeQuery) partJoinClauses = [] idJoinColumnCandidates = [] explicitJoins = [] partJoinClauses.append("%s.%s = %s.%s"%(litPart,litPart.columnNames[OBJECT],valueHash,valueHashKeyName)) idJoinColumnCandidates.append("%s.%s"%(litPart,litPart.columnNames[OBJECT])) intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates]) valueGCQuery = VALUE_GARBAGE_COLLECTION_SQL%( valueHash, valueHashKeyName, valueHash, "left join %s on (%s)"%(litPart,' or '.join(partJoinClauses)), intersectionClause ) valuePurgeQuery = PURGE_KEY_SQL%(valueHash,valueHashKeyName,valueHash,valueHashKeyName) purgeQueries.append("drop table if exists danglingIds") purgeQueries.append(valueGCQuery) purgeQueries.append(valuePurgeQuery) #pprint(purgeQueries) #return return purgeQueries
def updateIdentifierQueue(self,termList): for term,termType in termList: md5Int = normalizeValue(term, termType, self.useSignedInts) self.hashUpdateQueue[md5Int]=self.normalizeTerm(term)
def GarbageCollectionQUERY(idHash,valueHash,aBoxPart,binRelPart,litPart): """ Performs garbage collection on interned identifiers and their references. Joins the given KB parititions against the identifiers and values and removes the 'danglers'. This must be performed after every removal of an assertion and so becomes a primary bottleneck """ purgeQueries = ["drop temporary table if exists danglingIds"] rdfTypeInt = normalizeValue(RDF.type,'U') idHashKeyName = idHash.columns[0][0] valueHashKeyName = valueHash.columns[0][0] idHashJoinees = [aBoxPart,binRelPart,litPart] idJoinClauses = [] idJoinColumnCandidates = [] explicitJoins = [] for part in idHashJoinees: partJoinClauses = [] for colName in part.columnNames: if part.columnNames.index(colName) >= 4: colName,sqlType,index = colName if sqlType.lower()[:6]=='bigint': partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName)) idJoinColumnCandidates.append("%s.%s"%(part,colName)) elif colName: partJoinClauses.append("%s.%s = %s.%s"%(part,colName,idHash,idHashKeyName)) idJoinColumnCandidates.append("%s.%s"%(part,colName)) explicitJoins.append("left join %s on (%s)"%(part,' or '.join(partJoinClauses))) idJoinClauses.extend(partJoinClauses) intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates]) idGCQuery = IDENTIFIER_GARBAGE_COLLECTION_SQL%( idHash, idHashKeyName, idHash, ' '.join(explicitJoins), intersectionClause, idHash, idHashKeyName, rdfTypeInt ) idPurgeQuery = PURGE_KEY_SQL%(idHash,idHash,idHashKeyName,idHash,idHashKeyName) purgeQueries.append(idGCQuery) purgeQueries.append(idPurgeQuery) partJoinClauses = [] idJoinColumnCandidates = [] explicitJoins = [] partJoinClauses.append("%s.%s = %s.%s"%(litPart,litPart.columnNames[OBJECT],valueHash,valueHashKeyName)) idJoinColumnCandidates.append("%s.%s"%(litPart,litPart.columnNames[OBJECT])) intersectionClause = " and ".join([col + " is NULL" for col in idJoinColumnCandidates]) valueGCQuery = VALUE_GARBAGE_COLLECTION_SQL%( valueHash, valueHashKeyName, valueHash, "left join %s on (%s)"%(litPart,' or '.join(partJoinClauses)), intersectionClause ) valuePurgeQuery = PURGE_KEY_SQL%(valueHash,valueHash,valueHashKeyName,valueHash,valueHashKeyName) purgeQueries.append("drop temporary table if exists danglingIds") purgeQueries.append(valueGCQuery) purgeQueries.append(valuePurgeQuery) return purgeQueries
def defaultSQL(self): """ Since rdf:type is modeled explicitely (in the ABOX partition) it must be inserted as a 'default' identifier """ return 'INSERT into %s values (%s,"U","%s");'%(self,normalizeValue(RDF.type,'U'),RDF.type)