def createOrUpdateBranches(self, importBranchesEncodedPayload: bytes) -> None: """ Convert Import Branch Tuples This method takes import branch tuples, and converts them to branch format used throughout the diagram plugin. (Thats the packed JSON wrapped by an accessor class) """ # Decode importBranches payload importBranches: List[ImportBranchTuple] = ( Payload().fromEncodedPayload(importBranchesEncodedPayload).tuples) # Validate the input importBranches _validateNewBranchIndexs(importBranches) # Do the import groupedBranches = _convertImportBranchTuples(importBranches) startTime = datetime.now(pytz.utc) dbSession = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: for (modelSetKey, modelSetId, coordSetId), branches in groupedBranches.items(): _insertOrUpdateBranches(conn, modelSetKey, modelSetId, branches) newDisps, dispIdsToCompile = _convertBranchDisps(branches) # NO TRANSACTION # Bulk load the Disps _bulkInsertDisps(engine, newDisps) # Queue the compiler DispCompilerQueueController.queueDispIdsToCompileWithSession( dispIdsToCompile, conn) transaction.commit() dbSession.commit() logger.debug( "Completed importing %s branches for coordSetId %s in %s", len(branches), coordSetId, (datetime.now(pytz.utc) - startTime)) except Exception as e: dbSession.rollback() transaction.rollback() logger.debug("Retrying createOrUpdateBranches, %s", e) logger.exception(e) raise self.retry(exc=e, countdown=3) finally: dbSession.close() conn.close()
def qryChunkInWorker(self, offset, limit) -> List[LiveDbDisplayValueTuple]: """ Query Chunk This returns a chunk of LiveDB items from the database :param self: A celery reference to this task :param offset: The offset of the chunk :param limit: An encoded payload containing the updates :returns: List[LiveDbDisplayValueTuple] serialised in a payload json """ table = LiveDbItem.__table__ cols = [ table.c.key, table.c.dataType, table.c.rawValue, table.c.displayValue ] session = CeleryDbConn.getDbSession() try: result = session.execute( select(cols).order_by(table.c.id).offset(offset).limit(limit)) return [ LiveDbDisplayValueTuple(key=o.key, dataType=o.dataType, rawValue=o.rawValue, displayValue=o.displayValue) for o in result.fetchall() ] finally: session.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = (session.query( DocDbDocument.chunkKey, DocDbDocument.key, DocDbDocument.documentJson).filter( DocDbDocument.chunkKey.in_(chunkKeys)).order_by( DocDbDocument.key).yield_per(1000).all()) # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....] packagedJsonByObjIdByChunkKey = defaultdict(dict) for item in indexQry: packagedJsonByObjIdByChunkKey[item.chunkKey][ item.key] = item.documentJson encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, packedJsonByKey in packagedJsonByObjIdByChunkKey.items(): tuples = json.dumps(packedJsonByKey, sort_keys=True) # Create the blob data for this index. # It will be docDbed by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def deleteTraceConfig(self, modelSetKey: str, traceConfigKeys: List[str]) -> None: startTime = datetime.now(pytz.utc) traceConfigTable = GraphDbTraceConfig.__table__ engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: modelSetIdByKey = _loadModelSets() modelSetId = modelSetIdByKey[modelSetKey] conn.execute( traceConfigTable.delete(and_(traceConfigTable.c.key.in_(traceConfigKeys), traceConfigTable.c.modelSetId == modelSetId)) ) transaction.commit() logger.info("Deleted %s trace configs in %s", len(traceConfigKeys), (datetime.now(pytz.utc) - startTime)) except Exception as e: transaction.rollback() logger.debug("Retrying import graphDb objects, %s", e) raise self.retry(exc=e, countdown=3) finally: conn.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = (session.query( BranchIndex.chunkKey, BranchIndex.key, BranchIndex.packedJson).filter( BranchIndex.chunkKey.in_(chunkKeys)).order_by( BranchIndex.key).yield_per(1000).all()) # Create the ChunkKey -> {key -> packedJson, key -> packedJson, ....] packagedJsonsByObjKeyByChunkKey = defaultdict( lambda: defaultdict(list)) for item in indexQry: packagedJsonsByObjKeyByChunkKey[item.chunkKey][item.key].append( item.packedJson) encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, packedJsonsByKey in packagedJsonsByObjKeyByChunkKey.items( ): tuples = json.dumps(packedJsonsByKey, sort_keys=True) # Create the blob data for this index. # It will be index-blueprint by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def _loadCoordSet(modelSetKey, coordSetKey): ormSession = CeleryDbConn.getDbSession() try: coordSet = getOrCreateCoordSet(ormSession, modelSetKey, coordSetKey) ormSession.expunge_all() return coordSet finally: ormSession.close()
def _convertImportBranchTuples( importBranches: List[ImportBranchTuple] ) -> Dict[typing.Tuple[str, int, int], List[BranchTuple]]: """ Convert Import Branch Tuples This method takes import branch tuples, and converts them to branch format used throughout the diagram plugin. (Thats the packed JSON wrapped by an accessor class) """ # Get a map for the coordSetIds modelKeyCoordKeyTuples = [(b.modelSetKey, b.coordSetKey) for b in importBranches] coordSetIdByModelKeyCoordKeyTuple = getModelSetIdCoordSetId( modelKeyCoordKeyTuples) # Sort out the importBranches by coordSetKey branchByModelKeyByCoordKey = defaultdict(lambda: defaultdict(list)) for importBranch in importBranches: branchByModelKeyByCoordKey[importBranch.modelSetKey][importBranch.coordSetKey] \ .append(importBranch) # Define the converted importBranches convertedBranchesByCoordSetId: Dict[typing.Tuple[str, int, int], List[BranchTuple]] \ = {} # Get the model set dbSession = CeleryDbConn.getDbSession() try: # Iterate through the importBranches and convert them for modelSetKey, item in branchByModelKeyByCoordKey.items(): for coordSetKey, importBranches in item: modelSetId, coordSetId = coordSetIdByModelKeyCoordKeyTuple[( modelSetKey, coordSetKey)] lookupHashConverter = LookupHashConverter( dbSession, modelSetId, coordSetId) convertedBranches = [] for importBranch in importBranches: branch = BranchTuple.loadFromImportTuple( importBranch, coordSetId, lookupHashConverter=lookupHashConverter) convertedBranches.append(branch) convertedBranchesByCoordSetId[(modelSetKey, modelSetId, coordSetId)] \ = convertedBranches finally: dbSession.close() return convertedBranchesByCoordSetId
def _makeModelSet(modelSetKey: str) -> int: # Get the model set dbSession = CeleryDbConn.getDbSession() try: newItem = GraphDbModelSet(key=modelSetKey, name=modelSetKey) dbSession.add(newItem) dbSession.commit() return newItem.id finally: dbSession.close()
def reindexSearchObject(conn, objectsToIndex: List[ObjectToIndexTuple]) -> None: """ Reindex Search Object :param conn: :param objectsToIndex: Object To Index :returns: """ logger.debug("Starting to index %s SearchIndex", len(objectsToIndex)) searchIndexTable = SearchIndex.__table__ queueTable = SearchIndexCompilerQueue.__table__ startTime = datetime.now(pytz.utc) newSearchIndexes = [] objectIds = [] searchIndexChunksToQueue = set() for objectToIndex in objectsToIndex: newSearchIndexes.extend(_indexObject(objectToIndex)) objectIds.append(objectToIndex.id) newIdGen = CeleryDbConn.prefetchDeclarativeIds(SearchIndex, len(newSearchIndexes)) for newSearchIndex in newSearchIndexes: newSearchIndex.id = next(newIdGen) searchIndexChunksToQueue.add(newSearchIndex.chunkKey) results = conn.execute( select(columns=[searchIndexTable.c.chunkKey], whereclause=searchIndexTable.c.objectId.in_(objectIds))) for result in results: searchIndexChunksToQueue.add(result.chunkKey) if objectIds: conn.execute( searchIndexTable.delete( searchIndexTable.c.objectId.in_(objectIds))) if newSearchIndexes: logger.debug("Inserting %s SearchIndex", len(newSearchIndexes)) inserts = [o.tupleToSqlaBulkInsertDict() for o in newSearchIndexes] conn.execute(searchIndexTable.insert(), inserts) if searchIndexChunksToQueue: conn.execute(queueTable.insert(), [dict(chunkKey=k) for k in searchIndexChunksToQueue]) logger.info("Inserted %s SearchIndex keywords in %s", len(newSearchIndexes), (datetime.now(pytz.utc) - startTime))
def _makeCoordSet(modelSetId: int, coordSetKey: str) -> int: # Make a coord set dbSession = CeleryDbConn.getDbSession() try: newItem = ModelCoordSet(modelSetId=modelSetId, key=coordSetKey, name=coordSetKey) dbSession.add(newItem) dbSession.commit() return newItem.id finally: dbSession.close()
def _loadModelSets() -> Dict[str, int]: # Get the model set engine = CeleryDbConn.getDbEngine() conn = engine.connect() try: results = list( conn.execute( select(columns=[_modelSetTable.c.id, _modelSetTable.c.key]))) modelSetIdByKey = {o.key: o.id for o in results} del results finally: conn.close() return modelSetIdByKey
def _loadCoordSets(modelSetId: int) -> Dict[str, int]: # Get the model set engine = CeleryDbConn.getDbEngine() conn = engine.connect() try: results = list( conn.execute( select(columns=[_coordSetTable.c.id, _coordSetTable.c.key], whereclause=_coordSetTable.c.modelSetId == modelSetId))) coordSetIdByKey = {o.key: o.id for o in results} del results finally: conn.close() return coordSetIdByKey
def _bulkLoadDispsTask(importGroupHash: str, disps: List): """ Import Disps Links 1) Drop all disps with matching importGroupHash 2) set the coordSetId :param importGroupHash: :param disps: An array of disp objects to import :return: """ dispTable = DispBase.__table__ gridKeyIndexTable = GridKeyIndex.__table__ gridQueueTable = GridKeyCompilerQueue.__table__ engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: stmt = select([gridKeyIndexTable.c.coordSetId, gridKeyIndexTable.c.gridKey]) \ .where(dispTable.c.importGroupHash == importGroupHash) \ .select_from(join(gridKeyIndexTable, dispTable, gridKeyIndexTable.c.dispId == dispTable.c.id)) \ .distinct() ins = gridQueueTable.insert().from_select(['coordSetId', 'gridKey'], stmt) conn.execute(ins) conn.execute(dispTable.delete().where( dispTable.c.importGroupHash == importGroupHash)) transaction.commit() _bulkInsertDisps(engine, disps) except Exception: transaction.rollback() raise finally: conn.close()
def updateValues(self, payloadEncodedArgs: bytes) -> None: """ Compile Grids Task :param self: A celery reference to this task :param payloadEncodedArgs: The updates from the queue controller :returns: None """ startTime = datetime.now(pytz.utc) argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples allModelUpdates: List[LiveDbRawValueQueue] = argData[0] queueItemIds = argData[1] # Group the data by model set updatesByModelSetId = defaultdict(list) for update in allModelUpdates: updatesByModelSetId[update.modelSetId].append(update) ormSession = CeleryDbConn.getDbSession() try: for modelSetId, modelUpdates in updatesByModelSetId.items(): _updateValuesForModelSet(modelSetId, modelUpdates, ormSession) # --------------- # delete the queue items dispQueueTable = LiveDbRawValueQueue.__table__ ormSession.execute( dispQueueTable.delete(dispQueueTable.c.id.in_(queueItemIds)) ) ormSession.commit() # --------------- # Finally, tell log some statistics logger.info("Updated %s raw values in %s", len(allModelUpdates), (datetime.now(pytz.utc) - startTime)) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = ( session.query( ItemKeyIndex.chunkKey, ItemKeyIndex.itemKey, ItemKeyIndex.itemKey, # ItemKeyIndex.itemType, ItemKeyIndex.segmentKey).filter( ItemKeyIndex.chunkKey.in_(chunkKeys)).order_by( ItemKeyIndex.itemKey, ItemKeyIndex.segmentKey).yield_per(1000).all()) # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....] packagedJsonByObjIdByChunkKey = defaultdict(lambda: defaultdict(list)) for item in indexQry: (packagedJsonByObjIdByChunkKey[item.chunkKey][item.itemKey].append( item.segmentKey)) encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, segmentKeysByItemKey in packagedJsonByObjIdByChunkKey.items( ): # Convert the list to a json string, this reduces the memory footprint when # searching the index. packedJsonByKey = { itemKey: json.dumps(segmentKeys) for itemKey, segmentKeys in segmentKeysByItemKey.items() } tuples = json.dumps(packedJsonByKey, sort_keys=True) # Create the blob data for this index. # It could/will be found by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def _insertOrUpdateObjects(newTraceConfigs: List[GraphDbTraceConfigTuple], modelSetId: int) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ traceConfigTable = GraphDbTraceConfig.__table__ startTime = datetime.now(pytz.utc) dbSession = CeleryDbConn.getDbSession() try: keysToDelete = {i.key for i in newTraceConfigs} dbSession.execute( traceConfigTable.delete( traceConfigTable.c.key.in_(keysToDelete)) ) # Create state arrays inserts = [] # Create the DB Orm objects to insert for importTraceConfig in newTraceConfigs: dbSession.add(GraphDbTraceConfig().fromTuple(importTraceConfig, modelSetId)) dbSession.commit() logger.info("Inserted %s trace configs in %s", len(inserts), (datetime.now(pytz.utc) - startTime)) except Exception: dbSession.rollback() raise finally: dbSession.close()
def deleteSegment(self, modelSetKey: str, importGroupHashes: List[str]) -> None: startTime = datetime.now(pytz.utc) segmentTable = GraphDbSegment.__table__ queueTable = GraphDbCompilerQueue.__table__ engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: modelSetIdByKey = _loadModelSets() modelSetId = modelSetIdByKey[modelSetKey] chunkKeys = conn.execute( select([segmentTable.c.modelSetId, segmentTable.c.chunkKey], and_(segmentTable.c.importGroupHash.in_(importGroupHashes), segmentTable.c.modelSetId == modelSetId))).fetchall() if chunkKeys: conn.execute( segmentTable.delete( and_(segmentTable.c.importGroupHash.in_(importGroupHashes), segmentTable.c.modelSetId == modelSetId))) conn.execute(queueTable.insert(), chunkKeys) deleteItemKeys(conn, modelSetId, importGroupHashes) transaction.commit() logger.info("Deleted %s, queued %s chunks in %s", len(importGroupHashes), len(chunkKeys), (datetime.now(pytz.utc) - startTime)) except Exception as e: transaction.rollback() logger.debug("Retrying graphDb deleteSegment, %s", e) raise self.retry(exc=e, countdown=3) finally: conn.close()
def _updateCoordSetPosition(coordSet: ModelCoordSet, disps: List): """ Update CoordSet Position 1) Drop all disps with matching importGroupHash 2) set the coordSetId :param coordSet: :param disps: An array of disp objects to import :return: """ if coordSet.initialPanX or coordSet.initialPanY or coordSet.initialZoom: return startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: # Initialise the ModelCoordSet initial position if it's not set for disp in disps: if not hasattr(disp, 'geomJson'): continue coords = json.loads(disp.geomJson) coordSet.initialPanX = coords[0] coordSet.initialPanY = coords[1] coordSet.initialZoom = 0.05 ormSession.merge(coordSet) break ormSession.commit() logger.info("Updated coordset position in %s", (datetime.now(pytz.utc) - startTime)) finally: ormSession.close()
def compileBranchIndexChunk(self, payloadEncodedArgs: bytes) -> List[int]: """ Compile BranchIndex Index Task :param self: A bound parameter from celery :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: queueItemsByModelSetId = defaultdict(list) for queueItem in queueItems: queueItemsByModelSetId[queueItem.modelSetId].append(queueItem) for modelSetId, modelSetQueueItems in queueItemsByModelSetId.items(): _compileBranchIndexChunk(conn, transaction, modelSetId, modelSetQueueItems) queueTable = BranchIndexCompilerQueue.__table__ transaction = conn.begin() conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds))) transaction.commit() except Exception as e: transaction.rollback() logger.debug("RETRYING task - %s", e) raise self.retry(exc=e, countdown=10) finally: conn.close() return list(set([i.chunkKey for i in queueItems]))
def importLiveDbItems(self, modelSetKey: str, newItems: List[ImportLiveDbItemTuple]) -> List[str]: """ Compile Grids Task :param self: A celery reference to this task :param modelSetKey: The model set name :param newItems: The list of new items :returns: A list of grid keys that have been updated. """ startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() liveDbTable = LiveDbItem.__table__ try: liveDbModelSet = getOrCreateLiveDbModelSet(session, modelSetKey) # This will remove duplicates itemsByKey = {i.key: i for i in newItems} allKeys = list(itemsByKey) existingKeys = set() # Query for existing keys, in 1000 chinks chunkSize = 1000 offset = 0 while True: chunk = allKeys[offset:offset + chunkSize] if not chunk: break offset += chunkSize stmt = (select([liveDbTable.c.key]) .where(liveDbTable.c.modelSetId == liveDbModelSet.id) .where(makeCoreValuesSubqueryCondition( engine, liveDbTable.c.key, chunk )) ) result = conn.execute(stmt) existingKeys.update([o[0] for o in result.fetchall()]) inserts = [] newKeys = [] for newItem in itemsByKey.values(): if newItem.key in existingKeys: continue inserts.append(dict( modelSetId=liveDbModelSet.id, key=newItem.key, dataType=newItem.dataType, rawValue=newItem.rawValue, displayValue=newItem.displayValue, importHash=newItem.importHash )) newKeys.append(newItem.key) if not inserts: return [] conn.execute(LiveDbItem.__table__.insert(), inserts) transaction.commit() logger.info("Inserted %s LiveDbItems, %s already existed, in %s", len(inserts), len(existingKeys), (datetime.now(pytz.utc) - startTime)) return newKeys except Exception as e: transaction.rollback() logger.debug("Task failed, but it will retry. %s", e) raise self.retry(exc=e, countdown=10) finally: conn.close() session.close()
def compileGrids(self, payloadEncodedArgs: bytes) -> List[str]: """ Compile Grids Task :param self: A celery reference to this task :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] gridKeys = list(set([i.gridKey for i in queueItems])) coordSetIdByGridKey = {i.gridKey: i.coordSetId for i in queueItems} queueTable = GridKeyCompilerQueue.__table__ gridTable = GridKeyIndexCompiled.__table__ startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) total = 0 dispData = _qryDispData(session, gridKeys) conn.execute(gridTable.delete(gridTable.c.gridKey.in_(gridKeys))) transaction.commit() transaction = conn.begin() inserts = [] for gridKey, dispJsonStr in dispData.items(): m = hashlib.sha256() m.update(gridKey.encode()) m.update(dispJsonStr.encode()) gridTupleHash = b64encode(m.digest()).decode() gridTuple = GridTuple( gridKey=gridKey, dispJsonStr=dispJsonStr, lastUpdate=gridTupleHash ) encodedGridTuple = Payload(tuples=[gridTuple]).toEncodedPayload() inserts.append(dict(coordSetId=coordSetIdByGridKey[gridKey], gridKey=gridKey, lastUpdate=gridTupleHash, encodedGridTuple=encodedGridTuple)) if inserts: conn.execute(gridTable.insert(), inserts) logger.debug("Compiled %s gridKeys, %s missing, in %s", len(inserts), len(gridKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds))) transaction.commit() logger.info("Compiled and Committed %s GridKeyIndexCompileds in %s", total, (datetime.now(pytz.utc) - startTime)) return gridKeys except NotAllDispsCompiledException as e: logger.warning("Retrying, Not all disps for gridKey %s are compiled", gridKeys) raise self.retry(exc=e, countdown=1) except Exception as e: transaction.rollback() logger.debug("Compile of grids failed, retrying : %s", gridKeys) raise self.retry(exc=e, countdown=2) finally: conn.close() session.close()
def _importDisps(coordSet: ModelCoordSet, importDisps: List): """ Link Disps 1) Use the AgentImportDispGridLookup to convert lookups from importHash to id 2) set the coordSetId This is not done in a thread because the lookups cause issues """ dispIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(importDisps)) dispIdsToCompile = [] importDispLinks = [] ormDisps = [] ormSession = CeleryDbConn.getDbSession() try: lookupConverter = LookupHashConverter(ormSession, modelSetId=coordSet.modelSetId, coordSetId=coordSet.id) dispGroupPtrWithTargetHash: List[Tuple[DispGroupPointer, str]] = [] dispGroupChildWithTargetHash: List[Tuple[DispBase, str]] = [] # Preload any groups our pointers may point to. # Pre-import any DispGroup IDs we may need dispGroupTargetImportHashes = [ o.targetDispGroupHash for o in importDisps if o.tupleType() == ImportDispGroupPtrTuple.tupleType() ] # This will store DispGroup and DispGroupPointer hashes groupIdByImportHash: Dict[str, int] = { o.importHash: o.id for o in ormSession.query(DispBase.importHash, DispBase.id).filter( DispBase.importHash.in_(dispGroupTargetImportHashes)).filter( DispBase.coordSetId == coordSet.id) } del dispGroupTargetImportHashes # This is a list of DispGroup.id. # We use this to filter out disps that part of a DispGroup, # they don't get compiled dispGroupIds = set() # Sort the DispGroups first, so they are created before any FK references them sortedImportDisps = sorted( importDisps, key=lambda o: IMPORT_SORT_ORDER[o.tupleType()]) for importDisp in sortedImportDisps: # Convert the geometry into the internal array format _convertGeom(importDisp) # Create the storage tuple instance, and copy over the data. ormDisp = _convertImportTuple(importDisp) ormDisps.append(ormDisp) # Preallocate the IDs for performance on PostGreSQL ormDisp.id = next(dispIdGen) # Assign the coord set id. ormDisp.coordSetId = coordSet.id # If this is a dispGroup, index it's ID if isinstance(ormDisp, DispGroup): dispGroupIds.add(ormDisp.id) groupIdByImportHash[ormDisp.importHash] = ormDisp.id # If this is a dispGroupPtr, index its targetHash so we can update it if isinstance(ormDisp, DispGroupPointer): groupIdByImportHash[ormDisp.importHash] = ormDisp.id if ormDisp.targetDispGroupName: ormDisp.targetDispGroupName = '%s|%s' % ( coordSet.id, ormDisp.targetDispGroupName) # Not all DispGroupPointers have targets, # they can be orphaned instances if importDisp.targetDispGroupHash: dispGroupPtrWithTargetHash.append( (ormDisp, importDisp.targetDispGroupHash)) # If this is a dispGroupPtr, index its targetHash so we can update it parentDispGroupHash = getattr(importDisp, "parentDispGroupHash", None) if parentDispGroupHash: dispGroupChildWithTargetHash.append( (ormDisp, parentDispGroupHash)) # Add some interim data to the import display link, so it can be created if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalRawValue = getattr( ormDisp, attrName) importDispLink.internalDispId = ormDisp.id importDispLinks.append(importDispLink) # Convert the values of the liveDb attributes lookupConverter.convertLookups(ormDisp) # Add the after translate value, this is the Display Value if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalDisplayValue = getattr( ormDisp, attrName) # Queue the Disp to be compiled into a grid. # Disps belonging to a DispGroup do not get compiled into grids. if ormDisp.groupId not in dispGroupIds: dispIdsToCompile.append(ormDisp.id) # Link the DispGroups # Create the links between the Disp and DispGroup for ormDisp, groupHash in dispGroupChildWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.groupId = groupOrmObjId # Link the DispGroupPtr to the DispGroup # This is only used when the dispGrouPtr points to a disp group for ormDisp, groupHash in dispGroupPtrWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.targetDispGroupId = groupOrmObjId finally: ormSession.close() return dispIdsToCompile, importDispLinks, ormDisps
def loadItemKeys(conn, newItemKeys: List[ItemKeyImportTuple], modelSetId: int, modelSetKey: str) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ itemKeyIndexTable = ItemKeyIndex.__table__ queueTable = ItemKeyIndexCompilerQueue.__table__ startTime = datetime.now(pytz.utc) importHashSet = set() chunkKeysForQueue: Set[Tuple[int, str]] = set() # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds(ItemKeyIndex, len(newItemKeys)) # Create state arrays inserts = [] # Work out which objects have been updated or need inserting for importItemKey in newItemKeys: importHashSet.add(importItemKey.importGroupHash) # Work out if we need to update the object type id_ = next(newIdGen) insertObject = ItemKeyIndex( id=id_, modelSetId=modelSetId, importGroupHash=importItemKey.importGroupHash, itemType=importItemKey.itemType, itemKey=importItemKey.itemKey, segmentKey=importItemKey.segmentKey, chunkKey=makeChunkKeyForItemKey(modelSetKey, importItemKey.itemKey)) inserts.append(insertObject.tupleToSqlaBulkInsertDict()) chunkKeysForQueue.add((modelSetId, insertObject.chunkKey)) if importHashSet: conn.execute( itemKeyIndexTable.delete( itemKeyIndexTable.c.importGroupHash.in_(importHashSet))) # Insert the ItemKeyIndex Objects if inserts: conn.execute(itemKeyIndexTable.insert(), inserts) if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]) logger.debug("Inserted %s ItemKeys queued %s chunks in %s", len(inserts), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime))
def _insertToDb(dispIds, gridCompiledQueueItems, gridKeyIndexesByDispId, locationCompiledQueueItems, locationIndexByDispId, queueIds): """ Insert to DB This method provides the DB inserts and deletes after the data has been calculated. """ startTime = datetime.now(pytz.utc) dispBaseTable = DispBase.__table__ dispQueueTable = DispIndexerQueue.__table__ gridKeyIndexTable = GridKeyIndex.__table__ gridQueueTable = GridKeyCompilerQueue.__table__ locationIndexTable = LocationIndex.__table__ locationIndexCompilerQueueTable = LocationIndexCompilerQueue.__table__ engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: lockedDispIds = conn.execute( Select(whereclause=dispBaseTable.c.id.in_(dispIds), columns=[dispBaseTable.c.id], for_update=True)) lockedDispIds = [o[0] for o in lockedDispIds] # Ensure that the Disps exist, otherwise we get an integrity error. gridKeyIndexes = [] locationIndexes = [] for dispId in lockedDispIds: gridKeyIndexes.extend(gridKeyIndexesByDispId[dispId]) if dispId in locationIndexByDispId: locationIndexes.append(locationIndexByDispId[dispId]) # Delete existing items in the location and grid index # grid index conn.execute( gridKeyIndexTable.delete(gridKeyIndexTable.c.dispId.in_(dispIds))) # location index conn.execute( locationIndexTable.delete( locationIndexTable.c.dispId.in_(dispIds))) # --------------- # Insert the Grid Key indexes if gridKeyIndexes: conn.execute(gridKeyIndexTable.insert(), gridKeyIndexes) # Directly insert into the Grid compiler queue. if gridCompiledQueueItems: conn.execute(gridQueueTable.insert(), [ dict(coordSetId=i.coordSetId, gridKey=i.gridKey) for i in gridCompiledQueueItems ]) # --------------- # Insert the Location indexes if locationIndexes: conn.execute(locationIndexTable.insert(), locationIndexes) # Directly insert into the Location compiler queue. if locationCompiledQueueItems: conn.execute(locationIndexCompilerQueueTable.insert(), [ dict(modelSetId=i.modelSetId, indexBucket=i.indexBucket) for i in locationCompiledQueueItems ]) # --------------- # Finally, delete the disp queue items conn.execute(dispQueueTable.delete(dispQueueTable.c.id.in_(queueIds))) transaction.commit() logger.debug("Committed %s GridKeyIndex in %s", len(gridKeyIndexes), (datetime.now(pytz.utc) - startTime)) except Exception as e: raise finally: conn.close()
def compileDisps(self, payloadEncodedArgs: bytes): """ Compile Disps This function takes a list of Disp IDs and compiles them. The processing is as follows (more or less) 0) Load lookups ---- 1) DispGroupPointers, copy disps from group to pointer ---- 2) Load the Disps from the DB 3) Apply the LiveDB values to the Disp attributes 4) Scale the Disp geomJson to match the coord set scaling 5) DispGroups, take Disps as part of a disp group and load them into JSON in the DispGroup. PreparedDisp???? 6) Extract any new LocationIndex entries, of the Disp has a key 7) Determine which grids this disp will live in, and create GridKeyIndex entries for those grid keys for this disp. 8) Write the Disp JSON back to the disp ormSession.commit() here. This stores the following updates that have been made into the disp: * dispJson, * locationJson, * livedb attribute updates ---- 9) Write the calculated data to tables NOTE: Disps that belong to a DispGroup will not be queued for compile by ImportDispTask. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples dispIds = [o.dispId for o in argData[0]] queueItemIds: List[int] = argData[1] # ========================== # 0) Load the lookups ormSession = CeleryDbConn.getDbSession() try: # --------------- # Load Coord Sets coordSets = (ormSession.query(ModelCoordSet).options( subqueryload(ModelCoordSet.modelSet), subqueryload(ModelCoordSet.gridSizes)).all()) # Get Model Set Name Map coordSetById = {o.id: o for o in coordSets} # --------------- # Load Coord Sets textStyleById = { ts.id: ts for ts in ormSession.query(DispTextStyle).all() } ormSession.expunge_all() except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close() # ========================== # This method will create new disps that will be compiled later. try: # --------------- # 1) Clone the disps for the group instances dispIdsIncludingClones = _cloneDispsForDispGroupPointer(dispIds) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) # ========================== # Run all the ORM Session update methods ormSession = CeleryDbConn.getDbSession() try: with ormSession.no_autoflush: # --------------- # 2) Apply the LiveDB Attribute updates disps = _loadDisps(ormSession, dispIdsIncludingClones) # --------------- # 3) Apply the LiveDB Attribute updates _applyLiveDbAttributes(ormSession, disps, coordSetById) # --------------- # 4) Scale the Disp geomJson to match the coord set scaling preparedDisps = _scaleDisp(disps, coordSetById) # 5) DispGroups, take Disps as part of a disp group and load them # into JSON in the DispGroup. PreparedDisp???? _compileDispGroups(ormSession, preparedDisps) # --------------- # 6) Extract any new LocationIndex entries, of the Disp has a key locationCompiledQueueItems, locationIndexByDispId = _indexLocation( preparedDisps, coordSetById) # --------------- # 7) Determine which grids this disp will live in, and create GridKeyIndex # entries for those grid keys for this disp. gridCompiledQueueItems, gridKeyIndexesByDispId = _calculateGridKeys( preparedDisps, coordSetById, textStyleById) # --------------- # 8) Write the Disp JSON back to the disp _updateDispsJson(preparedDisps) # --------------- # Commit the updates startTime = datetime.now(pytz.utc) ormSession.commit() logger.debug("Committed %s disp objects in %s", len(disps), (datetime.now(pytz.utc) - startTime)) except Exception as e: ormSession.rollback() logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close() # ========================== # 9) Run the bulk DB delete/insert methods try: _insertToDb(dispIdsIncludingClones, gridCompiledQueueItems, gridKeyIndexesByDispId, locationCompiledQueueItems, locationIndexByDispId, queueItemIds) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) logger.info("Compiled %s disp objects in %s", len(dispIds), (datetime.now(pytz.utc) - startTime))
def _cloneDispsForDispGroupPointer(dispIds: List[int]): """ Clone Disps for DispGroupPointer This method will clone "instances" of the disps in the disp groups for the DispGroupPointer. """ startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: # ----- # Load the disp group pointers qry = ormSession.query(DispGroupPointer) \ .filter(DispGroupPointer.targetDispGroupId != None) \ .filter(DispGroupPointer.id.in_(dispIds)) dispGroupPointers: List[DispGroupPointer] = qry.all() # If there are no DispGroupPointers that need cloning, then return. if not dispGroupPointers: logger.debug( "Cloning skipped," " there are no disp group ptrs with targets, in %s", (datetime.now(pytz.utc) - startTime)) return dispIds dispGroupPointerTargetIds = [ o.targetDispGroupId for o in dispGroupPointers ] del qry # ----- # Delete any existing disps are in these pointers ormSession.query(DispBase) \ .filter(DispBase.groupId.in_([o.id for o in dispGroupPointers])) \ .delete(synchronize_session=False) ormSession.commit() # ----- # Query for the disp groups we'll need dispGroupChildsByGroupId = _queryDispsForGroup( ormSession, dispGroupPointerTargetIds) # ----- # Query for the disp groups names dispBaseTable = DispBase.__table__ dispGroupTable = DispGroup.__table__ qry = ormSession.execute( select(columns=[ dispBaseTable.c.id, dispBaseTable.c.coordSetId, dispGroupTable.c.name ], whereclause=dispBaseTable.c.id.in_( dispGroupPointerTargetIds)).select_from( join(dispGroupTable, dispBaseTable, dispGroupTable.c.id == dispBaseTable.c.id))) dispGroupNameByGroupId = { o.id: '%s|%s' % (o.coordSetId, o.name) for o in qry.fetchall() } del qry # ----- # Clone the child disps cloneDisps = [] cloneLiveDbDispLinks = [] for dispPtr in dispGroupPointers: if not dispPtr.targetDispGroupId: logger.debug("Pointer has no targetGroupId id=%s", dispPtr.id) continue dispGroupChilds = dispGroupChildsByGroupId.get( dispPtr.targetDispGroupId) if not dispGroupChilds: logger.warning( "Pointer points to missing DispGroup," " id=%s, targetGroupId=%s", dispPtr.id, dispPtr.targetDispGroupId) continue x, y = json.loads(dispPtr.geomJson) dispPtr.targetDispGroupName = \ dispGroupNameByGroupId[dispPtr.targetDispGroupId] for templateDisp in dispGroupChilds: # Create the clone cloneDisp = templateDisp.tupleClone() cloneDisps.append(cloneDisp) cloneDisp.coordSetId = dispPtr.coordSetId # Offset the geometry geom = json.loads(cloneDisp.geomJson) geom = _scaleDispGeom(geom, 1, 1, x, y) cloneDisp.geomJson = json.dumps(geom) # Assign the clone to the DispGroupPointer cloneDisp.groupId = dispPtr.id for dispLink in templateDisp.liveDbLinks: cloneDispLink = dispLink.tupleClone() cloneLiveDbDispLinks.append(cloneDispLink) cloneDispLink.id = None cloneDispLink.disp = cloneDisp cloneDispLink.coordSetId = dispPtr.coordSetId # ----- # Preallocate the IDs for performance on PostGreSQL dispIdGen = CeleryDbConn.prefetchDeclarativeIds( DispBase, len(cloneDisps)) for cloneDisp in cloneDisps: cloneDisp.id = next(dispIdGen) # Preallocate the IDs for performance on PostGreSQL dispLinkIdGen = CeleryDbConn.prefetchDeclarativeIds( LiveDbDispLink, len(cloneLiveDbDispLinks)) for cloneDispLink in cloneLiveDbDispLinks: cloneDispLink.id = next(dispLinkIdGen) cloneDispLink.dispId = cloneDispLink.disp.id cloneDispLink.disp = None # ----- # Create the new list of IDs to compile # Do this here, otherwise it will cause a DB refresh if it's after the commit. dispIdsIncludingClones = dispIds + [o.id for o in cloneDisps] ormSession.bulk_save_objects(cloneDisps, update_changed_only=False) ormSession.bulk_save_objects(cloneLiveDbDispLinks, update_changed_only=False) ormSession.commit() logger.debug("Cloned %s disp group objects in %s", len(cloneDisps), (datetime.now(pytz.utc) - startTime)) except Exception: ormSession.rollback() raise finally: ormSession.close() return dispIdsIncludingClones
def compileSearchIndexChunk(self, payloadEncodedArgs: bytes) -> List[str]: """ Compile Search Index Task :param self: A celery reference to this task :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] chunkKeys = list(set([i.chunkKey for i in queueItems])) queueTable = SearchIndexCompilerQueue.__table__ compiledTable = EncodedSearchIndexChunk.__table__ lastUpdate = datetime.now(pytz.utc).isoformat() startTime = datetime.now(pytz.utc) engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) # Get Model Sets total = 0 existingHashes = _loadExistingHashes(conn, chunkKeys) encKwPayloadByChunkKey = _buildIndex(conn, chunkKeys) chunksToDelete = [] inserts = [] for chunkKey, searchIndexChunkEncodedPayload in encKwPayloadByChunkKey.items( ): m = hashlib.sha256() m.update(searchIndexChunkEncodedPayload) encodedHash = b64encode(m.digest()).decode() # Compare the hash, AND delete the chunk key if chunkKey in existingHashes: # At this point we could decide to do an update instead, # but inserts are quicker if encodedHash == existingHashes.pop(chunkKey): continue chunksToDelete.append(chunkKey) inserts.append( dict(chunkKey=chunkKey, encodedData=searchIndexChunkEncodedPayload, encodedHash=encodedHash, lastUpdate=lastUpdate)) # Add any chnuks that we need to delete that we don't have new data for, here chunksToDelete.extend(list(existingHashes)) if chunksToDelete: # Delete the old chunks conn.execute( compiledTable.delete( compiledTable.c.chunkKey.in_(chunksToDelete))) if inserts: newIdGen = CeleryDbConn.prefetchDeclarativeIds( SearchIndex, len(inserts)) for insert in inserts: insert["id"] = next(newIdGen) transaction.commit() transaction = conn.begin() if inserts: conn.execute(compiledTable.insert(), inserts) logger.debug("Compiled %s SearchIndexes, %s missing, in %s", len(inserts), len(chunkKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds))) transaction.commit() logger.info("Compiled and Committed %s EncodedSearchIndexChunks in %s", total, (datetime.now(pytz.utc) - startTime)) return chunkKeys except Exception as e: transaction.rollback() # logger.warning(e) # Just a warning, it will retry logger.exception(e) raise self.retry(exc=e, countdown=10) finally: conn.close()
def _insertOrUpdateObjects(newDocuments: List[ImportDocumentTuple], modelSetId: int, docTypeIdsByName: Dict[str, int]) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ documentTable = DocDbDocument.__table__ queueTable = DocDbCompilerQueue.__table__ startTime = datetime.now(pytz.utc) engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: dontDeleteObjectIds = [] objectIdByKey: Dict[str, int] = {} objectKeys = [o.key for o in newDocuments] chunkKeysForQueue: Set[Tuple[str, str]] = set() # Query existing objects results = list( conn.execute( select(columns=[ documentTable.c.id, documentTable.c.key, documentTable.c.chunkKey, documentTable.c.documentJson ], whereclause=and_( documentTable.c.key.in_(objectKeys), documentTable.c.modelSetId == modelSetId)))) foundObjectByKey = {o.key: o for o in results} del results # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds( DocDbDocument, len(newDocuments) - len(foundObjectByKey)) # Create state arrays inserts = [] updates = [] processedKeys = set() # Work out which objects have been updated or need inserting for importDocument in newDocuments: if importDocument.key in processedKeys: raise Exception("Key %s exists in import data twice" % importDocument.key) processedKeys.add(importDocument.key) existingObject = foundObjectByKey.get(importDocument.key) importDocumentTypeId = docTypeIdsByName[ importDocument.documentTypeKey] packedJsonDict = { k: v for k, v in importDocument.document.items() if v is not None and v is not '' } # 0 / false allowed packedJsonDict['_dtid'] = importDocumentTypeId packedJsonDict['_msid'] = modelSetId documentJson = json.dumps(packedJsonDict, sort_keys=True) # Work out if we need to update the object type if existingObject: updates.append( dict(b_id=existingObject.id, b_typeId=importDocumentTypeId, b_documentJson=documentJson)) dontDeleteObjectIds.append(existingObject.id) else: id_ = next(newIdGen) existingObject = DocDbDocument( id=id_, modelSetId=modelSetId, documentTypeId=importDocumentTypeId, key=importDocument.key, importGroupHash=importDocument.importGroupHash, chunkKey=makeChunkKey(importDocument.modelSetKey, importDocument.key), documentJson=documentJson) inserts.append(existingObject.tupleToSqlaBulkInsertDict()) objectIdByKey[existingObject.key] = existingObject.id chunkKeysForQueue.add((modelSetId, existingObject.chunkKey)) # Insert the DocDb Objects if inserts: conn.execute(documentTable.insert(), inserts) if updates: stmt = (documentTable.update().where( documentTable.c.id == bindparam('b_id')).values( documentTypeId=bindparam('b_typeId'), documentJson=bindparam('b_documentJson'))) conn.execute(stmt, updates) if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]) if inserts or updates or chunkKeysForQueue: transaction.commit() else: transaction.rollback() logger.debug("Inserted %s updated %s queued %s chunks in %s", len(inserts), len(updates), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime)) except Exception: transaction.rollback() raise finally: conn.close()
def _compileBranchIndexChunk( conn, transaction, modelSetId: int, queueItems: List[BranchIndexCompilerQueue]) -> None: chunkKeys = list(set([i.chunkKey for i in queueItems])) compiledTable = BranchIndexEncodedChunk.__table__ lastUpdate = datetime.now(pytz.utc).isoformat() startTime = datetime.now(pytz.utc) logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) # Get Model Sets total = 0 existingHashes = _loadExistingHashes(conn, chunkKeys) encKwPayloadByChunkKey = _buildIndex(chunkKeys) chunksToDelete = [] inserts = [] for chunkKey, diagramIndexChunkEncodedPayload in encKwPayloadByChunkKey.items( ): m = hashlib.sha256() m.update(diagramIndexChunkEncodedPayload) encodedHash = b64encode(m.digest()).decode() # Compare the hash, AND delete the chunk key if chunkKey in existingHashes: # At this point we could decide to do an update instead, # but inserts are quicker if encodedHash == existingHashes.pop(chunkKey): continue chunksToDelete.append(chunkKey) inserts.append( dict(modelSetId=modelSetId, chunkKey=chunkKey, encodedData=diagramIndexChunkEncodedPayload, encodedHash=encodedHash, lastUpdate=lastUpdate)) # Add any chnuks that we need to delete that we don't have new data for, here chunksToDelete.extend(list(existingHashes)) if chunksToDelete: # Delete the old chunks conn.execute( compiledTable.delete(compiledTable.c.chunkKey.in_(chunksToDelete))) if inserts: newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex, len(inserts)) for insert in inserts: insert["id"] = next(newIdGen) transaction.commit() transaction = conn.begin() if inserts: conn.execute(compiledTable.insert(), inserts) logger.debug("Compiled %s BranchIndexs, %s missing, in %s", len(inserts), len(chunkKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) transaction.commit() logger.debug("Compiled and Committed %s EncodedBranchIndexChunks in %s", total, (datetime.now(pytz.utc) - startTime))
def _prepareLookups(newDocuments: List[ImportDocumentTuple], modelSetId: int) -> Dict[str, int]: """ Check Or Insert Search Properties Make sure the search properties exist. """ dbSession = CeleryDbConn.getDbSession() startTime = datetime.now(pytz.utc) try: docTypeNames = set() propertyNames = set() for o in newDocuments: o.document["key"] = o.key o.documentTypeKey = o.documentTypeKey.lower() docTypeNames.add(o.documentTypeKey) if o.document: propertyNames.update([s.lower() for s in o.document]) # Prepare Properties dbProps = (dbSession.query(DocDbPropertyTuple).filter( DocDbPropertyTuple.modelSetId == modelSetId).all()) propertyNames -= set([o.name for o in dbProps]) if propertyNames: for newPropName in propertyNames: dbSession.add( DocDbPropertyTuple(name=newPropName, title=newPropName, modelSetId=modelSetId)) dbSession.commit() del dbProps del propertyNames # Prepare Object Types dbObjectTypes = (dbSession.query(DocDbDocumentTypeTuple).filter( DocDbDocumentTypeTuple.modelSetId == modelSetId).all()) docTypeNames -= set([o.name for o in dbObjectTypes]) if not docTypeNames: docTypeIdsByName = {o.name: o.id for o in dbObjectTypes} else: for newType in docTypeNames: dbSession.add( DocDbDocumentTypeTuple(name=newType, title=newType, modelSetId=modelSetId)) dbSession.commit() dbObjectTypes = dbSession.query(DocDbDocumentTypeTuple).all() docTypeIdsByName = {o.name: o.id for o in dbObjectTypes} logger.debug("Prepared lookups in %s", (datetime.now(pytz.utc) - startTime)) return docTypeIdsByName except Exception as e: dbSession.rollback() raise finally: dbSession.close()