Esempio n. 1
0
def reindexSearchObject(conn,
                        objectsToIndex: List[ObjectToIndexTuple]) -> None:
    """ Reindex Search Object

    :param conn:
    :param objectsToIndex: Object To Index
    :returns:
    """

    logger.debug("Starting to index %s SearchIndex", len(objectsToIndex))

    searchIndexTable = SearchIndex.__table__
    queueTable = SearchIndexCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    newSearchIndexes = []
    objectIds = []
    searchIndexChunksToQueue = set()

    for objectToIndex in objectsToIndex:
        newSearchIndexes.extend(_indexObject(objectToIndex))
        objectIds.append(objectToIndex.id)

    newIdGen = CeleryDbConn.prefetchDeclarativeIds(SearchIndex,
                                                   len(newSearchIndexes))
    for newSearchIndex in newSearchIndexes:
        newSearchIndex.id = next(newIdGen)
        searchIndexChunksToQueue.add(newSearchIndex.chunkKey)

    results = conn.execute(
        select(columns=[searchIndexTable.c.chunkKey],
               whereclause=searchIndexTable.c.objectId.in_(objectIds)))

    for result in results:
        searchIndexChunksToQueue.add(result.chunkKey)

    if objectIds:
        conn.execute(
            searchIndexTable.delete(
                searchIndexTable.c.objectId.in_(objectIds)))

    if newSearchIndexes:
        logger.debug("Inserting %s SearchIndex", len(newSearchIndexes))
        inserts = [o.tupleToSqlaBulkInsertDict() for o in newSearchIndexes]
        conn.execute(searchIndexTable.insert(), inserts)

    if searchIndexChunksToQueue:
        conn.execute(queueTable.insert(),
                     [dict(chunkKey=k) for k in searchIndexChunksToQueue])

    logger.info("Inserted %s SearchIndex keywords in %s",
                len(newSearchIndexes), (datetime.now(pytz.utc) - startTime))
def _compileBranchIndexChunk(
        conn, transaction, modelSetId: int,
        queueItems: List[BranchIndexCompilerQueue]) -> None:
    chunkKeys = list(set([i.chunkKey for i in queueItems]))

    compiledTable = BranchIndexEncodedChunk.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    logger.debug("Staring compile of %s queueItems in %s", len(queueItems),
                 (datetime.now(pytz.utc) - startTime))

    # Get Model Sets

    total = 0
    existingHashes = _loadExistingHashes(conn, chunkKeys)
    encKwPayloadByChunkKey = _buildIndex(chunkKeys)
    chunksToDelete = []

    inserts = []
    for chunkKey, diagramIndexChunkEncodedPayload in encKwPayloadByChunkKey.items(
    ):
        m = hashlib.sha256()
        m.update(diagramIndexChunkEncodedPayload)
        encodedHash = b64encode(m.digest()).decode()

        # Compare the hash, AND delete the chunk key
        if chunkKey in existingHashes:
            # At this point we could decide to do an update instead,
            # but inserts are quicker
            if encodedHash == existingHashes.pop(chunkKey):
                continue

        chunksToDelete.append(chunkKey)
        inserts.append(
            dict(modelSetId=modelSetId,
                 chunkKey=chunkKey,
                 encodedData=diagramIndexChunkEncodedPayload,
                 encodedHash=encodedHash,
                 lastUpdate=lastUpdate))

    # Add any chnuks that we need to delete that we don't have new data for, here
    chunksToDelete.extend(list(existingHashes))

    if chunksToDelete:
        # Delete the old chunks
        conn.execute(
            compiledTable.delete(compiledTable.c.chunkKey.in_(chunksToDelete)))

    if inserts:
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex,
                                                       len(inserts))
        for insert in inserts:
            insert["id"] = next(newIdGen)

    transaction.commit()
    transaction = conn.begin()

    if inserts:
        conn.execute(compiledTable.insert(), inserts)

    logger.debug("Compiled %s BranchIndexs, %s missing, in %s", len(inserts),
                 len(chunkKeys) - len(inserts),
                 (datetime.now(pytz.utc) - startTime))

    total += len(inserts)

    transaction.commit()
    logger.debug("Compiled and Committed %s EncodedBranchIndexChunks in %s",
                 total, (datetime.now(pytz.utc) - startTime))
Esempio n. 3
0
def loadItemKeys(conn, newItemKeys: List[ItemKeyImportTuple], modelSetId: int,
                 modelSetKey: str) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    itemKeyIndexTable = ItemKeyIndex.__table__
    queueTable = ItemKeyIndexCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    importHashSet = set()

    chunkKeysForQueue: Set[Tuple[int, str]] = set()

    # Get the IDs that we need
    newIdGen = CeleryDbConn.prefetchDeclarativeIds(ItemKeyIndex,
                                                   len(newItemKeys))

    # Create state arrays
    inserts = []

    # Work out which objects have been updated or need inserting
    for importItemKey in newItemKeys:
        importHashSet.add(importItemKey.importGroupHash)

        # Work out if we need to update the object type

        id_ = next(newIdGen)
        insertObject = ItemKeyIndex(
            id=id_,
            modelSetId=modelSetId,
            importGroupHash=importItemKey.importGroupHash,
            itemType=importItemKey.itemType,
            itemKey=importItemKey.itemKey,
            segmentKey=importItemKey.segmentKey,
            chunkKey=makeChunkKeyForItemKey(modelSetKey,
                                            importItemKey.itemKey))
        inserts.append(insertObject.tupleToSqlaBulkInsertDict())

        chunkKeysForQueue.add((modelSetId, insertObject.chunkKey))

    if importHashSet:
        conn.execute(
            itemKeyIndexTable.delete(
                itemKeyIndexTable.c.importGroupHash.in_(importHashSet)))

    # Insert the ItemKeyIndex Objects
    if inserts:
        conn.execute(itemKeyIndexTable.insert(), inserts)

    if chunkKeysForQueue:
        conn.execute(
            queueTable.insert(),
            [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

    logger.debug("Inserted %s ItemKeys queued %s chunks in %s", len(inserts),
                 len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime))
def _cloneDispsForDispGroupPointer(dispIds: List[int]):
    """ Clone Disps for DispGroupPointer

    This method will clone "instances" of the disps in the disp groups for the
    DispGroupPointer.


    """
    startTime = datetime.now(pytz.utc)

    ormSession = CeleryDbConn.getDbSession()
    try:

        # -----
        # Load the disp group pointers
        qry = ormSession.query(DispGroupPointer) \
            .filter(DispGroupPointer.targetDispGroupId != None) \
            .filter(DispGroupPointer.id.in_(dispIds))

        dispGroupPointers: List[DispGroupPointer] = qry.all()

        # If there are no DispGroupPointers that need cloning, then return.
        if not dispGroupPointers:
            logger.debug(
                "Cloning skipped,"
                " there are no disp group ptrs with targets, in %s",
                (datetime.now(pytz.utc) - startTime))
            return dispIds

        dispGroupPointerTargetIds = [
            o.targetDispGroupId for o in dispGroupPointers
        ]

        del qry

        # -----
        # Delete any existing disps are in these pointers
        ormSession.query(DispBase) \
            .filter(DispBase.groupId.in_([o.id for o in dispGroupPointers])) \
            .delete(synchronize_session=False)

        ormSession.commit()

        # -----
        # Query for the disp groups we'll need
        dispGroupChildsByGroupId = _queryDispsForGroup(
            ormSession, dispGroupPointerTargetIds)

        # -----
        # Query for the disp groups names
        dispBaseTable = DispBase.__table__
        dispGroupTable = DispGroup.__table__

        qry = ormSession.execute(
            select(columns=[
                dispBaseTable.c.id, dispBaseTable.c.coordSetId,
                dispGroupTable.c.name
            ],
                   whereclause=dispBaseTable.c.id.in_(
                       dispGroupPointerTargetIds)).select_from(
                           join(dispGroupTable, dispBaseTable,
                                dispGroupTable.c.id == dispBaseTable.c.id)))

        dispGroupNameByGroupId = {
            o.id: '%s|%s' % (o.coordSetId, o.name)
            for o in qry.fetchall()
        }

        del qry

        # -----
        # Clone the child disps
        cloneDisps = []
        cloneLiveDbDispLinks = []

        for dispPtr in dispGroupPointers:
            if not dispPtr.targetDispGroupId:
                logger.debug("Pointer has no targetGroupId id=%s", dispPtr.id)
                continue

            dispGroupChilds = dispGroupChildsByGroupId.get(
                dispPtr.targetDispGroupId)

            if not dispGroupChilds:
                logger.warning(
                    "Pointer points to missing DispGroup,"
                    " id=%s, targetGroupId=%s", dispPtr.id,
                    dispPtr.targetDispGroupId)
                continue

            x, y = json.loads(dispPtr.geomJson)
            dispPtr.targetDispGroupName = \
                dispGroupNameByGroupId[dispPtr.targetDispGroupId]

            for templateDisp in dispGroupChilds:
                # Create the clone
                cloneDisp = templateDisp.tupleClone()
                cloneDisps.append(cloneDisp)

                cloneDisp.coordSetId = dispPtr.coordSetId

                # Offset the geometry
                geom = json.loads(cloneDisp.geomJson)
                geom = _scaleDispGeom(geom, 1, 1, x, y)
                cloneDisp.geomJson = json.dumps(geom)

                # Assign the clone to the DispGroupPointer
                cloneDisp.groupId = dispPtr.id

                for dispLink in templateDisp.liveDbLinks:
                    cloneDispLink = dispLink.tupleClone()
                    cloneLiveDbDispLinks.append(cloneDispLink)

                    cloneDispLink.id = None
                    cloneDispLink.disp = cloneDisp
                    cloneDispLink.coordSetId = dispPtr.coordSetId

        # -----
        # Preallocate the IDs for performance on PostGreSQL
        dispIdGen = CeleryDbConn.prefetchDeclarativeIds(
            DispBase, len(cloneDisps))
        for cloneDisp in cloneDisps:
            cloneDisp.id = next(dispIdGen)

        # Preallocate the IDs for performance on PostGreSQL
        dispLinkIdGen = CeleryDbConn.prefetchDeclarativeIds(
            LiveDbDispLink, len(cloneLiveDbDispLinks))
        for cloneDispLink in cloneLiveDbDispLinks:
            cloneDispLink.id = next(dispLinkIdGen)
            cloneDispLink.dispId = cloneDispLink.disp.id
            cloneDispLink.disp = None

        # -----
        # Create the new list of IDs to compile
        # Do this here, otherwise it will cause a DB refresh if it's after the commit.
        dispIdsIncludingClones = dispIds + [o.id for o in cloneDisps]

        ormSession.bulk_save_objects(cloneDisps, update_changed_only=False)
        ormSession.bulk_save_objects(cloneLiveDbDispLinks,
                                     update_changed_only=False)

        ormSession.commit()

        logger.debug("Cloned %s disp group objects in %s", len(cloneDisps),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        ormSession.rollback()
        raise

    finally:
        ormSession.close()

    return dispIdsIncludingClones
Esempio n. 5
0
def _insertOrUpdateObjects(newDocuments: List[ImportDocumentTuple],
                           modelSetId: int,
                           docTypeIdsByName: Dict[str, int]) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    documentTable = DocDbDocument.__table__
    queueTable = DocDbCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        dontDeleteObjectIds = []
        objectIdByKey: Dict[str, int] = {}

        objectKeys = [o.key for o in newDocuments]
        chunkKeysForQueue: Set[Tuple[str, str]] = set()

        # Query existing objects
        results = list(
            conn.execute(
                select(columns=[
                    documentTable.c.id, documentTable.c.key,
                    documentTable.c.chunkKey, documentTable.c.documentJson
                ],
                       whereclause=and_(
                           documentTable.c.key.in_(objectKeys),
                           documentTable.c.modelSetId == modelSetId))))

        foundObjectByKey = {o.key: o for o in results}
        del results

        # Get the IDs that we need
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(
            DocDbDocument,
            len(newDocuments) - len(foundObjectByKey))

        # Create state arrays
        inserts = []
        updates = []
        processedKeys = set()

        # Work out which objects have been updated or need inserting
        for importDocument in newDocuments:
            if importDocument.key in processedKeys:
                raise Exception("Key %s exists in import data twice" %
                                importDocument.key)
            processedKeys.add(importDocument.key)

            existingObject = foundObjectByKey.get(importDocument.key)
            importDocumentTypeId = docTypeIdsByName[
                importDocument.documentTypeKey]

            packedJsonDict = {
                k: v
                for k, v in importDocument.document.items()
                if v is not None and v is not ''
            }  # 0 / false allowed
            packedJsonDict['_dtid'] = importDocumentTypeId
            packedJsonDict['_msid'] = modelSetId
            documentJson = json.dumps(packedJsonDict, sort_keys=True)

            # Work out if we need to update the object type
            if existingObject:
                updates.append(
                    dict(b_id=existingObject.id,
                         b_typeId=importDocumentTypeId,
                         b_documentJson=documentJson))
                dontDeleteObjectIds.append(existingObject.id)

            else:
                id_ = next(newIdGen)
                existingObject = DocDbDocument(
                    id=id_,
                    modelSetId=modelSetId,
                    documentTypeId=importDocumentTypeId,
                    key=importDocument.key,
                    importGroupHash=importDocument.importGroupHash,
                    chunkKey=makeChunkKey(importDocument.modelSetKey,
                                          importDocument.key),
                    documentJson=documentJson)
                inserts.append(existingObject.tupleToSqlaBulkInsertDict())

            objectIdByKey[existingObject.key] = existingObject.id
            chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

        # Insert the DocDb Objects
        if inserts:
            conn.execute(documentTable.insert(), inserts)

        if updates:
            stmt = (documentTable.update().where(
                documentTable.c.id == bindparam('b_id')).values(
                    documentTypeId=bindparam('b_typeId'),
                    documentJson=bindparam('b_documentJson')))
            conn.execute(stmt, updates)

        if chunkKeysForQueue:
            conn.execute(
                queueTable.insert(),
                [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

        if inserts or updates or chunkKeysForQueue:
            transaction.commit()
        else:
            transaction.rollback()

        logger.debug("Inserted %s updated %s queued %s chunks in %s",
                     len(inserts), len(updates), len(chunkKeysForQueue),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
Esempio n. 6
0
def compileSearchIndexChunk(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Search Index Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    chunkKeys = list(set([i.chunkKey for i in queueItems]))

    queueTable = SearchIndexCompilerQueue.__table__
    compiledTable = EncodedSearchIndexChunk.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s", len(queueItems),
                     (datetime.now(pytz.utc) - startTime))

        # Get Model Sets

        total = 0
        existingHashes = _loadExistingHashes(conn, chunkKeys)
        encKwPayloadByChunkKey = _buildIndex(conn, chunkKeys)
        chunksToDelete = []

        inserts = []
        for chunkKey, searchIndexChunkEncodedPayload in encKwPayloadByChunkKey.items(
        ):
            m = hashlib.sha256()
            m.update(searchIndexChunkEncodedPayload)
            encodedHash = b64encode(m.digest()).decode()

            # Compare the hash, AND delete the chunk key
            if chunkKey in existingHashes:
                # At this point we could decide to do an update instead,
                # but inserts are quicker
                if encodedHash == existingHashes.pop(chunkKey):
                    continue

            chunksToDelete.append(chunkKey)
            inserts.append(
                dict(chunkKey=chunkKey,
                     encodedData=searchIndexChunkEncodedPayload,
                     encodedHash=encodedHash,
                     lastUpdate=lastUpdate))

        # Add any chnuks that we need to delete that we don't have new data for, here
        chunksToDelete.extend(list(existingHashes))

        if chunksToDelete:
            # Delete the old chunks
            conn.execute(
                compiledTable.delete(
                    compiledTable.c.chunkKey.in_(chunksToDelete)))

        if inserts:
            newIdGen = CeleryDbConn.prefetchDeclarativeIds(
                SearchIndex, len(inserts))
            for insert in inserts:
                insert["id"] = next(newIdGen)

        transaction.commit()
        transaction = conn.begin()

        if inserts:
            conn.execute(compiledTable.insert(), inserts)

        logger.debug("Compiled %s SearchIndexes, %s missing, in %s",
                     len(inserts),
                     len(chunkKeys) - len(inserts),
                     (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))

        transaction.commit()
        logger.info("Compiled and Committed %s EncodedSearchIndexChunks in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return chunkKeys

    except Exception as e:
        transaction.rollback()
        # logger.warning(e)  # Just a warning, it will retry
        logger.exception(e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()
def _importDisps(coordSet: ModelCoordSet, importDisps: List):
    """ Link Disps

    1) Use the AgentImportDispGridLookup to convert lookups from importHash
        to id
    2) set the  coordSetId

    This is not done in a thread because the lookups cause issues

    """

    dispIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(importDisps))

    dispIdsToCompile = []
    importDispLinks = []
    ormDisps = []

    ormSession = CeleryDbConn.getDbSession()
    try:

        lookupConverter = LookupHashConverter(ormSession,
                                              modelSetId=coordSet.modelSetId,
                                              coordSetId=coordSet.id)

        dispGroupPtrWithTargetHash: List[Tuple[DispGroupPointer, str]] = []
        dispGroupChildWithTargetHash: List[Tuple[DispBase, str]] = []

        # Preload any groups our pointers may point to.

        # Pre-import any DispGroup IDs we may need
        dispGroupTargetImportHashes = [
            o.targetDispGroupHash for o in importDisps
            if o.tupleType() == ImportDispGroupPtrTuple.tupleType()
        ]

        # This will store DispGroup and DispGroupPointer hashes
        groupIdByImportHash: Dict[str, int] = {
            o.importHash: o.id
            for o in ormSession.query(DispBase.importHash, DispBase.id).filter(
                DispBase.importHash.in_(dispGroupTargetImportHashes)).filter(
                    DispBase.coordSetId == coordSet.id)
        }

        del dispGroupTargetImportHashes

        # This is a list of DispGroup.id.
        # We use this to filter out disps that part of a DispGroup,
        # they don't get compiled
        dispGroupIds = set()

        # Sort the DispGroups first, so they are created before any FK references them
        sortedImportDisps = sorted(
            importDisps, key=lambda o: IMPORT_SORT_ORDER[o.tupleType()])

        for importDisp in sortedImportDisps:
            # Convert the geometry into the internal array format
            _convertGeom(importDisp)

            # Create the storage tuple instance, and copy over the data.
            ormDisp = _convertImportTuple(importDisp)
            ormDisps.append(ormDisp)

            # Preallocate the IDs for performance on PostGreSQL
            ormDisp.id = next(dispIdGen)

            # Assign the coord set id.
            ormDisp.coordSetId = coordSet.id

            # If this is a dispGroup, index it's ID
            if isinstance(ormDisp, DispGroup):
                dispGroupIds.add(ormDisp.id)
                groupIdByImportHash[ormDisp.importHash] = ormDisp.id

            # If this is a dispGroupPtr, index its targetHash so we can update it
            if isinstance(ormDisp, DispGroupPointer):
                groupIdByImportHash[ormDisp.importHash] = ormDisp.id

                if ormDisp.targetDispGroupName:
                    ormDisp.targetDispGroupName = '%s|%s' % (
                        coordSet.id, ormDisp.targetDispGroupName)

                # Not all DispGroupPointers have targets,
                # they can be orphaned instances
                if importDisp.targetDispGroupHash:
                    dispGroupPtrWithTargetHash.append(
                        (ormDisp, importDisp.targetDispGroupHash))

            # If this is a dispGroupPtr, index its targetHash so we can update it
            parentDispGroupHash = getattr(importDisp, "parentDispGroupHash",
                                          None)
            if parentDispGroupHash:
                dispGroupChildWithTargetHash.append(
                    (ormDisp, parentDispGroupHash))

            # Add some interim data to the import display link, so it can be created
            if hasattr(importDisp, "liveDbDispLinks"):
                for importDispLink in importDisp.liveDbDispLinks:
                    attrName = importDispLink.dispAttrName
                    importDispLink.internalRawValue = getattr(
                        ormDisp, attrName)
                    importDispLink.internalDispId = ormDisp.id
                    importDispLinks.append(importDispLink)

            # Convert the values of the liveDb attributes
            lookupConverter.convertLookups(ormDisp)

            # Add the after translate value, this is the Display Value
            if hasattr(importDisp, "liveDbDispLinks"):
                for importDispLink in importDisp.liveDbDispLinks:
                    attrName = importDispLink.dispAttrName
                    importDispLink.internalDisplayValue = getattr(
                        ormDisp, attrName)

            # Queue the Disp to be compiled into a grid.
            # Disps belonging to a DispGroup do not get compiled into grids.
            if ormDisp.groupId not in dispGroupIds:
                dispIdsToCompile.append(ormDisp.id)

        # Link the DispGroups
        # Create the links between the Disp and DispGroup
        for ormDisp, groupHash in dispGroupChildWithTargetHash:
            groupOrmObjId = groupIdByImportHash.get(groupHash)
            if groupOrmObjId is None:
                raise Exception("DispGroup with importHash %s doesn't exist" %
                                groupHash)

            ormDisp.groupId = groupOrmObjId

        # Link the DispGroupPtr to the DispGroup
        # This is only used when the dispGrouPtr points to a disp group
        for ormDisp, groupHash in dispGroupPtrWithTargetHash:
            groupOrmObjId = groupIdByImportHash.get(groupHash)
            if groupOrmObjId is None:
                raise Exception("DispGroup with importHash %s doesn't exist" %
                                groupHash)

            ormDisp.targetDispGroupId = groupOrmObjId

    finally:
        ormSession.close()

    return dispIdsToCompile, importDispLinks, ormDisps
def _insertOrUpdateObjects(newSegments: List[GraphDbImportSegmentTuple],
                           modelSetId: int, modelSetKey: str) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    segmentTable = GraphDbSegment.__table__
    queueTable = GraphDbCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)
    importHashSet = set()

    chunkKeysForQueue: Set[Tuple[int, str]] = set()

    # Get the IDs that we need
    newIdGen = CeleryDbConn.prefetchDeclarativeIds(GraphDbSegment,
                                                   len(newSegments))

    # Create state arrays
    inserts = []

    newItemKeys = []

    # Work out which objects have been updated or need inserting
    for importSegment in newSegments:
        importHashSet.add(importSegment.importGroupHash)
        segmentJson = importSegment.packJson()

        id_ = next(newIdGen)
        existingObject = GraphDbSegment(
            id=id_,
            modelSetId=modelSetId,
            key=importSegment.key,
            importGroupHash=importSegment.importGroupHash,
            chunkKey=makeChunkKeyForSegmentKey(importSegment.modelSetKey,
                                               importSegment.key),
            segmentJson=segmentJson)
        inserts.append(existingObject.tupleToSqlaBulkInsertDict())

        chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

        for edge in importSegment.edges:
            newItemKeys.append(
                ItemKeyImportTuple(
                    importGroupHash=importSegment.importGroupHash,
                    itemKey=edge.key,
                    itemType=ItemKeyTuple.ITEM_TYPE_EDGE,
                    segmentKey=importSegment.key))

        for vertex in importSegment.vertexes:
            newItemKeys.append(
                ItemKeyImportTuple(
                    importGroupHash=importSegment.importGroupHash,
                    itemKey=vertex.key,
                    itemType=ItemKeyTuple.ITEM_TYPE_VERTEX,
                    segmentKey=importSegment.key))

    # TODO: If this fails, we could potentially delete by segment key.
    # But that seems a bit hackish, the agents should delete the old first.
    # Or should they. That might leave a temporary gap in the network.

    # Delete old stuff
    if importHashSet:
        deleteSegment(modelSetKey=modelSetKey,
                      importGroupHashes=list(importHashSet))

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        # Insert the GraphDb Objects
        if inserts:
            conn.execute(segmentTable.insert(), inserts)

        if chunkKeysForQueue:
            conn.execute(
                queueTable.insert(),
                [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

        loadItemKeys(conn, newItemKeys, modelSetId, modelSetKey)

        if inserts or chunkKeysForQueue or newItemKeys:
            transaction.commit()
        else:
            transaction.rollback()

        logger.info("Inserted %s queued %s chunks in %s", len(inserts),
                    len(chunkKeysForQueue),
                    (datetime.now(pytz.utc) - startTime))

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def _insertOrUpdateBranches(conn,
                            modelSetKey: str,
                            modelSetId: int,
                            newBranches: List[BranchTuple]) -> None:
    """ Insert or Update Branches

    1) Delete existing branches
    2) Insert new branches
    3) Queue chunks for recompile

    """

    startTime = datetime.now(pytz.utc)

    branchIndexTable = BranchIndex.__table__
    queueTable = BranchIndexCompilerQueue.__table__

    importHashSet = set()

    chunkKeysForQueue: Set[Tuple[int, str]] = set()

    # Get the IDs that we need
    newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex, len(newBranches))

    # Create state arrays
    inserts = []

    # Work out which objects have been updated or need inserting
    for newBranch in newBranches:
        importHashSet.add(newBranch.importGroupHash)

        # noinspection PyTypeChecker
        newBranch.id = next(newIdGen)
        branchJson = newBranch.packJson()

        existingObject = BranchIndex(
            id=newBranch.id,
            coordSetId=newBranch.coordSetId,
            key=newBranch.key,
            updatedDate=newBranch.updatedDate,
            createdDate=newBranch.createdDate,
            importHash=newBranch.importHash,
            importGroupHash=newBranch.importGroupHash,
            chunkKey=makeChunkKeyForBranchIndex(modelSetKey, newBranch.key),
            packedJson=branchJson
        )
        inserts.append(existingObject.tupleToSqlaBulkInsertDict())

        chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

    # 1) Delete existing branches
    if importHashSet:
        # Make note of the IDs being deleted
        # FIXME : Unused
        branchIndexIdsBeingDeleted = [
            item.id for item in
            conn.execute(select(
                distinct=True,
                columns=[branchIndexTable.c.id],
                whereclause=branchIndexTable.c.importGroupHash.in_(importHashSet)
            ))
        ]

        conn.execute(
            branchIndexTable.delete(branchIndexTable.c.importGroupHash.in_(importHashSet))
        )

    # 2) Insert new branches
    if inserts:
        conn.execute(branchIndexTable.insert(), inserts)

    # 3) Queue chunks for recompile
    if chunkKeysForQueue:
        conn.execute(
            queueTable.insert(),
            [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]
        )

    logger.debug("Inserted %s queued %s chunks in %s",
                 len(inserts), len(chunkKeysForQueue),
                 (datetime.now(pytz.utc) - startTime))
def _convertBranchDisps(
        newBranches: List[BranchTuple]) -> typing.Tuple[List, List]:
    """ Insert Disps for Branch

    1) Insert new Disps
    2) Queue disps for recompile

    """
    startTime = datetime.now(pytz.utc)
    # Create state arrays
    newDisps = []
    dispIdsToCompile = []

    # Convert the branch disps into database disps
    for newBranch in newBranches:

        branchDisps = _convertJsonDispsToTuples(newBranch)

        if not branchDisps:
            continue

        # Create the map from the UI temp ID to the DB ID
        oldDispIdMap = {}

        # Set the IDs of the new Disps
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase,
                                                       len(branchDisps))
        for disp in branchDisps:
            oldDispId = disp.id
            disp.id = next(newIdGen)
            oldDispIdMap[oldDispId] = disp.id
            dispIdsToCompile.append(disp.id)

            newDisps.append(disp)

        # Update the group IDs
        for disp in branchDisps:
            if disp.groupId in oldDispIdMap:
                disp.groupId = oldDispIdMap[disp.groupId]

        # Recreate the branch disp json as per the structure from the DispBase tables
        # Just to be clear, this is converting it one way and then converting it back.
        # It ensures the data is consistent. (Which it should be if all was right)
        # It also sets the "hashId"

        # Create the map from the UI temp ID to the DB ID
        oldDispHashIdMap = {}
        newBranchDispItems = []

        newBranch.disps = []
        for disp in branchDisps:
            oldDispHashId = disp.hashId
            # This assigns the hashId to the jsonDict and disp
            newJsonDict = _packDispJson(disp, disp.tupleToSmallJsonDict())
            newBranch.disps.append(newJsonDict)

            oldDispHashIdMap[oldDispHashId] = disp.hashId
            newBranchDispItems.append((disp, newJsonDict))

        for disp, jsonDict in newBranchDispItems:
            if disp.replacesHashId in oldDispHashIdMap:
                disp.replacesHashId = oldDispHashIdMap.get(disp.replacesHashId)
                jsonDict['rid'] = disp.replacesHashId

            disp.dispJson = json.dumps(jsonDict)

            # AFTER the json has been dumped to the disp, convert it for storage
            # in the branch as geom JSON is not stored as a string in the branch
            # Because it's stored in the Disp Tuple/Table "geom" field as a string
            if 'g' in jsonDict:
                jsonDict['g'] = json.loads(jsonDict['g'])

        del newBranchDispItems

    logger.debug("Converted %s disps for %s branches in %s", len(newDisps),
                 len(newBranches), (datetime.now(pytz.utc) - startTime))

    return newDisps, dispIdsToCompile