def createOrUpdateBranches(self, importBranchesEncodedPayload: bytes) -> None:
    """ Convert Import Branch Tuples

    This method takes import branch tuples, and converts them to
    branch format used throughout the diagram plugin.

    (Thats the packed JSON wrapped by an accessor class)

    """
    # Decode importBranches payload
    importBranches: List[ImportBranchTuple] = (
        Payload().fromEncodedPayload(importBranchesEncodedPayload).tuples)

    # Validate the input importBranches
    _validateNewBranchIndexs(importBranches)

    # Do the import
    groupedBranches = _convertImportBranchTuples(importBranches)

    startTime = datetime.now(pytz.utc)

    dbSession = CeleryDbConn.getDbSession()

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        for (modelSetKey, modelSetId,
             coordSetId), branches in groupedBranches.items():
            _insertOrUpdateBranches(conn, modelSetKey, modelSetId, branches)

            newDisps, dispIdsToCompile = _convertBranchDisps(branches)

            # NO TRANSACTION
            # Bulk load the Disps
            _bulkInsertDisps(engine, newDisps)

            # Queue the compiler
            DispCompilerQueueController.queueDispIdsToCompileWithSession(
                dispIdsToCompile, conn)

            transaction.commit()
            dbSession.commit()

            logger.debug(
                "Completed importing %s branches for coordSetId %s in %s",
                len(branches), coordSetId,
                (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        dbSession.rollback()
        transaction.rollback()
        logger.debug("Retrying createOrUpdateBranches, %s", e)
        logger.exception(e)
        raise self.retry(exc=e, countdown=3)

    finally:
        dbSession.close()
        conn.close()
def deleteTraceConfig(self, modelSetKey: str, traceConfigKeys: List[str]) -> None:
    startTime = datetime.now(pytz.utc)

    traceConfigTable = GraphDbTraceConfig.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        modelSetIdByKey = _loadModelSets()
        modelSetId = modelSetIdByKey[modelSetKey]

        conn.execute(
            traceConfigTable.delete(and_(traceConfigTable.c.key.in_(traceConfigKeys),
                                         traceConfigTable.c.modelSetId == modelSetId))
        )

        transaction.commit()

        logger.info("Deleted %s trace configs in %s",
                     len(traceConfigKeys),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        transaction.rollback()
        logger.debug("Retrying import graphDb objects, %s", e)
        raise self.retry(exc=e, countdown=3)


    finally:
        conn.close()
예제 #3
0
def _loadModelSets() -> Dict[str, int]:
    # Get the model set
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    try:
        results = list(
            conn.execute(
                select(columns=[_modelSetTable.c.id, _modelSetTable.c.key])))
        modelSetIdByKey = {o.key: o.id for o in results}
        del results

    finally:
        conn.close()
    return modelSetIdByKey
예제 #4
0
def _loadCoordSets(modelSetId: int) -> Dict[str, int]:
    # Get the model set
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    try:
        results = list(
            conn.execute(
                select(columns=[_coordSetTable.c.id, _coordSetTable.c.key],
                       whereclause=_coordSetTable.c.modelSetId == modelSetId)))
        coordSetIdByKey = {o.key: o.id for o in results}
        del results

    finally:
        conn.close()

    return coordSetIdByKey
예제 #5
0
def _bulkLoadDispsTask(importGroupHash: str, disps: List):
    """ Import Disps Links

    1) Drop all disps with matching importGroupHash

    2) set the  coordSetId

    :param importGroupHash:
    :param disps: An array of disp objects to import
    :return:
    """

    dispTable = DispBase.__table__
    gridKeyIndexTable = GridKeyIndex.__table__
    gridQueueTable = GridKeyCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:

        stmt = select([gridKeyIndexTable.c.coordSetId,
                       gridKeyIndexTable.c.gridKey]) \
            .where(dispTable.c.importGroupHash == importGroupHash) \
            .select_from(join(gridKeyIndexTable, dispTable,
                              gridKeyIndexTable.c.dispId == dispTable.c.id)) \
            .distinct()

        ins = gridQueueTable.insert().from_select(['coordSetId', 'gridKey'],
                                                  stmt)
        conn.execute(ins)

        conn.execute(dispTable.delete().where(
            dispTable.c.importGroupHash == importGroupHash))

        transaction.commit()

        _bulkInsertDisps(engine, disps)

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def deleteSegment(self, modelSetKey: str,
                  importGroupHashes: List[str]) -> None:
    startTime = datetime.now(pytz.utc)

    segmentTable = GraphDbSegment.__table__
    queueTable = GraphDbCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        modelSetIdByKey = _loadModelSets()
        modelSetId = modelSetIdByKey[modelSetKey]

        chunkKeys = conn.execute(
            select([segmentTable.c.modelSetId, segmentTable.c.chunkKey],
                   and_(segmentTable.c.importGroupHash.in_(importGroupHashes),
                        segmentTable.c.modelSetId == modelSetId))).fetchall()

        if chunkKeys:
            conn.execute(
                segmentTable.delete(
                    and_(segmentTable.c.importGroupHash.in_(importGroupHashes),
                         segmentTable.c.modelSetId == modelSetId)))

            conn.execute(queueTable.insert(), chunkKeys)

        deleteItemKeys(conn, modelSetId, importGroupHashes)

        transaction.commit()

        logger.info("Deleted %s, queued %s chunks in %s",
                    len(importGroupHashes), len(chunkKeys),
                    (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        transaction.rollback()
        logger.debug("Retrying graphDb deleteSegment, %s", e)
        raise self.retry(exc=e, countdown=3)

    finally:
        conn.close()
def compileBranchIndexChunk(self, payloadEncodedArgs: bytes) -> List[int]:
    """ Compile BranchIndex Index Task

    :param self: A bound parameter from celery
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        queueItemsByModelSetId = defaultdict(list)

        for queueItem in queueItems:
            queueItemsByModelSetId[queueItem.modelSetId].append(queueItem)

        for modelSetId, modelSetQueueItems in queueItemsByModelSetId.items():
            _compileBranchIndexChunk(conn, transaction, modelSetId,
                                     modelSetQueueItems)

        queueTable = BranchIndexCompilerQueue.__table__

        transaction = conn.begin()
        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))
        transaction.commit()

    except Exception as e:
        transaction.rollback()
        logger.debug("RETRYING task - %s", e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()

    return list(set([i.chunkKey for i in queueItems]))
def _insertToDb(dispIds, gridCompiledQueueItems, gridKeyIndexesByDispId,
                locationCompiledQueueItems, locationIndexByDispId, queueIds):
    """ Insert to DB

    This method provides the DB inserts and deletes after the data has been calculated.

    """
    startTime = datetime.now(pytz.utc)

    dispBaseTable = DispBase.__table__
    dispQueueTable = DispIndexerQueue.__table__

    gridKeyIndexTable = GridKeyIndex.__table__
    gridQueueTable = GridKeyCompilerQueue.__table__

    locationIndexTable = LocationIndex.__table__
    locationIndexCompilerQueueTable = LocationIndexCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        lockedDispIds = conn.execute(
            Select(whereclause=dispBaseTable.c.id.in_(dispIds),
                   columns=[dispBaseTable.c.id],
                   for_update=True))

        lockedDispIds = [o[0] for o in lockedDispIds]

        # Ensure that the Disps exist, otherwise we get an integrity error.
        gridKeyIndexes = []
        locationIndexes = []
        for dispId in lockedDispIds:
            gridKeyIndexes.extend(gridKeyIndexesByDispId[dispId])

            if dispId in locationIndexByDispId:
                locationIndexes.append(locationIndexByDispId[dispId])

        # Delete existing items in the location and grid index

        # grid index
        conn.execute(
            gridKeyIndexTable.delete(gridKeyIndexTable.c.dispId.in_(dispIds)))

        # location index
        conn.execute(
            locationIndexTable.delete(
                locationIndexTable.c.dispId.in_(dispIds)))

        # ---------------
        # Insert the Grid Key indexes
        if gridKeyIndexes:
            conn.execute(gridKeyIndexTable.insert(), gridKeyIndexes)

        # Directly insert into the Grid compiler queue.
        if gridCompiledQueueItems:
            conn.execute(gridQueueTable.insert(), [
                dict(coordSetId=i.coordSetId, gridKey=i.gridKey)
                for i in gridCompiledQueueItems
            ])

        # ---------------
        # Insert the Location indexes
        if locationIndexes:
            conn.execute(locationIndexTable.insert(), locationIndexes)

        # Directly insert into the Location compiler queue.
        if locationCompiledQueueItems:
            conn.execute(locationIndexCompilerQueueTable.insert(), [
                dict(modelSetId=i.modelSetId, indexBucket=i.indexBucket)
                for i in locationCompiledQueueItems
            ])

        # ---------------
        # Finally, delete the disp queue items

        conn.execute(dispQueueTable.delete(dispQueueTable.c.id.in_(queueIds)))

        transaction.commit()
        logger.debug("Committed %s GridKeyIndex in %s", len(gridKeyIndexes),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        raise

    finally:
        conn.close()
예제 #9
0
def _insertOrUpdateObjects(newDocuments: List[ImportDocumentTuple],
                           modelSetId: int,
                           docTypeIdsByName: Dict[str, int]) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    documentTable = DocDbDocument.__table__
    queueTable = DocDbCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        dontDeleteObjectIds = []
        objectIdByKey: Dict[str, int] = {}

        objectKeys = [o.key for o in newDocuments]
        chunkKeysForQueue: Set[Tuple[str, str]] = set()

        # Query existing objects
        results = list(
            conn.execute(
                select(columns=[
                    documentTable.c.id, documentTable.c.key,
                    documentTable.c.chunkKey, documentTable.c.documentJson
                ],
                       whereclause=and_(
                           documentTable.c.key.in_(objectKeys),
                           documentTable.c.modelSetId == modelSetId))))

        foundObjectByKey = {o.key: o for o in results}
        del results

        # Get the IDs that we need
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(
            DocDbDocument,
            len(newDocuments) - len(foundObjectByKey))

        # Create state arrays
        inserts = []
        updates = []
        processedKeys = set()

        # Work out which objects have been updated or need inserting
        for importDocument in newDocuments:
            if importDocument.key in processedKeys:
                raise Exception("Key %s exists in import data twice" %
                                importDocument.key)
            processedKeys.add(importDocument.key)

            existingObject = foundObjectByKey.get(importDocument.key)
            importDocumentTypeId = docTypeIdsByName[
                importDocument.documentTypeKey]

            packedJsonDict = {
                k: v
                for k, v in importDocument.document.items()
                if v is not None and v is not ''
            }  # 0 / false allowed
            packedJsonDict['_dtid'] = importDocumentTypeId
            packedJsonDict['_msid'] = modelSetId
            documentJson = json.dumps(packedJsonDict, sort_keys=True)

            # Work out if we need to update the object type
            if existingObject:
                updates.append(
                    dict(b_id=existingObject.id,
                         b_typeId=importDocumentTypeId,
                         b_documentJson=documentJson))
                dontDeleteObjectIds.append(existingObject.id)

            else:
                id_ = next(newIdGen)
                existingObject = DocDbDocument(
                    id=id_,
                    modelSetId=modelSetId,
                    documentTypeId=importDocumentTypeId,
                    key=importDocument.key,
                    importGroupHash=importDocument.importGroupHash,
                    chunkKey=makeChunkKey(importDocument.modelSetKey,
                                          importDocument.key),
                    documentJson=documentJson)
                inserts.append(existingObject.tupleToSqlaBulkInsertDict())

            objectIdByKey[existingObject.key] = existingObject.id
            chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

        # Insert the DocDb Objects
        if inserts:
            conn.execute(documentTable.insert(), inserts)

        if updates:
            stmt = (documentTable.update().where(
                documentTable.c.id == bindparam('b_id')).values(
                    documentTypeId=bindparam('b_typeId'),
                    documentJson=bindparam('b_documentJson')))
            conn.execute(stmt, updates)

        if chunkKeysForQueue:
            conn.execute(
                queueTable.insert(),
                [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

        if inserts or updates or chunkKeysForQueue:
            transaction.commit()
        else:
            transaction.rollback()

        logger.debug("Inserted %s updated %s queued %s chunks in %s",
                     len(inserts), len(updates), len(chunkKeysForQueue),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def importLiveDbItems(self, modelSetKey: str,
                      newItems: List[ImportLiveDbItemTuple]) -> List[str]:
    """ Compile Grids Task

    :param self: A celery reference to this task
    :param modelSetKey: The model set name
    :param newItems: The list of new items
    :returns: A list of grid keys that have been updated.
    """

    startTime = datetime.now(pytz.utc)

    session = CeleryDbConn.getDbSession()
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    liveDbTable = LiveDbItem.__table__
    try:

        liveDbModelSet = getOrCreateLiveDbModelSet(session, modelSetKey)

        # This will remove duplicates
        itemsByKey = {i.key: i for i in newItems}

        allKeys = list(itemsByKey)
        existingKeys = set()

        # Query for existing keys, in 1000 chinks
        chunkSize = 1000
        offset = 0
        while True:
            chunk = allKeys[offset:offset + chunkSize]
            if not chunk:
                break
            offset += chunkSize
            stmt = (select([liveDbTable.c.key])
                    .where(liveDbTable.c.modelSetId == liveDbModelSet.id)
            .where(makeCoreValuesSubqueryCondition(
                engine, liveDbTable.c.key, chunk
            ))
            )

            result = conn.execute(stmt)

            existingKeys.update([o[0] for o in result.fetchall()])

        inserts = []
        newKeys = []

        for newItem in itemsByKey.values():
            if newItem.key in existingKeys:
                continue

            inserts.append(dict(
                modelSetId=liveDbModelSet.id,
                key=newItem.key,
                dataType=newItem.dataType,
                rawValue=newItem.rawValue,
                displayValue=newItem.displayValue,
                importHash=newItem.importHash
            ))

            newKeys.append(newItem.key)

        if not inserts:
            return []

        conn.execute(LiveDbItem.__table__.insert(), inserts)

        transaction.commit()
        logger.info("Inserted %s LiveDbItems, %s already existed, in %s",
                    len(inserts), len(existingKeys), (datetime.now(pytz.utc) - startTime))

        return newKeys

    except Exception as e:
        transaction.rollback()
        logger.debug("Task failed, but it will retry. %s", e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()
        session.close()
예제 #11
0
def compileSearchIndexChunk(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Search Index Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    chunkKeys = list(set([i.chunkKey for i in queueItems]))

    queueTable = SearchIndexCompilerQueue.__table__
    compiledTable = EncodedSearchIndexChunk.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s", len(queueItems),
                     (datetime.now(pytz.utc) - startTime))

        # Get Model Sets

        total = 0
        existingHashes = _loadExistingHashes(conn, chunkKeys)
        encKwPayloadByChunkKey = _buildIndex(conn, chunkKeys)
        chunksToDelete = []

        inserts = []
        for chunkKey, searchIndexChunkEncodedPayload in encKwPayloadByChunkKey.items(
        ):
            m = hashlib.sha256()
            m.update(searchIndexChunkEncodedPayload)
            encodedHash = b64encode(m.digest()).decode()

            # Compare the hash, AND delete the chunk key
            if chunkKey in existingHashes:
                # At this point we could decide to do an update instead,
                # but inserts are quicker
                if encodedHash == existingHashes.pop(chunkKey):
                    continue

            chunksToDelete.append(chunkKey)
            inserts.append(
                dict(chunkKey=chunkKey,
                     encodedData=searchIndexChunkEncodedPayload,
                     encodedHash=encodedHash,
                     lastUpdate=lastUpdate))

        # Add any chnuks that we need to delete that we don't have new data for, here
        chunksToDelete.extend(list(existingHashes))

        if chunksToDelete:
            # Delete the old chunks
            conn.execute(
                compiledTable.delete(
                    compiledTable.c.chunkKey.in_(chunksToDelete)))

        if inserts:
            newIdGen = CeleryDbConn.prefetchDeclarativeIds(
                SearchIndex, len(inserts))
            for insert in inserts:
                insert["id"] = next(newIdGen)

        transaction.commit()
        transaction = conn.begin()

        if inserts:
            conn.execute(compiledTable.insert(), inserts)

        logger.debug("Compiled %s SearchIndexes, %s missing, in %s",
                     len(inserts),
                     len(chunkKeys) - len(inserts),
                     (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))

        transaction.commit()
        logger.info("Compiled and Committed %s EncodedSearchIndexChunks in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return chunkKeys

    except Exception as e:
        transaction.rollback()
        # logger.warning(e)  # Just a warning, it will retry
        logger.exception(e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()
def updateBranches(self, modelSetId: int, branchEncodedPayload: bytes) -> None:
    """ Update Branch

    This method is called from the UI to update a single branch.
    It could be called from a server API as well.

    All the branches must be for the same model set.

    """
    # Decode BranchTuples payload
    updatedBranches: List[BranchTuple] = (
        Payload().fromEncodedPayload(branchEncodedPayload).tuples
    )

    startTime = datetime.now(pytz.utc)

    queueTable = BranchIndexCompilerQueue.__table__
    dispBaseTable = DispBase.__table__
    gridKeyIndexTable = GridKeyIndex.__table__

    gridKeyCompilerQueueTable = GridKeyCompilerQueue.__table__

    branchesByCoordSetId: Dict[int, List[BranchTuple]] = defaultdict(list)
    chunkKeys: Set[str] = set()

    newBranchesToInsert = []

    # Create a lookup of CoordSets by ID
    dbSession = CeleryDbConn.getDbSession()
    try:
        # Get the latest lookups
        modelSet = dbSession.query(ModelSet).filter(ModelSet.id == modelSetId).one()
        coordSetById = {i.id: i for i in dbSession.query(ModelCoordSet).all()}
        dbSession.expunge_all()

        # Update the branches
        # This will be a performance problem if lots of branches are updated,
        # however, on first writing this will just be used by the UI for updating
        # individual branches.
        for branch in updatedBranches:
            try:
                if str(branch.id).startswith("NEW_"):
                    branch.id = None

                if branch.id is None:
                    branchIndex = dbSession.query(BranchIndex) \
                        .filter(BranchIndex.coordSetId == branch.coordSetId) \
                        .filter(BranchIndex.key == branch.key) \
                        .one()
                else:
                    branchIndex = dbSession.query(BranchIndex) \
                        .filter(BranchIndex.id == branch.id) \
                        .one()
                branch.id = branchIndex.id
                branchIndex.packedJson = branch.packJson()
                branchIndex.updatedDate = branch.updatedDate

            except NoResultFound:
                newBranchesToInsert.append(branch)

            branchesByCoordSetId[branch.coordSetId].append(branch)

            chunkKeys.add(makeChunkKeyForBranchIndex(modelSet.key, branch.key))

        dbSession.commit()

    except Exception as e:
        dbSession.rollback()
        logger.debug("Retrying updateBranch, %s", e)
        logger.exception(e)
        raise self.retry(exc=e, countdown=3)

    finally:
        dbSession.close()

    dbSession = CeleryDbConn.getDbSession()

    try:
        if newBranchesToInsert:
            _insertOrUpdateBranches(dbSession, modelSet.key, modelSet.id,
                                    newBranchesToInsert)
            dbSession.commit()

        # Make an array of all branch IDs
        allBranchIds = []
        for branches in branchesByCoordSetId.values():
            allBranchIds.extend([b.id for b in branches])

        # Find out all the existing grids effected by this branch.
        gridsToRecompile = dbSession.execute(
            select(distinct=True,
                   columns=[gridKeyIndexTable.c.gridKey, gridKeyIndexTable.c.coordSetId],
                   whereclause=dispBaseTable.c.branchId.in_(allBranchIds))
                .select_from(gridKeyIndexTable.join(dispBaseTable))
        ).fetchall()

        allNewDisps = []
        allDispIdsToCompile = []

        packedJsonUpdates = []
        # Recompile the BranchGridIndexes
        for coordSetId, branches in branchesByCoordSetId.items():
            coordSet = coordSetById[coordSetId]
            assert coordSet.modelSetId == modelSetId, "Branches not all from one model"

            newDisps, dispIdsToCompile = _convertBranchDisps(branches)
            allNewDisps.extend(newDisps)
            allDispIdsToCompile.extend(dispIdsToCompile)

            packedJsonUpdates.extend([
                dict(b_id=b.id, b_packedJson=b.packJson()) for b in branches
            ])

        dbSession.execute(
            dispBaseTable.delete(dispBaseTable.c.branchId.in_(allBranchIds))
        )

        dbSession.commit()

        # NO TRANSACTION
        # Bulk load the Disps
        _bulkInsertDisps(CeleryDbConn.getDbEngine(), allNewDisps)

        # Queue the compiler
        DispCompilerQueueController.queueDispIdsToCompileWithSession(
            allDispIdsToCompile, dbSession
        )

        # Update the JSON again back into the grid index.
        stmt = BranchIndex.__table__.update(). \
            where(BranchIndex.__table__.c.id == bindparam('b_id')) \
            .values(packedJson=bindparam('b_packedJson'))
        dbSession.execute(stmt, packedJsonUpdates)

        # 3) Queue chunks for recompile
        dbSession.execute(
            queueTable.insert(),
            [dict(modelSetId=modelSetId, chunkKey=c) for c in chunkKeys]
        )

        # 4) Queue chunks for
        if gridsToRecompile:
            dbSession.execute(
                gridKeyCompilerQueueTable.insert(),
                [dict(coordSetId=item.coordSetId, gridKey=item.gridKey)
                 for item in gridsToRecompile]
            )

        dbSession.commit()

        logger.debug("Updated %s BranchIndexes queued %s chunks in %s",
                     len(updatedBranches), len(chunkKeys),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        dbSession.rollback()
        logger.debug("Retrying updateBranch, %s", e)
        logger.exception(e)
        raise self.retry(exc=e, countdown=3)

    finally:
        dbSession.close()
예제 #13
0
def importDispLinks(coordSet: ModelCoordSet,
                    importGroupHash: str,
                    importDispLinks: List[ImportLiveDbDispLinkTuple]
                    ) -> List[ImportLiveDbItemTuple]:
    """ Import Disps Links

    1) Drop all disps with matching importGroupHash

    2) set the  coordSetId

    :param coordSet:
    :param importGroupHash:
    :param importDispLinks: An array of import LiveDB Disp Links to import
    :return:
    """
    dispLinkTable = LiveDbDispLink.__table__
    dispLinkIdIterator = prefetchDeclarativeIds(LiveDbDispLink, len(importDispLinks))

    startTime = datetime.now(pytz.utc)

    ormSession = CeleryDbConn.getDbSession()
    try:

        ormSession.execute(dispLinkTable
                           .delete()
                           .where(dispLinkTable.c.importGroupHash == importGroupHash))

        if not importDispLinks:
            return []

        liveDbItemsToImportByKey = {}

        dispLinkInserts = []

        for importDispLink in importDispLinks:
            dispLink = _convertImportDispLinkTuple(coordSet, importDispLink)
            dispLink.id = next(dispLinkIdIterator)

            liveDbItem = _makeImportLiveDbItem(
                importDispLink, liveDbItemsToImportByKey
            )

            dispLink.liveDbKey = liveDbItem.key
            dispLinkInserts.append(dispLink.tupleToSqlaBulkInsertDict())

        # if dispLinkInserts:
        #     ormSession.execute(LiveDbDispLink.__table__.insert(), dispLinkInserts)

        ormSession.commit()

        if dispLinkInserts:
            # This commits it's self
            rawConn = CeleryDbConn.getDbEngine().raw_connection()
            pgCopyInsert(rawConn, LiveDbDispLink.__table__, dispLinkInserts)
            rawConn.commit()

        logger.info(
            "Inserted %s LiveDbDispLinks in %s",
            len(dispLinkInserts), (datetime.now(pytz.utc) - startTime)
        )

        return list(liveDbItemsToImportByKey.values())

    finally:
        ormSession.close()
def _insertOrUpdateObjects(newSegments: List[GraphDbImportSegmentTuple],
                           modelSetId: int, modelSetKey: str) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    segmentTable = GraphDbSegment.__table__
    queueTable = GraphDbCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)
    importHashSet = set()

    chunkKeysForQueue: Set[Tuple[int, str]] = set()

    # Get the IDs that we need
    newIdGen = CeleryDbConn.prefetchDeclarativeIds(GraphDbSegment,
                                                   len(newSegments))

    # Create state arrays
    inserts = []

    newItemKeys = []

    # Work out which objects have been updated or need inserting
    for importSegment in newSegments:
        importHashSet.add(importSegment.importGroupHash)
        segmentJson = importSegment.packJson()

        id_ = next(newIdGen)
        existingObject = GraphDbSegment(
            id=id_,
            modelSetId=modelSetId,
            key=importSegment.key,
            importGroupHash=importSegment.importGroupHash,
            chunkKey=makeChunkKeyForSegmentKey(importSegment.modelSetKey,
                                               importSegment.key),
            segmentJson=segmentJson)
        inserts.append(existingObject.tupleToSqlaBulkInsertDict())

        chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

        for edge in importSegment.edges:
            newItemKeys.append(
                ItemKeyImportTuple(
                    importGroupHash=importSegment.importGroupHash,
                    itemKey=edge.key,
                    itemType=ItemKeyTuple.ITEM_TYPE_EDGE,
                    segmentKey=importSegment.key))

        for vertex in importSegment.vertexes:
            newItemKeys.append(
                ItemKeyImportTuple(
                    importGroupHash=importSegment.importGroupHash,
                    itemKey=vertex.key,
                    itemType=ItemKeyTuple.ITEM_TYPE_VERTEX,
                    segmentKey=importSegment.key))

    # TODO: If this fails, we could potentially delete by segment key.
    # But that seems a bit hackish, the agents should delete the old first.
    # Or should they. That might leave a temporary gap in the network.

    # Delete old stuff
    if importHashSet:
        deleteSegment(modelSetKey=modelSetKey,
                      importGroupHashes=list(importHashSet))

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        # Insert the GraphDb Objects
        if inserts:
            conn.execute(segmentTable.insert(), inserts)

        if chunkKeysForQueue:
            conn.execute(
                queueTable.insert(),
                [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

        loadItemKeys(conn, newItemKeys, modelSetId, modelSetKey)

        if inserts or chunkKeysForQueue or newItemKeys:
            transaction.commit()
        else:
            transaction.rollback()

        logger.info("Inserted %s queued %s chunks in %s", len(inserts),
                    len(chunkKeysForQueue),
                    (datetime.now(pytz.utc) - startTime))

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def compileGrids(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Grids Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    gridKeys = list(set([i.gridKey for i in queueItems]))
    coordSetIdByGridKey = {i.gridKey: i.coordSetId for i in queueItems}

    queueTable = GridKeyCompilerQueue.__table__
    gridTable = GridKeyIndexCompiled.__table__

    startTime = datetime.now(pytz.utc)

    session = CeleryDbConn.getDbSession()
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s",
                     len(queueItems), (datetime.now(pytz.utc) - startTime))

        total = 0
        dispData = _qryDispData(session, gridKeys)

        conn.execute(gridTable.delete(gridTable.c.gridKey.in_(gridKeys)))

        transaction.commit()
        transaction = conn.begin()

        inserts = []
        for gridKey, dispJsonStr in dispData.items():
            m = hashlib.sha256()
            m.update(gridKey.encode())
            m.update(dispJsonStr.encode())
            gridTupleHash = b64encode(m.digest()).decode()

            gridTuple = GridTuple(
                gridKey=gridKey,
                dispJsonStr=dispJsonStr,
                lastUpdate=gridTupleHash
            )

            encodedGridTuple = Payload(tuples=[gridTuple]).toEncodedPayload()

            inserts.append(dict(coordSetId=coordSetIdByGridKey[gridKey],
                                gridKey=gridKey,
                                lastUpdate=gridTupleHash,
                                encodedGridTuple=encodedGridTuple))

        if inserts:
            conn.execute(gridTable.insert(), inserts)

        logger.debug("Compiled %s gridKeys, %s missing, in %s",
                     len(inserts),
                     len(gridKeys) - len(inserts), (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))

        transaction.commit()
        logger.info("Compiled and Committed %s GridKeyIndexCompileds in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return gridKeys

    except NotAllDispsCompiledException as e:
        logger.warning("Retrying, Not all disps for gridKey %s are compiled", gridKeys)
        raise self.retry(exc=e, countdown=1)

    except Exception as e:
        transaction.rollback()
        logger.debug("Compile of grids failed, retrying : %s", gridKeys)
        raise self.retry(exc=e, countdown=2)

    finally:
        conn.close()
        session.close()
def compileLocationIndex(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Location Index Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    indexBuckets = list(set([i.indexBucket for i in queueItems]))
    modelSetIdByIndexBucket = {i.indexBucket: i.modelSetId for i in queueItems}

    queueTable = LocationIndexCompilerQueue.__table__
    compiledTable = LocationIndexCompiled.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    session = CeleryDbConn.getDbSession()
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s",
                     len(queueItems), (datetime.now(pytz.utc) - startTime))

        # Get Model Sets

        modelSetIds = list(set(modelSetIdByIndexBucket.values()))
        modelSetQry = (
            session.query(ModelSet.key, ModelSet.id)
                .filter(ModelSet.id.in_(modelSetIds))
        )

        modelSetKeyByModelSetId = {o.id: o.key for o in modelSetQry}

        total = 0
        dispData = _buildIndex(session, indexBuckets)

        conn.execute(compiledTable.delete(
            makeCoreValuesSubqueryCondition(engine, compiledTable.c.indexBucket,
                                            indexBuckets)
        ))
        transaction.commit()
        transaction = conn.begin()

        inserts = []
        for indexBucket, jsonStr in dispData.items():
            modelSetId = modelSetIdByIndexBucket[indexBucket]
            modelSetKey = modelSetKeyByModelSetId[modelSetId]

            m = hashlib.sha256()
            m.update(modelSetKey.encode())
            m.update(jsonStr.encode())
            dataHash = b64encode(m.digest()).decode()

            locationIndexTuple = LocationIndexTuple(
                modelSetKey=modelSetKey,
                indexBucket=indexBucket,
                jsonStr=jsonStr,
                lastUpdate=dataHash

            )

            blobData = Payload(tuples=[locationIndexTuple]).toEncodedPayload()

            inserts.append(dict(modelSetId=modelSetId,
                                indexBucket=indexBucket,
                                lastUpdate=dataHash,
                                blobData=blobData))

        if inserts:
            conn.execute(compiledTable.insert(), inserts)

        logger.debug("Compiled %s LocationIndexes, %s missing, in %s",
                     len(inserts),
                     len(indexBuckets) - len(inserts),
                     (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(
            makeCoreValuesSubqueryCondition(engine, queueTable.c.id, queueItemIds)
        ))

        transaction.commit()
        logger.info("Compiled and Comitted %s LocationIndexCompileds in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return indexBuckets

    except Exception as e:
        transaction.rollback()
        # logger.warning(e)  # Just a warning, it will retry
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    finally:
        conn.close()
        session.close()
def removeBranches(self, modelSetKey: str, coordSetKey: str, keys: List[str]) -> None:
    """ Remove Branches

    This worker task removes branches from the indexes.

    """

    startTime = datetime.now(pytz.utc)

    branchIndexTable = BranchIndex.__table__
    queueTable = BranchIndexCompilerQueue.__table__

    # Create a lookup of CoordSets by ID
    dbSession = CeleryDbConn.getDbSession()
    try:
        coordSet = dbSession.query(ModelCoordSet) \
            .filter(ModelCoordSet.modelSet.key == modelSetKey) \
            .filter(ModelCoordSet.key == coordSetKey) \
            .one()

        dbSession.expunge_all()

    finally:
        dbSession.close()

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        items = conn.execute(select(
            distinct=True,
            columns=[branchIndexTable.c.id, branchIndexTable.c.chunkKey],
            whereclause=and_(branchIndexTable.c.key.in_(keys),
                             branchIndexTable.c.coordSetId == coordSet.id)
        )).fetchall()

        branchIndexIds = [i.id for i in items]
        chunkKeys = set([i.chunkKey for i in items])

        _deleteBranchDisps(conn, branchIndexIds)

        # 1) Delete existing branches
        conn.execute(
            branchIndexTable.delete(branchIndexTable.c.id.in_(branchIndexIds))
        )

        # 3) Queue chunks for recompile
        conn.execute(
            queueTable.insert(),
            [dict(modelSetId=coordSet.modelSetId, chunkKey=c) for c in chunkKeys]
        )

        transaction.commit()
        logger.debug("Deleted %s BranchIndexes queued %s chunks in %s",
                     len(branchIndexIds), len(chunkKeys),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        transaction.rollback()
        logger.debug("Retrying createOrUpdateBranches, %s", e)
        logger.exception(e)
        raise self.retry(exc=e, countdown=3)

    finally:
        conn.close()