Beispiel #1
0
    def testPurgeOldMetricDataRowsFewerCandidatesThanExpected(
            self, estimateNumRowsToDeleteMock, queryCandidateRowsMock,
            deleteRowsMock):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(
            zip(["ABCDEF"] * (estimate // 2), xrange(estimate // 2)))

        queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple(
            itertools.islice(candidatesIter, limit)))

        deleteRowsMock.side_effect = (
            lambda uidRowidPairs, **kwargs: len(uidRowidPairs))

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
            thresholdDays=90)

        self.assertEqual(numDeleted, estimate // 2)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 2)
        self.assertEqual(deleteRowsMock.call_count, 1)
Beispiel #2
0
    def testPurgeOldMetricDataRowsDeletedLessThanExpected(
            self, estimateNumRowsToDeleteMock, queryCandidateRowsMock,
            deleteRowsMock):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 3

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(zip(["ABCDEF"] * estimate, xrange(estimate)))

        queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple(
            itertools.islice(candidatesIter, limit)))

        deletedCounts = [
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE,
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE // 2,
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE
        ]

        deleteRowsMock.side_effect = iter(deletedCounts)

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
            thresholdDays=90)

        self.assertEqual(numDeleted, sum(deletedCounts))

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 4)
        self.assertEqual(deleteRowsMock.call_count, 3)
Beispiel #3
0
    def testPurgeOldMetricDataRowsStopAtEstimated(self,
                                                  estimateNumRowsToDeleteMock,
                                                  queryCandidateRowsMock,
                                                  deleteRowsMock):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(
            zip(["ABCDEF"] * (estimate + 1), xrange(estimate + 1)))

        queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple(
            itertools.islice(candidatesIter, limit)))

        deleteRowsMock.side_effect = (
            lambda uidRowidPairs, **kwargs: len(uidRowidPairs))

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
            thresholdDays=90)

        self.assertEqual(numDeleted, estimate)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 2)
        self.assertEqual(deleteRowsMock.call_count, 2)

        # Make sure it didn't try to retrieve candidates beyond estimated number
        self.assertEqual(len(tuple(candidatesIter)), 1)
    def testPurgeOldMetricDataRowsDeletedLessThanExpected(
        self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock
    ):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 3

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(zip(["ABCDEF"] * estimate, xrange(estimate)))

        queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit))

        deletedCounts = [
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE,
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE // 2,
            metric_garbage_collector._MAX_DELETE_BATCH_SIZE,
        ]

        deleteRowsMock.side_effect = iter(deletedCounts)

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90)

        self.assertEqual(numDeleted, sum(deletedCounts))

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 4)
        self.assertEqual(deleteRowsMock.call_count, 3)
    def testPurgeOldMetricDataRowsStopAtEstimated(
        self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock
    ):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(zip(["ABCDEF"] * (estimate + 1), xrange(estimate + 1)))

        queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit))

        deleteRowsMock.side_effect = lambda uidRowidPairs, **kwargs: len(uidRowidPairs)

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90)

        self.assertEqual(numDeleted, estimate)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 2)
        self.assertEqual(deleteRowsMock.call_count, 2)

        # Make sure it didn't try to retrieve candidates beyond estimated number
        self.assertEqual(len(tuple(candidatesIter)), 1)
    def testPurgeOldMetricDataRowsWithoutOldRecords(
        self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock
    ):
        estimateNumRowsToDeleteMock.return_value = 0

        # These should not be called in this test
        queryCandidateRowsMock.side_effect = []
        deleteRowsMock.side_effect = []

        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90)

        self.assertEqual(numDeleted, 0)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 0)
        self.assertEqual(deleteRowsMock.call_count, 0)
Beispiel #7
0
    def testPurgeOldMetricDataRowsWithoutOldRecords(
            self, estimateNumRowsToDeleteMock, queryCandidateRowsMock,
            deleteRowsMock):
        estimateNumRowsToDeleteMock.return_value = 0

        # These should not be called in this test
        queryCandidateRowsMock.side_effect = []
        deleteRowsMock.side_effect = []

        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
            thresholdDays=90)

        self.assertEqual(numDeleted, 0)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 0)
        self.assertEqual(deleteRowsMock.call_count, 0)
    def testPurgeOldMetricDataRowsFewerCandidatesThanExpected(
        self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock
    ):

        estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2

        estimateNumRowsToDeleteMock.return_value = estimate

        candidatesIter = iter(zip(["ABCDEF"] * (estimate // 2), xrange(estimate // 2)))

        queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit))

        deleteRowsMock.side_effect = lambda uidRowidPairs, **kwargs: len(uidRowidPairs)

        # Execute
        numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90)

        self.assertEqual(numDeleted, estimate // 2)

        self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1)

        self.assertEqual(queryCandidateRowsMock.call_count, 2)
        self.assertEqual(deleteRowsMock.call_count, 1)
    def testPurgeOldMetricData(self):

        gcThresholdDays = 90

        now = datetime.utcnow().replace(microsecond=0)

        uid1 = uuid.uuid1().hex

        oldRows = [
            dict(
                value=1.0,
                timestamp=now - timedelta(days=gcThresholdDays + 1),
            ),
            dict(
                value=2.0,
                timestamp=now - timedelta(days=gcThresholdDays + 2),
            ),
        ]

        youngRows = [
            dict(
                value=3.0,
                timestamp=now,
            ),
            dict(
                value=4.0,
                timestamp=now - timedelta(days=gcThresholdDays - 1),
            ),
            dict(
                value=5.0,
                timestamp=now - timedelta(days=gcThresholdDays - 2),
            ),
        ]

        allRows = oldRows + youngRows

        # Use a temporary database
        with repository_test_utils.HtmengineManagedTempRepository("metric_gc"):
            engine = htmengine.repository.engineFactory(
                config=htmengine.APP_CONFIG)

            # Add the dummy metric rows
            allData = [(row["value"], row["timestamp"]) for row in allRows]
            with engine.connect() as conn:  # pylint: disable=E1101
                htmengine.repository.addMetric(conn, uid=uid1)
                insertedObjects = htmengine.repository.addMetricData(
                    conn, metricId=uid1, data=allData)
                numInserted = len(insertedObjects)

            self.assertEqual(numInserted, len(allRows))

            # Execute
            numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
                gcThresholdDays)

            # Verify

            self.assertEqual(numDeleted, len(oldRows))

            # Verify that only the old tweets got purged
            with engine.connect() as conn:  # pylint: disable=E1101
                remainingRows = htmengine.repository.getMetricData(
                    conn).fetchall()

            self.assertEqual(len(remainingRows), len(youngRows))

            self.assertItemsEqual([(row["value"], row["timestamp"])
                                   for row in youngRows],
                                  [(row.metric_value, row.timestamp)
                                   for row in remainingRows])  # pylint: disable=E1101
  def testPurgeOldMetricData(self):


    gcThresholdDays = 90

    now = datetime.utcnow().replace(microsecond=0)

    uid1 = uuid.uuid1().hex

    oldRows = [
      dict(
        value=1.0,
        timestamp=now - timedelta(days=gcThresholdDays + 1),
      ),

      dict(
        value=2.0,
        timestamp=now - timedelta(days=gcThresholdDays + 2),
      ),
    ]

    youngRows = [
      dict(
        value=3.0,
        timestamp=now,
      ),

      dict(
        value=4.0,
        timestamp=now - timedelta(days=gcThresholdDays - 1),
      ),

      dict(
        value=5.0,
        timestamp=now - timedelta(days=gcThresholdDays - 2),
      ),
    ]

    allRows = oldRows + youngRows

    # Use a temporary database
    with repository_test_utils.ManagedTempRepository("metric_gc"):
      engine = htmengine.repository.engineFactory(config=htmengine.APP_CONFIG)

      # Add the dummy metric rows
      allData = [(row["value"], row["timestamp"]) for row in allRows]
      with engine.connect() as conn:  # pylint: disable=E1101
        htmengine.repository.addMetric(conn, uid=uid1)
        insertedObjects = htmengine.repository.addMetricData(conn,
                                                             metricId=uid1,
                                                             data=allData)
        numInserted = len(insertedObjects)


      self.assertEqual(numInserted, len(allRows))

      # Execute
      numDeleted = metric_garbage_collector.purgeOldMetricDataRows(
        gcThresholdDays)

      # Verify

      self.assertEqual(numDeleted, len(oldRows))

      # Verify that only the old tweets got purged
      with engine.connect() as conn:  # pylint: disable=E1101
        remainingRows = htmengine.repository.getMetricData(conn).fetchall()

      self.assertEqual(len(remainingRows), len(youngRows))

      self.assertItemsEqual(
        [(row["value"], row["timestamp"]) for row in youngRows],
        [(row.metric_value, row.timestamp) for row in remainingRows])  # pylint: disable=E1101