def testPurgeOldMetricDataRowsFewerCandidatesThanExpected( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter( zip(["ABCDEF"] * (estimate // 2), xrange(estimate // 2))) queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple( itertools.islice(candidatesIter, limit))) deleteRowsMock.side_effect = ( lambda uidRowidPairs, **kwargs: len(uidRowidPairs)) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows( thresholdDays=90) self.assertEqual(numDeleted, estimate // 2) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 1)
def testPurgeOldMetricDataRowsDeletedLessThanExpected( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 3 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter(zip(["ABCDEF"] * estimate, xrange(estimate))) queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple( itertools.islice(candidatesIter, limit))) deletedCounts = [ metric_garbage_collector._MAX_DELETE_BATCH_SIZE, metric_garbage_collector._MAX_DELETE_BATCH_SIZE // 2, metric_garbage_collector._MAX_DELETE_BATCH_SIZE ] deleteRowsMock.side_effect = iter(deletedCounts) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows( thresholdDays=90) self.assertEqual(numDeleted, sum(deletedCounts)) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 4) self.assertEqual(deleteRowsMock.call_count, 3)
def testPurgeOldMetricDataRowsStopAtEstimated(self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter( zip(["ABCDEF"] * (estimate + 1), xrange(estimate + 1))) queryCandidateRowsMock.side_effect = (lambda limit, **kwargs: tuple( itertools.islice(candidatesIter, limit))) deleteRowsMock.side_effect = ( lambda uidRowidPairs, **kwargs: len(uidRowidPairs)) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows( thresholdDays=90) self.assertEqual(numDeleted, estimate) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 2) # Make sure it didn't try to retrieve candidates beyond estimated number self.assertEqual(len(tuple(candidatesIter)), 1)
def testPurgeOldMetricDataRowsDeletedLessThanExpected( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock ): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 3 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter(zip(["ABCDEF"] * estimate, xrange(estimate))) queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit)) deletedCounts = [ metric_garbage_collector._MAX_DELETE_BATCH_SIZE, metric_garbage_collector._MAX_DELETE_BATCH_SIZE // 2, metric_garbage_collector._MAX_DELETE_BATCH_SIZE, ] deleteRowsMock.side_effect = iter(deletedCounts) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90) self.assertEqual(numDeleted, sum(deletedCounts)) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 4) self.assertEqual(deleteRowsMock.call_count, 3)
def testPurgeOldMetricDataRowsStopAtEstimated( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock ): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter(zip(["ABCDEF"] * (estimate + 1), xrange(estimate + 1))) queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit)) deleteRowsMock.side_effect = lambda uidRowidPairs, **kwargs: len(uidRowidPairs) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90) self.assertEqual(numDeleted, estimate) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 2) # Make sure it didn't try to retrieve candidates beyond estimated number self.assertEqual(len(tuple(candidatesIter)), 1)
def testPurgeOldMetricDataRowsWithoutOldRecords( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock ): estimateNumRowsToDeleteMock.return_value = 0 # These should not be called in this test queryCandidateRowsMock.side_effect = [] deleteRowsMock.side_effect = [] numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90) self.assertEqual(numDeleted, 0) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 0) self.assertEqual(deleteRowsMock.call_count, 0)
def testPurgeOldMetricDataRowsWithoutOldRecords( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimateNumRowsToDeleteMock.return_value = 0 # These should not be called in this test queryCandidateRowsMock.side_effect = [] deleteRowsMock.side_effect = [] numDeleted = metric_garbage_collector.purgeOldMetricDataRows( thresholdDays=90) self.assertEqual(numDeleted, 0) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 0) self.assertEqual(deleteRowsMock.call_count, 0)
def testPurgeOldMetricDataRowsFewerCandidatesThanExpected( self, estimateNumRowsToDeleteMock, queryCandidateRowsMock, deleteRowsMock ): estimate = metric_garbage_collector._MAX_DELETE_BATCH_SIZE * 2 estimateNumRowsToDeleteMock.return_value = estimate candidatesIter = iter(zip(["ABCDEF"] * (estimate // 2), xrange(estimate // 2))) queryCandidateRowsMock.side_effect = lambda limit, **kwargs: tuple(itertools.islice(candidatesIter, limit)) deleteRowsMock.side_effect = lambda uidRowidPairs, **kwargs: len(uidRowidPairs) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows(thresholdDays=90) self.assertEqual(numDeleted, estimate // 2) self.assertEqual(estimateNumRowsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 1)
def testPurgeOldMetricData(self): gcThresholdDays = 90 now = datetime.utcnow().replace(microsecond=0) uid1 = uuid.uuid1().hex oldRows = [ dict( value=1.0, timestamp=now - timedelta(days=gcThresholdDays + 1), ), dict( value=2.0, timestamp=now - timedelta(days=gcThresholdDays + 2), ), ] youngRows = [ dict( value=3.0, timestamp=now, ), dict( value=4.0, timestamp=now - timedelta(days=gcThresholdDays - 1), ), dict( value=5.0, timestamp=now - timedelta(days=gcThresholdDays - 2), ), ] allRows = oldRows + youngRows # Use a temporary database with repository_test_utils.HtmengineManagedTempRepository("metric_gc"): engine = htmengine.repository.engineFactory( config=htmengine.APP_CONFIG) # Add the dummy metric rows allData = [(row["value"], row["timestamp"]) for row in allRows] with engine.connect() as conn: # pylint: disable=E1101 htmengine.repository.addMetric(conn, uid=uid1) insertedObjects = htmengine.repository.addMetricData( conn, metricId=uid1, data=allData) numInserted = len(insertedObjects) self.assertEqual(numInserted, len(allRows)) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows( gcThresholdDays) # Verify self.assertEqual(numDeleted, len(oldRows)) # Verify that only the old tweets got purged with engine.connect() as conn: # pylint: disable=E1101 remainingRows = htmengine.repository.getMetricData( conn).fetchall() self.assertEqual(len(remainingRows), len(youngRows)) self.assertItemsEqual([(row["value"], row["timestamp"]) for row in youngRows], [(row.metric_value, row.timestamp) for row in remainingRows]) # pylint: disable=E1101
def testPurgeOldMetricData(self): gcThresholdDays = 90 now = datetime.utcnow().replace(microsecond=0) uid1 = uuid.uuid1().hex oldRows = [ dict( value=1.0, timestamp=now - timedelta(days=gcThresholdDays + 1), ), dict( value=2.0, timestamp=now - timedelta(days=gcThresholdDays + 2), ), ] youngRows = [ dict( value=3.0, timestamp=now, ), dict( value=4.0, timestamp=now - timedelta(days=gcThresholdDays - 1), ), dict( value=5.0, timestamp=now - timedelta(days=gcThresholdDays - 2), ), ] allRows = oldRows + youngRows # Use a temporary database with repository_test_utils.ManagedTempRepository("metric_gc"): engine = htmengine.repository.engineFactory(config=htmengine.APP_CONFIG) # Add the dummy metric rows allData = [(row["value"], row["timestamp"]) for row in allRows] with engine.connect() as conn: # pylint: disable=E1101 htmengine.repository.addMetric(conn, uid=uid1) insertedObjects = htmengine.repository.addMetricData(conn, metricId=uid1, data=allData) numInserted = len(insertedObjects) self.assertEqual(numInserted, len(allRows)) # Execute numDeleted = metric_garbage_collector.purgeOldMetricDataRows( gcThresholdDays) # Verify self.assertEqual(numDeleted, len(oldRows)) # Verify that only the old tweets got purged with engine.connect() as conn: # pylint: disable=E1101 remainingRows = htmengine.repository.getMetricData(conn).fetchall() self.assertEqual(len(remainingRows), len(youngRows)) self.assertItemsEqual( [(row["value"], row["timestamp"]) for row in youngRows], [(row.metric_value, row.timestamp) for row in remainingRows]) # pylint: disable=E1101