def test_degradation_linear(self): ''' As the input size increases, the number of calls to "load" (disk IO) increases linearly to the input size ''' buffer_size = 10 input_size1 = 2*buffer_size input_size2 = 10*buffer_size counters.set_counter('item_buffer', settings.MAX_CHUNKS) counters.reset_counter('load') self.validate_group_by(xrange(input_size1), buffer_size=buffer_size) load_to_input_size1 = 1.0*counters.get_counter('load') / input_size1 counters.reset_counter('load') self.validate_group_by(xrange(input_size2), buffer_size=buffer_size) load_to_input_size2 = 1.0*counters.get_counter('load') / input_size2 self.assertLess(input_size1, input_size2, 'test different input size') self.assertLessEqual(load_to_input_size1, load_to_input_size2, '#load is linear to input size %s %s' % (load_to_input_size1, load_to_input_size2))
from michael.settings import MAX_CHUNK_ITEM_BUFFER, REMOVE_TEMP_FILES from michael.settings import TEMP_DIRECTORY from michael import counters NOTHING = object() def make_temp_filename(): # potentially we can just use: # from os import tmpnam, tmpfile # but nowadays that function returns security warning, so i use uuid return "%s/%s" % (TEMP_DIRECTORY, str(uuid1())) counters.set_counter("item_buffer", MAX_CHUNK_ITEM_BUFFER) class FileBackedIterable(object): """ This is python iterator. Store an iterable in a file, and provide ability to iterate it by reading one item at a time from the file. This way the original iterable can be disposed from memory. Function next() returns the data and advance the location. Function peek() returns the data may store reference to it in memory. The number of referenced data is limited by semaphore count settings.MAX_CHUNK_ITEM_BUFFER. """