Example #1
0
class OrderedCachedQueue(Queue.Queue, QueueSpeedMeasurement):
    """
    This queue implements all the features explained in CachedQueue (see
    cached_queue.py) plus it will order the items in the queue as they are
    inserted.

    The queue is ordered by a unique identifier that is returned by the object
    being added. If the object is None, then it is is added to the end of the
    queue.

    The goal of this ordered queue is to impose an order in which URLs and
    forms identified by the w3af framework are processed by the plugins. Since
    plugins are run in threads, the order in which new URLs are added to the
    queue is "completely random" and depends on HTTP response times, CPU-load,
    memory swapping, etc.
    """

    LAST_MD5_HASH = 'f' * 32

    def __init__(self, maxsize=0, name='Unknown'):
        self.name = name
        self.max_in_memory = maxsize
        self.processed_tasks = 0

        QueueSpeedMeasurement.__init__(self)

        self.queue_order = None
        self.hash_to_uuid = None
        self.memory = None
        self.disk = None

        # We want to send zero to the maxsize of the Queue implementation
        # here because we can write an infinite number of items. But keep
        # in mind that we don't really use the queue storage in any way
        Queue.Queue.__init__(self, maxsize=0)

    def get_name(self):
        return self.name

    def get_processed_tasks(self):
        return self.processed_tasks

    def next_item_saved_to_memory(self):
        return len(self.memory) < self.max_in_memory

    def _init(self, maxsize):
        """
        Initialize the dicts and pointer
        :param maxsize: The max size for the queue
        """
        self.queue_order = list()
        self.hash_to_uuid = dict()
        self.memory = dict()
        self.disk = DiskDict(table_prefix='%sCachedQueue' % self.name)

    def _qsize(self, _len=len):
        return _len(self.memory) + _len(self.disk)

    def _get_class_name(self, obj):
        try:
            return obj.__class__.__name__
        except:
            return type(obj)

    def _get_hash(self, item):
        if item is None or item == POISON_PILL:
            # Return ffff...ffff which is the latest (in alphanumeric order)
            # hash that exists in MD5. This forces the None item to be placed
            # at the end of the queue.
            #
            # Warning! If FuzzableRequest.get_hash() ever changes its
            # implementation this will stop working as expected!
            return self.LAST_MD5_HASH

        return item.get_hash()

    def _put(self, item):
        """
        Put a new item in the queue
        """
        #
        #   This is very useful information for finding bottlenecks in the
        #   framework / strategy
        #
        if len(self.memory) == self.max_in_memory:
            #
            #   If you see many messages like this in the scan log, then you
            #   might want to experiment with a larger maxsize for this queue
            #
            msg = ('OrderedCachedQueue.put() will write a %r item to the %s'
                   ' DiskDict. This uses more CPU and disk IO than storing'
                   ' in memory but will avoid high memory usage issues. The'
                   ' current %s DiskDict size is %s.')
            args = (self._get_class_name(item), self.get_name(),
                    self.get_name(), len(self.disk))
            om.out.debug(msg % args)

        #
        #   Get the item hash to store it in the queue order list, and insert
        #   it using bisect.insort() that will keep the order at a low cost
        #
        item_hash = self._get_hash(item)
        bisect.insort(self.queue_order, item_hash)

        #
        #   Keep an in-memory dict that allows us to find the fuzzable requests
        #   in the other dictionaries
        #
        unique_id = str(uuid.uuid4())

        unique_id_list = self.hash_to_uuid.setdefault(item_hash, [])
        bisect.insort(unique_id_list, unique_id)

        #
        #   And now we just save the item to memory (if there is space) or
        #   disk (if it doesn't fit on memory)
        #
        if len(self.memory) < self.max_in_memory:
            self.memory[unique_id] = item
        else:
            self.disk[unique_id] = item

        self._item_added_to_queue()

    def _get(self):
        """
        Get an item from the queue
        """
        item_hash = self.queue_order.pop(0)
        unique_id_list = self.hash_to_uuid.pop(item_hash)
        unique_id = unique_id_list.pop(0)

        if unique_id_list:
            #
            # There are still items in this unique_id_list, this is most likely
            # because two items with the same hash were added to the queue, and
            # only one of those has been read.
            #
            # Need to add the other item(s) to the list again
            #
            bisect.insort(self.queue_order, item_hash)
            self.hash_to_uuid[item_hash] = unique_id_list

        try:
            item = self.memory.pop(unique_id)
        except KeyError:
            item = self.disk.pop(unique_id)

            if len(self.disk):
                #
                #   If you see many messages like this in the scan log, then you
                #   might want to experiment with a larger maxsize for this queue
                #
                msg = ('OrderedCachedQueue.get() from %s DiskDict was used to'
                       ' read an item from disk. The current %s DiskDict'
                       ' size is %s.')
                args = (self.get_name(), self.get_name(), len(self.disk))
                om.out.debug(msg % args)

        self._item_left_queue()
        self.processed_tasks += 1
        return item

    def join(self):
        """
        Blocks until all items in the Queue have been read and processed.

        The count of unfinished tasks goes up whenever an item is added to the
        queue. The count goes down whenever a consumer thread calls task_done()
        to indicate the item was retrieved and all work on it is complete.

        When the count of unfinished tasks drops to zero, join() unblocks.
        """
        msg = 'Called join on %s with %s unfinished tasks'
        args = (self.name, self.unfinished_tasks)
        om.out.debug(msg % args)

        self.all_tasks_done.acquire()
        try:
            while self.unfinished_tasks:
                result = self.all_tasks_done.wait(timeout=5)

                if result is None:
                    msg = 'Still have %s unfinished tasks in %s join()'
                    args = (self.unfinished_tasks, self.name)
                    om.out.debug(msg % args)
        finally:
            self.all_tasks_done.release()
Example #2
0
class CachedQueue(Queue.Queue, QueueSpeedMeasurement):
    """
    The framework uses the producer / consumer design pattern extensively.
    In order to avoid high memory usage in the queues connecting the different
    parts of the framework we defined a max size.

    When a queue max size is reached, one or more threads will block. This
    line is printed during a real scan:

        Thread blocked 5.76617312431 seconds waiting for Queue.put() to have space
        in the Grep queue. The queue's maxsize is 20.

    In the case of the Grep consumer / producer the problem with a block is increased
    by the fact that HTTP responses won't reach other parts of the framework
    until the queue has space.

    Increasing the queue size would increase memory usage.

    Using an on-disk queue would increase CPU (serialization) and disk IO.

    The CacheQueue is a mix of in-memory and on-disk queue. The first N items
    are stored in memory, when more items are put() we just write them to
    disk.

    The CacheQueue object implements these methods from QueueSpeedMeasurement:
        * get_input_rpm
        * get_output_rpm

    Which allows users to understand how fast a queue is moving.
    """
    def __init__(self, maxsize=0, name='Unknown'):
        self.name = name
        self.max_in_memory = maxsize

        QueueSpeedMeasurement.__init__(self)

        # We want to send zero to the maxsize of the Queue implementation
        # here because we can write an infinite number of items
        Queue.Queue.__init__(self, maxsize=0)

    def get_name(self):
        return self.name

    def next_item_saved_to_memory(self):
        return len(self.memory) < self.max_in_memory

    def _init(self, maxsize):
        """
        Initialize the dicts and pointer
        :param maxsize: The max size for the queue
        """
        self.memory = dict()
        self.disk = DiskDict(table_prefix='%sCachedQueue' % self.name)
        self.get_pointer = 0
        self.put_pointer = 0

    def _qsize(self, len=len):
        return len(self.memory) + len(self.disk)

    def _get_class_name(self, obj):
        try:
            return obj.__class__.__name__
        except:
            return type(obj)

    def _put(self, item):
        """
        Put a new item in the queue
        """
        #
        #   This is very useful information for finding bottlenecks in the
        #   framework / strategy
        #
        if len(self.memory) == self.max_in_memory:
            #
            #   If you see many messages like this in the scan log, then you
            #   might want to experiment with a larger maxsize for this queue
            #
            msg = ('CachedQueue.put() will write a %r item to the %s DiskDict.'
                   ' This uses more CPU and disk IO than storing in memory'
                   ' but will avoid high memory usage issues. The current'
                   ' %s DiskDict size is %s.')
            args = (self._get_class_name(item), self.get_name(),
                    self.get_name(), len(self.disk))
            om.out.debug(msg % args)

        #
        #   And now we just save the item to memory (if there is space) or
        #   disk (if it doesn't fit on memory)
        #
        if len(self.memory) < self.max_in_memory:
            self.memory[self.put_pointer] = item
        else:
            self.disk[self.put_pointer] = item

        self.put_pointer += 1
        self._item_added_to_queue()

    def _get(self):
        """
        Get an item from the queue
        """
        try:
            item = self.memory.pop(self.get_pointer)
        except KeyError:
            item = self.disk.pop(self.get_pointer)

            if len(self.disk):
                #
                #   If you see many messages like this in the scan log, then you
                #   might want to experiment with a larger maxsize for this queue
                #
                msg = ('CachedQueue.get() from %s DiskDict was used to read an'
                       ' item from disk. The current %s DiskDict size is %s.')
                args = (self.get_name(), self.get_name(), len(self.disk))
                om.out.debug(msg % args)

        self._item_left_queue()
        self.get_pointer += 1
        return item
Example #3
0
class CachedQueue(Queue.Queue, QueueSpeedMeasurement):
    """
    The framework uses the producer / consumer design pattern extensively.
    In order to avoid high memory usage in the queues connecting the different
    parts of the framework we defined a max size.

    When a queue max size is reached, one or more threads will block. This
    line is printed during a real scan:

        Thread blocked 5.76617312431 seconds waiting for Queue.put() to have space
        in the Grep queue. The queue's maxsize is 20.

    In the case of the Grep consumer / producer the problem with a block is increased
    by the fact that HTTP responses won't reach other parts of the framework
    until the queue has space.

    Increasing the queue size would increase memory usage.

    Using an on-disk queue would increase CPU (serialization) and disk IO.

    The CacheQueue is a mix of in-memory and on-disk queue. The first N items
    are stored in memory, when more items are put() we just write them to
    disk.

    The CacheQueue object implements these methods from QueueSpeedMeasurement:
        * get_input_rpm
        * get_output_rpm

    Which allows users to understand how fast a queue is moving.
    """
    def __init__(self, maxsize=0, name='Unknown'):
        self.name = name
        self.max_in_memory = maxsize

        QueueSpeedMeasurement.__init__(self)

        # We want to send zero to the maxsize of the Queue implementation
        # here because we can write an infinite number of items
        Queue.Queue.__init__(self, maxsize=0)

    def get_name(self):
        return self.name

    def next_item_saved_to_memory(self):
        return len(self.memory) < self.max_in_memory

    def _init(self, maxsize):
        """
        Initialize the dicts and pointer
        :param maxsize: The max size for the queue
        """
        self.memory = dict()
        self.disk = DiskDict(table_prefix='%sCachedQueue' % self.name)
        self.get_pointer = 0
        self.put_pointer = 0

    def _qsize(self, len=len):
        return len(self.memory) + len(self.disk)

    def _get_class_name(self, obj):
        try:
            return obj.__class__.__name__
        except:
            return type(obj)

    def _put(self, item):
        """
        Put a new item in the queue
        """
        #
        #   This is very useful information for finding bottlenecks in the
        #   framework / strategy
        #
        if len(self.memory) == self.max_in_memory:
            #
            #   If you see many messages like this in the scan log, then you
            #   might want to experiment with a larger maxsize for this queue
            #
            msg = ('CachedQueue.put() will write a %r item to the %s DiskDict.'
                   ' This uses more CPU and disk IO than storing in memory'
                   ' but will avoid high memory usage issues. The current'
                   ' %s DiskDict size is %s.')
            args = (self._get_class_name(item),
                    self.get_name(),
                    self.get_name(),
                    len(self.disk))
            om.out.debug(msg % args)

        #
        #   And now we just save the item to memory (if there is space) or
        #   disk (if it doesn't fit on memory)
        #
        if len(self.memory) < self.max_in_memory:
            self.memory[self.put_pointer] = item
        else:
            self.disk[self.put_pointer] = item

        self.put_pointer += 1
        self._item_added_to_queue()

    def _get(self):
        """
        Get an item from the queue
        """
        try:
            item = self.memory.pop(self.get_pointer)
        except KeyError:
            item = self.disk.pop(self.get_pointer)

            if len(self.disk):
                #
                #   If you see many messages like this in the scan log, then you
                #   might want to experiment with a larger maxsize for this queue
                #
                msg = ('CachedQueue.get() from %s DiskDict was used to read an'
                       ' item from disk. The current %s DiskDict size is %s.')
                args = (self.get_name(), self.get_name(), len(self.disk))
                om.out.debug(msg % args)

        self._item_left_queue()
        self.get_pointer += 1
        return item
Example #4
0
class CachedDiskDict(object):
    """
    This data structure keeps the `max_in_memory` most frequently accessed
    keys in memory and stores the rest on disk.

    It is ideal for situations where a DiskDict is frequently accessed,
    fast read / writes are required, and items can take considerable amounts
    of memory.
    """
    def __init__(self, max_in_memory=50, table_prefix=None):
        """
        :param max_in_memory: The max number of items to keep in memory
        """
        assert max_in_memory > 0, 'In-memory items must be > 0'

        table_prefix = self._get_table_prefix(table_prefix)

        self._max_in_memory = max_in_memory
        self._disk_dict = DiskDict(table_prefix=table_prefix)
        self._in_memory = dict()
        self._access_count = Counter()

    def cleanup(self):
        self._disk_dict.cleanup()

    def _get_table_prefix(self, table_prefix):
        if table_prefix is None:
            table_prefix = 'cached_disk_dict_%s' % rand_alpha(16)
        else:
            args = (table_prefix, rand_alpha(16))
            table_prefix = 'cached_disk_dict_%s_%s' % args

        return table_prefix

    def get(self, key, default=-456):
        try:
            return self[key]
        except KeyError:
            if default is not -456:
                return default

        raise KeyError()

    def __getitem__(self, key):
        try:
            value = self._in_memory[key]
        except KeyError:
            # This will raise KeyError if k is not found, and that is OK
            # because we don't need to increase the access count when the
            # key doesn't exist
            value = self._disk_dict[key]

        self._increase_access_count(key)
        return value

    def _get_keys_for_memory(self):
        """
        :return: Generate the names of the keys that should be kept in memory.
                 For example, if `max_in_memory` is set to 2 and:

                    _in_memory: {1: None, 2: None}
                    _access_count: {1: 10, 2: 20, 3: 5}
                    _disk_dict: {3: None}

                Then the method will generate [1, 2].
        """
        return [k for k, v in self._access_count.most_common(self._max_in_memory)]

    def _increase_access_count(self, key):
        self._access_count.update([key])

        keys_for_memory = self._get_keys_for_memory()

        self._move_key_to_disk_if_needed(keys_for_memory)
        self._move_key_to_memory_if_needed(key, keys_for_memory)

    def _move_key_to_disk_if_needed(self, keys_for_memory):
        """
        Analyzes the current access count for the last accessed key and
        checks if any if the keys in memory should be moved to disk.

        :param keys_for_memory: The keys that should be in memory
        :return: The name of the key that was moved to disk, or None if
                 all the keys are still in memory.
        """
        for key in self._in_memory:

            if key in keys_for_memory:
                continue

            try:
                value = self._in_memory.pop(key)
            except KeyError:
                return
            else:
                self._disk_dict[key] = value
                return key

    def _move_key_to_memory_if_needed(self, key, keys_for_memory):
        """
        Analyzes the current access count for the last accessed key and
        checks if any if the keys in disk should be moved to memory.

        :param key: The key that was last accessed
        :param keys_for_memory: The keys that should be in memory
        :return: The name of the key that was moved to memory, or None if
                 all the keys are still on disk.
        """
        # The key is already in memory, nothing to do here
        if key in self._in_memory:
            return

        # The key must not be in memory, nothing to do here
        if key not in keys_for_memory:
            return

        try:
            value = self._disk_dict.pop(key)
        except KeyError:
            return
        else:
            self._in_memory[key] = value
            return key

    def __setitem__(self, key, value):
        if key in self._in_memory:
            self._in_memory[key] = value

        elif len(self._in_memory) < self._max_in_memory:
            self._in_memory[key] = value

        else:
            self._disk_dict[key] = value

        self._increase_access_count(key)

    def __delitem__(self, key):
        try:
            del self._in_memory[key]
        except KeyError:
            # This will raise KeyError if k is not found, and that is OK
            # because we don't need to increase the access count when the
            # key doesn't exist
            del self._disk_dict[key]

        try:
            del self._access_count[key]
        except KeyError:
            # Another thread removed this key
            pass

    def __contains__(self, key):
        if key in self._in_memory:
            self._increase_access_count(key)
            return True

        if key in self._disk_dict:
            self._increase_access_count(key)
            return True

        return False

    def __iter__(self):
        """
        Decided not to increase the access count when iterating through the
        items. In most cases the iteration will be performed on all items,
        thus increasing the access count +1 for each, which will leave all
        access counts +1, forcing no movements between memory and disk.
        """
        for key in self._in_memory:
            yield key

        for key in self._disk_dict:
            yield key

    def iteritems(self):
        for key, value in self._in_memory.iteritems():
            yield key, value

        for key, value in self._disk_dict.iteritems():
            yield key, value
class OrderedCachedQueue(Queue.Queue, QueueSpeedMeasurement):
    """
    This queue implements all the features explained in CachedQueue (see
    cached_queue.py) plus it will order the items in the queue as they are
    inserted.

    The queue is ordered by a unique identifier that is returned by the object
    being added. If the object is None, then it is is added to the end of the
    queue.

    The goal of this ordered queue is to impose an order in which URLs and
    forms identified by the w3af framework are processed by the plugins. Since
    plugins are run in threads, the order in which new URLs are added to the
    queue is "completely random" and depends on HTTP response times, CPU-load,
    memory swapping, etc.
    """

    LAST_MD5_HASH = 'f' * 32

    def __init__(self, maxsize=0, name='Unknown'):
        self.name = name
        self.max_in_memory = maxsize
        self.processed_tasks = 0

        QueueSpeedMeasurement.__init__(self)

        self.queue_order = None
        self.hash_to_uuid = None
        self.memory = None
        self.disk = None

        # We want to send zero to the maxsize of the Queue implementation
        # here because we can write an infinite number of items. But keep
        # in mind that we don't really use the queue storage in any way
        Queue.Queue.__init__(self, maxsize=0)

    def get_name(self):
        return self.name

    def get_processed_tasks(self):
        return self.processed_tasks

    def next_item_saved_to_memory(self):
        return len(self.memory) < self.max_in_memory

    def _init(self, maxsize):
        """
        Initialize the dicts and pointer
        :param maxsize: The max size for the queue
        """
        self.queue_order = list()
        self.hash_to_uuid = dict()
        self.memory = dict()
        self.disk = DiskDict(table_prefix='%sCachedQueue' % self.name)

    def _qsize(self, _len=len):
        return _len(self.memory) + _len(self.disk)

    def _get_class_name(self, obj):
        try:
            return obj.__class__.__name__
        except:
            return type(obj)

    def _get_hash(self, item):
        if item is None or item == POISON_PILL:
            # Return ffff...ffff which is the latest (in alphanumeric order)
            # hash that exists in MD5. This forces the None item to be placed
            # at the end of the queue.
            #
            # Warning! If FuzzableRequest.get_hash() ever changes its
            # implementation this will stop working as expected!
            return self.LAST_MD5_HASH

        return item.get_hash()

    def _put(self, item):
        """
        Put a new item in the queue
        """
        #
        #   This is very useful information for finding bottlenecks in the
        #   framework / strategy
        #
        if len(self.memory) == self.max_in_memory:
            #
            #   If you see many messages like this in the scan log, then you
            #   might want to experiment with a larger maxsize for this queue
            #
            msg = ('OrderedCachedQueue.put() will write a %r item to the %s'
                   ' DiskDict. This uses more CPU and disk IO than storing'
                   ' in memory but will avoid high memory usage issues. The'
                   ' current %s DiskDict size is %s.')
            args = (self._get_class_name(item),
                    self.get_name(),
                    self.get_name(),
                    len(self.disk))
            om.out.debug(msg % args)

        #
        #   Get the item hash to store it in the queue order list, and insert
        #   it using bisect.insort() that will keep the order at a low cost
        #
        item_hash = self._get_hash(item)
        bisect.insort(self.queue_order, item_hash)

        #
        #   Keep an in-memory dict that allows us to find the fuzzable requests
        #   in the other dictionaries
        #
        unique_id = str(uuid.uuid4())

        unique_id_list = self.hash_to_uuid.setdefault(item_hash, [])
        bisect.insort(unique_id_list, unique_id)

        #
        #   And now we just save the item to memory (if there is space) or
        #   disk (if it doesn't fit on memory)
        #
        if len(self.memory) < self.max_in_memory:
            self.memory[unique_id] = item
        else:
            self.disk[unique_id] = item

        self._item_added_to_queue()

    def _get(self):
        """
        Get an item from the queue
        """
        item_hash = self.queue_order.pop(0)
        unique_id_list = self.hash_to_uuid.pop(item_hash)
        unique_id = unique_id_list.pop(0)

        if unique_id_list:
            #
            # There are still items in this unique_id_list, this is most likely
            # because two items with the same hash were added to the queue, and
            # only one of those has been read.
            #
            # Need to add the other item(s) to the list again
            #
            bisect.insort(self.queue_order, item_hash)
            self.hash_to_uuid[item_hash] = unique_id_list

        try:
            item = self.memory.pop(unique_id)
        except KeyError:
            item = self.disk.pop(unique_id)

            if len(self.disk):
                #
                #   If you see many messages like this in the scan log, then you
                #   might want to experiment with a larger maxsize for this queue
                #
                msg = ('OrderedCachedQueue.get() from %s DiskDict was used to'
                       ' read an item from disk. The current %s DiskDict'
                       ' size is %s.')
                args = (self.get_name(), self.get_name(), len(self.disk))
                om.out.debug(msg % args)

        self._item_left_queue()
        self.processed_tasks += 1
        return item
Example #6
0
class CachedDiskDict(object):
    """
    This data structure keeps the `max_in_memory` most frequently accessed
    keys in memory and stores the rest on disk.

    It is ideal for situations where a DiskDict is frequently accessed,
    fast read / writes are required, and items can take considerable amounts
    of memory.
    """
    def __init__(self, max_in_memory=50, table_prefix=None):
        """
        :param max_in_memory: The max number of items to keep in memory
        """
        assert max_in_memory > 0, 'In-memory items must be > 0'

        table_prefix = self._get_table_prefix(table_prefix)

        self._max_in_memory = max_in_memory
        self._disk_dict = DiskDict(table_prefix=table_prefix)
        self._in_memory = dict()
        self._access_count = dict()

    def cleanup(self):
        self._disk_dict.cleanup()

    def _get_table_prefix(self, table_prefix):
        if table_prefix is None:
            table_prefix = 'cached_disk_dict_%s' % rand_alpha(16)
        else:
            args = (table_prefix, rand_alpha(16))
            table_prefix = 'cached_disk_dict_%s_%s' % args

        return table_prefix

    def get(self, key, default=-456):
        try:
            return self[key]
        except KeyError:
            if default is not -456:
                return default

        raise KeyError()

    def __getitem__(self, key):
        try:
            value = self._in_memory[key]
        except KeyError:
            # This will raise KeyError if k is not found, and that is OK
            # because we don't need to increase the access count when the
            # key doesn't exist
            value = self._disk_dict[key]

        self._increase_access_count(key)
        return value

    def _get_keys_for_memory(self):
        """
        :return: Generate the names of the keys that should be kept in memory.
                 For example, if `max_in_memory` is set to 2 and:

                    _in_memory: {1: None, 2: None}
                    _access_count: {1: 10, 2: 20, 3: 5}
                    _disk_dict: {3: None}

                Then the method will generate [1, 2].
        """
        items = self._access_count.items()
        items.sort(sort_by_value)

        iterator = min(self._max_in_memory, len(items))

        for i in xrange(iterator):
            yield items[i][0]

    def _belongs_in_memory(self, key):
        """
        :param key: A key
        :return: True if the key should be stored in memory
        """
        if key in self._get_keys_for_memory():
            return True

        return False

    def _increase_access_count(self, key):
        access_count = self._access_count.get(key, 0)
        access_count += 1
        self._access_count[key] = access_count

        self._move_key_to_disk_if_needed(key)
        self._move_key_to_memory_if_needed(key)

    def _move_key_to_disk_if_needed(self, key):
        """
        Analyzes the current access count for the last accessed key and
        checks if any if the keys in memory should be moved to disk.

        :param key: The key that was last accessed
        :return: The name of the key that was moved to disk, or None if
                 all the keys are still in memory.
        """
        for key in self._in_memory.keys():
            if not self._belongs_in_memory(key):
                try:
                    value = self._in_memory[key]
                except KeyError:
                    return None
                else:
                    self._disk_dict[key] = value
                    self._in_memory.pop(key, None)
                    return key

    def _move_key_to_memory_if_needed(self, key):
        """
        Analyzes the current access count for the last accessed key and
        checks if any if the keys in disk should be moved to memory.

        :param key: The key that was last accessed
        :return: The name of the key that was moved to memory, or None if
                 all the keys are still on disk.
        """
        key_belongs_in_memory = self._belongs_in_memory(key)

        if not key_belongs_in_memory:
            return None

        try:
            value = self._disk_dict[key]
        except KeyError:
            return None
        else:
            self._in_memory[key] = value
            self._disk_dict.pop(key, None)
            return key

    def __setitem__(self, key, value):
        if len(self._in_memory) < self._max_in_memory:
            self._in_memory[key] = value
        else:
            self._disk_dict[key] = value

        self._increase_access_count(key)

    def __delitem__(self, key):
        try:
            del self._in_memory[key]
        except KeyError:
            # This will raise KeyError if k is not found, and that is OK
            # because we don't need to increase the access count when the
            # key doesn't exist
            del self._disk_dict[key]

        try:
            del self._access_count[key]
        except KeyError:
            # Another thread removed this key
            pass

    def __contains__(self, key):
        if key in self._in_memory:
            self._increase_access_count(key)
            return True

        if key in self._disk_dict:
            self._increase_access_count(key)
            return True

        return False

    def __iter__(self):
        """
        Decided not to increase the access count when iterating through the
        items. In most cases the iteration will be performed on all items,
        thus increasing the access count +1 for each, which will leave all
        access counts +1, forcing no movements between memory and disk.
        """
        for key in self._in_memory:
            yield key

        for key in self._disk_dict:
            yield key