def test_ventilator_stop(self): size = 100 max_ventilation_queue_size = 10 pool = DummyPool() ventilator = ConcurrentVentilator(ventilate_fn=pool.ventilate, items_to_ventilate=[{'item': i} for i in range(size)], max_ventilation_queue_size=max_ventilation_queue_size) pool.start(IdentityWorker, ventilator=ventilator) [pool.get_results() for _ in range(max_ventilation_queue_size)] # Stop the ventilator queue after some time, so there should only be 10 items left on it while ventilator._ventilated_items_count - ventilator._processed_items_count < max_ventilation_queue_size: time.sleep(.1) ventilator.stop() [pool.get_results() for _ in range(max_ventilation_queue_size)] with self.assertRaises(EmptyResultError): pool.get_results() pool.stop() pool.join()
def test_reset_ventilator(self): """Resetting ventilator after all items were ventilated will make it re-ventilate the same items""" items_count = 100 for pool in [DummyPool(), ThreadPool(10)]: ventilator = ConcurrentVentilator(ventilate_fn=pool.ventilate, items_to_ventilate=[{ 'item': i } for i in range(items_count)], iterations=1) pool.start(IdentityWorker, ventilator=ventilator) # Readout all ventilated items for _ in range(items_count): pool.get_results() # Should fail reading the next, as all items were read by now with self.assertRaises(EmptyResultError): pool.get_results() # Resetting, hence will be read out the items all over again ventilator.reset() for _ in range(items_count): pool.get_results() with self.assertRaises(EmptyResultError): pool.get_results() pool.stop() pool.join()
def test_reset_in_the_middle_of_ventilation(self): """Can not reset ventilator in the middle of ventilation""" for pool in [DummyPool(), ThreadPool(10)]: ventilator = ConcurrentVentilator(ventilate_fn=pool.ventilate, items_to_ventilate=[{ 'item': i } for i in range(100)], iterations=None) pool.start(IdentityWorker, ventilator=ventilator) # Resetting is supported only when the ventilator has finished with self.assertRaises(NotImplementedError): ventilator.reset() pool.stop() pool.join()
def _create_ventilator(self, row_group_indexes, shuffle_options, num_epochs, worker_predicate): items_to_ventilate = [] for piece_index in row_group_indexes: for shuffle_row_drop_partition in range(shuffle_options.shuffle_row_drop_partitions): items_to_ventilate.append( {'piece_index': piece_index, 'worker_predicate': worker_predicate, 'shuffle_row_drop_partition': (shuffle_row_drop_partition, shuffle_options.shuffle_row_drop_partitions)}) return ConcurrentVentilator(self._workers_pool.ventilate, items_to_ventilate, iterations=num_epochs, randomize_item_order=shuffle_options.shuffle_row_groups)
def _create_ventilator(self, blocklet_indexes, shuffle_blocklets, shuffle_row_drop_partitions, num_epochs, worker_predicate, max_ventilation_queue_size): items_to_ventilate = [] for piece_index in blocklet_indexes: for shuffle_row_drop_partition in range(shuffle_row_drop_partitions): items_to_ventilate.append( {'piece_index': piece_index, 'worker_predicate': worker_predicate, 'shuffle_row_drop_partition': (shuffle_row_drop_partition, shuffle_row_drop_partitions)}) return ConcurrentVentilator(self._workers_pool.ventilate, items_to_ventilate, iterations=num_epochs, max_ventilation_queue_size=max_ventilation_queue_size, randomize_item_order=shuffle_blocklets)