def test_primed(): global is_primed is_primed = False def unprimed_iter(): global is_primed is_primed = True for i in range(10): yield i iterator = unprimed_iter() # iterator is still unprimed assert is_primed is False iterator = primed(iterator) assert is_primed is True assert list(iterator) == list(range(10)) # test stop itteration with pytest.raises(StopIteration): next(primed(iterator))
def setup_pbars(self, cursor): """ Sets up progress bars """ total = None if isinstance(cursor, types.GeneratorType): cursor = primed(cursor) if hasattr(self.builder, "total"): total = self.builder.total elif hasattr(cursor, "__len__"): total = len(cursor) elif hasattr(cursor, "count"): total = cursor.count() self.get_pbar = tqdm(cursor, desc="Get Items", total=total) self.process_pbar = tqdm(desc="Processing Item", total=total) self.update_pbar = tqdm(desc="Updating Targets", total=total)
async def multi(builder, num_workers): builder.connect() cursor = builder.get_items() executor = ProcessPoolExecutor(num_workers) # Gets the total number of items to process by priming # the cursor total = None if isinstance(cursor, GeneratorType): try: cursor = primed(cursor) if hasattr(builder, "total"): total = builder.total except StopIteration: pass elif hasattr(cursor, "__len__"): total = len(cursor) elif hasattr(cursor, "count"): total = cursor.count() logger.info( f"Starting multiprocessing: {builder.__class__.__name__}", extra={ "maggma": { "event": "BUILD_STARTED", "total": total, "builder": builder.__class__.__name__, "sources": [source.name for source in builder.sources], "targets": [target.name for target in builder.targets], } }, ) back_pressured_get = BackPressure(iterator=tqdm(cursor, desc="Get", total=total), n=builder.chunk_size) processed_items = atqdm( async_iterator=AsyncUnorderedMap( func=builder.process_item, async_iterator=back_pressured_get, executor=executor, ), total=total, desc="Process Items", ) back_pressure_relief = back_pressured_get.release(processed_items) update_items = tqdm(total=total, desc="Update Targets") async for chunk in grouper(back_pressure_relief, n=builder.chunk_size): logger.info( "Processed batch of {} items".format(builder.chunk_size), extra={ "maggma": { "event": "UPDATE", "items": len(chunk), "builder": builder.__class__.__name__, "sources": [source.name for source in builder.sources], "targets": [target.name for target in builder.targets], } }, ) processed_items = [item for item in chunk if item is not None] builder.update_targets(processed_items) update_items.update(len(processed_items)) logger.info( f"Ended multiprocessing: {builder.__class__.__name__}", extra={ "maggma": { "event": "BUILD_ENDED", "builder": builder.__class__.__name__, "sources": [source.name for source in builder.sources], "targets": [target.name for target in builder.targets], } }, ) update_items.close() builder.finalize()
def serial(builder: Builder): """ Runs the builders using a single process """ logger = logging.getLogger("SerialProcessor") builder.connect() cursor = builder.get_items() total = None if isinstance(cursor, GeneratorType): try: cursor = primed(cursor) if hasattr(builder, "total"): total = builder.total except StopIteration: pass elif hasattr(cursor, "__len__"): total = len(cursor) # type: ignore elif hasattr(cursor, "count"): total = cursor.count() # type: ignore logger.info( f"Starting serial processing: {builder.__class__.__name__}", extra={ "maggma": { "event": "BUILD_STARTED", "total": total, "builder": builder.__class__.__name__, "sources": [source.name for source in builder.sources], "targets": [target.name for target in builder.targets], } }, ) for chunk in grouper(tqdm(cursor, total=total), builder.chunk_size): logger.info( "Processing batch of {} items".format(builder.chunk_size), extra={ "maggma": { "event": "UPDATE", "items": len(chunk), "builder": builder.__class__.__name__, } }, ) processed_chunk = [builder.process_item(item) for item in chunk] processed_items = [ item for item in processed_chunk if item is not None ] builder.update_targets(processed_items) logger.info( f"Ended serial processing: {builder.__class__.__name__}", extra={ "maggma": { "event": "BUILD_ENDED", "builder": builder.__class__.__name__ } }, ) builder.finalize()