def get_items_with_pool(source_key: str, count: int, start_index: int = 0, workers: int = 4) -> Items: """Concurrently reads items from API using Pool Args: source_key: a job or collection key, e.g. '112358/13/21' count: a number of items to retrieve start_index: an index to read from workers: the number of separate processors to get data in Returns: A list of items """ active_connections_limit = 10 processes_count = min(max(helpers.cpus_count(), workers), active_connections_limit) batch_size = math.ceil(count / processes_count) items = [] with Pool(processes_count) as p: results = p.starmap( partial(get_items, source_key, batch_size, p_bar=tqdm), zip([ i for i in range(start_index, start_index + count, batch_size) ]), ) for items_batch in results: items.extend(items_batch) return items
def test_cpus_count_no_affinity(mocker): mocker.patch.object(h.os, "sched_getaffinity", create=True, side_effect=AttributeError()) mocker.patch.object(h.os, "cpu_count", create=True, return_value=2) assert h.cpus_count() == 2
def get_items_with_pool(source_key: str, count: int, start_index: int, workers: int = 4) -> np.ndarray: """Concurrently reads items from API using Pool Args: source_key: a job or collection key, e.g. '112358/13/21' count: a number of items to retrieve start_index: an index to read from workers: the number of separate processors to get data in Returns: A numpy array of items """ active_connections_limit = 10 processes_count = min(max(helpers.cpus_count(), workers), active_connections_limit) batch_size = math.ceil(count / processes_count) start_idxs = range(start_index, start_index + count, batch_size) start = [f"{source_key}/{i}" for i in start_idxs] with Pool(processes_count) as p: results = p.starmap( partial(get_items, source_key, batch_size, p_bar=tqdm), zip(start_idxs, start), ) return np.concatenate(results)
def test_cpus_count(mocker): mocker.patch.object(h.os, "sched_getaffinity", create=True, return_value={0, 1, 2, 3}) assert h.cpus_count() == 4