def populate_cubes(queue_arn, start, stop, step): """Populate the given SQS queue with the target cubes to downsample Args: queue_arn (str): The target SQS queue URL start (XYZ): Starting corner of the coordinate frame stop (XYZ): Far corner of the coordinate frame step (XYZ): The size of each volume to downsample Return: int: The number of cubes enqueued """ # evenly chunk cubes into POOL_SIZE lists count = num_cubes(start, stop, step) enqueue_size = ceildiv(count, POOL_SIZE) args = ((queue_arn, cubes) for cubes in chunk(make_cubes(start, stop, step), enqueue_size)) log.debug("Enqueueing {} cubes in chunks of {} using {} processes".format(count, enqueue_size, POOL_SIZE)) start = datetime.now() with Pool(POOL_SIZE) as pool: pool.starmap(enqueue_cubes, args) stop = datetime.now() log.info("Enqueued {} cubes in {}".format(count, stop - start)) return count
def populate_cubes(queue_arn, start, stop, step): """Populate the given SQS queue with the target cubes to downsample Args: queue_arn (str): The target SQS queue URL start (XYZ): Starting corner of the coordinate frame stop (XYZ): Far corner of the coordinate frame step (XYZ): The size of each volume to downsample Return: int: The number of cubes enqueued """ # evenly chunk cubes into POOL_SIZE lists count = num_cubes(start, stop, step) enqueue_size = ceildiv(count, POOL_SIZE) args = ((queue_arn, cubes) for cubes in chunk(make_cubes(start, stop, step), enqueue_size)) log.debug("Enqueueing {} cubes in chunks of {} using {} processes".format( count, enqueue_size, POOL_SIZE)) start = datetime.now() with Pool(POOL_SIZE) as pool: pool.starmap(enqueue_cubes, args) stop = datetime.now() log.info("Enqueued {} cubes in {}".format(count, stop - start)) return count
def num_cubes(start, stop, step): """Calculate the number of volumes to be downsamples Used so all of the results from make_cubes() doesn't have to be pulled into memory. Args: start (XYZ): Starting corner of the coordinate frame stop (XYZ): Far corner of the coordinate frame step (XYZ): The size of each volume to downsample Return: int: The number of volumes in the frame """ extents = ceildiv(stop - start, step) return int(extents.x * extents.y * extents.z)
def test_downsample_channel_aniso(self): args1 = self.get_args(type='anisotropic') args2 = self.rh.downsample_channel( args1) # warning, will mutate args1 === args2 expected = [call('downsample-dlq-1234'), call('downsample-cubes-1234')] self.assertEqual(self.mock_create_queue.mock_calls, expected) expected = call(SQS_URL + 'downsample-cubes-1234', md.XYZ(0, 0, 0), md.XYZ(2, 2, 2), md.XYZ(2, 2, 1)) self.assertEqual(self.mock_populate_cubes.mock_calls, [expected]) args = { 'bucket_size': self.rh.BUCKET_SIZE, 'args': self.get_args( type='anisotropic')['msg'], # Need the original arguments 'step': md.XYZ(2, 2, 1), 'dim': md.XYZ(512, 512, 16), 'use_iso_flag': False, 'dlq_arn': SQS_URL + 'downsample-dlq-1234', 'cubes_arn': SQS_URL + 'downsample-cubes-1234' } expected = call( ceildiv(self.mock_populate_cubes.return_value, self.rh.BUCKET_SIZE) + self.rh.EXTRA_LAMBDAS, args1['msg']['downsample_volume_lambda'], json.dumps(args).encode('UTF8'), SQS_URL + 'downsample-dlq-1234', SQS_URL + 'downsample-cubes-1234', DOWNSAMPLE_QUEUE_URL, RECEIPT_HANDLE) self.assertEqual(self.mock_launch_lambdas.mock_calls, [expected]) self.assertEqual(args2['msg']['x_stop'], 512) self.assertEqual(args2['msg']['y_stop'], 512) self.assertEqual(args2['msg']['z_stop'], 32) self.assertNotIn('iso_x_start', args2['msg']) self.assertEqual(args2['msg']['resolution'], 1) self.assertTrue(args2['msg']['res_lt_max']) expected = [ call(SQS_URL + 'downsample-dlq-1234'), call(SQS_URL + 'downsample-cubes-1234') ] self.assertEqual(self.mock_del_queue.mock_calls, expected)
def downsample_channel(args): """ Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at resolution + 1. Makes use of the bossutils.multidimensional library for simplified vector math. Args: args { downsample_volume_sfn (ARN) collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) id_index (URL) x_start (int) y_start (int) z_start (int) x_stop (int) y_stop (int) z_stop (int) resolution (int) The resolution to downsample. Creates resolution + 1 resolution_max (int) The maximum resolution to generate res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1) annotation_index_max (int) The maximum resolution to index annotation channel cubes at When annotation_index_max = N, indices will exist for res 0 - (N - 1) type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both } """ #log.debug("Downsampling resolution " + str(args['resolution'])) resolution = args['resolution'] dim = XYZ(*CUBOIDSIZE[resolution]) #log.debug("Cube dimensions: {}".format(dim)) def frame(key): return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')]) # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic # downsampling. If both are happening, fanout one and then the other in series. configs = [] if args['type'] == 'isotropic': configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) else: configs.append({ 'name': 'anisotropic', 'step': XYZ(2,2,1), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': True, 'frame_start_key': 'iso_{}_start', 'frame_stop_key': 'iso_{}_stop', }) for config in configs: frame_start = frame(config['frame_start_key']) frame_stop = frame(config['frame_stop_key']) step = config['step'] use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag index_annotations = args['resolution'] < (args['annotation_index_max'] - 1) # Round to the furthest full cube from the center of the data cubes_start = frame_start // dim cubes_stop = ceildiv(frame_stop, dim) log.debug('Downsampling {} resolution {}'.format(config['name'], resolution)) log.debug("Frame corner: {}".format(frame_start)) log.debug("Frame extent: {}".format(frame_stop)) log.debug("Cubes corner: {}".format(cubes_start)) log.debug("Cubes extent: {}".format(cubes_stop)) log.debug("Downsample step: {}".format(step)) log.debug("Indexing Annotations: {}".format(index_annotations)) # Call the downsample_volume lambda to process the data fanout(aws.get_session(), args['downsample_volume_sfn'], make_args(args, cubes_start, cubes_stop, step, dim, use_iso_flag, index_annotations), max_concurrent = MAX_NUM_PROCESSES, rampup_delay = RAMPUP_DELAY, rampup_backoff = RAMPUP_BACKOFF, poll_delay = POLL_DELAY, status_delay = STATUS_DELAY) # Resize the coordinate frame extents as the data shrinks # DP NOTE: doesn't currently work correctly with non-zero frame starts def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args[start] //= size args[stop] = ceildiv(args[stop], size) resize('x', step.x) resize('y', step.y) resize('z', step.z) # if next iteration will split into aniso and iso downsampling, copy the coordinate frame if args['type'] != 'isotropic' and (resolution + 1) == args['iso_resolution']: def copy(var): args['iso_{}_start'.format(var)] = args['{}_start'.format(var)] args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)] copy('x') copy('y') copy('z') # Advance the loop and recalculate the conditional # Using max - 1 because resolution_max should not be a valid resolution # and res < res_max will end with res = res_max - 1, which generates res_max resolution args['resolution'] = resolution + 1 args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1) return args
def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args[start] //= size args[stop] = ceildiv(args[stop], size)
def launch_lambdas(total_count, lambda_arn, lambda_args, dlq_arn, cubes_arn, downsample_queue_url, receipt_handle): """Launch lambdas to process all of the target cubes to downsample Launches an initial set of lambdas and monitors the cubes SQS queue to understand the current status. If the count in the queue doesn't change for UNCHANGING_LAUNCH cycles then it will calculate how many more lambdas to launch and launch them. If the queue count doesn't change after launching more lambdas an exception will eventually be raised so the activity is not hanging forever. Args: total_count (int): The initial number of lambdas to launch lambda_arn (str): Name or ARN of the lambda function to invoke lambda_args (str): The lambda payload to pass when invoking dlq_arn (str): ARN of the SQS DLQ to monitor for error messages cubes_arn (str): ARN of the input cubes SQS queue to monitor for completion of the downsample downsample_queue_url (str): URL of downsample job queue receipt_handle (str): Handle of message from downsample queue """ per_lambda = ceildiv(total_count, POOL_SIZE) d, m = divmod(total_count, per_lambda) counts = [per_lambda] * d if m > 0: counts += [m] assert sum( counts) == total_count, "Didn't calculate counts per lambda correctly" log.debug("Launching {} lambdas in chunks of {} using {} processes".format( total_count, per_lambda, POOL_SIZE)) args = ((count, lambda_arn, lambda_args, dlq_arn) for count in counts) start = datetime.now() with Pool(POOL_SIZE) as pool: pool.starmap(invoke_lambdas, args) stop = datetime.now() log.info("Launched {} lambdas in {}".format(total_count, stop - start)) # Finished launching lambdas, need to wait for all to finish log.info("Finished launching lambdas") polling_start = datetime.now() previous_count = 0 count_count = 1 zero_count = 0 while True: if check_queue(dlq_arn) > 0: raise FailedLambdaError() count = check_queue(cubes_arn) log.debug("Status polling - count {}".format(count)) log.debug("Throttling count {}".format( lambda_throttle_count(lambda_arn))) if count == previous_count: count_count += 1 if count_count == UNCHANGING_MAX: raise ResolutionHierarchyError( "Status polling stuck at {} items for {}".format( count, polling_start - datetime.now())) if count_count == UNCHANGING_THROTTLE: # If the throttle count is increasing -> Sleep # If the throttle count is decreasing # If the cubes queue count has changed -> Continue regular polling # If the cubes queue count has not changed -> Sleep # If the throttle count is zero -> Continue regular polling # # This means that this loop will block until throttle has stopped / cubes # in the queue have been processed. # # If throttling stops and no cubes have been processed the UNCHANGING_MAX # threashold is the last guard so the activity doesn't hang prev_throttle = 0 while True: throttle = lambda_throttle_count(lambda_arn) if throttle < prev_throttle and check_queue( cubes_arn) != count: # If the throttle count is decreasing and the queue count has # changed continue the regular polling cycle break if throttle == 0: # No throttling happening break if throttle > 0: # Don't update count is there was an error getting the current count prev_throttle = throttle # Tell SQS we're still alive update_visibility_timeout(downsample_queue_url, receipt_handle) time.sleep(MAX_LAMBDA_TIME.seconds) if check_queue(dlq_arn) > 0: raise FailedLambdaError() if count_count == UNCHANGING_LAUNCH: # We have noticed that the last few messages are spread across multiple AWS queue servers and # A single lambda requesting 10 messages will only get messages from a single queue server. So we # pad the number of lambdas by EXTRAS_LAMBDAS to avoid extra looping cycles. needed = ceildiv(count, BUCKET_SIZE) if needed > 0: log.debug("Launching {} more lambdas".format(needed)) start = datetime.now() invoke_lambdas(needed + EXTRA_LAMBDAS, lambda_arn, lambda_args, dlq_arn) stop = datetime.now() log.debug("Launched {} lambdas with {} extra in {}".format( needed, EXTRA_LAMBDAS, stop - start)) else: previous_count = count count_count = 1 if count == 0: zero_count += 1 if zero_count == ZERO_COUNT: log.info("Finished polling for lambda completion") break else: log.info( "Zero cubes left, waiting to make sure lambda finishes") else: zero_count = 0 # Tell SQS we're still alive update_visibility_timeout(downsample_queue_url, receipt_handle) time.sleep(MAX_LAMBDA_TIME.seconds)
def downsample_channel(args): """ Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at resolution + 1. Makes use of the bossutils.multidimensional library for simplified vector math. Generators are used as much as possible (instead of lists) so that large lists of data are not actualized and kept in memory. Args: args { msg { (this holds the contents of the msg from the downsample queue) downsample_volume_lambda (ARN | lambda name) collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) x_start (int) y_start (int) z_start (int) x_stop (int) y_stop (int) z_stop (int) resolution (int) The resolution to downsample. Creates resolution + 1 resolution_max (int) The maximum resolution to generate res_lt_max (bool) = args['msg']['resolution'] < (args['msg']['resolution_max'] - 1) type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both aws_region (str) AWS region to run in such as us-east-1 } job_receipt_handle (str) Used by downstream state to delete the downsample job from queue queue_url (str) URL of downsample queue; downstream state deletes from this queue sfn_arn (str) <arn of the downsample step fcn> db_host (str) Host of MySQL database. } Returns: (dict): An updated argument dictionary containing the shrunk frame, resolution, res_lt_max values, and lookup_key """ # TODO: load downsample_volume_lambda from boss config #log.debug("Downsampling resolution " + str(args['msg']['resolution'])) resolution = args['msg']['resolution'] dim = XYZ(*CUBOIDSIZE[resolution]) #log.debug("Cube dimensions: {}".format(dim)) def frame(key): return XYZ(args['msg'][key.format('x')], args['msg'][key.format('y')], args['msg'][key.format('z')]) # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic # downsampling. If both are happening, fanout one and then the other in series. configs = [] if args['msg']['type'] == 'isotropic': configs.append({ 'name': 'isotropic', 'step': XYZ(2, 2, 2), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) else: configs.append({ 'name': 'anisotropic', 'step': XYZ(2, 2, 1), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) # if this iteration will split into aniso and iso downsampling, copy the coordinate frame if resolution == args['msg']['iso_resolution']: def copy(var): args['msg']['iso_{}_start'.format(var)] = args['msg'][ '{}_start'.format(var)] args['msg']['iso_{}_stop'.format(var)] = args['msg'][ '{}_stop'.format(var)] copy('x') copy('y') copy('z') if resolution >= args['msg'][ 'iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments configs.append({ 'name': 'isotropic', 'step': XYZ(2, 2, 2), 'iso_flag': True, 'frame_start_key': 'iso_{}_start', 'frame_stop_key': 'iso_{}_stop', }) for config in configs: # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received # for the previous config downsample_id = str( random.random())[2:] # remove the '0.' part of the number dlq_arn = create_queue('downsample-dlq-' + downsample_id) cubes_arn = create_queue('downsample-cubes-' + downsample_id) try: frame_start = frame(config['frame_start_key']) frame_stop = frame(config['frame_stop_key']) step = config['step'] use_iso_flag = config[ 'iso_flag'] # If the resulting cube should be marked with the ISO flag # Round to the furthest full cube from the center of the data cubes_start = frame_start // dim cubes_stop = ceildiv(frame_stop, dim) # For a non-zero start, make sure start cube aligns with a zero start downsample # so that the data aligns and there are no shifts with the new downsampled data mod = cubes_start % step if mod.x != 0: cubes_start = XYZ(cubes_start.x - 1, cubes_start.y, cubes_start.z) if mod.y != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y - 1, cubes_start.z) if mod.z != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y, cubes_start.z - 1) log.debug('Downsampling {} resolution {}'.format( config['name'], resolution)) log.debug("Frame corner: {}".format(frame_start)) log.debug("Frame extent: {}".format(frame_stop)) log.debug("Cubes corner: {}".format(cubes_start)) log.debug("Cubes extent: {}".format(cubes_stop)) log.debug("Downsample step: {}".format(step)) log.debug("Populating input cube") cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop, step) log.debug("Invoking downsample lambdas") lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS lambda_args = { 'bucket_size': BUCKET_SIZE, 'args': args['msg'], 'step': step, 'dim': dim, 'use_iso_flag': use_iso_flag, 'dlq_arn': dlq_arn, 'cubes_arn': cubes_arn, } launch_lambdas(lambda_count, args['msg']['downsample_volume_lambda'], json.dumps(lambda_args).encode('UTF8'), dlq_arn, cubes_arn, args['queue_url'], args['job_receipt_handle']) # Resize the coordinate frame extents as the data shrinks # DP NOTE: doesn't currently work correctly with non-zero frame starts def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args['msg'][start] //= size args['msg'][stop] = ceildiv(args['msg'][stop], size) resize('x', step.x) resize('y', step.y) resize('z', step.z) finally: delete_queue(dlq_arn) delete_queue(cubes_arn) # Advance the loop and recalculate the conditional # Using max - 1 because resolution_max should not be a valid resolution # and res < res_max will end with res = res_max - 1, which generates res_max resolution args['msg']['resolution'] = resolution + 1 args['msg']['res_lt_max'] = args['msg']['resolution'] < ( args['msg']['resolution_max'] - 1) # Move this up one level for use by states that follow. args['lookup_key'] = args['msg']['lookup_key'] return args
def downsample_channel(args): """ Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at resolution + 1. Makes use of the bossutils.multidimensional library for simplified vector math. Generators are used as much as possible (instead of lists) so that large lists of data are not actualized and kept in memory. Args: args { downsample_volume_lambda (ARN | lambda name) collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) x_start (int) y_start (int) z_start (int) x_stop (int) y_stop (int) z_stop (int) resolution (int) The resolution to downsample. Creates resolution + 1 resolution_max (int) The maximum resolution to generate res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1) type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both aws_region (str) AWS region to run in such as us-east-1 } Return: dict: An updated argument dictionary containing the shrunk frame, resolution, and res_lt_max values """ # TODO: load downsample_volume_lambda from boss config #log.debug("Downsampling resolution " + str(args['resolution'])) resolution = args['resolution'] dim = XYZ(*CUBOIDSIZE[resolution]) #log.debug("Cube dimensions: {}".format(dim)) def frame(key): return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')]) # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic # downsampling. If both are happening, fanout one and then the other in series. configs = [] if args['type'] == 'isotropic': configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) else: configs.append({ 'name': 'anisotropic', 'step': XYZ(2,2,1), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) # if this iteration will split into aniso and iso downsampling, copy the coordinate frame if resolution == args['iso_resolution']: def copy(var): args['iso_{}_start'.format(var)] = args['{}_start'.format(var)] args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)] copy('x') copy('y') copy('z') if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': True, 'frame_start_key': 'iso_{}_start', 'frame_stop_key': 'iso_{}_stop', }) for config in configs: # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received # for the previous config downsample_id = str(random.random())[2:] # remove the '0.' part of the number dlq_arn = create_queue('downsample-dlq-' + downsample_id) cubes_arn = create_queue('downsample-cubes-' + downsample_id) try: frame_start = frame(config['frame_start_key']) frame_stop = frame(config['frame_stop_key']) step = config['step'] use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag # Round to the furthest full cube from the center of the data cubes_start = frame_start // dim cubes_stop = ceildiv(frame_stop, dim) # For a non-zero start, make sure start cube aligns with a zero start downsample # so that the data aligns and there are no shifts with the new downsampled data mod = cubes_start % step if mod.x != 0: cubes_start = XYZ(cubes_start.x - 1, cubes_start.y, cubes_start.z) if mod.y != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y - 1, cubes_start.z) if mod.z != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y, cubes_start.z - 1) log.debug('Downsampling {} resolution {}'.format(config['name'], resolution)) log.debug("Frame corner: {}".format(frame_start)) log.debug("Frame extent: {}".format(frame_stop)) log.debug("Cubes corner: {}".format(cubes_start)) log.debug("Cubes extent: {}".format(cubes_stop)) log.debug("Downsample step: {}".format(step)) log.debug("Populating input cube") cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop, step) log.debug("Invoking downsample lambdas") lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS lambda_args = { 'bucket_size': BUCKET_SIZE, 'args': args, 'step': step, 'dim': dim, 'use_iso_flag': use_iso_flag, 'dlq_arn': dlq_arn, 'cubes_arn': cubes_arn, } launch_lambdas(lambda_count, args['downsample_volume_lambda'], json.dumps(lambda_args).encode('UTF8'), dlq_arn, cubes_arn) # Resize the coordinate frame extents as the data shrinks # DP NOTE: doesn't currently work correctly with non-zero frame starts def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args[start] //= size args[stop] = ceildiv(args[stop], size) resize('x', step.x) resize('y', step.y) resize('z', step.z) finally: delete_queue(dlq_arn) delete_queue(cubes_arn) # Advance the loop and recalculate the conditional # Using max - 1 because resolution_max should not be a valid resolution # and res < res_max will end with res = res_max - 1, which generates res_max resolution args['resolution'] = resolution + 1 args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1) return args
def launch_lambdas(total_count, lambda_arn, lambda_args, dlq_arn, cubes_arn): """Launch lambdas to process all of the target cubes to downsample Launches an initial set of lambdas and monitors the cubes SQS queue to understand the current status. If the count in the queue doesn't change for UNCHANGING_LAUNCH cycles then it will calculate how many more lambdas to launch and launch them. If the queue count doesn't change after launching more lambdas an exception will eventually be raised so the activity is not hanging forever. Args: total_count (int): The initial number of lambdas to launch lambda_arn (str): Name or ARN of the lambda function to invoke lambda_args (str): The lambda payload to pass when invoking dlq_arn (str): ARN of the SQS DLQ to monitor for error messages cubes_arn (str): ARN of the input cubes SQS queue to monitor for completion of the downsample """ per_lambda = ceildiv(total_count, POOL_SIZE) d,m = divmod(total_count, per_lambda) counts = [per_lambda] * d if m > 0: counts += [m] assert sum(counts) == total_count, "Didn't calculate counts per lambda correctly" log.debug("Launching {} lambdas in chunks of {} using {} processes".format(total_count, per_lambda, POOL_SIZE)) args = ((count, lambda_arn, lambda_args, dlq_arn) for count in counts) start = datetime.now() with Pool(POOL_SIZE) as pool: pool.starmap(invoke_lambdas, args) stop = datetime.now() log.info("Launched {} lambdas in {}".format(total_count, stop - start)) # Finished launching lambdas, need to wait for all to finish log.info("Finished launching lambdas") polling_start = datetime.now() previous_count = 0 count_count = 1 zero_count = 0 while True: if check_queue(dlq_arn) > 0: raise FailedLambdaError() count = check_queue(cubes_arn) log.debug("Status polling - count {}".format(count)) log.debug("Throttling count {}".format(lambda_throttle_count(lambda_arn))) if count == previous_count: count_count += 1 if count_count == UNCHANGING_MAX: raise ResolutionHierarchyError("Status polling stuck at {} items for {}".format(count, polling_start - datetime.now())) if count_count == UNCHANGING_THROTTLE: # If the throttle count is increasing -> Sleep # If the throttle count is decreasing # If the cubes queue count has changed -> Continue regular polling # If the cubes queue count has not changed -> Sleep # If the throttle count is zero -> Continue regular polling # # This means that this loop will block until throttle has stopped / cubes # in the queue have been processed. # # If throttling stops and no cubes have been processed the UNCHANGING_MAX # threashold is the last guard so the activity doesn't hang prev_throttle = 0 while True: throttle = lambda_throttle_count(lambda_arn) if throttle < prev_throttle and check_queue(cubes_arn) != count: # If the throttle count is decreasing and the queue count has # changed continue the regular polling cycle break if throttle == 0: # No throttling happening break if throttle > 0: # Don't update count is there was an error getting the current count prev_throttle = throttle time.sleep(MAX_LAMBDA_TIME.seconds) if check_queue(dlq_arn) > 0: raise FailedLambdaError() if count_count == UNCHANGING_LAUNCH: # We have noticed that the last few messages are spread across multiple AWS queue servers and # A single lambda requesting 10 messages will only get messages from a single queue server. So we # pad the number of lambdas by EXTRAS_LAMBDAS to avoid extra looping cycles. needed = ceildiv(count, BUCKET_SIZE) if needed > 0: log.debug("Launching {} more lambdas".format(needed)) start = datetime.now() invoke_lambdas(needed + EXTRA_LAMBDAS, lambda_arn, lambda_args, dlq_arn) stop = datetime.now() log.debug("Launched {} lambdas with {} extra in {}".format(needed, EXTRA_LAMBDAS, stop - start)) else: previous_count = count count_count = 1 if count == 0: zero_count += 1 if zero_count == ZERO_COUNT: log.info("Finished polling for lambda completion") break else: log.info("Zero cubes left, waiting to make sure lambda finishes") else: zero_count = 0 time.sleep(MAX_LAMBDA_TIME.seconds)