Python ceildivの例、bossutils.multidimensional.ceildiv Pythonの例

コード例 #1

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def populate_cubes(queue_arn, start, stop, step):
    """Populate the given SQS queue with the target cubes to downsample

    Args:
        queue_arn (str): The target SQS queue URL
        start (XYZ): Starting corner of the coordinate frame
        stop (XYZ): Far corner of the coordinate frame
        step (XYZ): The size of each volume to downsample

    Return:
        int: The number of cubes enqueued
    """
    # evenly chunk cubes into POOL_SIZE lists
    count = num_cubes(start, stop, step)
    enqueue_size = ceildiv(count, POOL_SIZE)

    args = ((queue_arn, cubes)
            for cubes in chunk(make_cubes(start, stop, step), enqueue_size))

    log.debug("Enqueueing {} cubes in chunks of {} using {} processes".format(count, enqueue_size, POOL_SIZE))

    start = datetime.now()
    with Pool(POOL_SIZE) as pool:
        pool.starmap(enqueue_cubes, args)
    stop = datetime.now()
    log.info("Enqueued {} cubes in {}".format(count, stop - start))

    return count

コード例 #2

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def populate_cubes(queue_arn, start, stop, step):
    """Populate the given SQS queue with the target cubes to downsample

    Args:
        queue_arn (str): The target SQS queue URL
        start (XYZ): Starting corner of the coordinate frame
        stop (XYZ): Far corner of the coordinate frame
        step (XYZ): The size of each volume to downsample

    Return:
        int: The number of cubes enqueued
    """
    # evenly chunk cubes into POOL_SIZE lists
    count = num_cubes(start, stop, step)
    enqueue_size = ceildiv(count, POOL_SIZE)

    args = ((queue_arn, cubes)
            for cubes in chunk(make_cubes(start, stop, step), enqueue_size))

    log.debug("Enqueueing {} cubes in chunks of {} using {} processes".format(
        count, enqueue_size, POOL_SIZE))

    start = datetime.now()
    with Pool(POOL_SIZE) as pool:
        pool.starmap(enqueue_cubes, args)
    stop = datetime.now()
    log.info("Enqueued {} cubes in {}".format(count, stop - start))

    return count

コード例 #3

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def num_cubes(start, stop, step):
    """Calculate the number of volumes to be downsamples

    Used so all of the results from make_cubes() doesn't have to be pulled into
    memory.

    Args:
        start (XYZ): Starting corner of the coordinate frame
        stop (XYZ): Far corner of the coordinate frame
        step (XYZ): The size of each volume to downsample

    Return:
        int: The number of volumes in the frame
    """
    extents = ceildiv(stop - start, step)
    return int(extents.x * extents.y * extents.z)

コード例 #4

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def num_cubes(start, stop, step):
    """Calculate the number of volumes to be downsamples

    Used so all of the results from make_cubes() doesn't have to be pulled into
    memory.

    Args:
        start (XYZ): Starting corner of the coordinate frame
        stop (XYZ): Far corner of the coordinate frame
        step (XYZ): The size of each volume to downsample

    Return:
        int: The number of volumes in the frame
    """
    extents = ceildiv(stop - start, step)
    return int(extents.x * extents.y * extents.z)

コード例 #5

0

ファイルを表示

    def test_downsample_channel_aniso(self):
        args1 = self.get_args(type='anisotropic')
        args2 = self.rh.downsample_channel(
            args1)  # warning, will mutate args1 === args2

        expected = [call('downsample-dlq-1234'), call('downsample-cubes-1234')]
        self.assertEqual(self.mock_create_queue.mock_calls, expected)

        expected = call(SQS_URL + 'downsample-cubes-1234', md.XYZ(0, 0, 0),
                        md.XYZ(2, 2, 2), md.XYZ(2, 2, 1))
        self.assertEqual(self.mock_populate_cubes.mock_calls, [expected])

        args = {
            'bucket_size': self.rh.BUCKET_SIZE,
            'args': self.get_args(
                type='anisotropic')['msg'],  # Need the original arguments
            'step': md.XYZ(2, 2, 1),
            'dim': md.XYZ(512, 512, 16),
            'use_iso_flag': False,
            'dlq_arn': SQS_URL + 'downsample-dlq-1234',
            'cubes_arn': SQS_URL + 'downsample-cubes-1234'
        }
        expected = call(
            ceildiv(self.mock_populate_cubes.return_value, self.rh.BUCKET_SIZE)
            + self.rh.EXTRA_LAMBDAS, args1['msg']['downsample_volume_lambda'],
            json.dumps(args).encode('UTF8'), SQS_URL + 'downsample-dlq-1234',
            SQS_URL + 'downsample-cubes-1234', DOWNSAMPLE_QUEUE_URL,
            RECEIPT_HANDLE)
        self.assertEqual(self.mock_launch_lambdas.mock_calls, [expected])

        self.assertEqual(args2['msg']['x_stop'], 512)
        self.assertEqual(args2['msg']['y_stop'], 512)
        self.assertEqual(args2['msg']['z_stop'], 32)

        self.assertNotIn('iso_x_start', args2['msg'])

        self.assertEqual(args2['msg']['resolution'], 1)
        self.assertTrue(args2['msg']['res_lt_max'])

        expected = [
            call(SQS_URL + 'downsample-dlq-1234'),
            call(SQS_URL + 'downsample-cubes-1234')
        ]
        self.assertEqual(self.mock_del_queue.mock_calls, expected)

コード例 #6

0

ファイルを表示

def downsample_channel(args):
    """
    Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then
    sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at
    resolution + 1.

    Makes use of the bossutils.multidimensional library for simplified vector
    math.

    Args:
        args {
            downsample_volume_sfn (ARN)

            collection_id (int)
            experiment_id (int)
            channel_id (int)
            annotation_channel (bool)
            data_type (str) 'uint8' | 'uint16' | 'uint64'

            s3_bucket (URL)
            s3_index (URL)
            id_index (URL)

            x_start (int)
            y_start (int)
            z_start (int)

            x_stop (int)
            y_stop (int)
            z_stop (int)

            resolution (int) The resolution to downsample. Creates resolution + 1
            resolution_max (int) The maximum resolution to generate
            res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1)

            annotation_index_max (int) The maximum resolution to index annotation channel cubes at
                                       When annotation_index_max = N, indices will exist for res 0 - (N - 1)

            type (str) 'isotropic' | 'anisotropic'
            iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both
        }
    """

    #log.debug("Downsampling resolution " + str(args['resolution']))

    resolution = args['resolution']

    dim = XYZ(*CUBOIDSIZE[resolution])
    #log.debug("Cube dimensions: {}".format(dim))

    def frame(key):
        return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')])

    # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic
    # downsampling. If both are happening, fanout one and then the other in series.
    configs = []
    if args['type'] == 'isotropic':
        configs.append({
            'name': 'isotropic',
            'step': XYZ(2,2,2),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })
    else:
        configs.append({
            'name': 'anisotropic',
            'step': XYZ(2,2,1),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })

        if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments
            configs.append({
                'name': 'isotropic',
                'step': XYZ(2,2,2),
                'iso_flag': True,
                'frame_start_key': 'iso_{}_start',
                'frame_stop_key': 'iso_{}_stop',
            })

    for config in configs:
        frame_start = frame(config['frame_start_key'])
        frame_stop = frame(config['frame_stop_key'])
        step = config['step']
        use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag
        index_annotations = args['resolution'] < (args['annotation_index_max'] - 1)

        # Round to the furthest full cube from the center of the data
        cubes_start = frame_start // dim
        cubes_stop = ceildiv(frame_stop, dim)

        log.debug('Downsampling {} resolution {}'.format(config['name'], resolution))
        log.debug("Frame corner: {}".format(frame_start))
        log.debug("Frame extent: {}".format(frame_stop))
        log.debug("Cubes corner: {}".format(cubes_start))
        log.debug("Cubes extent: {}".format(cubes_stop))
        log.debug("Downsample step: {}".format(step))
        log.debug("Indexing Annotations: {}".format(index_annotations))

        # Call the downsample_volume lambda to process the data
        fanout(aws.get_session(),
               args['downsample_volume_sfn'],
               make_args(args, cubes_start, cubes_stop, step, dim, use_iso_flag, index_annotations),
               max_concurrent = MAX_NUM_PROCESSES,
               rampup_delay = RAMPUP_DELAY,
               rampup_backoff = RAMPUP_BACKOFF,
               poll_delay = POLL_DELAY,
               status_delay = STATUS_DELAY)

        # Resize the coordinate frame extents as the data shrinks
        # DP NOTE: doesn't currently work correctly with non-zero frame starts
        def resize(var, size):
            start = config['frame_start_key'].format(var)
            stop = config['frame_stop_key'].format(var)
            args[start] //= size
            args[stop] = ceildiv(args[stop], size)
        resize('x', step.x)
        resize('y', step.y)
        resize('z', step.z)

    # if next iteration will split into aniso and iso downsampling, copy the coordinate frame
    if args['type'] != 'isotropic' and (resolution + 1) == args['iso_resolution']:
        def copy(var):
            args['iso_{}_start'.format(var)] = args['{}_start'.format(var)]
            args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)]
        copy('x')
        copy('y')
        copy('z')

    # Advance the loop and recalculate the conditional
    # Using max - 1 because resolution_max should not be a valid resolution
    # and res < res_max will end with res = res_max - 1, which generates res_max resolution
    args['resolution'] = resolution + 1
    args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1)
    return args

コード例 #7

0

ファイルを表示

 def resize(var, size):
     start = config['frame_start_key'].format(var)
     stop = config['frame_stop_key'].format(var)
     args[start] //= size
     args[stop] = ceildiv(args[stop], size)

コード例 #8

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def launch_lambdas(total_count, lambda_arn, lambda_args, dlq_arn, cubes_arn,
                   downsample_queue_url, receipt_handle):
    """Launch lambdas to process all of the target cubes to downsample

    Launches an initial set of lambdas and monitors the cubes SQS queue to
    understand the current status. If the count in the queue doesn't change
    for UNCHANGING_LAUNCH cycles then it will calculate how many more lambdas
    to launch and launch them.

    If the queue count doesn't change after launching more lambdas an exception
    will eventually be raised so the activity is not hanging forever.

    Args:
        total_count (int): The initial number of lambdas to launch
        lambda_arn (str): Name or ARN of the lambda function to invoke
        lambda_args (str): The lambda payload to pass when invoking
        dlq_arn (str): ARN of the SQS DLQ to monitor for error messages
        cubes_arn (str): ARN of the input cubes SQS queue to monitor for
                         completion of the downsample
        downsample_queue_url (str): URL of downsample job queue
        receipt_handle (str): Handle of message from downsample queue
    """
    per_lambda = ceildiv(total_count, POOL_SIZE)
    d, m = divmod(total_count, per_lambda)
    counts = [per_lambda] * d
    if m > 0:
        counts += [m]

    assert sum(
        counts) == total_count, "Didn't calculate counts per lambda correctly"

    log.debug("Launching {} lambdas in chunks of {} using {} processes".format(
        total_count, per_lambda, POOL_SIZE))

    args = ((count, lambda_arn, lambda_args, dlq_arn) for count in counts)

    start = datetime.now()
    with Pool(POOL_SIZE) as pool:
        pool.starmap(invoke_lambdas, args)
    stop = datetime.now()
    log.info("Launched {} lambdas in {}".format(total_count, stop - start))

    # Finished launching lambdas, need to wait for all to finish
    log.info("Finished launching lambdas")

    polling_start = datetime.now()
    previous_count = 0
    count_count = 1
    zero_count = 0
    while True:
        if check_queue(dlq_arn) > 0:
            raise FailedLambdaError()

        count = check_queue(cubes_arn)
        log.debug("Status polling - count {}".format(count))

        log.debug("Throttling count {}".format(
            lambda_throttle_count(lambda_arn)))

        if count == previous_count:
            count_count += 1
            if count_count == UNCHANGING_MAX:
                raise ResolutionHierarchyError(
                    "Status polling stuck at {} items for {}".format(
                        count, polling_start - datetime.now()))
            if count_count == UNCHANGING_THROTTLE:
                # If the throttle count is increasing -> Sleep
                # If the throttle count is decreasing
                #     If the cubes queue count has changed -> Continue regular polling
                #     If the cubes queue count has not changed -> Sleep
                # If the throttle count is zero -> Continue regular polling
                #
                # This means that this loop will block until throttle has stopped / cubes
                # in the queue have been processed.
                #
                # If throttling stops and no cubes have been processed the UNCHANGING_MAX
                # threashold is the last guard so the activity doesn't hang
                prev_throttle = 0
                while True:
                    throttle = lambda_throttle_count(lambda_arn)

                    if throttle < prev_throttle and check_queue(
                            cubes_arn) != count:
                        # If the throttle count is decreasing and the queue count has
                        # changed continue the regular polling cycle
                        break
                    if throttle == 0:
                        # No throttling happening
                        break

                    if throttle > 0:
                        # Don't update count is there was an error getting the current count
                        prev_throttle = throttle

                    # Tell SQS we're still alive
                    update_visibility_timeout(downsample_queue_url,
                                              receipt_handle)
                    time.sleep(MAX_LAMBDA_TIME.seconds)

                    if check_queue(dlq_arn) > 0:
                        raise FailedLambdaError()

            if count_count == UNCHANGING_LAUNCH:
                # We have noticed that the last few messages are spread across multiple AWS queue servers and
                # A single lambda requesting 10 messages will only get messages from a single queue server.  So we
                # pad the number of lambdas by EXTRAS_LAMBDAS to avoid extra looping cycles.
                needed = ceildiv(count, BUCKET_SIZE)
                if needed > 0:
                    log.debug("Launching {} more lambdas".format(needed))

                    start = datetime.now()
                    invoke_lambdas(needed + EXTRA_LAMBDAS, lambda_arn,
                                   lambda_args, dlq_arn)
                    stop = datetime.now()
                    log.debug("Launched {} lambdas with {} extra in {}".format(
                        needed, EXTRA_LAMBDAS, stop - start))
        else:
            previous_count = count
            count_count = 1

        if count == 0:
            zero_count += 1
            if zero_count == ZERO_COUNT:
                log.info("Finished polling for lambda completion")
                break
            else:
                log.info(
                    "Zero cubes left, waiting to make sure lambda finishes")
        else:
            zero_count = 0

        # Tell SQS we're still alive
        update_visibility_timeout(downsample_queue_url, receipt_handle)
        time.sleep(MAX_LAMBDA_TIME.seconds)

コード例 #9

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def downsample_channel(args):
    """
    Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then
    sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at
    resolution + 1.

    Makes use of the bossutils.multidimensional library for simplified vector
    math.

    Generators are used as much as possible (instead of lists) so that large
    lists of data are not actualized and kept in memory.

    Args:
        args {
            msg { (this holds the contents of the msg from the downsample queue)
                downsample_volume_lambda (ARN | lambda name)

                collection_id (int)
                experiment_id (int)
                channel_id (int)
                annotation_channel (bool)
                data_type (str) 'uint8' | 'uint16' | 'uint64'

                s3_bucket (URL)
                s3_index (URL)

                x_start (int)
                y_start (int)
                z_start (int)

                x_stop (int)
                y_stop (int)
                z_stop (int)

                resolution (int) The resolution to downsample. Creates resolution + 1
                resolution_max (int) The maximum resolution to generate
                res_lt_max (bool) = args['msg']['resolution'] < (args['msg']['resolution_max'] - 1)

                type (str) 'isotropic' | 'anisotropic'
                iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both

                aws_region (str) AWS region to run in such as us-east-1
            }
            job_receipt_handle (str) Used by downstream state to delete the downsample job from queue
            queue_url (str) URL of downsample queue; downstream state deletes from this queue
            sfn_arn (str) <arn of the downsample step fcn>
            db_host (str) Host of MySQL database.
        }

    Returns:
        (dict): An updated argument dictionary containing the shrunk frame,
                resolution, res_lt_max values, and lookup_key
    """

    # TODO: load downsample_volume_lambda from boss config

    #log.debug("Downsampling resolution " + str(args['msg']['resolution']))

    resolution = args['msg']['resolution']

    dim = XYZ(*CUBOIDSIZE[resolution])

    #log.debug("Cube dimensions: {}".format(dim))

    def frame(key):
        return XYZ(args['msg'][key.format('x')], args['msg'][key.format('y')],
                   args['msg'][key.format('z')])

    # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic
    # downsampling. If both are happening, fanout one and then the other in series.
    configs = []
    if args['msg']['type'] == 'isotropic':
        configs.append({
            'name': 'isotropic',
            'step': XYZ(2, 2, 2),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })
    else:
        configs.append({
            'name': 'anisotropic',
            'step': XYZ(2, 2, 1),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })

        # if this iteration will split into aniso and iso downsampling, copy the coordinate frame
        if resolution == args['msg']['iso_resolution']:

            def copy(var):
                args['msg']['iso_{}_start'.format(var)] = args['msg'][
                    '{}_start'.format(var)]
                args['msg']['iso_{}_stop'.format(var)] = args['msg'][
                    '{}_stop'.format(var)]

            copy('x')
            copy('y')
            copy('z')

        if resolution >= args['msg'][
                'iso_resolution']:  # DP TODO: Figure out how to launch aniso iso version with mutating arguments
            configs.append({
                'name': 'isotropic',
                'step': XYZ(2, 2, 2),
                'iso_flag': True,
                'frame_start_key': 'iso_{}_start',
                'frame_stop_key': 'iso_{}_stop',
            })

    for config in configs:
        # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue
        # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received
        #     for the previous config
        downsample_id = str(
            random.random())[2:]  # remove the '0.' part of the number
        dlq_arn = create_queue('downsample-dlq-' + downsample_id)
        cubes_arn = create_queue('downsample-cubes-' + downsample_id)

        try:
            frame_start = frame(config['frame_start_key'])
            frame_stop = frame(config['frame_stop_key'])
            step = config['step']
            use_iso_flag = config[
                'iso_flag']  # If the resulting cube should be marked with the ISO flag

            # Round to the furthest full cube from the center of the data
            cubes_start = frame_start // dim
            cubes_stop = ceildiv(frame_stop, dim)

            # For a non-zero start, make sure start cube aligns with a zero start downsample
            # so that the data aligns and there are no shifts with the new downsampled data
            mod = cubes_start % step
            if mod.x != 0:
                cubes_start = XYZ(cubes_start.x - 1, cubes_start.y,
                                  cubes_start.z)
            if mod.y != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y - 1,
                                  cubes_start.z)
            if mod.z != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y,
                                  cubes_start.z - 1)

            log.debug('Downsampling {} resolution {}'.format(
                config['name'], resolution))
            log.debug("Frame corner: {}".format(frame_start))
            log.debug("Frame extent: {}".format(frame_stop))
            log.debug("Cubes corner: {}".format(cubes_start))
            log.debug("Cubes extent: {}".format(cubes_stop))
            log.debug("Downsample step: {}".format(step))

            log.debug("Populating input cube")
            cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop,
                                        step)

            log.debug("Invoking downsample lambdas")
            lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS
            lambda_args = {
                'bucket_size': BUCKET_SIZE,
                'args': args['msg'],
                'step': step,
                'dim': dim,
                'use_iso_flag': use_iso_flag,
                'dlq_arn': dlq_arn,
                'cubes_arn': cubes_arn,
            }

            launch_lambdas(lambda_count,
                           args['msg']['downsample_volume_lambda'],
                           json.dumps(lambda_args).encode('UTF8'), dlq_arn,
                           cubes_arn, args['queue_url'],
                           args['job_receipt_handle'])

            # Resize the coordinate frame extents as the data shrinks
            # DP NOTE: doesn't currently work correctly with non-zero frame starts
            def resize(var, size):
                start = config['frame_start_key'].format(var)
                stop = config['frame_stop_key'].format(var)
                args['msg'][start] //= size
                args['msg'][stop] = ceildiv(args['msg'][stop], size)

            resize('x', step.x)
            resize('y', step.y)
            resize('z', step.z)
        finally:
            delete_queue(dlq_arn)
            delete_queue(cubes_arn)

    # Advance the loop and recalculate the conditional
    # Using max - 1 because resolution_max should not be a valid resolution
    # and res < res_max will end with res = res_max - 1, which generates res_max resolution
    args['msg']['resolution'] = resolution + 1
    args['msg']['res_lt_max'] = args['msg']['resolution'] < (
        args['msg']['resolution_max'] - 1)

    # Move this up one level for use by states that follow.
    args['lookup_key'] = args['msg']['lookup_key']
    return args

コード例 #10

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def downsample_channel(args):
    """
    Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then
    sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at
    resolution + 1.

    Makes use of the bossutils.multidimensional library for simplified vector
    math.

    Generators are used as much as possible (instead of lists) so that large
    lists of data are not actualized and kept in memory.

    Args:
        args {
            downsample_volume_lambda (ARN | lambda name)

            collection_id (int)
            experiment_id (int)
            channel_id (int)
            annotation_channel (bool)
            data_type (str) 'uint8' | 'uint16' | 'uint64'

            s3_bucket (URL)
            s3_index (URL)

            x_start (int)
            y_start (int)
            z_start (int)

            x_stop (int)
            y_stop (int)
            z_stop (int)

            resolution (int) The resolution to downsample. Creates resolution + 1
            resolution_max (int) The maximum resolution to generate
            res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1)

            type (str) 'isotropic' | 'anisotropic'
            iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both

            aws_region (str) AWS region to run in such as us-east-1
        }

    Return:
        dict: An updated argument dictionary containing the shrunk frame,
              resolution, and res_lt_max values
    """

    # TODO: load downsample_volume_lambda from boss config

    #log.debug("Downsampling resolution " + str(args['resolution']))

    resolution = args['resolution']

    dim = XYZ(*CUBOIDSIZE[resolution])
    #log.debug("Cube dimensions: {}".format(dim))

    def frame(key):
        return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')])

    # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic
    # downsampling. If both are happening, fanout one and then the other in series.
    configs = []
    if args['type'] == 'isotropic':
        configs.append({
            'name': 'isotropic',
            'step': XYZ(2,2,2),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })
    else:
        configs.append({
            'name': 'anisotropic',
            'step': XYZ(2,2,1),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })

        # if this iteration will split into aniso and iso downsampling, copy the coordinate frame
        if resolution == args['iso_resolution']:
            def copy(var):
                args['iso_{}_start'.format(var)] = args['{}_start'.format(var)]
                args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)]
            copy('x')
            copy('y')
            copy('z')

        if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments
            configs.append({
                'name': 'isotropic',
                'step': XYZ(2,2,2),
                'iso_flag': True,
                'frame_start_key': 'iso_{}_start',
                'frame_stop_key': 'iso_{}_stop',
            })

    for config in configs:
        # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue
        # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received
        #     for the previous config
        downsample_id = str(random.random())[2:] # remove the '0.' part of the number
        dlq_arn = create_queue('downsample-dlq-' + downsample_id)
        cubes_arn = create_queue('downsample-cubes-' + downsample_id)

        try:
            frame_start = frame(config['frame_start_key'])
            frame_stop = frame(config['frame_stop_key'])
            step = config['step']
            use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag

            # Round to the furthest full cube from the center of the data
            cubes_start = frame_start // dim
            cubes_stop = ceildiv(frame_stop, dim)

            # For a non-zero start, make sure start cube aligns with a zero start downsample
            # so that the data aligns and there are no shifts with the new downsampled data
            mod = cubes_start % step
            if mod.x != 0:
                cubes_start = XYZ(cubes_start.x - 1, cubes_start.y, cubes_start.z)
            if mod.y != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y - 1, cubes_start.z)
            if mod.z != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y, cubes_start.z - 1)

            log.debug('Downsampling {} resolution {}'.format(config['name'], resolution))
            log.debug("Frame corner: {}".format(frame_start))
            log.debug("Frame extent: {}".format(frame_stop))
            log.debug("Cubes corner: {}".format(cubes_start))
            log.debug("Cubes extent: {}".format(cubes_stop))
            log.debug("Downsample step: {}".format(step))

            log.debug("Populating input cube")
            cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop, step)

            log.debug("Invoking downsample lambdas")
            lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS
            lambda_args = {
                'bucket_size': BUCKET_SIZE,
                'args': args,
                'step': step,
                'dim': dim,
                'use_iso_flag': use_iso_flag,
                'dlq_arn': dlq_arn,
                'cubes_arn': cubes_arn,
            }

            launch_lambdas(lambda_count,
                           args['downsample_volume_lambda'],
                           json.dumps(lambda_args).encode('UTF8'),
                           dlq_arn,
                           cubes_arn)

            # Resize the coordinate frame extents as the data shrinks
            # DP NOTE: doesn't currently work correctly with non-zero frame starts
            def resize(var, size):
                start = config['frame_start_key'].format(var)
                stop = config['frame_stop_key'].format(var)
                args[start] //= size
                args[stop] = ceildiv(args[stop], size)
            resize('x', step.x)
            resize('y', step.y)
            resize('z', step.z)
        finally:
            delete_queue(dlq_arn)
            delete_queue(cubes_arn)

    # Advance the loop and recalculate the conditional
    # Using max - 1 because resolution_max should not be a valid resolution
    # and res < res_max will end with res = res_max - 1, which generates res_max resolution
    args['resolution'] = resolution + 1
    args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1)
    return args

コード例 #11

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

def launch_lambdas(total_count, lambda_arn, lambda_args, dlq_arn, cubes_arn):
    """Launch lambdas to process all of the target cubes to downsample

    Launches an initial set of lambdas and monitors the cubes SQS queue to
    understand the current status. If the count in the queue doesn't change
    for UNCHANGING_LAUNCH cycles then it will calculate how many more lambdas
    to launch and launch them.

    If the queue count doesn't change after launching more lambdas an exception
    will eventually be raised so the activity is not hanging forever.

    Args:
        total_count (int): The initial number of lambdas to launch
        lambda_arn (str): Name or ARN of the lambda function to invoke
        lambda_args (str): The lambda payload to pass when invoking
        dlq_arn (str): ARN of the SQS DLQ to monitor for error messages
        cubes_arn (str): ARN of the input cubes SQS queue to monitor for
                         completion of the downsample
    """
    per_lambda = ceildiv(total_count, POOL_SIZE)
    d,m = divmod(total_count, per_lambda)
    counts = [per_lambda] * d
    if m > 0:
        counts += [m]

    assert sum(counts) == total_count, "Didn't calculate counts per lambda correctly"

    log.debug("Launching {} lambdas in chunks of {} using {} processes".format(total_count, per_lambda, POOL_SIZE))

    args = ((count, lambda_arn, lambda_args, dlq_arn)
            for count in counts)

    start = datetime.now()
    with Pool(POOL_SIZE) as pool:
        pool.starmap(invoke_lambdas, args)
    stop = datetime.now()
    log.info("Launched {} lambdas in {}".format(total_count, stop - start))

    # Finished launching lambdas, need to wait for all to finish
    log.info("Finished launching lambdas")

    polling_start = datetime.now()
    previous_count = 0
    count_count = 1
    zero_count = 0
    while True:
        if check_queue(dlq_arn) > 0:
            raise FailedLambdaError()

        count = check_queue(cubes_arn)
        log.debug("Status polling - count {}".format(count))

        log.debug("Throttling count {}".format(lambda_throttle_count(lambda_arn)))

        if count == previous_count:
            count_count += 1
            if count_count == UNCHANGING_MAX:
                raise ResolutionHierarchyError("Status polling stuck at {} items for {}".format(count, polling_start - datetime.now()))
            if count_count == UNCHANGING_THROTTLE:
                # If the throttle count is increasing -> Sleep
                # If the throttle count is decreasing
                #     If the cubes queue count has changed -> Continue regular polling
                #     If the cubes queue count has not changed -> Sleep
                # If the throttle count is zero -> Continue regular polling
                #
                # This means that this loop will block until throttle has stopped / cubes
                # in the queue have been processed.
                #
                # If throttling stops and no cubes have been processed the UNCHANGING_MAX
                # threashold is the last guard so the activity doesn't hang
                prev_throttle = 0
                while True:
                    throttle = lambda_throttle_count(lambda_arn)

                    if throttle < prev_throttle and check_queue(cubes_arn) != count:
                        # If the throttle count is decreasing and the queue count has
                        # changed continue the regular polling cycle
                        break
                    if throttle == 0:
                        # No throttling happening
                        break

                    if throttle > 0:
                        # Don't update count is there was an error getting the current count
                        prev_throttle = throttle

                    time.sleep(MAX_LAMBDA_TIME.seconds)

                    if check_queue(dlq_arn) > 0:
                        raise FailedLambdaError()

            if count_count == UNCHANGING_LAUNCH:
                # We have noticed that the last few messages are spread across multiple AWS queue servers and
                # A single lambda requesting 10 messages will only get messages from a single queue server.  So we
                # pad the number of lambdas by EXTRAS_LAMBDAS to avoid extra looping cycles.
                needed = ceildiv(count, BUCKET_SIZE)
                if needed > 0:
                    log.debug("Launching {} more lambdas".format(needed))

                    start = datetime.now()
                    invoke_lambdas(needed + EXTRA_LAMBDAS, lambda_arn, lambda_args, dlq_arn)
                    stop = datetime.now()
                    log.debug("Launched {} lambdas with {} extra in {}".format(needed, EXTRA_LAMBDAS, stop - start))
        else:
            previous_count = count
            count_count = 1

        if count == 0:
            zero_count += 1
            if zero_count == ZERO_COUNT:
                log.info("Finished polling for lambda completion")
                break
            else:
                log.info("Zero cubes left, waiting to make sure lambda finishes")
        else:
            zero_count = 0

        time.sleep(MAX_LAMBDA_TIME.seconds)

コード例 #12

0

ファイルを表示

ファイル: resolution_hierarchy.py プロジェクト: jhuapl-boss/boss-tools

 def resize(var, size):
     start = config['frame_start_key'].format(var)
     stop = config['frame_stop_key'].format(var)
     args[start] //= size
     args[stop] = ceildiv(args[stop], size)