Beispiel #1
0
    def delete_tiles(self, ingest_job):
        """
        Delete all remaining tiles from the tile index database and tile bucket
        Args:
            ingest_job: Ingest job model

        Returns:
            None
        Raises:
            BossError : For exceptions that happen while deleting the tiles and index

        """
        try:
            # Get all the chunks for a job
            tiledb = BossTileIndexDB(ingest_job.collection + '&' +
                                     ingest_job.experiment)
            tilebucket = TileBucket(ingest_job.collection + '&' +
                                    ingest_job.experiment)
            chunks = list(tiledb.getTaskItems(ingest_job.id))

            for chunk in chunks:
                chunk_key = chunk['chunk_key']
                # delete each tile in the chunk
                for key in chunk['tile_uploaded_map']:
                    response = tilebucket.deleteObject(key)
                tiledb.deleteCuboid(chunk['chunk_key'], ingest_job.id)

        except Exception as e:
            raise BossError(
                "Exception while deleteing tiles for the ingest job {}. {}".
                format(ingest_job.id, e), ErrorCodes.BOSS_SYSTEM_ERROR)
Beispiel #2
0
 def setup_class(self):
     """Setup class parameters"""
     # create the tile index table. skip if it exists
     try:
         TileIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
         CuboidIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
     except Exception as e:
         pass
     self.tileindex_db = TileIndexDB(nd_proj.project_name,
                                     endpoint_url=settings.DYNAMO_ENDPOINT)
     self.tile_bucket = TileBucket(nd_proj.project_name,
                                   endpoint_url=settings.S3_ENDPOINT)
     [self.x_tile, self.y_tile, self.z_tile] = [0, 0, 0]
     supercuboid_key = 'testing'
     message_id = '123456'
     receipt_handle = 'testing123456'
     message = serializer.encodeDeleteMessage(supercuboid_key, message_id,
                                              receipt_handle)
     # insert message in the upload queue
     CleanupQueue.createQueue(nd_proj, endpoint_url=settings.SQS_ENDPOINT)
     self.cleanup_queue = CleanupQueue(nd_proj,
                                       endpoint_url=settings.SQS_ENDPOINT)
     self.cleanup_queue.sendMessage(message)
     # receive message and upload object
     for z_index in range(self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
         tile_handle = cStringIO.StringIO()
         self.tile_bucket.putObject(tile_handle, nd_proj.channel_name,
                                    nd_proj.resolution, self.x_tile,
                                    self.y_tile, z_index, message_id,
                                    receipt_handle)
Beispiel #3
0
    def delete_tiles(self, ingest_job):
        """
        Delete all remaining tiles from the tile index database and tile bucket

        5/24/2018 - This code depends on a GSI for the tile index.  The GSI was
        removed because its key didn't shard well.  Cleanup will now be handled
        by TTL policies applied to the tile bucket and the tile index.  This
        method will be removed once that code is merged.

        Args:
            ingest_job: Ingest job model

        Returns:
            None
        Raises:
            BossError : For exceptions that happen while deleting the tiles and index

        """
        try:
            # Get all the chunks for a job
            tiledb = BossTileIndexDB(ingest_job.collection + '&' + ingest_job.experiment)
            tilebucket = TileBucket(ingest_job.collection + '&' + ingest_job.experiment)
            chunks = list(tiledb.getTaskItems(ingest_job.id))

            for chunk in chunks:
                # delete each tile in the chunk
                for key in chunk['tile_uploaded_map']:
                    response = tilebucket.deleteObject(key)
                tiledb.deleteCuboid(chunk['chunk_key'], ingest_job.id)

        except Exception as e:
            raise BossError("Exception while deleteing tiles for the ingest job {}. {}".format(ingest_job.id, e),
                            ErrorCodes.BOSS_SYSTEM_ERROR)
Beispiel #4
0
    def teardown_class(self):
        """Teardown class parameters"""

        # cleanup tilebucket
        for z_index in (self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile_key = self.tile_bucket.encodeObjectKey(
                nd_proj.channel_name,
                nd_proj.resolution,
                self.x_tile,
                self.y_tile,
                z_index,
            )
            self.tile_bucket.deleteObject(tile_key)

        morton_index = XYZMorton(self.tiles)
        supercuboid_key = self.cuboid_bucket.generateSupercuboidKey(
            nd_proj.channel_name, nd_proj.resolution, self.tiles)
        self.cuboid_bucket.deleteObject(supercuboid_key)
        # delete created entities
        TileIndexDB.deleteTable(endpoint_url="http://localhost:8000")
        CuboidIndexDB.deleteTable(endpoint_url="http://localhost:8000")
        IngestQueue.deleteQueue(nd_proj, endpoint_url="http://localhost:4568")
        CleanupQueue.deleteQueue(nd_proj, endpoint_url="http://localhost:4568")
        TileBucket.deleteBucket(endpoint_url="http://localhost:4567")
        try:
            CuboidBucket.deleteBucket(endpoint_url="http://localhost:4567")
        except Exception as e:
            pass
Beispiel #5
0
 def setup_class(cls):
   """Setup Parameters"""
   if 'S3_ENDPOINT' in dir(settings):
     cls.endpoint_url = settings.S3_ENDPOINT
   else:
     cls.endpoint_url = None
   TileBucket.createBucket(endpoint_url=cls.endpoint_url)
   cls.tile_bucket = TileBucket(nd_proj.project_name, endpoint_url=cls.endpoint_url)
Beispiel #6
0
  def teardown_class(cls):
    """Teardown Parameters"""

    # Ensure bucket empty before deleting.
    for objs in cls.tile_bucket.getAllObjects():
      cls.tile_bucket.deleteObject(objs.key)

    TileBucket.deleteBucket(endpoint_url=cls.endpoint_url)
Beispiel #7
0
    def setup_class(self):
        """Setup class parameters"""

        # create the tile index table. skip if it exists
        try:
            TileIndexDB.createTable(endpoint_url="http://localhost:8000")
            CuboidIndexDB.createTable(endpoint_url="http://localhost:8000")
        except Exception as e:
            pass
        self.tileindex_db = TileIndexDB(nd_proj.project_name,
                                        endpoint_url="http://localhost:8000")

        # create the tile bucket
        TileBucket.createBucket(endpoint_url="http://localhost:4567")
        self.tile_bucket = TileBucket(nd_proj.project_name,
                                      endpoint_url="http://localhost:4567")
        self.tiles = [self.x_tile, self.y_tile, self.z_tile] = [0, 0, 0]

        message_id = "testing"
        receipt_handle = "123456"
        # insert SUPER_CUBOID_SIZE tiles in the bucket
        for z_index in (self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile_handle = cStringIO.StringIO()
            self.tile_bucket.putObject(
                tile_handle,
                nd_proj.channel_name,
                nd_proj.resolution,
                self.x_tile,
                self.y_tile,
                z_index,
                message_id,
                receipt_handle,
            )

        # creating the cuboid bucket
        CuboidBucket.createBucket(endpoint_url="http://localhost:4567")
        self.cuboid_bucket = CuboidBucket(nd_proj.project_name,
                                          endpoint_url="http://localhost:4567")

        # create the ingest queue
        IngestQueue.createQueue(nd_proj, endpoint_url="http://localhost:4568")
        self.ingest_queue = IngestQueue(nd_proj,
                                        endpoint_url="http://localhost:4568")

        # send message to the ingest queue
        morton_index = XYZMorton(self.tiles)
        supercuboid_key = self.cuboid_bucket.generateSupercuboidKey(
            nd_proj.channel_name, nd_proj.resolution, morton_index)
        response = self.ingest_queue.sendMessage(supercuboid_key)

        # create the cleanup queue
        CleanupQueue.createQueue(nd_proj, endpoint_url="http://localhost:4568")
Beispiel #8
0
    def generate_ingest_credentials(self, ingest_job):
        """
        Create new ingest credentials for a job
        Args:
            ingest_job: Ingest job model
        Returns:
            None
        Raises:
            (ValueError): On bad ingest_type

        """
        # Generate credentials for the ingest_job
        upload_queue = self.get_ingest_job_upload_queue(ingest_job)
        ingest_creds = IngestCredentials()
        if ingest_job.ingest_type == IngestJob.TILE_INGEST:
            bucket_name = TileBucket.getBucketName()
        elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST:
            bucket_name = INGEST_BUCKET
        else:
            raise ValueError('Unknown ingest_type: {}'.format(
                ingest_job.ingest_type))
        policy = BossUtil.generate_ingest_policy(
            ingest_job.id,
            upload_queue,
            bucket_name,
            ingest_type=ingest_job.ingest_type)
        ingest_creds.generate_credentials(ingest_job.id, policy.arn)
Beispiel #9
0
    def test_create_ingest_policy_volumetric(self, boss_util_fixtures):
        self._setup(boss_util_fixtures)
        policy = BossUtil.generate_ingest_policy(
            self.job_id,
            self.upload_queue,
            self.tile_index_queue,
            self.tile_bucket.bucket.name,
            ingest_type=VOLUMETRIC_INGEST,
        )
        from ndingest.ndbucket.tilebucket import TileBucket

        try:
            assert settings.IAM_POLICY_PATH == policy.path
            assert policy.default_version is not None
            statements = policy.default_version.document["Statement"]
            assert 2 == len(statements)
            for stmt in statements:
                if stmt["Sid"] == "ClientUploadQueuePolicy":
                    for perm in [
                            "sqs:ReceiveMessage",
                            "sqs:GetQueueAttributes",
                            "sqs:DeleteMessage",
                    ]:
                        assert perm in stmt["Action"]
                    assert 3 == len(stmt["Action"])
                    assert self.upload_queue.arn == stmt["Resource"]
                elif stmt["Sid"] == "ClientTileBucketPolicy":
                    assert "s3:PutObject" in stmt["Action"]
                    assert len(stmt["Action"]) == 1
                    assert (TileBucket.buildArn(
                        self.tile_bucket.bucket.name) == stmt["Resource"])
        finally:
            policy.delete()
Beispiel #10
0
    def get_tile_bucket(self):
        """

        Returns:

        """
        return TileBucket.getBucketName()
Beispiel #11
0
def test_createPolicy_with_folder(tile_bucket):
    """Test policy creation with a folder"""

    from ndingest.ndbucket.tilebucket import TileBucket

    statements = [{
        "Sid": "WriteAccess",
        "Effect": "Allow",
        "Action": ["s3:PutObject"]
    }]

    expName = "ndingest_test_tile_bucket_policy"
    folder = "some/folder"

    actual = tile_bucket.createPolicy(statements, expName, folder)

    try:
        assert expName == actual.policy_name
        assert settings.IAM_POLICY_PATH == actual.path
        assert actual.default_version is not None

        # Test that the statements' resource set to this bucket and folder.
        statements = actual.default_version.document["Statement"]
        bucket_name = TileBucket.getBucketName()
        arn = "arn:aws:s3:::{}/{}/*".format(bucket_name, folder)
        for stmt in statements:
            assert stmt["Resource"] == arn
    finally:
        actual.delete()
Beispiel #12
0
  def test_createPolicy_with_folder(self):
    """Test policy creation with a folder"""

    statements = [{
      'Sid': 'WriteAccess',
      'Effect': 'Allow',
      'Action': ['s3:PutObject'] 
    }]

    expName = 'ndingest_test_tile_bucket_policy'
    folder = 'some/folder'

    actual = self.tile_bucket.createPolicy(statements, expName, folder)

    try:
        assert(expName == actual.policy_name)
        assert(settings.IAM_POLICY_PATH == actual.path)
        assert(actual.default_version is not None)

        # Test that the statements' resource set to this bucket and folder.
        statements = actual.default_version.document['Statement']
        bucket_name = TileBucket.getBucketName()
        arn = 'arn:aws:s3:::{}/{}/*'.format(bucket_name, folder)
        for stmt in statements:
            assert(stmt['Resource'] == arn)
    finally:
        actual.delete()
Beispiel #13
0
def test_buildArn_with_folder_no_slashes():
    """Test buildArn with a folder."""

    from ndingest.ndbucket.tilebucket import TileBucket

    expected = "arn:aws:s3:::my_bucket/some/folder/*"
    actual = TileBucket.buildArn("my_bucket", "some/folder")
    assert expected == actual
Beispiel #14
0
def test_buildArn_no_folder():
    """Test buildArn with folder's default value."""

    from ndingest.ndbucket.tilebucket import TileBucket

    expected = "arn:aws:s3:::my_bucket/*"
    actual = TileBucket.buildArn("my_bucket")
    assert expected == actual
Beispiel #15
0
 def setup_class(self):
     """Setup class parameters"""
     # create the tile index table. skip if it exists
     try:
         TileIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
     except Exception as e:
         pass
     self.tileindex_db = TileIndexDB(nd_proj.project_name,
                                     endpoint_url=settings.DYNAMO_ENDPOINT)
     # create the ingest queue
     IngestQueue.createQueue(nd_proj, endpoint_url=settings.SQS_ENDPOINT)
     # create the upload queue
     UploadQueue.createQueue(nd_proj, endpoint_url=settings.SQS_ENDPOINT)
     self.upload_queue = UploadQueue(nd_proj,
                                     endpoint_url=settings.SQS_ENDPOINT)
     tile_bucket = TileBucket(nd_proj.project_name,
                              endpoint_url=settings.S3_ENDPOINT)
     [self.x_tile, self.y_tile, self.z_tile] = [0, 0, 0]
     message = serializer.encodeUploadMessage(
         nd_proj.project_name,
         nd_proj.channel_name,
         nd_proj.resolution,
         self.x_tile,
         self.y_tile,
         self.z_tile,
     )
     # insert message in the upload queue
     self.upload_queue.sendMessage(message)
     # receive message and upload object
     for (
             message_id,
             receipt_handle,
             message_body,
     ) in self.upload_queue.receiveMessage():
         tile_handle = cStringIO.StringIO()
         tile_bucket.putObject(
             tile_handle,
             nd_proj.channel_name,
             nd_proj.resolution,
             self.x_tile,
             self.y_tile,
             self.z_tile,
             message_id,
             receipt_handle,
         )
Beispiel #16
0
    def get_tile_bucket(self):
        """
        Get the name of the ingest tile bucket

        Returns:
            Str: Name of the Tile bucket

        """
        return TileBucket.getBucketName()
Beispiel #17
0
    def setup_ingest(self, creator, config_data):
        """
        Setup the ingest job. This is the primary method for the ingest manager.
        It creates the ingest job and queues required for the ingest. It also uploads the messages for the ingest

        Args:
            creator: The validated user from the request to create the ingest jon
            config_data : Config data to create the ingest job

        Returns:
            IngestJob : data model containing the ingest job

        Raises:
            BossError : For all exceptions that happen

        """
        # Validate config data and schema

        self.owner = creator
        try:
            valid_schema = self.validate_config_file(config_data)
            valid_prop = self.validate_properties()
            if valid_schema is True and valid_prop is True:
                # create the django model for the job
                self.job = self.create_ingest_job()

                # create the additional resources needed for the ingest
                # initialize the ndingest project for use with the library
                proj_class = BossIngestProj.load()
                self.nd_proj = proj_class(self.collection.name,
                                          self.experiment.name,
                                          self.channel.name, self.resolution,
                                          self.job.id)

                # Create the upload queue
                upload_queue = self.create_upload_queue()
                self.job.upload_queue = upload_queue.url

                # Create the ingest queue
                ingest_queue = self.create_ingest_queue()
                self.job.ingest_queue = ingest_queue.url

                self.generate_upload_tasks()
                tile_bucket = TileBucket(self.job.collection + '&' +
                                         self.job.experiment)

                self.create_ingest_credentials(upload_queue, tile_bucket)

            # TODO create channel if needed

        except BossError as err:
            raise BossError(err.message, err.error_code)
        except Exception as e:
            raise BossError(
                "Unable to create the upload and ingest queue.{}".format(e),
                ErrorCodes.BOSS_SYSTEM_ERROR)
        return self.job
Beispiel #18
0
    def get_tile_bucket(self):
        """
        Get the name of the ingest tile bucket

        Returns:
            Str: Name of the Tile bucket

        """
        return TileBucket.getBucketName()
Beispiel #19
0
def test_buildArn_with_folder_with_slashes():
    """Test buildArn with folder with slashes at beginning and end."""

    # Import here so S3 is properly mocked.
    from ndingest.ndbucket.tilebucket import TileBucket

    expected = "arn:aws:s3:::my_bucket/some/folder/*"
    actual = TileBucket.buildArn("my_bucket", "/some/folder/")
    assert expected == actual
Beispiel #20
0
    def setup_ingest(self, creator, config_data):
        """

        Args:


        Returns:

        """
        # Validate config data and schema

        self.owner = creator
        try:
            valid_schema = self.validate_config_file(config_data)
            valid_prop = self.validate_properties()
            if valid_schema is True and valid_prop is True:
                # create the django model for the job
                self.job = self.create_ingest_job()

                # create the additional resources needed for the ingest
                # initialize the ndingest project for use with the library
                proj_class = BossIngestProj.load()
                self.nd_proj = proj_class(self.collection.name,
                                          self.experiment.name,
                                          self.channel_layer.name,
                                          self.resolution, self.job.id)

                # Create the upload queue
                upload_queue = self.create_upload_queue()
                self.job.upload_queue = upload_queue.url

                # Create the ingest queue
                ingest_queue = self.create_ingest_queue()
                self.job.ingest_queue = ingest_queue.url

                self.generate_upload_tasks()
                tile_bucket = TileBucket(self.job.collection + '&' +
                                         self.job.experiment)

                self.create_ingest_credentials(upload_queue, tile_bucket)

                # Update status
                self.job.status = 1
                self.job.save()

            # TODO create channel if needed

        except BossError as err:
            raise BossError(err.message, err.error_code)
        except Exception as e:
            raise BossError(
                "Unable to create the upload and ingest queue.{}".format(e),
                ErrorCodes.BOSS_SYSTEM_ERROR)
        return self.job
Beispiel #21
0
    def setUpClass(cls):
        # Silence warnings about open boto3 sessions.
        warnings.filterwarnings('ignore')

        cls.job_id = 123
        cls.nd_proj = BossIngestProj('testCol', 'kasthuri11', 'image', 0,
                                     cls.job_id)

        TileBucket.createBucket()
        cls.tile_bucket = TileBucket(cls.nd_proj.project_name)

        warnings.simplefilter('ignore')

        #with open('/Users/manavpj1/repos/boss/django/bossingest/test/boss_tile_index.json') as fp:
        #    schema = json.load(fp)

        #BossTileIndexDB.createTable(schema, endpoint_url=settings.DYNAMO_TEST_ENDPOINT)

        cls.tileindex_db = BossTileIndexDB(
            cls.nd_proj.project_name,
            endpoint_url=settings.DYNAMO_TEST_ENDPOINT)
Beispiel #22
0
    def generate_ingest_credentials(self, ingest_job):
        """
        Create new ingest credentials for a job
        Args:
            upload_queue : Upload queue for the job
            tile_bucket : Name of the tile bucket for the job
        Returns:
            None

        """
        # Generate credentials for the ingest_job
        # Create the credentials for the job
        tile_bucket = TileBucket(ingest_job.collection + '&' +
                                 ingest_job.experiment)
        upload_queue = self.get_ingest_job_upload_queue(ingest_job)
        ingest_creds = IngestCredentials()
        policy = BossUtil.generate_ingest_policy(ingest_job.id, upload_queue,
                                                 tile_bucket)
        ingest_creds.generate_credentials(ingest_job.id, policy.arn)
Beispiel #23
0
    def generate_ingest_credentials(self, ingest_job):
        """
        Create new ingest credentials for a job
        Args:
            ingest_job: Ingest job model
        Returns:
            None
        Raises:
            (ValueError): On bad ingest_type

        """
        # Generate credentials for the ingest_job
        upload_queue = self.get_ingest_job_upload_queue(ingest_job)
        tile_index_queue = None
        ingest_creds = IngestCredentials()
        if ingest_job.ingest_type == IngestJob.TILE_INGEST:
            bucket_name = TileBucket.getBucketName()
            tile_index_queue = self.get_ingest_job_tile_index_queue(ingest_job)
        elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST:
            bucket_name = INGEST_BUCKET 
        else:
            raise ValueError('Unknown ingest_type: {}'.format(ingest_job.ingest_type))
        policy = BossUtil.generate_ingest_policy(ingest_job.id, upload_queue, tile_index_queue, bucket_name, ingest_type=ingest_job.ingest_type)
        ingest_creds.generate_credentials(ingest_job.id, policy.arn)
def handler(event, context):
    # Load settings
    SETTINGS = BossSettings.load()

    # Used as a guard against trying to delete the SQS message when lambda is
    # triggered by SQS.
    sqs_triggered = 'Records' in event and len(event['Records']) > 0

    if sqs_triggered :
        # Lambda invoked by an SQS trigger.
        msg_data = json.loads(event['Records'][0]['body'])
        # Load the project info from the chunk key you are processing
        chunk_key = msg_data['chunk_key']
        proj_info = BossIngestProj.fromSupercuboidKey(chunk_key)
        proj_info.job_id = msg_data['ingest_job']
    else:
        # Standard async invoke of this lambda.

        # Load the project info from the chunk key you are processing
        proj_info = BossIngestProj.fromSupercuboidKey(event["chunk_key"])
        proj_info.job_id = event["ingest_job"]

        # Get message from SQS ingest queue, try for ~2 seconds
        rx_cnt = 0
        msg_data = None
        msg_id = None
        msg_rx_handle = None
        while rx_cnt < 6:
            ingest_queue = IngestQueue(proj_info)
            msg = [x for x in ingest_queue.receiveMessage()]
            if msg:
                msg = msg[0]
                print("MESSAGE: {}".format(msg))
                print(len(msg))
                msg_id = msg[0]
                msg_rx_handle = msg[1]
                msg_data = json.loads(msg[2])
                print("MESSAGE DATA: {}".format(msg_data))
                break
            else:
                rx_cnt += 1
                print("No message found. Try {} of 6".format(rx_cnt))
                time.sleep(1)

        if not msg_id:
            # No tiles ready to ingest.
            print("No ingest message available")
            return

        # Get the chunk key of the tiles to ingest.
        chunk_key = msg_data['chunk_key']


    tile_error_queue = TileErrorQueue(proj_info)

    print("Ingesting Chunk {}".format(chunk_key))
    tiles_in_chunk = int(chunk_key.split('&')[1])

    # Setup SPDB instance
    sp = SpatialDB(msg_data['parameters']["KVIO_SETTINGS"],
                   msg_data['parameters']["STATEIO_CONFIG"],
                   msg_data['parameters']["OBJECTIO_CONFIG"])

    # Get tile list from Tile Index Table
    tile_index_db = BossTileIndexDB(proj_info.project_name)
    # tile_index_result (dict): keys are S3 object keys of the tiles comprising the chunk.
    tile_index_result = tile_index_db.getCuboid(msg_data["chunk_key"], int(msg_data["ingest_job"]))
    if tile_index_result is None:
        # If chunk_key is gone, another lambda uploaded the cuboids and deleted the chunk_key afterwards.
        if not sqs_triggered:
            # Remove message so it's not redelivered.
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)

        print("Aborting due to chunk key missing from tile index table")
        return

    # Sort the tile keys
    print("Tile Keys: {}".format(tile_index_result["tile_uploaded_map"]))
    tile_key_list = [x.rsplit("&", 2) for x in tile_index_result["tile_uploaded_map"].keys()]
    if len(tile_key_list) < tiles_in_chunk:
        print("Not a full set of 16 tiles. Assuming it has handled already, tiles: {}".format(len(tile_key_list)))
        if not sqs_triggered:
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)
        return
    tile_key_list = sorted(tile_key_list, key=lambda x: int(x[1]))
    tile_key_list = ["&".join(x) for x in tile_key_list]
    print("Sorted Tile Keys: {}".format(tile_key_list))

    # Augment Resource JSON data so it will instantiate properly that was pruned due to S3 metadata size limits
    resource_dict = msg_data['parameters']['resource']
    _, exp_name, ch_name = resource_dict["boss_key"].split("&")

    resource_dict["channel"]["name"] = ch_name
    resource_dict["channel"]["description"] = ""
    resource_dict["channel"]["sources"] = []
    resource_dict["channel"]["related"] = []
    resource_dict["channel"]["default_time_sample"] = 0
    resource_dict["channel"]["downsample_status"] = "NOT_DOWNSAMPLED"

    resource_dict["experiment"]["name"] = exp_name
    resource_dict["experiment"]["description"] = ""
    resource_dict["experiment"]["num_time_samples"] = 1
    resource_dict["experiment"]["time_step"] = None
    resource_dict["experiment"]["time_step_unit"] = None

    resource_dict["coord_frame"]["name"] = "cf"
    resource_dict["coord_frame"]["name"] = ""
    resource_dict["coord_frame"]["x_start"] = 0
    resource_dict["coord_frame"]["x_stop"] = 100000
    resource_dict["coord_frame"]["y_start"] = 0
    resource_dict["coord_frame"]["y_stop"] = 100000
    resource_dict["coord_frame"]["z_start"] = 0
    resource_dict["coord_frame"]["z_stop"] = 100000
    resource_dict["coord_frame"]["voxel_unit"] = "nanometers"

    # Setup the resource
    resource = BossResourceBasic()
    resource.from_dict(resource_dict)
    dtype = resource.get_numpy_data_type()

    # read all tiles from bucket into a slab
    tile_bucket = TileBucket(proj_info.project_name)
    data = []
    num_z_slices = 0
    for tile_key in tile_key_list:
        try:
            image_data, message_id, receipt_handle, metadata = tile_bucket.getObjectByKey(tile_key)
        except KeyError:
            print('Key: {} not found in tile bucket, assuming redelivered SQS message and aborting.'.format(
                tile_key))
            if not sqs_triggered:
                # Remove message so it's not redelivered.
                ingest_queue.deleteMessage(msg_id, msg_rx_handle)
            print("Aborting due to missing tile in bucket")
            return

        image_bytes = BytesIO(image_data)
        image_size = image_bytes.getbuffer().nbytes

        # Get tiles size from metadata, need to shape black tile if actual tile is corrupt.
        if 'x_size' in metadata:
            tile_size_x = metadata['x_size']
        else:
            print('MetadataMissing: x_size not in tile metadata:  using 1024.')
            tile_size_x = 1024

        if 'y_size' in metadata:
            tile_size_y = metadata['y_size']
        else:
            print('MetadataMissing: y_size not in tile metadata:  using 1024.')
            tile_size_y = 1024

        if image_size == 0:
            print('TileError: Zero length tile, using black instead: {}'.format(tile_key))
            error_msg = 'Zero length tile'
            enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
            tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
        else:
            try:
                tile_img = np.asarray(Image.open(image_bytes), dtype=dtype)
            except TypeError as te:
                print('TileError: Incomplete tile, using black instead (tile_size_in_bytes, tile_key): {}, {}'
                      .format(image_size, tile_key))
                error_msg = 'Incomplete tile'
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
            except OSError as oe:
                print('TileError: OSError, using black instead (tile_size_in_bytes, tile_key): {}, {} ErrorMessage: {}'
                      .format(image_size, tile_key, oe))
                error_msg = 'OSError: {}'.format(oe)
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)

        data.append(tile_img)
        num_z_slices += 1


    # Make 3D array of image data. It should be in XYZ at this point
    chunk_data = np.array(data)
    del data
    tile_dims = chunk_data.shape

    # Break into Cube instances
    print("Tile Dims: {}".format(tile_dims))
    print("Num Z Slices: {}".format(num_z_slices))
    num_x_cuboids = int(math.ceil(tile_dims[2] / CUBOIDSIZE[proj_info.resolution][0]))
    num_y_cuboids = int(math.ceil(tile_dims[1] / CUBOIDSIZE[proj_info.resolution][1]))

    print("Num X Cuboids: {}".format(num_x_cuboids))
    print("Num Y Cuboids: {}".format(num_y_cuboids))

    chunk_key_parts = BossUtil.decode_chunk_key(chunk_key)
    t_index = chunk_key_parts['t_index']
    for x_idx in range(0, num_x_cuboids):
        for y_idx in range(0, num_y_cuboids):
            # TODO: check time series support
            cube = Cube.create_cube(resource, CUBOIDSIZE[proj_info.resolution])
            cube.zeros()

            # Compute Morton ID
            # TODO: verify Morton indices correct!
            print(chunk_key_parts)
            morton_x_ind = x_idx + (chunk_key_parts["x_index"] * num_x_cuboids)
            morton_y_ind = y_idx + (chunk_key_parts["y_index"] * num_y_cuboids)
            print("Morton X: {}".format(morton_x_ind))
            print("Morton Y: {}".format(morton_y_ind))
            morton_index = XYZMorton([morton_x_ind, morton_y_ind, int(chunk_key_parts['z_index'])])

            # Insert sub-region from chunk_data into cuboid
            x_start = x_idx * CUBOIDSIZE[proj_info.resolution][0]
            x_end = x_start + CUBOIDSIZE[proj_info.resolution][0]
            x_end = min(x_end, tile_dims[2])
            y_start = y_idx * CUBOIDSIZE[proj_info.resolution][1]
            y_end = y_start + CUBOIDSIZE[proj_info.resolution][1]
            y_end = min(y_end, tile_dims[1])
            z_end = CUBOIDSIZE[proj_info.resolution][2]
            # TODO: get sub-array w/o making a copy.
            print("Yrange: {}".format(y_end - y_start))
            print("Xrange: {}".format(x_end - x_start))
            print("X start: {}".format(x_start))
            print("X stop: {}".format(x_end))
            cube.data[0, 0:num_z_slices, 0:(y_end - y_start), 0:(x_end - x_start)] = chunk_data[0:num_z_slices,
                                                                                 y_start:y_end, x_start:x_end]

            # Create object key
            object_key = sp.objectio.generate_object_key(resource, proj_info.resolution, t_index, morton_index)
            print("Object Key: {}".format(object_key))

            # Put object in S3
            sp.objectio.put_objects([object_key], [cube.to_blosc()])

            # Add object to index
            sp.objectio.add_cuboid_to_index(object_key, ingest_job=int(msg_data["ingest_job"]))

            # Update id indices if this is an annotation channel
            # We no longer index during ingest.
            #if resource.data['channel']['type'] == 'annotation':
            #   try:
            #       sp.objectio.update_id_indices(
            #           resource, proj_info.resolution, [object_key], [cube.data])
            #   except SpdbError as ex:
            #       sns_client = boto3.client('sns')
            #       topic_arn = msg_data['parameters']["OBJECTIO_CONFIG"]["prod_mailing_list"]
            #       msg = 'During ingest:\n{}\nCollection: {}\nExperiment: {}\n Channel: {}\n'.format(
            #           ex.message,
            #           resource.data['collection']['name'],
            #           resource.data['experiment']['name'],
            #           resource.data['channel']['name'])
            #       sns_client.publish(
            #           TopicArn=topic_arn,
            #           Subject='Object services misuse',
            #           Message=msg)

    lambda_client = boto3.client('lambda', region_name=SETTINGS.REGION_NAME)

    names = AWSNames.create_from_lambda_name(context.function_name)

    delete_tiles_data = {
        'tile_key_list': tile_key_list,
        'region': SETTINGS.REGION_NAME,
        'bucket': tile_bucket.bucket.name
    }

    # Delete tiles from tile bucket.
    lambda_client.invoke(
        FunctionName=names.delete_tile_objs_lambda,
        InvocationType='Event',
        Payload=json.dumps(delete_tiles_data).encode()
    )       

    delete_tile_entry_data = {
        'tile_index': tile_index_db.table.name,
        'region': SETTINGS.REGION_NAME,
        'chunk_key': chunk_key,
        'task_id': msg_data['ingest_job']
    }

    # Delete entry from tile index.
    lambda_client.invoke(
        FunctionName=names.delete_tile_index_entry_lambda,
        InvocationType='Event',
        Payload=json.dumps(delete_tile_entry_data).encode()
    )       

    if not sqs_triggered:
        # Delete message since it was processed successfully
        ingest_queue.deleteMessage(msg_id, msg_rx_handle)
Beispiel #25
0
class Test_IngestLambda:
    def setup_class(self):
        """Setup class parameters"""

        # create the tile index table. skip if it exists
        try:
            TileIndexDB.createTable(endpoint_url="http://localhost:8000")
            CuboidIndexDB.createTable(endpoint_url="http://localhost:8000")
        except Exception as e:
            pass
        self.tileindex_db = TileIndexDB(nd_proj.project_name,
                                        endpoint_url="http://localhost:8000")

        # create the tile bucket
        TileBucket.createBucket(endpoint_url="http://localhost:4567")
        self.tile_bucket = TileBucket(nd_proj.project_name,
                                      endpoint_url="http://localhost:4567")
        self.tiles = [self.x_tile, self.y_tile, self.z_tile] = [0, 0, 0]

        message_id = "testing"
        receipt_handle = "123456"
        # insert SUPER_CUBOID_SIZE tiles in the bucket
        for z_index in (self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile_handle = cStringIO.StringIO()
            self.tile_bucket.putObject(
                tile_handle,
                nd_proj.channel_name,
                nd_proj.resolution,
                self.x_tile,
                self.y_tile,
                z_index,
                message_id,
                receipt_handle,
            )

        # creating the cuboid bucket
        CuboidBucket.createBucket(endpoint_url="http://localhost:4567")
        self.cuboid_bucket = CuboidBucket(nd_proj.project_name,
                                          endpoint_url="http://localhost:4567")

        # create the ingest queue
        IngestQueue.createQueue(nd_proj, endpoint_url="http://localhost:4568")
        self.ingest_queue = IngestQueue(nd_proj,
                                        endpoint_url="http://localhost:4568")

        # send message to the ingest queue
        morton_index = XYZMorton(self.tiles)
        supercuboid_key = self.cuboid_bucket.generateSupercuboidKey(
            nd_proj.channel_name, nd_proj.resolution, morton_index)
        response = self.ingest_queue.sendMessage(supercuboid_key)

        # create the cleanup queue
        CleanupQueue.createQueue(nd_proj, endpoint_url="http://localhost:4568")

    def teardown_class(self):
        """Teardown class parameters"""

        # cleanup tilebucket
        for z_index in (self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile_key = self.tile_bucket.encodeObjectKey(
                nd_proj.channel_name,
                nd_proj.resolution,
                self.x_tile,
                self.y_tile,
                z_index,
            )
            self.tile_bucket.deleteObject(tile_key)

        morton_index = XYZMorton(self.tiles)
        supercuboid_key = self.cuboid_bucket.generateSupercuboidKey(
            nd_proj.channel_name, nd_proj.resolution, self.tiles)
        self.cuboid_bucket.deleteObject(supercuboid_key)
        # delete created entities
        TileIndexDB.deleteTable(endpoint_url="http://localhost:8000")
        CuboidIndexDB.deleteTable(endpoint_url="http://localhost:8000")
        IngestQueue.deleteQueue(nd_proj, endpoint_url="http://localhost:4568")
        CleanupQueue.deleteQueue(nd_proj, endpoint_url="http://localhost:4568")
        TileBucket.deleteBucket(endpoint_url="http://localhost:4567")
        try:
            CuboidBucket.deleteBucket(endpoint_url="http://localhost:4567")
        except Exception as e:
            pass

    def test_Uploadevent(self):
        """Testing the event"""
        # creating an emulambda function
        func = emulambda.import_lambda("ingestlambda.lambda_handler")
        # creating an emulambda event
        event = emulambda.parse_event(
            open("../ndlambda/functions/ingest/ingest_event.json").read())
        # calling the emulambda function to invoke a lambda
        emulambda.invoke_lambda(func, event, None, 0, None)

        # testing if the supercuboid was inserted in the bucket
        morton_index = XYZMorton(self.tiles)
        cuboid = self.cuboid_bucket.getObject(nd_proj.channel_name,
                                              nd_proj.resolution, morton_index)

        # testing if the message was removed from the ingest queue
        for message in self.ingest_queue.receiveMessage():
            # KL TODO write the message id into the JSON event file directly
            print(message)
Beispiel #26
0
  def test_buildArn_no_folder(self):
    """Test buildArn with folder's default value."""

    expected = 'arn:aws:s3:::my_bucket/*'
    actual = TileBucket.buildArn('my_bucket')
    assert(expected == actual)
Beispiel #27
0
  def test_buildArn_with_folder_no_slashes(self):
    """Test buildArn with a folder."""

    expected = 'arn:aws:s3:::my_bucket/some/folder/*'
    actual = TileBucket.buildArn('my_bucket', 'some/folder')
    assert(expected == actual)
Beispiel #28
0
  def test_buildArn_with_folder_with_slashes(self):
    """Test buildArn with folder with slashes at beginning and end."""

    expected = 'arn:aws:s3:::my_bucket/some/folder/*'
    actual = TileBucket.buildArn('my_bucket', '/some/folder/')
    assert(expected == actual)
Beispiel #29
0
    def generate_ingest_policy(job_id,
                               upload_queue,
                               tile_bucket,
                               region_name=settings.REGION_NAME,
                               endpoint_url=None,
                               description=''):
        """Generate the combined IAM policy.
       
        Policy allows receiving messages from the queue and writing to the tile bucket.

        Args:
            job_id (int): Id of ingest job.
            upload_queue (UploadQueue):
            tile_bucket (TileBucket):
            region_name (optional[string]): AWS region.
            endpoint_url (string|None): Alternative URL boto3 should use for testing instead of connecting to AWS.

        Returns:
            (iam.Policy)
        """
        iam = boto3.resource(
            'iam',
            region_name=region_name,
            endpoint_url=endpoint_url,
            aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)

        if not settings.TEST_MODE:
            policy_name = INGEST_POLICY_NAME.format(settings.DOMAIN, job_id)
        else:
            if BossUtil.test_policy_id == -1:
                BossUtil.test_policy_id = random.randint(0, 999)
            policy_name = TEST_INGEST_POLICY_NAME.format(
                settings.DOMAIN, BossUtil.test_policy_id, job_id)

        policy = {
            "Version":
            "2012-10-17",
            "Id":
            policy_name,
            "Statement": [{
                "Sid":
                "ClientQueuePolicy",
                "Effect":
                "Allow",
                "Action": ["sqs:ReceiveMessage", "sqs:GetQueueAttributes"],
                "Resource":
                upload_queue.arn
            }, {
                "Sid":
                "ClientTileBucketPolicy",
                "Effect":
                "Allow",
                "Action": ["s3:PutObject"],
                "Resource":
                TileBucket.buildArn(tile_bucket.bucket.name)
            }]
        }

        return iam.create_policy(PolicyName=policy['Id'],
                                 PolicyDocument=json.dumps(policy),
                                 Path=settings.IAM_POLICY_PATH,
                                 Description=description)
# Parse input args passed as a JSON string from the lambda loader
json_event = sys.argv[1]
event = json.loads(json_event)
print(event)

# extract bucket name and tile key from the event
bucket = event['Records'][0]['s3']['bucket']['name']
tile_key = urllib.parse.unquote_plus(
    event['Records'][0]['s3']['object']['key'])
print("Bucket: {}".format(bucket))
print("Tile key: {}".format(tile_key))

# fetch metadata from the s3 object
proj_info = BossIngestProj.fromTileKey(tile_key)
tile_bucket = TileBucket(proj_info.project_name)
message_id, receipt_handle, metadata = tile_bucket.getMetadata(tile_key)
print("Metadata: {}".format(metadata))

# Currently this is what is sent from the client for the "metadata"
#  metadata = {'chunk_key': 'chunk_key',
#              'ingest_job': self.ingest_job_id,
#              'parameters': {"upload_queue": XX
#                             "ingest_queue": XX,
#                             "ingest_lambda":XX,
#                             "KVIO_SETTINGS": XX,
#                             "STATEIO_CONFIG": XX,
#                             "OBJECTIO_CONFIG": XX
#                             },
#              'tile_size_x': "{}".format(self.config.config_data["ingest_job"]["tile_size"]["x"]),
#              'tile_size_y': "{}".format(self.config.config_data["ingest_job"]["tile_size"]["y"])
Beispiel #31
0
class Test_UploadLambda:
    def setup_class(self):
        """Setup class parameters"""
        # create the tile index table. skip if it exists
        try:
            TileIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
            CuboidIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
        except Exception as e:
            pass
        self.tileindex_db = TileIndexDB(nd_proj.project_name,
                                        endpoint_url=settings.DYNAMO_ENDPOINT)
        self.tile_bucket = TileBucket(nd_proj.project_name,
                                      endpoint_url=settings.S3_ENDPOINT)
        [self.x_tile, self.y_tile, self.z_tile] = [0, 0, 0]
        supercuboid_key = 'testing'
        message_id = '123456'
        receipt_handle = 'testing123456'
        message = serializer.encodeDeleteMessage(supercuboid_key, message_id,
                                                 receipt_handle)
        # insert message in the upload queue
        CleanupQueue.createQueue(nd_proj, endpoint_url=settings.SQS_ENDPOINT)
        self.cleanup_queue = CleanupQueue(nd_proj,
                                          endpoint_url=settings.SQS_ENDPOINT)
        self.cleanup_queue.sendMessage(message)
        # receive message and upload object
        for z_index in range(self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile_handle = cStringIO.StringIO()
            self.tile_bucket.putObject(tile_handle, nd_proj.channel_name,
                                       nd_proj.resolution, self.x_tile,
                                       self.y_tile, z_index, message_id,
                                       receipt_handle)

    def teardown_class(self):
        """Teardown class parameters"""
        TileIndexDB.deleteTable(endpoint_url=settings.DYNAMO_ENDPOINT)
        CuboidIndexDB.deleteTable(endpoint_url=settings.DYNAMO_ENDPOINT)
        CleanupQueue.deleteQueue(nd_proj, endpoint_url=settings.SQS_ENDPOINT)

    def test_Uploadevent(self):
        """Testing the event"""
        # creating an emulambda function
        func = emulambda.import_lambda('cleanuplambda.lambda_handler')
        # creating an emulambda event
        event = emulambda.parse_event(
            open('../ndlambda/functions/cleanup/cleanup_event.json').read())
        # calling the emulambda function to invoke a lambda
        emulambda.invoke_lambda(func, event, None, 0, None)

        # test if there are any tiles leftover in tile bucket
        for z_index in range(self.z_tile, settings.SUPER_CUBOID_SIZE[2], 1):
            tile = self.tile_bucket.getObject(nd_proj.channel_name,
                                              nd_proj.resolution, self.x_tile,
                                              self.y_tile, z_index)
            assert (tile is None)

        # check if there are any entires left in the tileindex table
        supercuboid_key = self.tileindex_db.generatePrimaryKey(
            nd_proj.channel_name, nd_proj.resolution, self.x_tile, self.y_tile,
            self.z_tile)
        item = self.tileindex_db.getItem(supercuboid_key)
        assert (item is None)

        # testing if the message was deleted from the cleanup queue or not
        for message in self.cleanup_queue.receiveMessage():
            # KL TODO write the message id into the JSON event file directly
            print message
Beispiel #32
0
def tile_bucket(s3, iam):
    from ndingest.ndbucket.tilebucket import TileBucket

    TileBucket.createBucket()
    yield TileBucket(nd_proj.project_name)
    TileBucket.deleteBucket()
Beispiel #33
0
    def test_upload_tile_index_table(self):
        """"""
        ingest_mgmr = IngestManager()
        ingest_mgmr.validate_config_file(self.example_config_data)
        ingest_mgmr.validate_properties()
        ingest_mgmr.owner = self.user.pk
        ingest_job = ingest_mgmr.create_ingest_job()
        assert (ingest_job.id is not None)

        # Get the chunks in this job
        # Get the project information
        bosskey = ingest_job.collection + '&' + ingest_job.experiment + '&' + ingest_job.channel_layer
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]
        proj_name = ingest_job.collection + '&' + ingest_job.experiment
        tile_index_db = BossTileIndexDB(proj_name)
        tilebucket = TileBucket(str(col_id) + '&' + str(exp_id))

        for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1):
            # For each time step, compute the chunks and tile keys

            for z in range(ingest_job.z_start, ingest_job.z_stop, 16):
                for y in range(ingest_job.y_start, ingest_job.y_stop,
                               ingest_job.tile_size_y):
                    for x in range(ingest_job.x_start, ingest_job.x_stop,
                                   ingest_job.tile_size_x):

                        # compute the chunk indices
                        chunk_x = int(x / ingest_job.tile_size_x)
                        chunk_y = int(y / ingest_job.tile_size_y)
                        chunk_z = int(z / 16)

                        # Compute the number of tiles in the chunk
                        if ingest_job.z_stop - z >= 16:
                            num_of_tiles = 16
                        else:
                            num_of_tiles = ingest_job.z_stop - z

                        # Generate the chunk key
                        chunk_key = (BossBackend(
                            ingest_mgmr.config)).encode_chunk_key(
                                num_of_tiles, project_info,
                                ingest_job.resolution, chunk_x, chunk_y,
                                chunk_z, time_step)
                        # Upload the chunk to the tile index db
                        tile_index_db.createCuboidEntry(
                            chunk_key, ingest_job.id)
                        key_map = {}
                        for tile in range(0, num_of_tiles):
                            # get the object key and upload it
                            #tile_key = tilebucket.encodeObjectKey(ch_id, ingest_job.resolution,
                            #                              chunk_x, chunk_y, tile, time_step)
                            tile_key = 'fakekey' + str(tile)
                            tile_index_db.markTileAsUploaded(
                                chunk_key, tile_key)

                        # for each chunk key, delete entries from the tile_bucket

        # Check if data has been uploaded
        chunks = list(tile_index_db.getTaskItems(ingest_job.id))
        assert (len(chunks) != 0)

        ingest_mgmr.delete_tiles(ingest_job)
        chunks = list(tile_index_db.getTaskItems(ingest_job.id))
        assert (len(chunks) == 0)