class S3Cuboid(object):

  def __init__(self, token_name, host_name=HOST_NAME):
    # configuring the logger based on the dataset we are uploading
    self.logger = logging.getLogger(token_name)
    self.logger.setLevel(logging.INFO)
    fh = logging.FileHandler('{}_upload.log'.format(token_name))
    self.logger.addHandler(fh)

    self.info_interface = InfoInterface(host_name, token_name)
    self.project_name = self.info_interface.project_name
    self.cuboidindex_db = CuboidIndexDB(self.project_name)
    self.cuboid_bucket = CuboidBucket(self.project_name)


  def upload(self, file_name, channel_name, resolution, x_index, y_index, z_index, dimensions=[1, 64, 512,512], time_index=0, neariso=False):
    """Upload a 4D supercuboid directly to dynamo and s3"""
    cuboid_data = np.fromfile(file_name, dtype=self.info_interface.get_channel_datatype(channel_name))
    cuboid_data = cuboid_data.reshape(dimensions)
    super_zidx = XYZMorton([x_index, y_index, z_index])
    self.logger.info("Inserting cube {},{},{}".format(x_index, y_index, z_index))
    self.cuboidindex_db.putItem(channel_name, resolution, x_index, y_index, z_index, time_index, neariso=neariso)
    self.cuboid_bucket.putObject(channel_name, resolution, super_zidx, time_index, blosc.pack_array(cuboid_data), neariso=neariso)
Beispiel #2
0
class AwsInterface:
    def __init__(self, token_name, host_name=HOST_NAME):
        """Create the bucket and intialize values"""

        # configuring the logger based on the dataset we are uploading
        self.logger = logging.getLogger(token_name)
        self.logger.setLevel(logging.INFO)
        fh = logging.FileHandler('{}.log'.format(token_name))
        self.logger.addHandler(fh)
        # setting up the project metadata
        self.info_interface = InfoInterface(host_name, token_name)
        # creating the resource interface to the remote server
        # self.resource_interface = ResourceInterface(self.info_interface.dataset_name, self.info_interface.project_name, host_name, logger=self.logger)
        # self.proj = self.resource_interface.getProject()
        # create the s3 I/O and index objects
        self.cuboidindex_db = CuboidIndexDB(self.info_interface.project_name)
        self.cuboid_bucket = CuboidBucket(self.info_interface.project_name)

    # def setupNewProject(self):
    # """Setup a new project if it does not exist"""

    # self.resource_interface.createDataset()
    # self.resource_interface.createProject()
    # self.resource_interface.createToken()

    def uploadExistingProject(self,
                              channel_name,
                              resolution,
                              start_values,
                              neariso=False):
        """Upload an existing project to S3"""

        self.setupNewProject()
        db = SpatialDB(self.proj)
        # checking for channels
        if channel_name is None:
            channel_list = None
        else:
            channel_list = [channel_name]

        # iterating over channels in a project
        for ch in self.proj.projectChannels(channel_list):

            # creating the channel resource
            self.resource_interface.createChannel(ch.channel_name)
            # ingest 1 or more resolutions based on user input
            if resolution is None:
                start_res = self.proj.datasetcfg.scalinglevels
                stop_res = ch.resolution - 1
            else:
                start_res = resolution
                stop_res = resolution - 1

            # iterating over resolution
            for cur_res in range(start_res, stop_res, -1):

                # get the source database sizes
                [image_size,
                 time_range] = self.proj.datasetcfg.dataset_dim(cur_res)
                [xcubedim, ycubedim, zcubedim
                 ] = cubedim = self.proj.datasetcfg.get_cubedim(cur_res)
                offset = self.proj.datasetcfg.get_offset(cur_res)
                [xsupercubedim, ysupercubedim, zsupercubedim
                 ] = supercubedim = self.proj.datasetcfg.get_supercubedim(
                     cur_res)
                # set the limits for iteration on the number of cubes in each dimension
                xlimit = (image_size[0] - 1) / (xsupercubedim) + 1
                ylimit = (image_size[1] - 1) / (ysupercubedim) + 1
                zlimit = (image_size[2] - 1) / (zsupercubedim) + 1
                # [xlimit, ylimit, zlimit] = limit = self.proj.datasetcfg.get_supercube_limit(cur_res)
                [x_start, y_start, z_start] = map(div, start_values,
                                                  supercubedim)
                for z in range(z_start, zlimit, 1):
                    for y in range(y_start, ylimit, 1):
                        for x in range(x_start, xlimit, 1):

                            try:
                                # cutout the data at the current resolution
                                data = db.cutout(ch, [
                                    x * xsupercubedim, y * ysupercubedim,
                                    z * zsupercubedim
                                ], [
                                    xsupercubedim, ysupercubedim, zsupercubedim
                                ], cur_res).data
                                # generate the morton index
                                morton_index = XYZMorton([x, y, z])

                                self.logger.info("[{},{},{}] at res {}".format(
                                    x * xsupercubedim, y * ysupercubedim,
                                    z * zsupercubedim, cur_res))
                                # updating the index
                                # self.cuboidindex_db.putItem(ch.channel_name, cur_res, x, y, z, ch.time_range[0])
                                # inserting the cube
                                self.s3_io.putCube(ch,
                                                   ch.time_stamp[0],
                                                   morton_index,
                                                   cur_res,
                                                   blosc.pack_array(data),
                                                   neariso=neariso)

                            except Exception as e:
                                # checkpoint the ingest
                                self.logger.error(e)
                                self.checkpoint_ingest(ch.channel_name,
                                                       cur_res, x, y, z, e)
                                raise e

    def uploadNewProject(self, config_file, start_values, neariso=False):
        """Upload a new project"""

        # loading the config file and assdociated params and processors
        config = Configuration()
        config.load(json.loads(open(config_file, 'rt').read()))
        config.load_plugins()
        path_processor = config.path_processor_class
        path_processor.setup(config.get_path_processor_params())
        tile_processor = config.tile_processor_class
        tile_processor.setup(config.get_tile_processor_params())
        tile_params = config.get_tile_processor_params()
        path_params = config.get_path_processor_params()

        # creating the channel object from resource service
        channel_name = config.config_data['database']['channel']
        channel_datatype = self.info_interface.get_channel_datatype(
            channel_name)
        cur_res = tile_params['ingest_job']['resolution']

        # loading all the parameters for image-sizes, tile-sizes, and iteration limits
        [xsupercubedim, ysupercubedim,
         zsupercubedim] = supercubedim = SUPER_CUBOID_SIZE
        [x_start, x_end] = tile_params['ingest_job']['extent']['x']
        [y_start, y_end] = tile_params['ingest_job']['extent']['y']
        [z_start, z_end] = tile_params['ingest_job']['extent']['z']
        [t_start, t_end] = tile_params['ingest_job']['extent']['t']
        x_tilesz = tile_params['ingest_job']['tile_size']['x']
        y_tilesz = tile_params['ingest_job']['tile_size']['y']
        z_tilesz = tile_params['ingest_job']['tile_size']['z']
        t_tilesz = tile_params['ingest_job']['tile_size']['t']
        x_limit = (x_end - 1) / (x_tilesz) + 1
        y_limit = (y_end - 1) / (y_tilesz) + 1
        z_limit = (z_end - 1) / (z_tilesz) + 1
        t_limit = (t_end - 1) / (t_tilesz) + 1

        if start_values != [0, 0, 0]:
            [x_start, y_start, z_start] = map(div, start_values,
                                              [x_tilesz, y_tilesz, z_tilesz])
        # iterate over t,z,y,x to ingest the data
        for t in range(t_start, t_limit, 1):
            for z in range(z_start, z_limit, zsupercubedim):
                for y in range(y_start, y_limit, 1):
                    for x in range(x_start, x_limit, 1):

                        data = np.zeros([zsupercubedim, y_tilesz, x_tilesz],
                                        dtype=ND_dtypetonp[channel_datatype])
                        for b in range(0, zsupercubedim, 1):
                            if z + b > z_end - 1:
                                break
                            # generate file name
                            file_name = path_processor.process(x, y, z + b, t)
                            # read the file, handle expection if the file is missing
                            try:
                                tile_handle = tile_processor.process(
                                    file_name, x, y, z + b, t)
                                tile_handle.seek(0)
                                data[b, :, :] = np.asarray(
                                    Image.open(tile_handle))
                            except IOError as e:
                                pass
                                # print "missing file", file_name
                        # iterate over the tile if it is larger then supercuboid size
                        for y_index in range(0, y_tilesz / ysupercubedim):
                            for x_index in range(0, x_tilesz / xsupercubedim):
                                # calculate the morton index
                                insert_data = data[:, y_index *
                                                   ysupercubedim:(y_index +
                                                                  1) *
                                                   ysupercubedim, x_index *
                                                   xsupercubedim:(x_index +
                                                                  1) *
                                                   xsupercubedim]
                                if np.any(insert_data):
                                    morton_index = XYZMorton([
                                        x_index +
                                        (x * x_tilesz / xsupercubedim),
                                        y_index +
                                        (y * y_tilesz / ysupercubedim),
                                        z / zsupercubedim
                                    ])
                                    [s3_x, s3_y,
                                     s3_z] = MortonXYZ(morton_index)
                                    print "Morton Index {}".format(
                                        morton_index)
                                    self.logger.info("[{},{},{}]".format(
                                        (x_index + x) * x_tilesz,
                                        (y_index + y) * y_tilesz, z))
                                    self.cuboidindex_db.putItem(
                                        channel_name,
                                        cur_res,
                                        s3_x,
                                        s3_y,
                                        s3_z,
                                        t,
                                        neariso=neariso)
                                    self.cuboid_bucket.putObject(
                                        channel_name,
                                        cur_res,
                                        morton_index,
                                        t,
                                        blosc.pack_array(insert_data),
                                        neariso=neariso)
                                    # self.s3_io.putCube(ch, t, morton_index, cur_res, blosc.pack_array(insert_data), update=False, neariso=False)

    def checkpoint_ingest(self, channel_name, resolution, x, y, z, e, time=0):
        """Checkpoint the progress to file"""

        with closing(open('checkpoint_ingest.csv', 'wb')) as csv_file:
            field_names = [
                'project_name', 'channel_name', 'resolution', 'x', 'y', 'z',
                'time', 'exception'
            ]
            csv_writer = csv.DictWriter(csv_file,
                                        delimiter=',',
                                        fieldnames=field_names)
            csv_writer.writeheader()
            csv_writer.writerow({
                'project_name': self.proj.project_name,
                'channel_name': channel_name,
                'resolution': resolution,
                'x': x,
                'y': y,
                'z': z,
                'time': time,
                'exception': e.message
            })

    def load_checkpoint(self):
        """Load from a checkpoint file"""
        return NotImplemented
Beispiel #3
0
class Test_CuboidIndexDB():
    def setup_class(self):
        """Setup parameters"""
        try:
            CuboidIndexDB.createTable(endpoint_url=settings.DYNAMO_ENDPOINT)
        except Exception as e:
            pass
        self.cuboid_index = CuboidIndexDB(
            nd_proj.project_name, endpoint_url=settings.DYNAMO_ENDPOINT)

    def teardown_class(self):
        """Teardown parameters"""
        CuboidIndexDB.deleteTable(endpoint_url=settings.DYNAMO_ENDPOINT)

    def test_putItem(self):
        """Test data insertion"""

        # inserting three values for task 0, zvalues 0-2
        x_value = 0
        y_value = 0
        for z_value in range(0, 2, 1):
            self.cuboid_index.putItem(nd_proj.channel_name, nd_proj.resolution,
                                      x_value, y_value, z_value)

        # checking if the items were inserted
        for z_value in range(0, 2, 1):
            item_value = self.cuboid_index.getItem(nd_proj.channel_name,
                                                   nd_proj.resolution, x_value,
                                                   y_value, z_value)
            assert (item_value['project_name'] == nd_proj.project_name)
            assert (
                item_value['channel_resolution_taskid'] == '{}&{}&{}'.format(
                    nd_proj.channel_name, nd_proj.resolution, 0))

        # inserting two values for task 1, zvalues 0-1
        for z_value in range(0, 1, 1):
            self.cuboid_index.putItem(nd_proj.channel_name,
                                      nd_proj.resolution,
                                      x_value,
                                      y_value,
                                      z_value,
                                      task_id=1)

        # checking if the items were updated
        for z_value in range(0, 1, 1):
            item_value = self.cuboid_index.getItem(nd_proj.channel_name,
                                                   nd_proj.resolution, x_value,
                                                   y_value, z_value)
            assert (item_value['project_name'] == nd_proj.project_name)
            assert (
                item_value['channel_resolution_taskid'] == '{}&{}&{}'.format(
                    nd_proj.channel_name, nd_proj.resolution, 1))

    def test_queryProjectItems(self):
        """Test the query over SI"""

        # inserting three values for task 0, zvalues 0-2
        x_value = 0
        y_value = 0
        for z_value in range(0, 2, 1):
            self.cuboid_index.putItem(nd_proj.channel_name, nd_proj.resolution,
                                      x_value, y_value, z_value)

        for item in self.cuboid_index.queryProjectItems():
            assert (item['project_name'] == nd_proj.project_name)

        for item in self.cuboid_index.queryChannelItems(nd_proj2.channel_name):
            assert (item['channel_resolution_taskid'] == '{}&{}&{}'.format(
                nd_proj2.channel_name, nd_proj.resolution, 0))

        for item in self.cuboid_index.queryTaskItems(nd_proj.channel_name,
                                                     nd_proj.resolution, 1):
            assert (item['channel_resolution_taskid'] == '{}&{}&{}'.format(
                nd_proj2.channel_name, nd_proj.resolution, 0))

    def test_deleteXYZ(self):
        """Test item deletion"""

        x_value = 0
        y_value = 0
        for z_value in range(0, 2, 1):
            value = self.cuboid_index.deleteXYZ(nd_proj.channel_name,
                                                nd_proj.resolution, x_value,
                                                y_value, z_value)
            item = self.cuboid_index.getItem(nd_proj.channel_name,
                                             nd_proj.resolution, x_value,
                                             y_value, z_value)
            assert (item == None)
Beispiel #4
0
class AwsInterface:

  def __init__(self, token_name, host_name=HOST_NAME):
    """Create the bucket and intialize values"""
  
    # configuring the logger based on the dataset we are uploading
    self.logger = logging.getLogger(token_name)
    self.logger.setLevel(logging.INFO)
    fh = logging.FileHandler('{}.log'.format(token_name))
    self.logger.addHandler(fh)
    # setting up the project metadata
    self.info_interface = InfoInterface(host_name, token_name)
    # creating the resource interface to the remote server
    # self.resource_interface = ResourceInterface(self.info_interface.dataset_name, self.info_interface.project_name, host_name, logger=self.logger)
    # self.proj = self.resource_interface.getProject()
    # create the s3 I/O and index objects
    self.cuboidindex_db = CuboidIndexDB(self.info_interface.project_name)
    self.cuboid_bucket = CuboidBucket(self.info_interface.project_name)
  

  
  # def setupNewProject(self):
    # """Setup a new project if it does not exist"""
    
    # self.resource_interface.createDataset()
    # self.resource_interface.createProject()
    # self.resource_interface.createToken()
  

  def uploadExistingProject(self, channel_name, resolution, start_values, neariso=False):
    """Upload an existing project to S3"""
      
    self.setupNewProject()
    db = SpatialDB(self.proj)
    # checking for channels
    if channel_name is None:
      channel_list = None
    else:
      channel_list = [channel_name]
    
    # iterating over channels in a project
    for ch in self.proj.projectChannels(channel_list):
      
      # creating the channel resource
      self.resource_interface.createChannel(ch.channel_name)
      # ingest 1 or more resolutions based on user input
      if resolution is None:
        start_res = self.proj.datasetcfg.scalinglevels
        stop_res = ch.resolution - 1
      else:
        start_res = resolution
        stop_res = resolution - 1
      
      # iterating over resolution
      for cur_res in range(start_res, stop_res, -1):
        
        # get the source database sizes
        [image_size, time_range] = self.proj.datasetcfg.dataset_dim(cur_res)
        [xcubedim, ycubedim, zcubedim] = cubedim = self.proj.datasetcfg.get_cubedim(cur_res)
        offset = self.proj.datasetcfg.get_offset(cur_res)
        [xsupercubedim, ysupercubedim, zsupercubedim] = supercubedim = self.proj.datasetcfg.get_supercubedim(cur_res)
        # set the limits for iteration on the number of cubes in each dimension
        xlimit = (image_size[0]-1) / (xsupercubedim) + 1
        ylimit = (image_size[1]-1) / (ysupercubedim) + 1
        zlimit = (image_size[2]-1) / (zsupercubedim) + 1
        # [xlimit, ylimit, zlimit] = limit = self.proj.datasetcfg.get_supercube_limit(cur_res)
        [x_start, y_start, z_start] = map(div, start_values, supercubedim)
        for z in range(z_start, zlimit, 1):
          for y in range(y_start, ylimit, 1):
            for x in range(x_start, xlimit, 1):

              try:
                # cutout the data at the current resolution
                data = db.cutout(ch, [x*xsupercubedim, y*ysupercubedim, z*zsupercubedim], [xsupercubedim, ysupercubedim, zsupercubedim], cur_res).data
                # generate the morton index
                morton_index = XYZMorton([x, y, z])

                self.logger.info("[{},{},{}] at res {}".format(x*xsupercubedim, y*ysupercubedim, z*zsupercubedim, cur_res))
                # updating the index
                # self.cuboidindex_db.putItem(ch.channel_name, cur_res, x, y, z, ch.time_range[0])
                # inserting the cube
                self.s3_io.putCube(ch, ch.time_stamp[0], morton_index, cur_res, blosc.pack_array(data), neariso=neariso)
              
              except Exception as e:
                # checkpoint the ingest
                self.logger.error(e)
                self.checkpoint_ingest(ch.channel_name, cur_res, x, y, z, e)
                raise e
  
  
  def uploadNewProject(self, config_file, start_values, neariso=False):
    """Upload a new project"""
    
    # loading the config file and assdociated params and processors
    config = Configuration()
    config.load(json.loads(open(config_file, 'rt').read()))
    config.load_plugins()
    path_processor = config.path_processor_class
    path_processor.setup(config.get_path_processor_params())
    tile_processor = config.tile_processor_class
    tile_processor.setup(config.get_tile_processor_params())
    tile_params = config.get_tile_processor_params()
    path_params = config.get_path_processor_params()
    
    # creating the channel object from resource service
    channel_name = config.config_data['database']['channel']
    channel_datatype = self.info_interface.get_channel_datatype(channel_name)
    cur_res = tile_params['ingest_job']['resolution']
    
    # loading all the parameters for image-sizes, tile-sizes, and iteration limits
    [xsupercubedim, ysupercubedim, zsupercubedim] = supercubedim = SUPER_CUBOID_SIZE
    [x_start, x_end] = tile_params['ingest_job']['extent']['x']
    [y_start, y_end] = tile_params['ingest_job']['extent']['y']
    [z_start, z_end] = tile_params['ingest_job']['extent']['z']
    [t_start, t_end] = tile_params['ingest_job']['extent']['t']
    x_tilesz = tile_params['ingest_job']['tile_size']['x']
    y_tilesz = tile_params['ingest_job']['tile_size']['y']
    z_tilesz = tile_params['ingest_job']['tile_size']['z']
    t_tilesz = tile_params['ingest_job']['tile_size']['t']
    x_limit = (x_end-1) / (x_tilesz) + 1
    y_limit = (y_end-1) / (y_tilesz) + 1
    z_limit = (z_end-1) / (z_tilesz) + 1
    t_limit = (t_end-1) / (t_tilesz) + 1
    
    if start_values != [0, 0, 0]:
      [x_start, y_start, z_start] = map(div, start_values, [x_tilesz, y_tilesz, z_tilesz])
    # iterate over t,z,y,x to ingest the data
    for t in range(t_start, t_limit, 1):  
      for z in range(z_start, z_limit, zsupercubedim):
        for y in range(y_start, y_limit, 1):
          for x in range(x_start, x_limit, 1):
            
            data = np.zeros([zsupercubedim, y_tilesz, x_tilesz], dtype=ND_dtypetonp[channel_datatype])
            for b in range(0, zsupercubedim, 1):
              if z + b > z_end - 1:
                break
              # generate file name
              file_name = path_processor.process(x, y, z+b, t)
              # read the file, handle expection if the file is missing
              try:
                tile_handle = tile_processor.process(file_name, x, y, z+b, t)
                tile_handle.seek(0)
                data[b,:,:] = np.asarray(Image.open(tile_handle))
              except IOError as e:
                pass
                # print "missing file", file_name
            # iterate over the tile if it is larger then supercuboid size
            for y_index in range(0, y_tilesz/ysupercubedim):
              for x_index in range(0, x_tilesz/xsupercubedim):
                # calculate the morton index 
                insert_data = data[:, y_index*ysupercubedim:(y_index+1)*ysupercubedim, x_index*xsupercubedim:(x_index+1)*xsupercubedim]
                if np.any(insert_data):
                  morton_index = XYZMorton([x_index+(x*x_tilesz/xsupercubedim), y_index+(y*y_tilesz/ysupercubedim), z/zsupercubedim])
                  [s3_x, s3_y, s3_z] = MortonXYZ(morton_index)
                  print "Morton Index {}".format(morton_index)
                  self.logger.info("[{},{},{}]".format((x_index+x)*x_tilesz, (y_index+y)*y_tilesz, z))
                  self.cuboidindex_db.putItem(channel_name, cur_res, s3_x, s3_y, s3_z, t, neariso=neariso)
                  self.cuboid_bucket.putObject(channel_name, cur_res, morton_index, t, blosc.pack_array(insert_data), neariso=neariso)
                  # self.s3_io.putCube(ch, t, morton_index, cur_res, blosc.pack_array(insert_data), update=False, neariso=False)


  def checkpoint_ingest(self, channel_name, resolution, x, y, z, e, time=0):
    """Checkpoint the progress to file"""
    
    with closing(open('checkpoint_ingest.csv', 'wb')) as csv_file:
      field_names = ['project_name', 'channel_name', 'resolution', 'x', 'y', 'z', 'time', 'exception']
      csv_writer = csv.DictWriter(csv_file, delimiter=',', fieldnames=field_names)
      csv_writer.writeheader()
      csv_writer.writerow({'project_name' : self.proj.project_name, 'channel_name' : channel_name, 'resolution' : resolution, 'x' : x, 'y' : y, 'z' : z, 'time' : time, 'exception' : e.message})

  
  def load_checkpoint(self):
    """Load from a checkpoint file"""
    return NotImplemented