def test_get_cubes(self): """Test adding cubes to the cache""" resolution = 1 rkv = RedisKVIO(self.config_data) # Clean up data self.cache_client.flushdb() data1 = np.random.randint(50, size=[10, 15, 5]) data2 = np.random.randint(50, size=[10, 15, 5]) data3 = np.random.randint(50, size=[10, 15, 5]) data_packed1 = blosc.pack_array(data1) data_packed2 = blosc.pack_array(data2) data_packed3 = blosc.pack_array(data3) data = [data_packed1, data_packed2, data_packed3] # Add items morton_id = [112, 125, 516] keys = rkv.generate_cached_cuboid_keys(self.resource, 2, [0], morton_id) rkv.put_cubes(keys, data) # Get cube cubes = rkv.get_cubes(keys) assert len(cubes) == 3 for m, c, d in zip(morton_id, cubes, data): assert c[0] == m assert c[1] == 0 data_retrieved = blosc.unpack_array(c[2]) np.testing.assert_array_equal(data_retrieved, blosc.unpack_array(d))
def DecompressArray_with_order(array, start, num, maximum, read_index_list=None): endFlag = 0 if start + num >= maximum: num = maximum - start endFlag = 1 leftEnd = start % param.bloscBlockSize startingBlock = int(start / param.bloscBlockSize) maximumBlock = int((start + num - 1) / param.bloscBlockSize) rt = [] rt.append(blosc.unpack_array(array[startingBlock])) startingBlock += 1 if startingBlock <= maximumBlock: if read_index_list is None: for i in range(startingBlock, (maximumBlock + 1)): rt.append(blosc.unpack_array(array[i])) else: for i in range(startingBlock, (maximumBlock + 1)): rt.append(blosc.unpack_array(array[read_index_list[i]])) nprt = np.concatenate(rt[:]) if leftEnd != 0 or num % param.bloscBlockSize != 0: nprt = nprt[leftEnd:(leftEnd + num)] return nprt, num, endFlag
def receive_data(socket): """Receive the pack and unpack it.""" data = socket.recv() if len(data) == 1: array = blosc.unpack_array(data[0]) return array elif len(data) == 4: array = blosc.unpack_array(data[0]) return (array, ) + (data[1:])
def mergeCubes(data1, data2): """Merge Cubes""" import ndlib data1 = blosc.unpack_array(data1) data2 = blosc.unpack_array(data2) # Call vectorize function # vec_func = np.vectorize(lambda x,y: x if y == 0 else y) # Call ctype function ndlib.overwriteMerge_ctype(data1, data2) return blosc.pack_array(data1)
def __getitem__(self, idx): """ :param idx: id of the items :return: the processed data, return as type of dic """ random_state = np.random.RandomState(int(time.time())) rand_label_id = random_state.randint(0, 1000) + idx idx = idx % self.num_img filename = self.name_list[idx] zipnp_list = self.img_list[idx] spacing = self.spacing_list[idx] original_spacing = self.original_spacing_list[idx] original_sz = self.original_sz_list[idx] if self.has_label: img_np, label_np = [ blosc.unpack_array(item) for item in zipnp_list ] else: img_np = blosc.unpack_array(zipnp_list[0]) img_path = self.path_list[idx] if self.phase == "train": sample = { 'image': [img_np], 'label': label_np } # here the list is for multi-modality , each mode is an elem in list sample = self.apply_transform(sample, self.corr_transform_pool[idx], rand_label_id) else: if not self.has_label: sample = {'image': [img_np]} else: sample = {'image': [img_np], 'label': label_np} if not self.use_whole_img_as_input: sample = self.corr_partition_pool[idx](sample) else: sample['image'] = np.stack(sample['image'], 0) sample['image'] = np.stack(sample['image'], 0) sample['img_path'] = img_path if self.transform: sample['image'] = self.transform(sample['image']) if self.has_label: sample['label'] = self.transform(sample['label']) sample['spacing'] = spacing.copy() sample['original_sz'] = original_sz.copy() sample['original_spacing'] = original_spacing.copy() return sample, filename
def recv(self, data, quit_ev): if quit_ev.is_set(): return False msg = self.socket.recv(copy=False) data[:] = blosc.unpack_array(bytes(msg)) return True
def breakCubes(self, timestamp, super_zidx, resolution, super_cube): """Breaking the supercuboids into cuboids""" super_cube = blosc.unpack_array(super_cube) # print "breaking supercube shape: {}".format(super_cube.shape) # Empty lists for zindx and cube data zidx_list = [] cube_list = [] # SuperCube Size [xnumcubes, ynumcubes, znumcubes] = self.db.datasetcfg.supercube_size # Cube dimensions cubedim = self.db.datasetcfg.get_cubedim(resolution) [x,y,z] = MortonXYZ(super_zidx) # start = map(mul, cubedim, [x,y,z]) start = map(mul, [x,y,z], self.db.datasetcfg.supercube_size) for z in range(znumcubes): for y in range(ynumcubes): for x in range(xnumcubes): zidx = XYZMorton(map(add, start, [x,y,z])) # Parameters in the cube slab index = map(mul, cubedim, [x,y,z]) end = map(add, index, cubedim) cube_data = super_cube[:,index[2]:end[2], index[1]:end[1], index[0]:end[0]] zidx_list.append(zidx) # print "mini cube:", cube_data.shape cube_list.append(blosc.pack_array(cube_data)) return zidx_list, [timestamp]*len(zidx_list), cube_list
def transform_demos(demos): ''' takes as input a list of demonstrations in the format generated with `make_agent_demos` or `make_human_demos` i.e. each demo is a tuple (mission, blosc.pack_array(np.array(images)), directions, actions) returns demos as a list of lists. Each demo is a list of (obs, action, done) tuples ''' new_demos = [] for demo in demos: new_demo = [] mission = demo[0] all_images = demo[1] directions = demo[2] actions = demo[3] all_images = blosc.unpack_array(all_images) n_observations = all_images.shape[0] assert len(directions) == len( actions) == n_observations, "error transforming demos" for i in range(n_observations): obs = { 'image': all_images[i], 'direction': directions[i], 'mission': mission } action = actions[i] done = i == n_observations - 1 new_demo.append((obs, action, done)) new_demos.append(new_demo) return new_demos
def __init__(self, demos): self.obs = [] self.obs_lens = [] self.langs = [] self.lang_lens = [] self.dirs = [] self.acts = [] missions, packed_obs, directions, actions = zip(*demos) self.lang_w2i = make_vocab(missions, str_format=True) self.dirs_w2i = make_vocab(directions) self.acts_w2i = make_vocab(actions) for mission, packed_obs, dirs, acts in demos: missions_i = [1, *(self.lang_w2i[t] for t in mission.split(" ")), 2] # src doesn't need start of sentence token. dirs_i = [self.dirs_w2i[str(t)] for t in dirs] acts_i = [self.acts_w2i[str(t)] for t in acts] missions_i = np.array(missions_i, dtype=np.int64) dirs_i = np.array(dirs_i, dtype=np.int64) acts_i = np.array(acts_i, dtype=np.int64) obs = blosc.unpack_array(packed_obs) # Transpose - channels first obs = np.transpose(obs, (0, 3, 1, 2)) self.obs.append(obs) self.obs_lens.append(obs.shape[0]) self.langs.append(missions_i) self.lang_lens.append(len(missions_i)) self.dirs.append(dirs_i) self.acts.append(acts_i)
def _get_cutout_blosc_no_chunking(self, token, channel, resolution, x_start, x_stop, y_start, y_stop, z_start, z_stop, neariso=False): url = self.url() + "{}/{}/blosc/{}/{},{}/{},{}/{},{}/".format( token, channel, resolution, x_start, x_stop, y_start, y_stop, z_start, z_stop ) if neariso: url += "neariso/" req = requests.get(url) if req.status_code is not 200: raise IOError("Bad server response for {}: {}: {}".format( url, req.status_code, req.text)) # This will need modification for >3D blocks return blosc.unpack_array(req.content)[0] raise IOError("Failed to retrieve blosc cutout.")
def test_time_diff(self): """Post npz data with default channel""" p.args = (3000, 3100, 2000, 2100, 10, 11, 2, 3) image_data1 = np.ones([2, 1, 1, 100, 100], dtype=np.uint8) * random.randint(0, 255) response = postNPZ(p, image_data1, time=True) assert (response.status_code == 200) voxarray = getNPZ(p, time=True) # check that the return matches assert (np.array_equal(voxarray, image_data1)) p.args = (3000, 3100, 2000, 2100, 10, 11, 3, 4) image_data2 = np.ones([2, 1, 1, 100, 100], dtype=np.uint8) * random.randint(0, 255) response = postNPZ(p, image_data2, time=True) assert (response.status_code == 200) voxarray = getNPZ(p, time=True) # check that the return matches assert (np.array_equal(voxarray, image_data2)) p.args = (3000, 3100, 2000, 2100, 10, 11, 2, 4) url = "https://{}/sd/{}/{}/diff/{}/{},{}/{},{}/{},{}/{},{}/".format( SITE_HOST, p.token, p.channels[0], p.resolution, *p.args) f = getURL(url) voxarray = blosc.unpack_array(f.content) image_data = np.subtract(np.float32(image_data1), np.float32(image_data2)) assert (np.array_equal(image_data[0, :], voxarray[0, :]))
def supercube_compatibility(self, super_cube): super_cube = blosc.unpack_array(super_cube) if len(super_cube.shape) == 3: return blosc.pack_array(super_cube.reshape((1,) + super_cube.shape)) else: return blosc.pack_array(super_cube)
def supercube_compatibility(self, super_cube): super_cube = blosc.unpack_array(super_cube) if len(super_cube.shape) == 3: return blosc.pack_array( super_cube.reshape((1, ) + super_cube.shape)) else: return blosc.pack_array(super_cube)
def decompress(msg): if sys.version_info[0] < 3: # Python 2.x implementation assert isinstance(msg, str) else: # Python 3.x implementation assert isinstance(msg, bytes) grad = blosc.unpack_array(msg) return grad
def test_channel_uint64_cuboid_unaligned_offset_time_offset_blosc_numpy( self): """ Test uint64 data, not cuboid aligned, offset, time samples, blosc interface Test Requires >=2GB of memory! """ test_mat = np.random.randint(1, 2**50, (3, 17, 300, 500)) test_mat = test_mat.astype(np.uint64) bb = blosc.pack_array(test_mat) # Create request factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/layer1/0/100:600/450:750/20:37/200:203', bb, content_type='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', dataset='layer1', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37') self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get( '/' + version + '/cutout/col1/exp1/layer1/0/100:600/450:750/20:37/200:203', HTTP_ACCEPT='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', dataset='layer1', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37').render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress data_mat = blosc.unpack_array(response.content) # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def np_rebuild(blocks, axis, dtype): blocks = [blosc.unpack_array(b) for b in blocks] if len(blocks) == 1: array = blocks[0] else: array = np.concatenate(blocks, axis=axis) if dtype: return np.require(array, dtype) else: return array
def test_channel_uint16_cuboid_aligned_offset_no_time_blosc_numpy(self): """ Test uint16 data, cuboid aligned, offset, no time samples, blosc interface""" test_mat = np.random.randint(1, 2**16 - 1, (16, 128, 128)) test_mat = test_mat.astype(np.uint16) bb = blosc.pack_array(test_mat) # Create request factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel2/0/128:256/256:384/16:32/', bb, content_type='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='128:256', y_range='256:384', z_range='16:32', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get( '/' + version + '/cutout/col1/exp1/channel2/0/128:256/256:384/16:32/', HTTP_ACCEPT='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='128:256', y_range='256:384', z_range='16:32', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress data_mat = blosc.unpack_array(response.content) # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def fromBlosc ( self, pandz ): """Load the cube from a pickled and zipped blob""" try: self.data = blosc.unpack_array(pandz[:]) self.zdim, self.ydim, self.xdim = self.data.shape except: logger.error ("Failed to decompress database cube. Data integrity concern.") raise self._newcube = False
def fromBlosc(self, compressed_data): """Load the cube from a pickled and zipped blob""" try: self.data = blosc.unpack_array(compressed_data[:]) except: pass logger.error("Failed to decompress database cube. Data integrity concern.") raise SpatialDBError("Failed to decompress database cube. Data integrity concern.") self._newcube = False
def fromBlosc(self, pandz): """Load the cube from a pickled and zipped blob""" try: self.data = blosc.unpack_array(pandz[:]) self.zdim, self.ydim, self.xdim = self.data.shape except: logger.error( "Failed to decompress database cube. Data integrity concern.") raise self._newcube = False
def getSuperCubes(self, ch, res, super_listofidxs): """Get SuperCubes""" for super_zidx in super_listofidxs: try: super_cube = self.client.get_object(Bucket=generateS3BucketName(), Key=generateS3Key(self.project_name, ch.getChannelName(), res, super_zidx)).get('Body').read() yield (super_zidx, blosc.unpack_array(super_cube)) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey': continue if e.response['Error']['Code'] == 'NoSuchBucket': pass
def getIndex ( self, ch, entityid, resolution, update=False ): """Retrieve the index for the annotation with id""" idxstr = self.kvio.getIndex(ch, entityid, resolution, update) if idxstr: if self.NPZ: fobj = cStringIO.StringIO ( idxstr ) return np.load ( fobj ) else: return blosc.unpack_array(idxstr) else: return []
def getIndex(self, ch, entityid, resolution, update=False): """Retrieve the index for the annotation with id""" idxstr = self.kvio.getIndex(ch, entityid, resolution, update) if idxstr: if self.NPZ: fobj = cStringIO.StringIO(idxstr) return np.load(fobj) else: return blosc.unpack_array(idxstr) else: return []
def transform_merge_demos(demos): ''' takes as input a list of demonstrations in the format generated with `make_agent_demos` or `make_human_demos` i.e. each demo is a tuple (mission, blosc.pack_array(np.array(images)), directions, actions) returns demos as a list of lists. Each demo is a list of (obs, action, done) tuples ''' new_demos = [] conjs = [' and ', ' then, ', ' after you '] for idx in range(len(demos) // 2): demo_1 = demos[2 * idx] demo_2 = demos[2 * idx + 1] conj = conjs[np.random.randint(0, 3)] new_demo = [] if conj == ' after you ': mission = demo_2[0] + conj + demo_1[0] else: mission = demo_1[0] + conj + demo_2[0] directions = demo_1[2] + demo_2[2] actions = demo_1[3] + demo_2[3] all_images = np.concatenate( (blosc.unpack_array(demo_1[1]), blosc.unpack_array(demo_2[1])), axis=0) n_observations = all_images.shape[0] assert len(directions) == len( actions) == n_observations, "error transforming demos" for i in range(n_observations): obs = { 'image': all_images[i], 'direction': directions[i], 'mission': mission, 'submissions': [demo_1[0], demo_2[0]] } action = 0 if actions[i] == 6 else actions[i] done = i == n_observations - 1 new_demo.append((obs, action, done)) new_demos.append(new_demo) return new_demos
def load_blosc_training_images(blosc_filename): """ Lumber Labs data format. TODO: Remove me when this gets integrated into the Deep Learning Tutorials, just to avoid confusion. """ import blosc with open(blosc_filename, "rb") as blosc_file: images_and_labels = pickle.load(blosc_file) images_compressed = images_and_labels[0] images_array = blosc.unpack_array(images_compressed) images_array.shape = (-1, 27, 19) return images_array
def receive_payload(self, payload): data, id, timestamp = payload.split(self.separator) if time.time() - float(timestamp) > 0.5 and self.n_dropped_frames < 10: self.n_dropped_frames += 1 return False if self.mode == 0: self.current_id = int(id) self.current_data = blosc.unpack_array(data) * 1.0 self.current_timestamp = float(timestamp) return True
def test_param_constructor(self): """Re-run a testing using the parameter based constructor""" config = { "cache_host": self.config["aws"]["cache"], "cache_db": 1, "read_timeout": 86400 } rkv = RedisKVIO(config) # Clean up data self.cache_client.flushdb() data1 = np.random.randint(50, size=[10, 15, 5]) data2 = np.random.randint(50, size=[10, 15, 5]) data3 = np.random.randint(50, size=[10, 15, 5]) data_packed1 = blosc.pack_array(data1) data_packed2 = blosc.pack_array(data2) data_packed3 = blosc.pack_array(data3) data = [data_packed1, data_packed2, data_packed3] # Add items morton_id = [112, 125, 516] keys = rkv.generate_cached_cuboid_keys(self.resource, 2, [0], morton_id) rkv.put_cubes(keys, data) # Get cube cubes = rkv.get_cubes(keys) cube = [x for x in cubes] assert len(cube) == 3 for m, c, d in zip(morton_id, cube, data): assert c[0] == m assert c[1] == 0 data_retrieved = blosc.unpack_array(c[2]) np.testing.assert_array_equal(data_retrieved, blosc.unpack_array(d))
def getBlosc (p, time=False): """Get data using npz. Returns a numpy array""" # Build the url to get the npz object if time: url = 'http://{}/blaze/{}/{}/blosc/{}/{},{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args ) elif p.channels is not None: url = 'http://{}/blaze/{}/{}/blosc/{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args ) elif p.channels is None: url = 'http://{}/blaze/{}/blosc/{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, p.resolution, *p.args ) # Get the image back raw_data = urllib2.urlopen (url).read() return blosc.unpack_array(raw_data)
def fromBlosc(self, compressed_data): """Load the cube from a pickled and zipped blob""" try: self.data = blosc.unpack_array(compressed_data[:]) except: pass logger.error( "Failed to decompress database cube. Data integrity concern.") raise SpatialDBError( "Failed to decompress database cube. Data integrity concern.") self._newcube = False
def _load_blosc_component_sync(self, ix, component, ext): """ Load component stored in blosc format. Parameters ---------- ix : str index of element in batch. component : str name of component. ext : str extension of file stored on hard drive. """ component_path = os.path.join(self.index.get_fullpath(ix), component, 'data' + '.' + ext) if not os.path.exists(component_path): raise OSError("File with component " + "{} doesn't exist.".format(component)) with open(component_path, mode='rb') as file: byted = file.read() if ext == 'blk': decoder_path = os.path.join(self.index.get_fullpath(ix), component, 'data.decoder') if os.path.exists(decoder_path): with open(decoder_path, mode='rb') as file: decoder = pickle.loads(file.read()) else: decoder = lambda x: x data = decoder(blosc.unpack_array(byted)) elif ext == 'pkl': data = pickle.loads(byted) # print(self.get_pos(None, component, ix)) if component == 'spacing_beg': self.spacing_beg = data return if component == 'origin_beg': self.origin_beg = data return if component == 'series': self.series = data return component_pos = self.get_pos(None, component, ix) getattr(self, component)[component_pos] = data
def get_block(s3key, s3_resource, bucketname): """Gets a cuboid from ndstore s3_resource is a boto3 s3 resource """ try: s3_obj = s3_resource.Object(bucketname, s3key) response = s3_obj.get() blosc_data = response["Body"].read() # unpack the blosc encoded data return blosc.unpack_array(blosc_data, encoding="latin1") except ClientError: return None
def breakCubes(key, blosc_data): """break the cubes into smaller chunks""" key_array = [token, channel_name, res, x1, x2, y1, y2, z1, z2, time_stamp] = key.split('_') [res, x1, x2, y1, y2, z1, z2] = [int(i) for i in key_array[2:][:-1]] if blosc_data is None: return voxarray = blosc.unpack_array(blosc_data) br = BlazeRedis(token, channel_name, res) ds = Dataset(token) ch = ds.getChannelObj(channel_name) [zimagesz, yimagesz, ximagesz] = ds.imagesz[res] #[xcubedim, ycubedim, zcubedim] = cubedim = ds.cubedim[res] [xcubedim, ycubedim, zcubedim] = cubedim = CUBE_DIM [xoffset, yoffset, zoffset] = ds.offset[res] # Calculating the corner and dimension corner = [x1, y1, z1] dim = voxarray.shape[::-1][:-1] # Round to the nearest largest cube in all dimensions [xstart, ystart, zstart] = start = map(div, corner, cubedim) znumcubes = (corner[2]+dim[2]+zcubedim-1)/zcubedim - zstart ynumcubes = (corner[1]+dim[1]+ycubedim-1)/ycubedim - ystart xnumcubes = (corner[0]+dim[0]+xcubedim-1)/xcubedim - xstart numcubes = [xnumcubes, ynumcubes, znumcubes] offset = map(mod, corner, cubedim) data_buffer = np.zeros(map(mul, numcubes, cubedim)[::-1], dtype=voxarray.dtype) end = map(add, offset, dim) data_buffer[offset[2]:end[2], offset[1]:end[1], offset[0]:end[0]] = voxarray cube_list = [] for z in range(znumcubes): for y in range(ynumcubes): for x in range(xnumcubes): zidx = XYZMorton(map(add, start, [x,y,z])) # Parameters in the cube slab index = map(mul, cubedim, [x,y,z]) end = map(add, index, cubedim) cube_data = data_buffer[index[2]:end[2], index[1]:end[1], index[0]:end[0]] cube_list.append((br.generateSIKey(zidx), blosc.pack_array(cube_data.reshape((1,)+cube_data.shape)))) return cube_list[:]
def getBlosc(p, time=False): """Get data using npz. Returns a numpy array""" # Build the url to get the npz object if time: url = 'http://{}/ca/{}/{}/blosc/{}/{},{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args) elif p.channels is not None: url = 'http://{}/ca/{}/{}/blosc/{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args) elif p.channels is None: url = 'http://{}/ca/{}/blosc/{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, p.resolution, *p.args) # Get the image back f = urllib2.urlopen(url) return blosc.unpack_array(f.read())
def to_array(data): """ Import a blosc array into a numpy array. Arguments: data: A blosc packed numpy array Returns: A numpy array with data from a blosc compressed array """ try: numpy_data = blosc.unpack_array(data) except Exception as e: raise ValueError("Could not load numpy data. {}".format(e)) return numpy_data
def __convert_to_standard_label_map(self, label_map, interested_label_list): label_map =blosc.unpack_array(label_map) cur_label_list = list(np.unique(label_map)) # unique func orders the elements if set(cur_label_list) == set(interested_label_list): return label_map for l_id in cur_label_list: if l_id in interested_label_list: st_index = interested_label_list.index(l_id) else: # assume background label is 0 st_index = 0 print("warning label: {} is not in interested label index, and would be convert to 0".format(l_id)) label_map[np.where(label_map == l_id)] = st_index return label_map
def parse(self, stream, media_type=None, parser_context=None): """Method to decompress bytes from a POST that contains blosc compressed numpy ndarray Only should be used if data sent was compressed using blosc.pack_array() :param stream: Request stream stream type: django.core.handlers.wsgi.WSGIRequest :param media_type: :param parser_context: :return: """ try: req = BossRequest(parser_context['request']) except BossError as err: return BossParserError(err.args[0], err.args[1], err.args[2]) # Convert to Resource resource = spdb.project.BossResourceDjango(req) # Get bit depth try: bit_depth = resource.get_bit_depth() except ValueError: return BossParserError( "Unsupported data type provided to parser: {}".format( resource.get_data_type()), ErrorCodes.TYPE_ERROR) # Make sure cutout request is under 1GB UNCOMPRESSED total_bytes = req.get_x_span() * req.get_y_span() * req.get_z_span( ) * len(req.get_time()) * bit_depth / 8 if total_bytes > settings.CUTOUT_MAX_SIZE: return BossParserError( "Cutout request is over 1GB when uncompressed. Reduce cutout dimensions.", ErrorCodes.REQUEST_TOO_LARGE) # Decompress and return try: return blosc.unpack_array(stream.read()) except EOFError: return BossParserError( "Failed to unpack data. Verify the datatype of your POSTed data and " "xyz dimensions used in the POST URL.", ErrorCodes.DATA_DIMENSION_MISMATCH)
def decompress_array(array, blosc_start_index, first_blosc_block_data_index, no_of_data_rows_to_retrieve, no_of_blosc_blocks, read_index_list=None): """ Return: data_rows, next_first_blosc_block_data_index and next_blosc_start_index Note: blosc_start_index, next_first_blosc_block_data_index and next_blosc_start_index is inclusive. """ data_rows = [] no_of_data_rows = 0 for i in range(blosc_start_index, no_of_blosc_blocks): new_data_rows = blosc.unpack_array( array[i if read_index_list is None else read_index_list[i]]) data_rows.append(new_data_rows) no_of_data_rows += len(new_data_rows) if i == blosc_start_index and first_blosc_block_data_index > 0: return np.concatenate( data_rows[:])[first_blosc_block_data_index:], 0, i + 1 if no_of_data_rows >= no_of_data_rows_to_retrieve: extra_no_of_data_rows = no_of_data_rows % no_of_data_rows_to_retrieve next_blosc_start_index = i + 1 if extra_no_of_data_rows == 0 else i next_first_blosc_block_data_index = (0 if extra_no_of_data_rows == 0 else (len(new_data_rows) - extra_no_of_data_rows)) return (np.concatenate( data_rows[:])[0:no_of_data_rows_to_retrieve], next_first_blosc_block_data_index if next_blosc_start_index < no_of_blosc_blocks else -1, next_blosc_start_index if next_blosc_start_index < no_of_blosc_blocks else -1) if no_of_data_rows <= 0: return None, -1, -1 return np.concatenate(data_rows[:]), -1, -1
def getBlosc (p, time=False, neariso=False, direct=False): """Get data using blosc. Returns a blosc packed numpy array""" # Build the url to get the npz object if time: url = 'https://{}/sd/{}/{}/blosc/{}/{},{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args ) elif p.channels is not None: url = 'https://{}/sd/{}/{}/blosc/{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args ) elif p.channels is None: url = 'https://{}/sd/{}/blosc/{}/{},{}/{},{}/{},{}/'.format ( SITE_HOST, p.token, p.resolution, *p.args ) if neariso: url = url + NEARISO if direct: url = url + DIRECT # Get the image back resp = getURL(url) return blosc.unpack_array(resp.content)
def test_channel_uint64_cuboid_unaligned_offset_time_offset_blosc_numpy(self): """ Test uint64 data, not cuboid aligned, offset, time samples, blosc interface Test Requires >=2GB of memory! """ test_mat = np.random.randint(1, 256, (3, 17, 300, 500)) test_mat = test_mat.astype(np.uint64) bb = blosc.pack_array(test_mat) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/100:600/450:750/20:37/200:203', bb, content_type='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='200:203') self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/100:600/450:750/20:37/200:203', HTTP_ACCEPT='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='200:203').render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress data_mat = blosc.unpack_array(response.content) # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def cutout_get( self, resource, resolution, x_range, y_range, z_range, time_range, url_prefix, auth, session, send_opts): """Upload a cutout to the Boss data store. Args: resource (ndio.ndresource.resource.Resource): Resource compatible with cutout operations. resolution (int): 0 indicates native resolution. x_range (string): x range such as '10:20' which means x>=10 and x<20. y_range (string): y range such as '10:20' which means y>=10 and y<20. z_range (string): z range such as '10:20' which means z>=10 and z<20. time_range (string): None or time range such as 30:40 which means t>=30 and t<40. url_prefix (string): Protocol + host such as https://api.theboss.io auth (string): Token to send in the request header. session (requests.Session): HTTP session to use for request. send_opts (dictionary): Additional arguments to pass to session.send(). Returns: (numpy.array): A 3D or 4D numpy matrix in ZXY(time) order. Raises: requests.HTTPError """ req = self.get_cutout_request( resource, 'GET', 'application/blosc-python', url_prefix, auth, resolution, x_range, y_range, z_range, time_range) prep = session.prepare_request(req) # Hack in Accept header for now. prep.headers['Accept'] = 'application/blosc-python' #resp = session.send(prep, stream = True, **send_opts) resp = session.send(prep, **send_opts) if resp.status_code == 200: return blosc.unpack_array(resp.content) msg = ('Get cutout failed on {}, got HTTP response: ({}) - {}'.format( resource.name, resp.status_code, resp.text)) raise HTTPError(msg, request = req, response = resp)
def getBlosc(p, time=False, neariso=False, direct=False): """Get data using blosc. Returns a blosc packed numpy array""" # Build the url to get the npz object if time: url = 'https://{}/sd/{}/{}/blosc/{}/{},{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args) elif p.channels is not None: url = 'https://{}/sd/{}/{}/blosc/{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, ','.join(p.channels), p.resolution, *p.args) elif p.channels is None: url = 'https://{}/sd/{}/blosc/{}/{},{}/{},{}/{},{}/'.format( SITE_HOST, p.token, p.resolution, *p.args) if neariso: url = url + NEARISO if direct: url = url + DIRECT # Get the image back resp = getURL(url) return blosc.unpack_array(resp.content)
def test_channel_uint16_cuboid_aligned_offset_no_time_blosc_numpy(self): """ Test uint16 data, cuboid aligned, offset, no time samples, blosc interface""" test_mat = np.random.randint(1, 2**16-1, (16, 128, 128)) test_mat = test_mat.astype(np.uint16) bb = blosc.pack_array(test_mat) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/channel2/0/128:256/256:384/16:32/', bb, content_type='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='128:256', y_range='256:384', z_range='16:32', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/channel2/0/128:256/256:384/16:32/', HTTP_ACCEPT='application/blosc-python') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='128:256', y_range='256:384', z_range='16:32', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress data_mat = blosc.unpack_array(response.content) # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def get_tiledata(self, zoom, row, col): with self.db_connection as db_connection: cursor = db_connection.cursor() data_type = self.data_type if self.compression and (data_type == "xray"): db_connection.text_factory = str try: cursor.execute(""" SELECT tile_data from tiles WHERE zoom_level=? AND tile_row=? AND tile_column=?; """, (str(zoom), str(row), str(col))) except: raise if data_type == "xray": if self.compression: data = blosc.unpack_array(cursor.fetchone()[0]) else: data = cursor.fetchone()[0] if data_type == "image/TIFF": data = cursor.fetchone()[0] # img = Image.frombuffer("L", (255, 255), data) img = Image.open(ioBuffer(data)) data = np.array(img) return data
def _get_cutout_blosc_no_chunking(self, token, channel, resolution, x_start, x_stop, y_start, y_stop, z_start, z_stop, neariso=False): url = self.url() + "{}/{}/blosc/{}/{},{}/{},{}/{},{}/".format( token, channel, resolution, x_start, x_stop, y_start, y_stop, z_start, z_stop ) if neariso: url += "neariso/" req = requests.get(url) if req.status_code is not 200: raise IOError("Bad server response for {}: {}: {}".format( url, req.status_code, req.text)) return blosc.unpack_array(req.content)[0] # TODO: 4D - 3D array raise IOError("Failed to retrieve blosc cutout.")
def test_unpack_array_with_unicode_characters_from_py27(self): import numpy as np out_array = np.array(['å', 'ç', 'ø', 'π', '˚']) np.testing.assert_array_equal(out_array, blosc.unpack_array(self.PY_27_INPUT, encoding='bytes'))
def test_unpack_array_with_unicode_characters(self): import numpy as np input_array = np.array(['å', 'ç', 'ø', 'π', '˚']) packed_array = blosc.pack_array(input_array) np.testing.assert_array_equal(input_array, blosc.unpack_array(packed_array, encoding='UTF-8'))
hdf5_ss_values.append(sys.getsizeof(tmpfile.read())) tmpfile.seek(0) start = time.time() fh5in = h5py.File(tmpfile.name, driver='core', backing_store=True) test = np.array(fh5in['TEST']['CUTOUT']) hdf5_ds_values.append(time.time()-start) fh5in.close() tmpfile.close() # BLOSC start = time.time() test = blosc.pack_array(data) blosc_ts_values.append(time.time()-start) blosc_ss_values.append(sys.getsizeof(test)) start = time.time() test = blosc.unpack_array(test) blosc_ds_values.append(time.time()-start) # opening a pdf file pp = PdfPages('time_serialization.pdf') # Time Serlization Graph # plot values plt.figure(figsize=(10,10)) plt.plot(x_axis_values, numpy_ts_values, color='green', marker='o') plt.plot(x_axis_values, hdf5_ts_values, color='blue', marker='^') plt.plot(x_axis_values, blosc_ts_values, color='red', marker='s') # configure x-axis
def _uncompress_blosc(self, index, data): data = blosc.unpack_array(data) return index, data
#!/usr/bin/env python from useful.bench import StopWatch import numpy as np import argparse import blosc import sys parser = argparse.ArgumentParser(description='Run experiments') parser.add_argument('-r', '--repeat', '--repeats', type=int, default=100, help="number of repeats") args = parser.parse_args() a = np.linspace(0, 100, 3e6) bytes_array = a.tostring() for x in range(args.repeat): with StopWatch() as t: packed = blosc.pack_array(a) blosc.unpack_array(packed) del packed print("real: {0:.2f} cpu: {0:.2f}".format(t.time, t.cpu))
hdf5_ds_values = [] blosc_ds_values = [] numpy_ss_values = [] hdf5_ss_values = [] blosc_ss_values = [] x_axis_values = [] print "-----SERIALIZATION TIME-----" for i in range(0, ITERATIONS, 1): CUBE_VALUE = int(BASE_SIZE*math.pow(2,i)) x_axis_values.append(CUBE_VALUE) print "SIZE:{}".format(CUBE_VALUE) cutout_args = (OFFSET[0], OFFSET[0]+CUBE_VALUE, OFFSET[1], OFFSET[1]+CUBE_VALUE, OFFSET[2], OFFSET[2]+Z_SIZE) data = blosc.unpack_array(getURL(generateURLBlosc(HOST, TOKEN, CHANNELS, RESOLUTION, cutout_args))) # data = np.asarray(range(CUBE_VALUE*CUBE_VALUE*Z_SIZE), dtype=np.uint32).reshape(CUBE_VALUE,CUBE_VALUE,Z_SIZE) # NUMPY start = time.time() fileobj = cStringIO.StringIO() np.save(fileobj, data) test = zlib.compress(fileobj.getvalue()) numpy_ts_values.append(time.time()-start) # numpy_ss_values.append(sys.getsizeof(data)/sys.getsizeof(test)) numpy_ss_values.append(sys.getsizeof(test)) start = time.time() test = np.load ( cStringIO.StringIO ( zlib.decompress ( test ) ) ) numpy_ds_values.append(time.time()-start)
#in_ = np.random.random_integers(0, 100, N) # random distribution print(" ", in_) tic = time.time() out_ = np.copy(in_) toc = time.time() print(" Time for copying array with np.copy(): %.3f s" % (toc-tic,)) print() for cname in blosc.compressor_list(): print("Using *** %s *** compressor::" % cname) ctic = time.time() c = blosc.pack_array(in_, clevel=clevel, shuffle=True, cname=cname) ctoc = time.time() dtic = time.time() out = blosc.unpack_array(c) dtoc = time.time() assert((in_ == out).all()) print(" Time for pack_array/unpack_array: %.3f/%.3f s." % \ (ctoc-ctic, dtoc-dtic), end='') print("\tCompr ratio: %.2f" % (in_.size*in_.dtype.itemsize*1. / len(c))) ctic = time.time() c = blosc.compress_ptr(in_.__array_interface__['data'][0], in_.size, in_.dtype.itemsize, clevel=clevel, shuffle=True, cname=cname) ctoc = time.time() out = np.empty(in_.size, dtype=in_.dtype) dtic = time.time() blosc.decompress_ptr(c, out.__array_interface__['data'][0]) dtoc = time.time()
# ensure only one request of a cube at a time try: self.ds.db.fetchlock(cuboid_url) except Exception, e: logger.warning("Already fetching {}. Returning.".format(cuboid_url)) return # try block to ensure that we call fetchrelease try: if not self.ds.getS3Backend(): try: # Get cube in question # cube_data = test.getCutout(cuboidurl) # get the cutout data cube_data = blosc.unpack_array(getURL(cuboid_url).read()) except urllib2.URLError, e: # release the fetch lock self.ds.db.fetchrelease(cuboid_url) logger.error("Could not fetch the cuboid {}".format(cuboid_url)) raise NDTILECACHEError("Could not fetch the cuboid {}".format(cuboid_url)) # properties [ximagesize, yimagesize, zimagesize] = self.ds.imagesz[res] (xdim, ydim, zdim) = self.ds.cubedim[res] (xsuperdim, ysuperdim, zsuperdim) = self.ds.supercubedim[res] [xoffset, yoffset, zoffset] = self.ds.offset[res] scale = self.ds.scale[res][self.slice_type] if xmax == ximagesize or ymax == yimagesize or zmax == zimagesize:
def parse(self, stream, media_type=None, parser_context=None): """Method to decompress bytes from a POST that contains blosc compressed numpy ndarray Only should be used if data sent was compressed using blosc.pack_array() :param stream: Request stream stream type: django.core.handlers.wsgi.WSGIRequest :param media_type: :param parser_context: :return: """ try: request_args = { "service": "cutout", "collection_name": parser_context['kwargs']['collection'], "experiment_name": parser_context['kwargs']['experiment'], "channel_name": parser_context['kwargs']['channel'], "resolution": parser_context['kwargs']['resolution'], "x_args": parser_context['kwargs']['x_range'], "y_args": parser_context['kwargs']['y_range'], "z_args": parser_context['kwargs']['z_range'], } if 't_range' in parser_context['kwargs']: request_args["time_args"] = parser_context['kwargs']['t_range'] else: request_args["time_args"] = None req = BossRequest(parser_context['request'], request_args) except BossError as err: self.consume_request(stream) return BossParserError(err.message, err.error_code) except Exception as err: self.consume_request(stream) return BossParserError(str(err), ErrorCodes.UNHANDLED_EXCEPTION) # Convert to Resource resource = spdb.project.BossResourceDjango(req) # Get bit depth try: bit_depth = resource.get_bit_depth() except ValueError: self.consume_request(stream) return BossParserError("Unsupported data type provided to parser: {}".format(resource.get_data_type()), ErrorCodes.TYPE_ERROR) # Make sure cutout request is under 500MB UNCOMPRESSED if is_too_large(req, bit_depth): self.consume_request(stream) return BossParserError("Cutout request is over 500MB when uncompressed. Reduce cutout dimensions.", ErrorCodes.REQUEST_TOO_LARGE) # Decompress and return try: parsed_data = blosc.unpack_array(stream.read()) except MemoryError: return BossParserError("Ran out of memory decompressing data.", ErrorCodes.BOSS_SYSTEM_ERROR) except EOFError: return BossParserError("Failed to unpack data. Verify the datatype of your POSTed data and " "xyz dimensions used in the POST URL.", ErrorCodes.DATA_DIMENSION_MISMATCH) return req, resource, parsed_data