def test_attrs(self): """ Test .attrs property """ attrs = {'bla': 3, 'blu': 'asasdfsa'} with File(self.filename, 'w') as f: dst = f.create_dataset(name='/testgrp/dataset', shape=(30, 30)) for key, value in attrs.items(): dst.attrs[key] = value with File(self.filename, 'r') as f: dst = f['/testgrp/dataset'] self.assertIn('bla', dst.attrs) self.assertEqual(dst.attrs['bla'], attrs['bla']) for key in dst.attrs: self.assertIn(key, attrs) # same test with a group with File(self.filename, 'a') as f: grp = f['/testgrp'] grp.attrs['bla'] = 3 dst = grp.create_dataset(name='dataset2', shape=(30, 30)) self.assertIn('bla', grp.attrs) self.assertEqual(['bla'], grp.attrs.keys()) self.assertEqual(grp.attrs['bla'], 3)
def test_resize(self): ''' Test Dataset.resize() method ''' with File(self.filename, 'a') as f: start_size = (10, 20) f.create_dataset(name='resizable', shape=start_size, maxshape=(50, 20)) dset = f['resizable'] new_size = (40, 20) dset.resize(new_size) self.assertEqual(dset.shape, new_size)
def test_items(self): """ Test items() method """ # create some groups and datasets with File(self.filename, 'a') as f: g1 = f.create_group('/a/b/g1') f.create_group('/a/b/g2') f.create_group('/a/b/g3') f.create_dataset(name='a/b/g1/dst1', shape=(30, 30)) f.create_dataset(name='/a/b/g1/dst2', shape=(30, 30)) f.create_dataset(name='/a/b/g2/dst1', shape=(30, 30)) for key, val in f.items(): print(key, val)
def setUp(self): tmpdir = tempfile.gettempdir() self.filename = os.path.join(tmpdir, 'test_attrs.h5') with File(self.filename, 'w') as f: print("created {0}.".format(self.filename)) f.create_dataset(name='/bla', shape=(30, 30))
def test_parallel(self): """ Test parallel read/write access """ tmpdir = tempfile.gettempdir() NO_WORKERS = 40 filename = os.path.join(tmpdir, 'paralleltest827348723.h5') f = File(filename, 'w') # create some datasets (to test reading) for i in range(NO_WORKERS): f.create_dataset(name='/testgrp/dataset{}'.format(i), data=np.random.random(self.shape) .astype(np.float32)) def worker_read(i, hdf5file): """ reading worker """ time.sleep(random.random()) print("worker {0} is reading...".format(i)) data = hdf5file['/testgrp/dataset{}'.format(i)][:] print("worker {0} is done reading.".format(i)) self.assertEqual(data.shape, self.shape) def worker_write(i, hdf5file): """ writing worker """ # do some reading # print(hdf5file.keys()) # do some writing time.sleep(random.random()) data = np.empty((4, self.shape[0], self.shape[1]), dtype=np.int32) data[:] = i*100 # modify existing dataset dst = hdf5file['/testgrp/dataset{}'.format(i)] print("worker {0} is writing...".format(i)) dst[0:50, ] = i print("worker {0} done writing.".format(i)) jobs = [] writers = [] print("") for i in range(NO_WORKERS): if i % 4 == 0: p = multiprocessing.Process(target=worker_write, args=(i, f)) writers.append(i) else: p = multiprocessing.Process(target=worker_read, args=(i, f)) jobs.append(p) p.start() # p.join() # wait until all processes have terminated while True: time.sleep(0.3) all_terminated = not max((job.is_alive() for job in jobs)) if all_terminated: break # then test if data was written correctly print("Testing if data was written correctly...") for i in writers: dst = f['/testgrp/dataset{}'.format(i)] self.assertTrue(np.all(dst[0:50, ] == i))
def test_parallel(self): """ Test parallel read/write access """ tmpdir = tempfile.gettempdir() NO_WORKERS = 40 filename = os.path.join(tmpdir, 'paralleltest827348723.h5') f = File(filename, 'w') # create some datasets (to test reading) for i in range(NO_WORKERS): f.create_dataset(name='/testgrp/dataset{}'.format(i), data=np.random.random(self.shape).astype( np.float32)) def worker_read(i, hdf5file): """ reading worker """ time.sleep(random.random()) print("worker {0} is reading...".format(i)) data = hdf5file['/testgrp/dataset{}'.format(i)][:] print("worker {0} is done reading.".format(i)) self.assertEqual(data.shape, self.shape) def worker_write(i, hdf5file): """ writing worker """ # do some reading # print(hdf5file.keys()) # do some writing time.sleep(random.random()) data = np.empty((4, self.shape[0], self.shape[1]), dtype=np.int32) data[:] = i * 100 # modify existing dataset dst = hdf5file['/testgrp/dataset{}'.format(i)] print("worker {0} is writing...".format(i)) dst[0:50, ] = i print("worker {0} done writing.".format(i)) jobs = [] writers = [] print("") for i in range(NO_WORKERS): if i % 4 == 0: p = multiprocessing.Process(target=worker_write, args=(i, f)) writers.append(i) else: p = multiprocessing.Process(target=worker_read, args=(i, f)) jobs.append(p) p.start() # p.join() # wait until all processes have terminated while True: time.sleep(0.3) all_terminated = not max((job.is_alive() for job in jobs)) if all_terminated: break # then test if data was written correctly print("Testing if data was written correctly...") for i in writers: dst = f['/testgrp/dataset{}'.format(i)] self.assertTrue(np.all(dst[0:50, ] == i))
def handle_request(msg): """ Process hurray message :param msg: Message dictionary with 'cmd' and 'args' keys :return: Msgpacked response as bytes """ cmd = msg.get(CMD_KW_CMD, None) args = msg.get(CMD_KW_ARGS, {}) app_log.debug('Process "%s" (%s)', cmd, ', '.join(['%s=%s' % (k, v) for k, v in args.items()])) status = OK data = None if cmd in DATABASE_COMMANDS: # Database related commands # Database name has to be defined if CMD_KW_DB not in args: return response(MISSING_ARGUMENT) db = args[CMD_KW_DB] if len(db) < 1: return response(INVALID_ARGUMENT) if cmd == CMD_CREATE_DATABASE: if db_exists(db): status = FILE_EXISTS else: File(db_path(db), 'w-') status = CREATED elif cmd == CMD_CONNECT_DATABASE: if not db_exists(db): status = FILE_NOT_FOUND elif cmd in NODE_COMMANDS: # Node related commands # Database name and path have to be defined if CMD_KW_DB not in args or CMD_KW_PATH not in args: return response(MISSING_ARGUMENT) db_name = args.get(CMD_KW_DB) # check if database exists if not db_exists(db_name): return response(FILE_NOT_FOUND) db = File(db_path(db_name), "r+") path = args[CMD_KW_PATH] if len(path) < 1: return response(INVALID_ARGUMENT) if cmd == CMD_CREATE_GROUP: if path in db: status = GROUP_EXISTS else: db.create_group(path) elif cmd == CMD_CREATE_DATASET: if path in db: status = DATASET_EXISTS else: if CMD_KW_DATA not in msg: return response(MISSING_DATA) db.create_dataset(name=path, data=msg[CMD_KW_DATA]) else: # Commands for existing nodes if path not in db: return response(NODE_NOT_FOUND) if cmd == CMD_GET_NODE: node = db[path] if isinstance(node, Group): data = { RESPONSE_NODE_TYPE: NODE_TYPE_GROUP } elif isinstance(node, Dataset): data = { RESPONSE_NODE_TYPE: NODE_TYPE_DATASET, RESPONSE_NODE_SHAPE: node.shape, RESPONSE_NODE_DTYPE: str(node.dtype) } elif cmd == CMD_SLICE_DATASET: if CMD_KW_KEY not in args: return response(MISSING_ARGUMENT) try: data = { RESPONSE_DATA: db[path][args[CMD_KW_KEY]] } except ValueError as ve: status = VALUE_ERROR app_log.debug('Invalid slice: %s', ve) elif cmd == CMD_BROADCAST_DATASET: if CMD_KW_DATA not in msg: return response(MISSING_DATA) if CMD_KW_KEY not in args: return response(MISSING_ARGUMENT) try: db[path][args[CMD_KW_KEY]] = msg[CMD_KW_DATA] except ValueError as ve: status = VALUE_ERROR app_log.debug('Invalid slice: %s', ve) except TypeError as te: status = TYPE_ERROR app_log.debug('Invalid broacdcast: %s', te) elif cmd == CMD_ATTRIBUTES_SET: if CMD_KW_KEY not in args: return response(MISSING_ARGUMENT) key = args[CMD_KW_KEY] if len(key) < 1: return response(INVALID_ARGUMENT) if CMD_KW_DATA in msg: db[path].attrs[key] = msg[CMD_KW_DATA] else: return response(MISSING_DATA) elif cmd == CMD_ATTRIBUTES_GET: if CMD_KW_KEY not in args: return response(MISSING_ARGUMENT) try: data = { RESPONSE_DATA: db[path].attrs[args[CMD_KW_KEY]] } except KeyError as ke: status = KEY_ERROR app_log.debug('Invalid key: %s', ke) elif cmd == CMD_ATTRIBUTES_CONTAINS: if CMD_KW_KEY not in args: return response(MISSING_ARGUMENT) data = { RESPONSE_ATTRS_CONTAINS: args[CMD_KW_KEY] in db[path].attrs } elif cmd == CMD_ATTRIBUTES_KEYS: data = { RESPONSE_ATTRS_KEYS: db[path].attrs.keys() } else: status = UNKNOWN_COMMAND return response(status, data)