def __init__(self, h5json, fname, ext='h5'): """Initialize a PyCode instance. :arg dict h5json: HDF5/JSON content. :arg str fname: Name of the HDF5 file the generated code will create. :arg str ext: Generated HDF5 file's extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in h5json: raise KeyError('"root" key not found.') self._h5j = h5json self._dimensions = list() self._dimscales = dict() self._fname = '{}.{}'.format(fname, ext) self._file_var = 'f' self._p = StringStore()
def __init__(self, tinfo, fname, ext='h5'): """Initialize an MCode instance. :arg dict tinfo: Template content information. :arg str fname: Name of the file to use when generating the source code. :arg str ext: HDF5 file extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in tinfo: raise KeyError('"root" key not found.') # Helper variables... self._d = tinfo self._root = self._d['root'] self._fname = fname + '.' + ext self._dset_path = dict() # map dataset ID to HDF5 paths self._dimlist = [] # List of dataset IDs that have dimscales attached # Variable for the generated source code... self._m = StringStore()
def __init__(self, tinfo, fname, ext='h5'): """Initialize an IdlCode instance. :arg dict tinfo: Template content information. :arg str fname: Name of the file to use when generating the source code. :arg str ext: HDF5 file extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in tinfo: raise KeyError('"root" key not found.') # Helper variables... self._d = tinfo self._root = self._d['root'] self._fname = fname + '.' + ext self._dset_path = dict() # map dataset ID to HDF5 paths self._dimlist = [] # List of dataset IDs that have dimscales attached # Variable for the generated source code... self._c = StringStore()
class IdlCode(object): """Produce IDL source code (as an .m file) to produce a template HDF5 file.""" def __init__(self, tinfo, fname, ext='h5'): """Initialize an IdlCode instance. :arg dict tinfo: Template content information. :arg str fname: Name of the file to use when generating the source code. :arg str ext: HDF5 file extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in tinfo: raise KeyError('"root" key not found.') # Helper variables... self._d = tinfo self._root = self._d['root'] self._fname = fname + '.' + ext self._dset_path = dict() # map dataset ID to HDF5 paths self._dimlist = [] # List of dataset IDs that have dimscales attached # Variable for the generated source code... self._c = StringStore() def _get_hard_links(self, gid, collection): links = list() for l in self._d['groups'][gid].get('links', []): if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection: links.append(l) return links def _order_groups(self): pid = [{'id': self._root, 'path': '/'}] def tree_walker(ginfo): glinks = self._get_hard_links(ginfo['id'], 'groups') chld_grps = list() for gl in glinks: chld_grps.append({'id': gl['id'], 'path': pp.join(ginfo['path'], gl['title'])}) pid.extend(chld_grps) for cg in chld_grps: tree_walker(cg) tree_walker(pid[0]) return pid def _create_file(self): """Code to create an HDF5 file.""" tmplt = Template( "; Create the HDF5 file. It will overwrite any file with same " "name.\n" "fname = '$filename';\n" "fid = H5F_CREATE(fname)\n" ) vars = {'filename': self._fname} self._c.append(tmplt.substitute(vars)) def _close_file(self): """Code to close an HDF5 file.""" tmplt = ( "\n\n" "; Close the HDF5 file.\n" "H5F_CLOSE, fid\n" "; Template file is ready!\n" ) self._c.append(tmplt) def _dims2str(self, dims): """Stringify dimension list with support for the unlimited size. :arg list dims: Dimension size list. """ dim_str = [] for d in dims: if d == 'H5S_UNLIMITED': # Unlimited dimension... dim_str.append("-1") else: dim_str.append('{:d}ULL'.format(d)) return '[{}]'.format(', '.join(dim_str)) def _dspace(self, shape): """Generate dataspace code. :arg dict shape: HDF5/JSON shape information. """ if shape['class'] == 'H5S_SCALAR': return "sid = H5S_CREATE_SCALAR()\n" elif shape['class'] == 'H5S_SIMPLE': rank = len(shape['dims']) if rank == 1: tmplt = Template( "sid = H5S_CREATE_SIMPLE($dims, MAX_DIMENSIONS=$maxdims)\n" ) else: tmplt = Template( "sid = H5S_CREATE_SIMPLE(REVERSE($dims), " "MAX_DIMENSIONS=REVERSE($maxdims))\n" ) vars = {'dims': self._dims2str(shape['dims']), 'maxdims': self._dims2str(shape.get('maxdims', []))} return tmplt.substitute(vars) else: raise NotImplementedError('%s: Not supported' % shape['class']) def _dtype(self, t, var='tid'): """Generate datatype code. :arg dict t: HDF5/JSON datatype information. :arg str tid: Default name of the datatype variable. """ tcls = t['class'] if tcls == 'H5T_COMPOUND': tmplt = '' # Go over each compound field... field_cnt = 0 field_tid_fmt = 'tid%d' for f in t['fields']: field_cnt += 1 field_tid = field_tid_fmt % field_cnt tmplt += self._dtype(f['type'], var=field_tid) # Create the compound datatype... array = [field_tid_fmt % i for i in range(1, field_cnt + 1)] array = ', '.join(array) names = [f['name'].encode('ascii') for f in t['fields']] tmplt += "%s = H5T_COMPOUND_CREATE([%s], %s)\n" % (var, array, names) # Close field datatypes... for i in range(1, field_cnt + 1): tmplt += "H5T_CLOSE, tid%d\n" % i return tmplt elif tcls == 'H5T_VLEN': return "%s = H5T_VLEN_CREATE(%s)\n" \ % (var, self._atomic_dtype(t['base'])) elif tcls == 'H5T_ARRAY': tmplt = Template( "${base}" "$var = H5T_ARRAY_CREATE(base_tid, $dims)\n" "H5T_CLOSE, base_tid\n" ) if len(t['dims']) > 1: dims = 'REVERSE(%s)' % t['dims'] else: dims = t['dims'] return tmplt.substitute( {'base': self._dtype(t['base'], var='base_tid'), 'dims': dims, 'var': var}) else: return var + " = " + self._atomic_dtype(t) + "\n" def _atomic_dtype(self, t, var='tid'): """Handle HDF5 atomic datatypes. :arg dict t: HDF5/JSON datatype information. """ tcls = t['class'] if tcls == 'H5T_STRING': if isinstance(t['length'], six.string_types): raise NotImplementedError('Variable length string datatype ' 'not supported yet.') else: tmplt = Template( "H5T_IDL_CREATE(STRING('a', FORMAT='(A${n})'))" ) return tmplt.substitute({'n': t['length']}) elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'): type_map = {'H5T_STD_U8': 'BYTE', 'H5T_STD_U16': 'UINT', 'H5T_STD_U32': 'ULONG', 'H5T_STD_U64': 'ULONG64', 'H5T_STD_I16': 'FIX', 'H5T_STD_I32': 'LONG', 'H5T_STD_I64': 'LONG64', 'H5T_IEEE_F32': 'FLOAT', 'H5T_IEEE_F64': 'DOUBLE'} base = t['base'][:-2] try: return "H5T_IDL_CREATE(%s(0))" % type_map[base] except KeyError: raise NotImplementedError('IDL does not support datatype: %s' % t['base']) elif tcls == 'H5T_REFERENCE': if t['base'] == 'H5T_STD_REF_OBJ': region = '' else: region = '/REGION' return "H5T_REFERENCE_CREATE(%s)" % region else: raise NotImplementedError('%s: Datatype class not supported yet' % t['class']) def _create_attr(self, attr, locid, dimscale=False): """Generate code for one attribute of the ``locid`` object. :arg dict attr: Attribute information. :arg str locid: Attribute's parent variable name. :arg bool dimscale: Parent object type: ``group`` or ``dataset``. """ dataspace = self._dspace(attr['shape']) datatype = self._dtype(attr['type']) tmplt = Template( "\n; Attribute: $name\n" "${datatype}" "${dataspace}" "aid = H5A_CREATE($locid, '$name', tid, sid)\n" "${value}" "H5T_CLOSE, tid\n" "H5S_CLOSE, sid\n" "H5A_CLOSE, aid\n" ) if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST'): val = ("; This is a dimension scale attribute. It's value is " "written later in the code.\n") else: if attr['type']['class'] == 'H5T_STRING': def prep_vals(vals): values = list() for v in vals: temp = v.encode('ascii') temp = temp.replace("'", "''") if '\n' in temp: # Replace "\n" with "STRING(10B)" temp = "'+STRING(10B)+'".join(temp.split('\n')) values.append(temp) return values if attr['shape']['class'] == 'H5S_SCALAR': value = "%s" % prep_vals([attr['value']])[0] else: if len(attr['shape']['dims']) > 1: raise NotImplementedError( 'Rank > 1 for string data not supported') value = prep_vals(attr['value']) if attr['type']['length'] == 'H5T_VARIABLE': val_str = \ '[' + ', '.join(["'%s'" % v for v in value]) + ']' value = "H5T_VLEN_TO_STR(%s)" % val_str else: # Left-justified, fixed-length string format... # fmt = '{{:<{0:d}.{0:d}}}'.format(attr['type']['length']) if attr['shape']['class'] == 'H5S_SCALAR': # value = "'%s'" % fmt.format(value) value = "'%s'" % value else: # for i in xrange(len(value)): # value[i] = fmt.format(value[i]) value = ('[' + ', '.join(["'%s'" % v for v in value]) + "]") else: value = attr['value'] val = Template( "H5A_WRITE, aid, $value\n" ).substitute({'value': value}) vars = {'locid': locid, 'name': attr['name'], 'datatype': datatype, 'dataspace': dataspace, 'value': val} self._c.append(tmplt.substitute(vars)) def _is_dimscale(self, attrs): """Check if the dataset is a dimension scale. :arg list attrs: All dataset's attributes. """ # Check if REFERENCE_LIST attribute is present... ref_list = any(a['name'] == 'REFERENCE_LIST' and a['type']['class'] == 'H5T_COMPOUND' for a in attrs) # Check if CLASS attribute is present... cls = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE' for a in attrs) if ref_list and cls: return True else: return False def _is_dimlist(self, attrs): """Check if the dataset has dimension scales atteched. :arg list attrs: All dataset's attributes. """ # Check if DIMENSION_LIST attribute is present... dim_list = any(a['name'] == 'DIMENSION_LIST' and a['type']['class'] == 'H5T_VLEN' for a in attrs) return True if dim_list else False def _is_dimscale_related(self, attrs): """Check if the attributes of a dataset indicate it is related to dimension scales. :arg list attrs: All dataset's attributes. """ dimscale = self._is_dimscale(attrs) dimlist = self._is_dimlist(attrs) if dimscale or dimlist: return True else: return False def _create_attrs(self, attrs, locid, dimscale=False): """Generate code for all the attributes of the ``locid`` object. :arg dict attrs: HDF5/JSON information about the attributes of the parent ``locid`` object. :arg str locid: MATLAB variable name of the attributes' parent object. :arg bool dimscale: Boolean indicating whether the attributes belong to a dimension scale. """ for a in attrs: self._create_attr(a, locid, dimscale=dimscale) def _create_dset(self, id, name, ds, locid): """Generate code for one dataset of the ``locid`` group. : arg str id: Dataset's identifier. :arg str name: Dataset's name. :arg dict ds: Dataset information. :arg str locid: Varable name of the dataset's parent group. """ dataspace = self._dspace(ds['shape']) datatype = self._dtype(ds['type']) tmplt = Template( "\n" "; Dataset: $name\n" "${datatype}" "${dataspace}" "dsid = H5D_CREATE($locid, '$name', tid, sid${layout}${filter})\n" "H5S_CLOSE, sid\n" "H5T_CLOSE, tid\n" ) # Layout... layout = ds.get('creationProperties', {}).get('layout', {}) lyt = '' if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS': pass elif layout['class'] == 'H5D_COMPACT': pass elif layout['class'] == 'H5D_CHUNKED': if len(layout['dims']) == 1: chunks = '$chunks' else: chunks = 'REVERSE($chunks)' lyt = Template( ", CHUNK_DIMENSIONS=" + chunks ).substitute({'chunks': self._dims2str(layout['dims'])}) else: raise ValueError('%s: Invalid layout class' % layout['class']) # Filters... filters = ds.get('creationProperties', {}).get('filters', []) fltr = '' for f in filters: if f['class'] == 'H5Z_FILTER_DEFLATE': fltr += ", GZIP=%s" % f['level'] elif f['class'] == 'H5Z_FILTER_SHUFFLE': fltr += ", /SHUFFLE" else: raise NotImplementedError('%s: Filter not supported yet' % f['class']) vars = {'locid': locid, 'name': name, 'datatype': datatype, 'dataspace': dataspace, 'layout': lyt, 'filter': fltr} self._c.append(tmplt.substitute(vars)) attrs = ds.get('attributes', []) dimscale = self._is_dimscale_related(attrs) if self._is_dimlist(attrs): self._dimlist.append(id) self._create_attrs(attrs, 'dsid', dimscale=dimscale) self._c.append("H5D_CLOSE, dsid\n") def _create_dsets(self, dsets, locid, path): """Generate code for all the datasets of the ``locid`` object. :arg dict dsets: HDF5/JSON information about the datasets of the parent ``locid`` object. :arg str locid: IDL variable name of the datasets' parent object. :arg str path: HDF5 path of the datasets' parent object. """ for d in dsets: # Record the full HDF5 path to the dataset... self._dset_path[d['id']] = pp.join(path, d['title']) # Generate dataset code... self._create_dset(d['id'], d['title'], self._d['datasets'][d['id']], locid) def _create_group(self, g): """Code for all group content. :arg dict g: Group id and full name. """ grpid = g['id'] path = g['path'] if grpid == self._root: locid = 'fid' else: tmplt = Template( "\n\n" ";\n" "; Group: $path\n" ";\n" "gid = H5G_CREATE(fid, '$path')\n" ) vars = {'path': path} self._c.append(tmplt.substitute(vars)) locid = 'gid' self._create_attrs(self._d['groups'][grpid].get('attributes', []), locid) self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid, path) if grpid != self._root: self._c.append("H5G_CLOSE, gid\n") def _dimscales(self): """Generate code connecting dimension scales and their datasets.""" # List for storing dimension scales IDs... dscales = set() self._c.append( "\n\n" ";\n" "; Datasets and their dimension scales\n" ";\n" ) for dset_id in self._dimlist: dset = self._d['datasets'][dset_id] # Get DIMENSION_LIST attribute value... dims = next(a['value'] for a in dset['attributes'] if a['name'] == 'DIMENSION_LIST') tmp = Template( "\n; Dataset with dimension scales: $name\n" "dset_id = H5D_OPEN(fid, '$name')\n" "aid = H5A_OPEN_NAME(dset_id, 'DIMENSION_LIST')\n" "dims = REPLICATE({IDL_H5_VLEN}, $n)\n" ).substitute({'name': self._dset_path[dset_id], 'n': len(dims)}) self._c.append(tmp) # Iterate over dataset's dimension scales... for index, dimscales in enumerate(dims): self._c.append("ref = INDGEN(%d)\n" % len(dimscales)) for n, ds in enumerate(dimscales): ds_id = pp.basename(ds) dscales.add(ds_id) self._c.append("ref[%d] = H5R_CREATE(fid, '%s')\n" % (n, self._dset_path[ds_id])) self._c.append("dims[%d].pdata = PTR_NEW(ref);\n" % index) self._c.append( "H5A_WRITE, aid, dims\n" "H5A_CLOSE, aid\n" "H5D_CLOSE, dset_id\n" ) for dset_id in dscales: tmp = Template( "\n; Dimension scale: $name\n" "dset_id = H5D_OPEN(fid, '$name')\n" "aid = H5A_OPEN_NAME(dset_id, 'REFERENCE_LIST')\n" ).substitute({'name': self._dset_path[dset_id]}) self._c.append(tmp) dset = self._d['datasets'][dset_id] # Get REFERENCE_LIST attribute value and fields... refs, fields = next((a['value'], a['type']['fields']) for a in dset['attributes'] if a['name'] == 'REFERENCE_LIST') # Field names... f_names = [f['name'] for f in fields] self._c.append("ref = REPLICATE({%s:1, %s:1}, %d)\n" % tuple(f_names + [len(refs)])) for n, r in enumerate(refs): ds_id = pp.basename(r[0]) tmp = Template( "ref[$n].${d} = H5R_CREATE(fid, '$path')\n" "ref[$n].${i} = $index;\n" ).substitute({'n': n, 'path': self._dset_path[ds_id], 'd': f_names[0], 'i': f_names[1], 'index': r[1]}) self._c.append(tmp) self._c.append( "H5A_WRITE, aid, ref\n" "H5A_CLOSE, aid\n" "H5D_CLOSE, dset_id\n" ) def get_code(self): """Generate IDL source code.""" self._create_file() # Order groups by hierarchy... groups = self._order_groups() for g in groups: self._create_group(g) if self._dimlist: # Handle dimension scales... self._dimscales() self._close_file() return self._c.dump()
class MCode(object): """Generate MATLAB source code (as an .m file) that produces a template HDF5 file.""" def __init__(self, tinfo, fname, ext='h5'): """Initialize an MCode instance. :arg dict tinfo: Template content information. :arg str fname: Name of the file to use when generating the source code. :arg str ext: HDF5 file extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in tinfo: raise KeyError('"root" key not found.') # Helper variables... self._d = tinfo self._root = self._d['root'] self._fname = fname + '.' + ext self._dset_path = dict() # map dataset ID to HDF5 paths self._dimlist = [] # List of dataset IDs that have dimscales attached # Variable for the generated source code... self._m = StringStore() def _get_hard_links(self, gid, collection): links = list() for l in self._d['groups'][gid].get('links', []): if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection: links.append(l) return links def _order_groups(self): pid = [{'id': self._root, 'path': '/'}] def tree_walker(ginfo): glinks = self._get_hard_links(ginfo['id'], 'groups') chld_grps = list() for gl in glinks: chld_grps.append({'id': gl['id'], 'path': pp.join(ginfo['path'], gl['title'])}) pid.extend(chld_grps) for cg in chld_grps: tree_walker(cg) tree_walker(pid[0]) return pid def matlab_dtype(self, h5type): """Find appropriate MATLAB numerical data type for HDF5 predefined datatype. :arg str h5type: HDF5 predefined datatype. :return: MATLAB data type. :rtype: str """ conv_map = { 'H5T_STD_I8': 'int8', 'H5T_STD_U8': 'uint8', 'H5T_STD_I16': 'int16', 'H5T_STD_U16': 'uint16', 'H5T_STD_I32': 'int32', 'H5T_STD_U32': 'uint32', 'H5T_STD_I64': 'int64', 'H5T_STD_U64': 'uint64', 'H5T_IEEE_F32': 'single', 'H5T_IEEE_F64': 'double' } try: return conv_map[h5type[:-2]] except KeyError: raise ValueError('%s: Invalid predefined datatype' % h5type) def _create_file(self): """Code to create an HDF5 file.""" tmplt = Template( "% Create the HDF5 file. It will overwrite any file with same " "name.\n" "fname = '$filename';\n" "fcpl = H5P.create('H5P_FILE_CREATE');\n" "fapl = H5P.create('H5P_FILE_ACCESS');\n" "fid = H5F.create(fname, 'H5F_ACC_TRUNC', fcpl, fapl);\n" ) vars = {'filename': self._fname} self._m.append(tmplt.substitute(vars)) def _close_file(self): """Code to close an HDF5 file.""" tmplt = ( "\n\n" "% Close the HDF5 file.\n" "H5F.close(fid);\n" "% Template file is ready!\n" ) self._m.append(tmplt) def _dims2str(self, dims): """Stringify dimension list with support for the unlimited size. :arg list dims: Dimension size list. """ dim_str = [] for d in dims: if d == 'H5S_UNLIMITED': # Unlimited dimension... dim_str.append("H5ML.get_constant_value('H5S_UNLIMITED')") else: dim_str.append('{:d}'.format(d)) return '[{}]'.format(', '.join(dim_str)) def _dspace(self, shape): """Generate dataspace code. :arg dict shape: HDF5/JSON shape information. """ if shape['class'] == 'H5S_SCALAR': return "sid = H5S.create('H5S_SCALAR');\n" elif shape['class'] == 'H5S_SIMPLE': nelems_limit = 2**48 - 1 nelems = 1 for d in shape['dims']: if d != 'H5S_UNLIMITED': nelems *= d if nelems > nelems_limit: raise ValueError( 'Number of elements too large (max. 2^48 - 1): {:d}' .format(nelems)) rank = len(shape['dims']) if rank == 1: tmplt = Template( "sid = H5S.create_simple($rank, $dims, $maxdims);\n" ) else: tmplt = Template( "sid = H5S.create_simple($rank, fliplr($dims), " "fliplr($maxdims));\n" ) vars = {'dims': self._dims2str(shape['dims']), 'maxdims': self._dims2str(shape.get('maxdims', [])), 'rank': rank} return tmplt.substitute(vars) elif shape['class'] == 'H5S_NULL': return "sid = H5S.create('H5S_NULL');\n" else: raise NotImplementedError('%s: Not supported' % shape['class']) def _dtype(self, t, var='tid'): """Generate datatype code. :arg dict t: HDF5/JSON datatype information. :arg str var: Default name of the datatype variable. """ tcls = t['class'] if tcls == 'H5T_COMPOUND': tmplt = '' # Go over each compound field... field_cnt = 0 for f in t['fields']: field_cnt += 1 dt = "field_tid(%d)" % field_cnt tmplt += self._dtype(f['type'], var=dt) tmplt += "field_size(%d) = H5T.get_size(%s);\n" % (field_cnt, dt) # Compute field byte offsets... num_fields = len(t['fields']) tmplt += ( "field_offset = [0 cumsum(field_size(1:%d))];\n" ) % (num_fields - 1) # Create the compound datatype... tmplt += "tid = H5T.create('H5T_COMPOUND', sum(field_size));\n" for n in range(num_fields): tmplt += ("H5T.insert(tid, '%s', field_offset(%d), " "field_tid(%d));\n") % (t['fields'][n]['name'], n + 1, n + 1) # Close field datatypes... for n in range(num_fields): tmplt += "H5T.close(field_tid(%d));\n" % (n + 1) return tmplt elif tcls == 'H5T_VLEN': if t['base']['class'] == 'H5T_STRING': base_type = 'H5T_C_S1' if t['base']['length'] not in (1, 'H5T_VARIABLE'): raise NotImplementedError( 'MATLAB only allows vlen strings of variable or ' 'fixed length of 1.') elif t['base']['class'] == 'H5T_REFERENCE': raise NotImplementedError( 'MATLAB does not support H5T_REFERENCE for vlen datatype') else: base_type = t['base']['base'] return "tid = H5T.vlen_create('%s');\n" % base_type elif tcls == 'H5T_ARRAY': tmplt = Template( "${base}" "$var = H5T.array_create(base_tid, fliplr($dims));\n" "H5T.close(base_tid);\n" ) return tmplt.substitute( {'base': self._dtype(t['base'], var='base_tid'), 'dims': t['dims'], 'var': var}) else: return var + " = " + self._atomic_dtype(t, var=var) def _atomic_dtype(self, t, var='tid'): """Handle HDF5 atomic datatypes. :arg dict t: HDF5/JSON datatype information. "arg str tid: Default name of the datatype variable. """ tcls = t['class'] if tcls == 'H5T_STRING': tmplt = Template( "H5T.copy('H5T_C_S1');\n" "H5T.set_size($var, $length);\n" "H5T.set_strpad($var,'$strpad');\n" "H5T.set_cset($var, H5ML.get_constant_value('$cset'));\n" ) if isinstance(t['length'], six.string_types): length = "'%s'" % t['length'] else: length = t['length'] return tmplt.substitute({'length': length, 'strpad': t['strPad'], 'cset': t['charSet'], 'var': var}) elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'): return "H5T.copy('%s');\n" % t['base'] elif tcls == 'H5T_REFERENCE': return "H5T.copy('%s');\n" % t['base'] else: raise NotImplementedError('%s: Datatype not supported' % t['class']) def _create_attr(self, attr, locid, dimscale=False): """Generate code for one attribute of the ``locid`` object. :arg dict attr: Attribute information. :arg str locid: Attribute's parent variable name. :arg bool dimscale: Indicates attribute's parent is a dimension scale. """ if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST', 'NAME', 'CLASS'): return dataspace = self._dspace(attr['shape']) datatype = self._dtype(attr['type']) tmplt = Template( "\n% Attribute: $name\n" "${datatype}" "${dataspace}" "acpl = H5P.create('H5P_ATTRIBUTE_CREATE');\n" "aid = H5A.create($lid, '$name', tid, sid, acpl, 'H5P_DEFAULT');\n" "${value}" "H5T.close(tid);\n" "H5S.close(sid);\n" "H5A.close(aid);\n" ) if attr['type']['class'] == 'H5T_STRING': def prep_vals(vals): values = list() with_sprintf = False for v in vals: temp = v.encode('ascii') if "'" in temp: temp = temp.replace("'", "''") with_sprintf = True if '%' in temp: temp = temp.replace('%', '%%') with_sprintf = True if '\n' in temp: temp = temp.replace('\n', '\\n') with_sprintf = True if with_sprintf: values.append("sprintf('{}')".format(temp)) else: values.append("'{}'".format(temp)) return values if attr['shape']['class'] == 'H5S_SCALAR': value = prep_vals([attr['value']])[0] else: if len(attr['shape']['dims']) > 1: raise NotImplementedError( 'Rank > 1 for string data not supported') value = prep_vals(attr['value']) if attr['type']['length'] == 'H5T_VARIABLE': val_str = ('{' + ', '.join(value) + '}') else: # Left-justified, fixed-length string format... fmt = "'%-{0:d}.{0:d}s'".format(attr['type']['length']) if attr['shape']['class'] == 'H5S_SCALAR': val_str = "sprintf({}, {})".format(fmt, value) else: for i in range(len(value)): value[i] = "sprintf({}, {})".format(fmt, value[i]) val_str = ( '[' + '; '.join(value) + "]'") else: val_str = attr['value'] val = Template( "H5A.write(aid, 'H5ML_DEFAULT', $value);\n" ).substitute({'value': val_str}) vars = {'lid': locid, 'name': attr['name'], 'datatype': datatype, 'dataspace': dataspace, 'value': val} self._m.append(tmplt.substitute(vars)) def _is_dimscale(self, attrs): """Check if the dataset is a dimension scale. :arg list attrs: All dataset's attributes. """ # Check if REFERENCE_LIST attribute is present... ref_list = any(a['name'] == 'REFERENCE_LIST' and a['type']['class'] == 'H5T_COMPOUND' for a in attrs) # Check if CLASS attribute is present... cls_ = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE' for a in attrs) if ref_list and cls_: return True else: return False def _dimscale(self, attrs, locid): """Generate the code that sets a dimension scale. :arg list attrs: All dataset's attributes. :arg str locid: MATLAB variable name of the attributes' parent object. """ # Check if NAME attribute is present... for a in attrs: if a['name'] == 'NAME'and a['type']['class'] == 'H5T_STRING': scale_name = "'%s'" % a['value'] break else: scale_name = '[]' return "H5DS.set_scale(%s, %s);\n" % (locid, scale_name) def _is_dimlist(self, attrs): """Check if the dataset has dimension scales attached. :arg list attrs: All dataset's attributes. """ # Check if DIMENSION_LIST attribute is present... dim_list = any(a['name'] == 'DIMENSION_LIST' and a['type']['class'] == 'H5T_VLEN' for a in attrs) return True if dim_list else False def _is_dimscale_related(self, attrs): """Check if the attributes of a dataset indicate it is related to dimension scales. :arg list attrs: All dataset's attributes. """ dimscale = self._is_dimscale(attrs) dimlist = self._is_dimlist(attrs) if dimscale or dimlist: return True else: return False def _create_attrs(self, attrs, locid, dimscale=False): """Generate code for all the attributes of the ``locid`` object. :arg dict attrs: HDF5/JSON information about the attributes of the parent ``locid`` object. :arg str locid: MATLAB variable name of the attributes' parent object. :arg bool dimscale: Indicates whether the attributes belong to a dimension scale. """ for a in attrs: self._create_attr(a, locid, dimscale=dimscale) def _create_dset(self, id, name, ds, locid): """Generate code for one dataset of the ``locid`` group. : arg str id: Dataset's identifier. :arg str name: Dataset's name. :arg dict ds: Dataset information. :arg str locid: Varable name of the dataset's parent group. """ try: dataspace = self._dspace(ds['shape']) except ValueError as e: raise ValueError('{}: {}'.format(ds.get('alias', [name])[0], str(e))) datatype = self._dtype(ds['type']) tmplt = Template( "\n" "% Dataset: $name\n" "${datatype}" "${dataspace}" "${dcpl}" "dsid = H5D.create($locid, '$name', tid, sid, '$plist', dcpl, " "'$plist');\n" "H5S.close(sid);\n" "H5T.close(tid);\n" "${dimscale}" ) # Dataset creation property list... dcpl = "dcpl = H5P.create('H5P_DATASET_CREATE');\n" # Layout... layout = ds.get('creationProperties', {}).get('layout', {}) if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS': pass elif layout['class'] == 'H5D_COMPACT': dcpl = ("dcpl = H5P.create('H5P_DATASET_CREATE');\n" "H5P.set_layout(dcpl, 'H5D_COMPACT');\n") elif layout['class'] == 'H5D_CHUNKED': if len(layout['dims']) == 1: chunks = layout['dims'] else: chunks = 'fliplr(%s)' % layout['dims'] dcpl = Template( "dcpl = H5P.create('H5P_DATASET_CREATE');\n" "H5P.set_layout(dcpl, H5ML.get_constant_value('H5D_CHUNKED'));" "\n" "H5P.set_chunk(dcpl, $chunks);\n" ).substitute({'chunks': chunks}) else: raise ValueError('%s: Invalid layout class' % layout['class']) # Filters... filters = ds.get('creationProperties', {}).get('filters', []) for f in filters: if f['class'] == 'H5Z_FILTER_DEFLATE': dcpl += "H5P.set_deflate(dcpl, %s);\n" % f['level'] elif f['class'] == 'H5Z_FILTER_FLETCHER32': dcpl += "H5P.set_fletcher32(dcpl);\n" elif f['class'] == 'H5Z_FILTER_SHUFFLE': dcpl += "H5P.set_shuffle(dcpl);\n" elif f['class'] == 'H5Z_FILTER_SCALEOFFSET': dcpl += ("H5P.set_scaleoffset(dcpl, '%s', %d);\n" % (f['scaleType'], f['scaleOffset'])) elif f['class'] == 'H5Z_FILTER_NBIT': dcpl += "H5P.set_nbit(dcpl);\n" else: raise NotImplementedError('%s: Filter not supported yet' % f['class']) # Fill value... fv = ds.get('creationProperties', {}).get('fillValue', None) if fv: if type(fv) is list: raise NotImplementedError( 'Non-scalar fill value not supported yet') else: # Use dataset's datatype for fill value... if ds['type']['class'] == 'H5T_STRING': fv = "'%s'" % fv else: fv = str(fv) # Remove an "L" suffix if present... if fv[-1] == 'L': fv = fv[:-1] fv = '{}({})'.format(self.matlab_dtype(ds['type']['base']), fv) dcpl += "H5P.set_fill_value(dcpl, tid, %s);\n" % fv attrs = ds.get('attributes', []) is_dimscale = self._is_dimscale(attrs) if is_dimscale: dimscale = self._dimscale(attrs, 'dsid') else: dimscale = str() vars = {'locid': locid, 'name': name, 'datatype': datatype, 'dataspace': dataspace, 'plist': 'H5P_DEFAULT', 'dcpl': dcpl, 'dimscale': dimscale} self._m.append(tmplt.substitute(vars)) is_dimlist = self._is_dimlist(attrs) if is_dimlist: self._dimlist.append(id) self._create_attrs(attrs, 'dsid', dimscale=(is_dimscale or is_dimlist)) self._m.append("H5D.close(dsid);\n") def _create_dsets(self, dsets, locid, path): """Generate code for all the datasets of the ``locid`` object. :arg dict dsets: HDF5/JSON information about the datasets of the parent ``locid`` object. :arg str locid: MATLAB variable name of the datasets' parent object. :arg str path: HDF5 path of the datasets' parent object. """ for d in dsets: # Record the full HDF5 path to the dataset... self._dset_path[d['id']] = pp.join(path, d['title']) # Generate dataset code... self._create_dset(d['id'], d['title'], self._d['datasets'][d['id']], locid) def _create_group(self, g): """Code for all group content. :arg dict g: Group id and full name. """ grpid = g['id'] path = g['path'] if grpid == self._root: locid = 'fid' else: tmplt = Template( "\n\n" "%\n" "% Group: $path\n" "%\n" "gid = H5G.create(fid, '$path', '$plist', '$plist', '$plist');" "\n" ) vars = {'path': path, 'plist': 'H5P_DEFAULT'} self._m.append(tmplt.substitute(vars)) locid = 'gid' self._create_attrs(self._d['groups'][grpid].get('attributes', []), locid) self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid, path) if grpid != self._root: self._m.append("H5G.close(gid);\n") def _dimensions(self): """Generate code connecting dimension scales and their datasets.""" # List for storing dimension scale's IDs... dscales = list() self._m.append( "\n\n" "%\n" "% Datasets and their dimension scales\n" "%\n" ) for dset_id in self._dimlist: tmp = Template( "\n% Dataset with dimension scales: $name\n" "dset_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n" ).substitute({'name': self._dset_path[dset_id]}) self._m.append(tmp) dset = self._d['datasets'][dset_id] # Get DIMENSION_LIST attribute value... dims = next(a['value'] for a in dset['attributes'] if a['name'] == 'DIMENSION_LIST') # Iterate over dataset's dimension scales in reversed order # (because of using fliplr() when defining dataset's shape)... for index, dimscales in enumerate(reversed(dims)): for n, ds in enumerate(dimscales): ds_id = pp.basename(ds) dscales.append(ds_id) tmp = Template( "\n% Dimension scale: $name\n" "dscl_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n" "H5DS.attach_scale(dset_id, dscl_id, $idx);\n" "H5D.close(dscl_id);\n" ).substitute({'name': self._dset_path[ds_id], 'idx': index}) self._m.append(tmp) self._m.append( "\nH5D.close(dset_id);\n" ) def get_code(self): """Generate MATLAB source code.""" self._create_file() # Order groups by hierarchy... groups = self._order_groups() for g in groups: self._create_group(g) if self._dimlist: # Handle dimension scales... self._dimensions() self._close_file() return self._m.dump()
class MCode(object): """Generate MATLAB source code (as an .m file) that produces a template HDF5 file.""" def __init__(self, tinfo, fname, ext='h5'): """Initialize an MCode instance. :arg dict tinfo: Template content information. :arg str fname: Name of the file to use when generating the source code. :arg str ext: HDF5 file extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in tinfo: raise KeyError('"root" key not found.') # Helper variables... self._d = tinfo self._root = self._d['root'] self._fname = fname + '.' + ext self._dset_path = dict() # map dataset ID to HDF5 paths self._dimlist = [] # List of dataset IDs that have dimscales attached # Variable for the generated source code... self._m = StringStore() def _get_hard_links(self, gid, collection): links = list() for l in self._d['groups'][gid].get('links', []): if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection: links.append(l) return links def _order_groups(self): pid = [{'id': self._root, 'path': '/'}] def tree_walker(ginfo): glinks = self._get_hard_links(ginfo['id'], 'groups') chld_grps = list() for gl in glinks: chld_grps.append({ 'id': gl['id'], 'path': pp.join(ginfo['path'], gl['title']) }) pid.extend(chld_grps) for cg in chld_grps: tree_walker(cg) tree_walker(pid[0]) return pid def matlab_dtype(self, h5type): """Find appropriate MATLAB numerical data type for HDF5 predefined datatype. :arg str h5type: HDF5 predefined datatype. :return: MATLAB data type. :rtype: str """ conv_map = { 'H5T_STD_I8': 'int8', 'H5T_STD_U8': 'uint8', 'H5T_STD_I16': 'int16', 'H5T_STD_U16': 'uint16', 'H5T_STD_I32': 'int32', 'H5T_STD_U32': 'uint32', 'H5T_STD_I64': 'int64', 'H5T_STD_U64': 'uint64', 'H5T_IEEE_F32': 'single', 'H5T_IEEE_F64': 'double' } try: return conv_map[h5type[:-2]] except KeyError: raise ValueError('%s: Invalid predefined datatype' % h5type) def _create_file(self): """Code to create an HDF5 file.""" tmplt = Template( "% Create the HDF5 file. It will overwrite any file with same " "name.\n" "fname = '$filename';\n" "fcpl = H5P.create('H5P_FILE_CREATE');\n" "fapl = H5P.create('H5P_FILE_ACCESS');\n" "fid = H5F.create(fname, 'H5F_ACC_TRUNC', fcpl, fapl);\n") vars = {'filename': self._fname} self._m.append(tmplt.substitute(vars)) def _close_file(self): """Code to close an HDF5 file.""" tmplt = ("\n\n" "% Close the HDF5 file.\n" "H5F.close(fid);\n" "% Template file is ready!\n") self._m.append(tmplt) def _dims2str(self, dims): """Stringify dimension list with support for the unlimited size. :arg list dims: Dimension size list. """ dim_str = [] for d in dims: if d == 'H5S_UNLIMITED': # Unlimited dimension... dim_str.append("H5ML.get_constant_value('H5S_UNLIMITED')") else: dim_str.append('{:d}'.format(d)) return '[{}]'.format(', '.join(dim_str)) def _dspace(self, shape): """Generate dataspace code. :arg dict shape: HDF5/JSON shape information. """ if shape['class'] == 'H5S_SCALAR': return "sid = H5S.create('H5S_SCALAR');\n" elif shape['class'] == 'H5S_SIMPLE': nelems_limit = 2**48 - 1 nelems = 1 for d in shape['dims']: if d != 'H5S_UNLIMITED': nelems *= d if nelems > nelems_limit: raise ValueError( 'Number of elements too large (max. 2^48 - 1): {:d}'. format(nelems)) rank = len(shape['dims']) if rank == 1: tmplt = Template( "sid = H5S.create_simple($rank, $dims, $maxdims);\n") else: tmplt = Template( "sid = H5S.create_simple($rank, fliplr($dims), " "fliplr($maxdims));\n") vars = { 'dims': self._dims2str(shape['dims']), 'maxdims': self._dims2str(shape.get('maxdims', [])), 'rank': rank } return tmplt.substitute(vars) elif shape['class'] == 'H5S_NULL': return "sid = H5S.create('H5S_NULL');\n" else: raise NotImplementedError('%s: Not supported' % shape['class']) def _dtype(self, t, var='tid'): """Generate datatype code. :arg dict t: HDF5/JSON datatype information. :arg str var: Default name of the datatype variable. """ tcls = t['class'] if tcls == 'H5T_COMPOUND': tmplt = '' # Go over each compound field... field_cnt = 0 for f in t['fields']: field_cnt += 1 dt = "field_tid(%d)" % field_cnt tmplt += self._dtype(f['type'], var=dt) tmplt += "field_size(%d) = H5T.get_size(%s);\n" % (field_cnt, dt) # Compute field byte offsets... num_fields = len(t['fields']) tmplt += ("field_offset = [0 cumsum(field_size(1:%d))];\n") % ( num_fields - 1) # Create the compound datatype... tmplt += "tid = H5T.create('H5T_COMPOUND', sum(field_size));\n" for n in range(num_fields): tmplt += ("H5T.insert(tid, '%s', field_offset(%d), " "field_tid(%d));\n") % (t['fields'][n]['name'], n + 1, n + 1) # Close field datatypes... for n in range(num_fields): tmplt += "H5T.close(field_tid(%d));\n" % (n + 1) return tmplt elif tcls == 'H5T_VLEN': if t['base']['class'] == 'H5T_STRING': base_type = 'H5T_C_S1' if t['base']['length'] not in (1, 'H5T_VARIABLE'): raise NotImplementedError( 'MATLAB only allows vlen strings of variable or ' 'fixed length of 1.') elif t['base']['class'] == 'H5T_REFERENCE': raise NotImplementedError( 'MATLAB does not support H5T_REFERENCE for vlen datatype') else: base_type = t['base']['base'] return "tid = H5T.vlen_create('%s');\n" % base_type elif tcls == 'H5T_ARRAY': tmplt = Template( "${base}" "$var = H5T.array_create(base_tid, fliplr($dims));\n" "H5T.close(base_tid);\n") return tmplt.substitute({ 'base': self._dtype(t['base'], var='base_tid'), 'dims': t['dims'], 'var': var }) else: return var + " = " + self._atomic_dtype(t, var=var) def _atomic_dtype(self, t, var='tid'): """Handle HDF5 atomic datatypes. :arg dict t: HDF5/JSON datatype information. "arg str tid: Default name of the datatype variable. """ tcls = t['class'] if tcls == 'H5T_STRING': tmplt = Template( "H5T.copy('H5T_C_S1');\n" "H5T.set_size($var, $length);\n" "H5T.set_strpad($var,'$strpad');\n" "H5T.set_cset($var, H5ML.get_constant_value('$cset'));\n") if isinstance(t['length'], six.string_types): length = "'%s'" % t['length'] else: length = t['length'] return tmplt.substitute({ 'length': length, 'strpad': t['strPad'], 'cset': t['charSet'], 'var': var }) elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'): return "H5T.copy('%s');\n" % t['base'] elif tcls == 'H5T_REFERENCE': return "H5T.copy('%s');\n" % t['base'] else: raise NotImplementedError('%s: Datatype not supported' % t['class']) def _create_attr(self, attr, locid, dimscale=False): """Generate code for one attribute of the ``locid`` object. :arg dict attr: Attribute information. :arg str locid: Attribute's parent variable name. :arg bool dimscale: Indicates attribute's parent is a dimension scale. """ if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST', 'NAME', 'CLASS'): return dataspace = self._dspace(attr['shape']) datatype = self._dtype(attr['type']) tmplt = Template( "\n% Attribute: $name\n" "${datatype}" "${dataspace}" "acpl = H5P.create('H5P_ATTRIBUTE_CREATE');\n" "aid = H5A.create($lid, '$name', tid, sid, acpl, 'H5P_DEFAULT');\n" "${value}" "H5T.close(tid);\n" "H5S.close(sid);\n" "H5A.close(aid);\n") if attr['type']['class'] == 'H5T_STRING': def prep_vals(vals): values = list() with_sprintf = False for v in vals: temp = v.encode('ascii') if "'" in temp: temp = temp.replace("'", "''") with_sprintf = True if '%' in temp: temp = temp.replace('%', '%%') with_sprintf = True if '\n' in temp: temp = temp.replace('\n', '\\n') with_sprintf = True if with_sprintf: values.append("sprintf('{}')".format(temp)) else: values.append("'{}'".format(temp)) return values if attr['shape']['class'] == 'H5S_SCALAR': value = prep_vals([attr['value']])[0] else: if len(attr['shape']['dims']) > 1: raise NotImplementedError( 'Rank > 1 for string data not supported') value = prep_vals(attr['value']) if attr['type']['length'] == 'H5T_VARIABLE': val_str = ('{' + ', '.join(value) + '}') else: # Left-justified, fixed-length string format... fmt = "'%-{0:d}.{0:d}s'".format(attr['type']['length']) if attr['shape']['class'] == 'H5S_SCALAR': val_str = "sprintf({}, {})".format(fmt, value) else: for i in range(len(value)): value[i] = "sprintf({}, {})".format(fmt, value[i]) val_str = ('[' + '; '.join(value) + "]'") else: val_str = attr['value'] val = Template("H5A.write(aid, 'H5ML_DEFAULT', $value);\n").substitute( {'value': val_str}) vars = { 'lid': locid, 'name': attr['name'], 'datatype': datatype, 'dataspace': dataspace, 'value': val } self._m.append(tmplt.substitute(vars)) def _is_dimscale(self, attrs): """Check if the dataset is a dimension scale. :arg list attrs: All dataset's attributes. """ # Check if REFERENCE_LIST attribute is present... ref_list = any(a['name'] == 'REFERENCE_LIST' and a['type']['class'] == 'H5T_COMPOUND' for a in attrs) # Check if CLASS attribute is present... cls_ = any(a['name'] == 'CLASS' and a['value'] == 'DIMENSION_SCALE' for a in attrs) if ref_list and cls_: return True else: return False def _dimscale(self, attrs, locid): """Generate the code that sets a dimension scale. :arg list attrs: All dataset's attributes. :arg str locid: MATLAB variable name of the attributes' parent object. """ # Check if NAME attribute is present... for a in attrs: if a['name'] == 'NAME' and a['type']['class'] == 'H5T_STRING': scale_name = "'%s'" % a['value'] break else: scale_name = '[]' return "H5DS.set_scale(%s, %s);\n" % (locid, scale_name) def _is_dimlist(self, attrs): """Check if the dataset has dimension scales attached. :arg list attrs: All dataset's attributes. """ # Check if DIMENSION_LIST attribute is present... dim_list = any( a['name'] == 'DIMENSION_LIST' and a['type']['class'] == 'H5T_VLEN' for a in attrs) return True if dim_list else False def _is_dimscale_related(self, attrs): """Check if the attributes of a dataset indicate it is related to dimension scales. :arg list attrs: All dataset's attributes. """ dimscale = self._is_dimscale(attrs) dimlist = self._is_dimlist(attrs) if dimscale or dimlist: return True else: return False def _create_attrs(self, attrs, locid, dimscale=False): """Generate code for all the attributes of the ``locid`` object. :arg dict attrs: HDF5/JSON information about the attributes of the parent ``locid`` object. :arg str locid: MATLAB variable name of the attributes' parent object. :arg bool dimscale: Indicates whether the attributes belong to a dimension scale. """ for a in attrs: self._create_attr(a, locid, dimscale=dimscale) def _create_dset(self, id, name, ds, locid): """Generate code for one dataset of the ``locid`` group. : arg str id: Dataset's identifier. :arg str name: Dataset's name. :arg dict ds: Dataset information. :arg str locid: Varable name of the dataset's parent group. """ try: dataspace = self._dspace(ds['shape']) except ValueError as e: raise ValueError('{}: {}'.format( ds.get('alias', [name])[0], str(e))) datatype = self._dtype(ds['type']) tmplt = Template( "\n" "% Dataset: $name\n" "${datatype}" "${dataspace}" "${dcpl}" "dsid = H5D.create($locid, '$name', tid, sid, '$plist', dcpl, " "'$plist');\n" "H5S.close(sid);\n" "H5T.close(tid);\n" "${dimscale}") # Dataset creation property list... dcpl = "dcpl = H5P.create('H5P_DATASET_CREATE');\n" # Layout... layout = ds.get('creationProperties', {}).get('layout', {}) if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS': pass elif layout['class'] == 'H5D_COMPACT': dcpl = ("dcpl = H5P.create('H5P_DATASET_CREATE');\n" "H5P.set_layout(dcpl, 'H5D_COMPACT');\n") elif layout['class'] == 'H5D_CHUNKED': if len(layout['dims']) == 1: chunks = layout['dims'] else: chunks = 'fliplr(%s)' % layout['dims'] dcpl = Template( "dcpl = H5P.create('H5P_DATASET_CREATE');\n" "H5P.set_layout(dcpl, H5ML.get_constant_value('H5D_CHUNKED'));" "\n" "H5P.set_chunk(dcpl, $chunks);\n").substitute( {'chunks': chunks}) else: raise ValueError('%s: Invalid layout class' % layout['class']) # Filters... filters = ds.get('creationProperties', {}).get('filters', []) for f in filters: if f['class'] == 'H5Z_FILTER_DEFLATE': dcpl += "H5P.set_deflate(dcpl, %s);\n" % f['level'] elif f['class'] == 'H5Z_FILTER_FLETCHER32': dcpl += "H5P.set_fletcher32(dcpl);\n" elif f['class'] == 'H5Z_FILTER_SHUFFLE': dcpl += "H5P.set_shuffle(dcpl);\n" elif f['class'] == 'H5Z_FILTER_SCALEOFFSET': dcpl += ("H5P.set_scaleoffset(dcpl, '%s', %d);\n" % (f['scaleType'], f['scaleOffset'])) elif f['class'] == 'H5Z_FILTER_NBIT': dcpl += "H5P.set_nbit(dcpl);\n" else: raise NotImplementedError('%s: Filter not supported yet' % f['class']) # Fill value... fv = ds.get('creationProperties', {}).get('fillValue', None) if fv: if type(fv) is list: raise NotImplementedError( 'Non-scalar fill value not supported yet') else: # Use dataset's datatype for fill value... if ds['type']['class'] == 'H5T_STRING': fv = "'%s'" % fv else: fv = str(fv) # Remove an "L" suffix if present... if fv[-1] == 'L': fv = fv[:-1] fv = '{}({})'.format(self.matlab_dtype(ds['type']['base']), fv) dcpl += "H5P.set_fill_value(dcpl, tid, %s);\n" % fv attrs = ds.get('attributes', []) is_dimscale = self._is_dimscale(attrs) if is_dimscale: dimscale = self._dimscale(attrs, 'dsid') else: dimscale = str() vars = { 'locid': locid, 'name': name, 'datatype': datatype, 'dataspace': dataspace, 'plist': 'H5P_DEFAULT', 'dcpl': dcpl, 'dimscale': dimscale } self._m.append(tmplt.substitute(vars)) is_dimlist = self._is_dimlist(attrs) if is_dimlist: self._dimlist.append(id) self._create_attrs(attrs, 'dsid', dimscale=(is_dimscale or is_dimlist)) self._m.append("H5D.close(dsid);\n") def _create_dsets(self, dsets, locid, path): """Generate code for all the datasets of the ``locid`` object. :arg dict dsets: HDF5/JSON information about the datasets of the parent ``locid`` object. :arg str locid: MATLAB variable name of the datasets' parent object. :arg str path: HDF5 path of the datasets' parent object. """ for d in dsets: # Record the full HDF5 path to the dataset... self._dset_path[d['id']] = pp.join(path, d['title']) # Generate dataset code... self._create_dset(d['id'], d['title'], self._d['datasets'][d['id']], locid) def _create_group(self, g): """Code for all group content. :arg dict g: Group id and full name. """ grpid = g['id'] path = g['path'] if grpid == self._root: locid = 'fid' else: tmplt = Template( "\n\n" "%\n" "% Group: $path\n" "%\n" "gid = H5G.create(fid, '$path', '$plist', '$plist', '$plist');" "\n") vars = {'path': path, 'plist': 'H5P_DEFAULT'} self._m.append(tmplt.substitute(vars)) locid = 'gid' self._create_attrs(self._d['groups'][grpid].get('attributes', []), locid) self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid, path) if grpid != self._root: self._m.append("H5G.close(gid);\n") def _dimensions(self): """Generate code connecting dimension scales and their datasets.""" # List for storing dimension scale's IDs... dscales = list() self._m.append("\n\n" "%\n" "% Datasets and their dimension scales\n" "%\n") for dset_id in self._dimlist: tmp = Template("\n% Dataset with dimension scales: $name\n" "dset_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n" ).substitute({'name': self._dset_path[dset_id]}) self._m.append(tmp) dset = self._d['datasets'][dset_id] # Get DIMENSION_LIST attribute value... dims = next(a['value'] for a in dset['attributes'] if a['name'] == 'DIMENSION_LIST') # Iterate over dataset's dimension scales in reversed order # (because of using fliplr() when defining dataset's shape)... for index, dimscales in enumerate(reversed(dims)): for n, ds in enumerate(dimscales): ds_id = pp.basename(ds) dscales.append(ds_id) tmp = Template( "\n% Dimension scale: $name\n" "dscl_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n" "H5DS.attach_scale(dset_id, dscl_id, $idx);\n" "H5D.close(dscl_id);\n").substitute({ 'name': self._dset_path[ds_id], 'idx': index }) self._m.append(tmp) self._m.append("\nH5D.close(dset_id);\n") def get_code(self): """Generate MATLAB source code.""" self._create_file() # Order groups by hierarchy... groups = self._order_groups() for g in groups: self._create_group(g) if self._dimlist: # Handle dimension scales... self._dimensions() self._close_file() return self._m.dump()
class PyCode(object): """ Produce Python code that generates HDF5 file based on given JSON input. """ def __init__(self, h5json, fname, ext='h5'): """Initialize a PyCode instance. :arg dict h5json: HDF5/JSON content. :arg str fname: Name of the HDF5 file the generated code will create. :arg str ext: Generated HDF5 file's extension. **Without the comma!** """ if len(fname) == 0: raise ValueError('Missing file name.') if 'root' not in h5json: raise KeyError('"root" key not found.') self._h5j = h5json self._dimensions = list() self._dimscales = dict() self._fname = '{}.{}'.format(fname, ext) self._file_var = 'f' self._p = StringStore() def get_code(self): """Generate Python code for supplied HDF5/JSON.""" root_uuid = self._h5j["root"] self._p.append( "import h5py\n" "import numpy as np\n\n" "# creating file: {1}\n" "{0} = h5py.File('{1}', 'w')\n\n" .format(self._file_var, self._fname) ) group_json = self._h5j["groups"] root_json = group_json[root_uuid] self.doAttributes(root_json, '/', self._file_var) self.doLinks(self._h5j, root_json, 0) self.doDimensions(self._h5j, self._dimensions, self._dimscales, self._file_var) return self._p.dump() def getNumpyTypename(self, hdf5TypeName, typeClass=None): predefined_int_types = { 'H5T_STD_I8': 'i1', 'H5T_STD_U8': 'u1', 'H5T_STD_I16': 'i2', 'H5T_STD_U16': 'u2', 'H5T_STD_I32': 'i4', 'H5T_STD_U32': 'u4', 'H5T_STD_I64': 'i8', 'H5T_STD_U64': 'u8' } predefined_float_types = { 'H5T_IEEE_F32': 'f4', 'H5T_IEEE_F64': 'f8' } if len(hdf5TypeName) < 3: raise TypeError("%s: invalid type" % hdf5TypeName) endian = '<' # default endian key = hdf5TypeName if hdf5TypeName.endswith('LE'): key = hdf5TypeName[:-2] elif hdf5TypeName.endswith('BE'): key = hdf5TypeName[:-2] endian = '>' if key in predefined_int_types and (typeClass is None or typeClass == 'H5T_INTEGER'): return endian + predefined_int_types[key] if key in predefined_float_types and (typeClass is None or typeClass == 'H5T_FLOAT'): return endian + predefined_float_types[key] raise TypeError("%s: invalid type" % hdf5TypeName) def getBaseDataType(self, typeItem): code = "dt = " if type(typeItem) == str or type(typeItem) == unicode: # should be one of the predefined types dtName = self.getNumpyTypename(typeItem) code += "np.dtype('{}')\n".format(dtName) return code if type(typeItem) != dict: raise TypeError("{}: invalid type".format(typeItem)) code += 'np.dtype({})\n'.format(self._dtype(typeItem)) return code def _dtype(self, typeItem, compound=False): """Helper function for generating numpy.dtype() code. :arg dict typeItem: HDF5/JSON datatype description. :arg bool compound: Flag indicating the datatype is part of a compound datatype. """ typeClass = typeItem['class'] shape = '' if 'dims' in typeItem: shp_key = 'dims' else: shp_key = 'shape' if shp_key in typeItem: dims = None if type(typeItem[shp_key]) == int: dims = (typeItem[shp_key],) # make into a tuple elif type(typeItem[shp_key]) not in (list, tuple): raise TypeError("expected list or integer for %s" % shp_key) else: dims = typeItem[shp_key] shape = str(tuple(dims)) code = '' if typeClass == 'H5T_INTEGER': if 'base' not in typeItem: raise KeyError("'base' not provided") baseType = self.getNumpyTypename(typeItem['base'], typeClass='H5T_INTEGER') code += "'{}{}'".format(shape, baseType) elif typeClass == 'H5T_FLOAT': if 'base' not in typeItem: raise KeyError("'base' not provided") baseType = self.getNumpyTypename(typeItem['base'], typeClass='H5T_FLOAT') code += "'{}{}'".format(shape, baseType) elif typeClass == 'H5T_STRING': if 'length' not in typeItem: raise KeyError("'length' not provided") if 'charSet' not in typeItem: raise KeyError("'charSet' not provided") if typeItem['length'] == 'H5T_VARIABLE': if shape: raise TypeError( "ArrayType is not supported for variable len types") if typeItem['charSet'] == 'H5T_CSET_ASCII': code += "h5py.special_dtype(vlen=str)" elif typeItem['charSet'] == 'H5T_CSET_UTF8': code += "h5py.special_dtype(vlen=unicode)" else: raise TypeError("unexpected 'charSet' value") else: # fixed size ascii string nStrSize = typeItem['length'] if type(nStrSize) != int: raise TypeError("expecting integer value for 'length'") code += "'{}S{}'".format(shape, nStrSize) elif typeClass == 'H5T_VLEN': if shape: raise TypeError( "ArrayType is not supported for variable len types") if 'base' not in typeItem: raise KeyError("'base' not provided") vlenBaseType = typeItem['base'] baseType = self.getNumpyTypename(vlenBaseType['base'], typeClass=vlenBaseType['class']) code += "h5py.special_dtype(vlen=np.dtype('" + baseType + "'))" elif typeClass == 'H5T_OPAQUE': if shape: raise TypeError( "Opaque Type is not supported for variable len types") if 'size' not in typeItem: raise KeyError("'size' not provided") nSize = int(typeItem['size']) if nSize <= 0: raise TypeError("'size' must be non-negative") code += "'V{}'".format(nSize) elif typeClass == 'H5T_ARRAY': if not shape: raise KeyError("'shape' must be provided for array types") if 'base' not in typeItem: raise KeyError("'base' not provided") baseType = self._dtype(typeItem['base']) if type(baseType) not in (str, unicode): raise TypeError( "Array type is only supported for predefined base types") # should be one of the predefined types code += "{1}, {0}".format(shape, baseType) if not compound: code = "({})".format(code) elif typeClass == 'H5T_COMPOUND': if 'fields' not in typeItem: raise KeyError("'fields' must be provided for compound types") if type(typeItem['fields']) is not list: raise TypeError("compound 'fields' value must be a list") dt_arg = list() for fld in typeItem['fields']: dt_arg.append( "('{}', {})".format( fld['name'], self._dtype(fld['type'], compound=True))) code = '[' + ', '.join(dt_arg) + ']' else: raise TypeError("%s: Invalid type class" % typeClass) return code def valueToString(self, attr_json): value = attr_json["value"] return json.dumps(value) def doAttribute(self, attr_json, parent_var): if attr_json['type']['class'] == 'H5T_STRING': self._p.append(parent_var + ".attrs['" + attr_json['name'] + "'] = " + self.valueToString(attr_json) + '\n') else: dt = 'np.dtype({})'.format(self._dtype(attr_json["type"])) shape_json = attr_json["shape"] if shape_json['class'] == 'H5S_SIMPLE': shape = self._dims2str(shape_json['dims']) elif shape_json['class'] == 'H5S_SCALAR': shape = ', ()' else: raise NotImplementedError('{}: Dataspace not supported yet' .format(shape_json['class'])) self._p.append("{}.attrs.create('{}', {}{}, dtype={})\n" .format(parent_var, attr_json['name'], self.valueToString(attr_json), shape, dt)) def getObjectName(self, obj_json, obj_title): name = obj_title if "alias" in obj_json: alias = obj_json["alias"] try: name = alias[0] except (TypeError, IndexError): name = alias return name def group_var_name(self, level, next=False): """Determine the name of the group variable based on its HDF5 tree depth. :arg int level: HDF5 tree depth level (root = 0). :arg bool next: Provide group variable for the next level, i.e. subgroup. """ grp_fmt = 'grp_{:d}' if next: return grp_fmt.format(level+1) if level == 0: pvar = self._file_var else: pvar = grp_fmt.format(level) return pvar def doAttributes(self, obj_json, obj_name, parent_var, is_dimscale=False, is_dimension=False): if len(obj_json.get('attributes', [])) == 0: return attrs_json = obj_json["attributes"] first_time = True for attr_json in attrs_json: if is_dimscale and attr_json['name'] in ('CLASS', 'REFERENCE_LIST', 'NAME'): continue if is_dimension and attr_json['name'] == 'DIMENSION_LIST': continue if first_time: self._p.append("# Creating attributes for {}\n" .format(obj_name)) first_time = False self.doAttribute(attr_json, parent_var) def doGroup(self, h5json, group_id, group_name, level): parent_var = self.group_var_name(level) groups = h5json["groups"] group_json = groups[group_id] group_path = self.getObjectName(group_json, group_name) self._p.append("\n\n# Group: {}\n".format(group_path)) group_var = self.group_var_name(level, next=True) self._p.append("{0} = {1}.create_group('{2}')\n" .format(group_var, parent_var, group_name)) self.doAttributes(group_json, group_path, group_var) self.doLinks(h5json, group_json, level+1) def _dims2str(self, dims, kwd=''): """Convert a list of integers to a string representing a dimension tuple. :arg list dims: A dimension list. :arg str kwd: Optional keyword string. """ dims = [str(d) if d != 'H5S_UNLIMITED' else 'None' for d in dims] if len(dims) == 1: # Produce correct tuple when dim rank is 1... dims.append('') if kwd: kwd += '=' return ', {}({})'.format(kwd, ','.join(dims)) def doDataset(self, h5json, dset_id, dset_name, parent_var): datasets = h5json["datasets"] dset_json = datasets[dset_id] dset_path = dset_json.get('alias', [dset_name])[0] self._p.append("\n# Dataset: {}\n".format(dset_path)) dset_var = 'dset' try: dtLine = self.getBaseDataType(dset_json["type"]) # "dt = ..." self._p.append(dtLine) shape = '' maxshape = '' chunks = '' flt = '' cp = dset_json.get('creationProperties', {}) if "shape" in dset_json: shape_json = dset_json["shape"] if shape_json["class"] == "H5S_SIMPLE": shape = self._dims2str(shape_json["dims"]) if 'maxdims' in shape_json: maxshape = self._dims2str(shape_json['maxdims'], kwd='maxshape') layout = cp.get('layout', {}).get('class', 'H5D_CONTIGUOUS') if layout == 'H5D_CHUNKED': chunks = self._dims2str(cp['layout']['dims'], kwd='chunks') filters = cp.get('filters', []) for f in filters: if f['class'] == 'H5Z_FILTER_DEFLATE': flt += ( ", compression='gzip', " "compression_opts={:d}").format(f['level']) elif f['class'] == 'H5Z_FILTER_FLETCHER32': flt += ', fletcher32=True' elif f['class'] == 'H5Z_FILTER_SHUFFLE': flt += ', shuffle=True' elif f['class'] == 'H5Z_FILTER_SCALEOFFSET': flt += (', scaleoffset={:d}' .format(f['scaleOffset'])) else: raise NotImplementedError( '{}: Filter not supported yet' .format(f['class'])) elif shape_json['class'] == 'H5S_SCALAR': shape = ', ()' if 'fillValue' in cp: if dset_json['type']['class'] == 'H5T_STRING': fv = ", fillvalue='{}'".format(cp['fillValue']) else: fv = ', fillvalue={}'.format(cp['fillValue']) else: fv = '' code_line = ("{} = {}.create_dataset('{}'{}{}{}{}{}" ", dtype=dt)\n").format(dset_var, parent_var, dset_name, shape, maxshape, chunks, fv, flt) self._p.append(code_line) self._p.append("# initialize dataset values here\n") dscale = self._is_dimscale(dset_json.get('attributes', [])) if dscale: # Find out dimension scale's name, if available... for a in dset_json.get('attributes', []): if a['name'] == 'NAME': ds_name = a['value'] break else: ds_name = None self._dimscales.update({dset_id: ds_name}) dim = self._is_dimlist(dset_json.get('attributes', [])) if dim: self._dimensions.append(dset_id) self.doAttributes(dset_json, dset_path, dset_var, is_dimscale=dscale, is_dimension=dim) except Exception as e: raise type(e)('{}: {}'.format(dset_path, str(e))) def doLink(self, h5json, link_json, level): parent_var = self.group_var_name(level) if link_json["class"] == "H5L_TYPE_EXTERNAL": self._p.append("{0}['{1}'] = h5py.ExternalLink('{2}', '{3}')\n" .format(parent_var, link_json["title"], link_json["file"], link_json["h5path"]) ) elif link_json["class"] == "H5L_TYPE_SOFT": self._p.append("{0}['{1}'] = h5py.SoftLink('{2}')\n" .format(parent_var, link_json["title"], link_json["h5path"]) ) elif link_json["class"] == "H5L_TYPE_HARD": if link_json["collection"] == "groups": self.doGroup(h5json, link_json["id"], link_json["title"], level) elif link_json["collection"] == "datasets": self.doDataset(h5json, link_json["id"], link_json["title"], parent_var) elif link_json["collection"] == "datatypes": raise NotImplementedError( 'committed datatypes not supported yet') else: raise Exception( "unexpected collection name: " + link_json["collection"]) elif link_json["class"] == "H5L_TYPE_UDLINK": self._p.append("# ignoring user defined link: '{0}'\n" .format(link_json["title"])) else: raise Exception("unexpected link type: " + link_json["class"]) def doLinks(self, h5json, group_json, level): links = group_json.get("links", []) for link in links: self.doLink(h5json, link, level) def _is_dimscale(self, attrs): """Check if the dataset is a dimension scale. :arg list attrs: All dataset's attributes. """ # Check if REFERENCE_LIST attribute is present... ref_list = any(a['name'] == 'REFERENCE_LIST' and a['type']['class'] == 'H5T_COMPOUND' for a in attrs) # Check if CLASS attribute is present... cls_ = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE' for a in attrs) if ref_list and cls_: return True else: return False def _is_dimlist(self, attrs): """Check if the dataset has dimension scales attached. :arg list attrs: All dataset's attributes. """ # Check if DIMENSION_LIST attribute is present... dim_list = any(a['name'] == 'DIMENSION_LIST' and a['type']['class'] == 'H5T_VLEN' for a in attrs) return True if dim_list else False def _dset_paths(self, h5json): grps = [{'id': h5json['root'], 'path': '/'}] dsets = {} def get_hard_links(grpjson, collection): links = list() for l in grpjson.get('links', []): if (l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection): links.append(l) return links def tree_walker(ginfo): dlinks = get_hard_links(h5json['groups'][ginfo['id']], 'datasets') for dl in dlinks: dsets.update({dl['id']: pp.join(ginfo['path'], dl['title'])}) glinks = get_hard_links(h5json['groups'][ginfo['id']], 'groups') chld_grps = list() for gl in glinks: chld_grps.append({'id': gl['id'], 'path': pp.join(ginfo['path'], gl['title'])}) grps.extend(chld_grps) for cg in chld_grps: tree_walker(cg) tree_walker(grps[0]) return dsets def doDimensions(self, h5json, dimensions, dimscales, parent_var): if len(dimensions) == 0: return # Generate HDF5 paths for all datasets... dset_path = self._dset_paths(h5json) self._p.append('\n\n' '#\n' '# Adding dimensions\n' '#\n' '\n') self._p.append('# Creating dimension scales\n') for dsid, name in dimscales.iteritems(): if dimscales[dsid]: name = ", '{}'". format(dimscales[dsid]) else: name = '' self._p.append("h5py.h5ds.set_scale({}['{}'].id{})\n" .format(parent_var, dset_path[dsid], name)) for dsid in dimensions: dsid_path = dset_path[dsid] self._p.append("\n# Attaching dimension scales to dataset: {}\n" .format(dsid_path)) for attr in h5json['datasets'][dsid].get('attributes', []): if attr['name'] == 'DIMENSION_LIST': dim_list = attr['value'] break else: raise ValueError('%s: DIMENSION_LIST attribute not found' % dsid_path) for idx, ds in enumerate(dim_list): for d in ds: did = pp.split(d)[-1] did_path = dset_path[did] self._p.append( "{}['{}'].dims[{:d}].attach_scale({}['{}'])\n" .format(parent_var, dsid_path, idx, parent_var, did_path) )