def _binary_data_bytecount(niml): '''helper function that returns how many bytes a NIML binary data element should have''' niform = niml['ni_form'] if not 'binary' in niform: raise ValueError('Illegal niform %s' % niform) tps = niml['vec_typ'] onetype = types.findonetype(tps) if onetype is None: debug('NIML', 'Not unique type: %r', tps) return None # numeric, either int or float ncols = niml['vec_num'] nrows = niml['vec_len'] tp = types.code2numpy_type(onetype) bytes_per_elem = types.numpy_type2bytecount(tp) if bytes_per_elem is None: raise ValueError("Type not supported: %r" % onetype) nb = ncols * nrows * bytes_per_elem debug('NIML', 'Number of bytes for %s: %d x %d with %d bytes / element', (niform, ncols, nrows, bytes_per_elem)) return nb
def _datastring2rawniml(s, niml): '''Converts data with uniform type to raw NIML''' debug('NIML', 'Raw string to NIML: %d characters', len(s)) tps = niml['vec_typ'] onetype = types.findonetype(tps) if onetype is None or ([onetype] == types.str2codes('string') and len(tps) > 1): return _mixedtypes_datastring2rawniml(s, niml) if [onetype] == types.str2codes('string'): # single string return decode_escape(s.decode()) # do not string2rawniml # numeric, either int or float ncols = niml['vec_num'] nrows = niml['vec_len'] tp = types.code2numpy_type(onetype) niform = niml.get('ni_form', None) if not niform or niform == 'text': data = np.zeros((nrows, ncols), dtype=tp) # allocate space for data convertor = types.code2python_convertor( onetype) # string to type convertor vals = s.split(None) # split by whitespace seperator if len(vals) != ncols * nrows: raise ValueError("unexpected number of elements") for i, val in enumerate(vals): data[i // ncols, i % ncols] = convertor(val) else: dtype = np.dtype(tp) dtype = types.byteorder_from_niform(niform, dtype) if 'base64' in niform: debug('NIML', 'base64, %d chars: %s', (len(s), _partial_string(s, 0))) s = base64.b64decode(s) elif not 'binary' in niform: raise ValueError('Illegal niform %s' % niform) data_1d = np.fromstring(s, dtype=tp) debug('NIML', 'data vector has %d elements, reshape to %d x %d = %d', (np.size(data_1d), nrows, ncols, nrows * ncols)) data = np.reshape(data_1d, (nrows, ncols)) return data
def _datastring2rawniml(s, niml): '''Converts data with uniform type to raw NIML''' debug('NIML', 'Raw string to NIML: %d characters', len(s)) tps = niml['vec_typ'] onetype = types.findonetype(tps) if onetype is None or ([onetype] == types.str2codes('string') and len(tps) > 1): return _mixedtypes_datastring2rawniml(s, niml) if [onetype] == types.str2codes('string'): # single string return decode_escape(s.decode()) # do not string2rawniml # numeric, either int or float ncols = niml['vec_num'] nrows = niml['vec_len'] tp = types.code2numpy_type(onetype) niform = niml.get('ni_form', None) if not niform or niform == 'text': data = np.zeros((nrows, ncols), dtype=tp) # allocate space for data convertor = types.code2python_convertor(onetype) # string to type convertor vals = s.split(None) # split by whitespace seperator if len(vals) != ncols * nrows: raise ValueError("unexpected number of elements") for i, val in enumerate(vals): data[i // ncols, i % ncols] = convertor(val) else: dtype = np.dtype(tp) dtype = types.byteorder_from_niform(niform, dtype) if 'base64' in niform: debug('NIML', 'base64, %d chars: %s', (len(s), _partial_string(s, 0))) s = base64.b64decode(s) elif not 'binary' in niform: raise ValueError('Illegal niform %s' % niform) data_1d = np.fromstring(s, dtype=tp) debug('NIML', 'data vector has %d elements, reshape to %d x %d = %d', (np.size(data_1d), nrows, ncols, nrows * ncols)) data = np.reshape(data_1d, (nrows, ncols)) return data
def string2rawniml(s, i=None): '''Parses a NIML string to a raw NIML tree-like structure Parameters ---------- s: bytearray string to be converted i: int Starting position in the string. By default None is used, which means that the entire string is converted. Returns ------- r: the NIML result. If input parameter i is None then a dictionary with NIML elements, or a list containing such elements, is returned. If i is an integer, then a tuple j, d is returned with d the new starting position and a dictionary or list with the elements parsed so far. ''' # return new starting position? return_pos = not i is None if not return_pos: i = 0 debug('NIML', 'Parsing at %d, total length %d', (i, len(s))) # start parsing from header # # the tricky part is that binary data can contain characters that also # indicate the end of a data segment, so 'typical' parsing with start # and end markers cannot be done. Instead the header of each part is # read first, then the number of elements is computed based on the # header information, and the required number of bytes is converted. # From then on the remainder of the string is parsed as above. headerpat = b'\W*<(?P<name>\w+)\W(?P<header>.*?)>' nimls = [] # here all found parts are stored # Keep on reading new parts while True: # ignore any xml tags if s.startswith(b'<?xml', i): i = s.index(b'>', i) + 1 # try to read a name and header part m = re.match(headerpat, s[i:], _RE_FLAGS) if m is None: # no header - was it the end of a section? m = re.match(b'\W*</\w+>\s*', s[i:], _RE_FLAGS) if m is None: if len(s[i:].strip()) == 0: if return_pos: return i, nimls else: return nimls else: raise ValueError( "No match towards end of header end: [%s] " % _partial_string(s, i)) else: # for NIFTI extensions there can be some null bytes left # so get rid of them here remaining = s[i + m.end():].replace(chr(0).encode(), b'').strip() if len(remaining) > 0: # there is more stuff to parse i += m.end() continue # entire file was parsed - we are done debug('NIML', 'Completed parsing, length %d (%d elements)', (len(s), len(nimls))) if return_pos: return i, nimls else: return nimls else: # get values from header d = m.groupdict() name, header = d['name'], d['header'] # update current position i += m.end() # parse the keys and values in the header debug('NIML', 'Parsing header %s, header end position %d', (name, i + m.end())) niml = _parse_keyvalues(header) debug('NIML', 'Found keys %s.', (", ".join(niml.keys()))) # set the name of this element niml['name'] = name.decode() if niml.get('ni_form', None) == 'ni_group': # it's a group. Parse the group using recursion debug("NIML", "Starting a group %s >>>", niml['name']) i, niml['nodes'] = string2rawniml(s, i) debug("NIML", "<<< ending a group %s", niml['name']) else: # it's a normal element with data debug( 'NIML', 'Parsing element %s from position %d, total ' 'length %d', (niml['name'], i, len(s))) # set a few data elements datatypes = niml['ni_type'] niml['vec_typ'] = types.str2codes(datatypes) niml['vec_len'] = int(niml['ni_dimen']) niml['vec_num'] = len(niml['vec_typ']) debug('NIML', 'Element of type %s' % niml['vec_typ']) # data can be in string form, binary or base64. is_string = niml['ni_type'] == 'String' or \ not 'ni_form' in niml if is_string: # string form is handled separately. It's easy to parse # because it cannot contain any end markers in the data debug("NIML", "Parsing string body for %s", name) vec_typ = niml['vec_typ'] is_mixed_data = len(set(vec_typ)) > 1 is_multiple_string_data = len( vec_typ) > 1 and types._one_str2code( 'String') == types.findonetype(vec_typ) if is_mixed_data or is_multiple_string_data: debug( "NIML", "Data is mixed type (string=%s)" % is_multiple_string_data) #strpat = ('\s*(?P<data>.*)\s*</%s>' % \ # (name.decode())).encode() strpat = ('\s*(?P<data>.*?)\s*</%s>' % \ (name.decode())).encode() m = re.match(strpat, s[i:], _RE_FLAGS) is_string_data = is_multiple_string_data else: # If the data type is string, it is surrounded by quotes # Otherwise (numeric data) there are no quotes is_string_data = niml['ni_type'] == 'String' quote = '"' if is_string_data else '' # construct the regular pattern for this string strpat = ('\s*%s(?P<data>[^"]*)[^"]*%s\s*</%s>' % \ (quote, quote, name.decode())).encode() m = re.match(strpat, s[i:], _RE_FLAGS) if m is None: # something went wrong raise ValueError("Could not parse string data from " "pos %d: %s" % (i, _partial_string(s, i))) # parse successful - get the parsed data data = m.groupdict()['data'] # convert data to raw NIML data = _datastring2rawniml(data, niml) # if string data, replace escape characters if is_multiple_string_data or is_string_data: data = decode_escape(data) # store data niml['data'] = data # update position i += m.end() debug('NIML', 'Completed %s, now at %d', (name, i)) else: # see how many bytes (characters) to read # convert this part of the string if 'base64' in niml['ni_form']: # base 64 has no '<' character - so we should be fine endpos = s.index(b'<', i + 1) datastring = s[i:endpos] nbytes = len(datastring) else: # hardcode binary data - see how many bytes we need nbytes = _binary_data_bytecount(niml) debug( 'NIML', 'Raw data with %d bytes - total length ' '%d, starting at %d', (nbytes, len(s), i)) datastring = s[i:(i + nbytes)] niml['data'] = _datastring2rawniml(datastring, niml) # update position i += nbytes # ensure that immediately after this segment there is an # end-part marker endstr = '</%s>' % name.decode() if s[i:(i + len(endstr))].decode() != endstr: raise ValueError("Not found expected end string %s" " (found %s...)" % (endstr, _partial_string(s, i))) i += len(endstr) debug( 'NIML', "Adding element '%s' with keys %r" % (niml['name'], niml.keys())) nimls.append(niml) # we should never end up here. raise ValueError("this should never happen")
def string2rawniml(s, i=None): '''Parses a NIML string to a raw NIML tree-like structure Parameters ---------- s: bytearray string to be converted i: int Starting position in the string. By default None is used, which means that the entire string is converted. Returns ------- r: the NIML result. If input parameter i is None then a dictionary with NIML elements, or a list containing such elements, is returned. If i is an integer, then a tuple j, d is returned with d the new starting position and a dictionary or list with the elements parsed so far. ''' # return new starting position? return_pos = not i is None if not return_pos: i = 0 debug('NIML', 'Parsing at %d, total length %d', (i, len(s))) # start parsing from header # # the tricky part is that binary data can contain characters that also # indicate the end of a data segment, so 'typical' parsing with start # and end markers cannot be done. Instead the header of each part is # read first, then the number of elements is computed based on the # header information, and the required number of bytes is converted. # From then on the remainder of the string is parsed as above. headerpat = b'\W*<(?P<name>\w+)\W(?P<header>.*?)>' nimls = [] # here all found parts are stored # Keep on reading new parts while True: # ignore any xml tags if s.startswith(b'<?xml', i): i = s.index(b'>', i) + 1 # try to read a name and header part m = re.match(headerpat, s[i:], _RE_FLAGS) if m is None: # no header - was it the end of a section? m = re.match(b'\W*</\w+>\s*', s[i:], _RE_FLAGS) if m is None: if len(s[i:].strip()) == 0: if return_pos: return i, nimls else: return nimls else: raise ValueError("No match towards end of header end: [%s] " % _partial_string(s, i)) else: # for NIFTI extensions there can be some null bytes left # so get rid of them here remaining = s[i + m.end():].replace(chr(0).encode(), b'').strip() if len(remaining) > 0: # there is more stuff to parse i += m.end() continue # entire file was parsed - we are done debug('NIML', 'Completed parsing, length %d (%d elements)', (len(s), len(nimls))) if return_pos: return i, nimls else: return nimls else: # get values from header d = m.groupdict() name, header = d['name'], d['header'] # update current position i += m.end() # parse the keys and values in the header debug('NIML', 'Parsing header %s, header end position %d', (name, i + m.end())) niml = _parse_keyvalues(header) debug('NIML', 'Found keys %s.', (", ".join(niml.keys()))) # set the name of this element niml['name'] = name.decode() if niml.get('ni_form', None) == 'ni_group': # it's a group. Parse the group using recursion debug("NIML", "Starting a group %s >>>" , niml['name']) i, niml['nodes'] = string2rawniml(s, i) debug("NIML", "<<< ending a group %s", niml['name']) else: # it's a normal element with data debug('NIML', 'Parsing element %s from position %d, total ' 'length %d', (niml['name'], i, len(s))) # set a few data elements datatypes = niml['ni_type'] niml['vec_typ'] = types.str2codes(datatypes) niml['vec_len'] = int(niml['ni_dimen']) niml['vec_num'] = len(niml['vec_typ']) debug('NIML', 'Element of type %s' % niml['vec_typ']) # data can be in string form, binary or base64. is_string = niml['ni_type'] == 'String' or \ not 'ni_form' in niml if is_string: # string form is handled separately. It's easy to parse # because it cannot contain any end markers in the data debug("NIML", "Parsing string body for %s", name) vec_typ = niml['vec_typ'] is_mixed_data = len(set(vec_typ)) > 1 is_multiple_string_data = len(vec_typ) > 1 and types._one_str2code('String') == types.findonetype(vec_typ) if is_mixed_data or is_multiple_string_data: debug("NIML", "Data is mixed type (string=%s)" % is_multiple_string_data) #strpat = ('\s*(?P<data>.*)\s*</%s>' % \ # (name.decode())).encode() strpat = ('\s*(?P<data>.*?)\s*</%s>' % \ (name.decode())).encode() m = re.match(strpat, s[i:], _RE_FLAGS) is_string_data = is_multiple_string_data else: # If the data type is string, it is surrounded by quotes # Otherwise (numeric data) there are no quotes is_string_data = niml['ni_type'] == 'String' quote = '"' if is_string_data else '' # construct the regular pattern for this string strpat = ('\s*%s(?P<data>[^"]*)[^"]*%s\s*</%s>' % \ (quote, quote, name.decode())).encode() m = re.match(strpat, s[i:], _RE_FLAGS) if m is None: # something went wrong raise ValueError("Could not parse string data from " "pos %d: %s" % (i, _partial_string(s, i))) # parse successful - get the parsed data data = m.groupdict()['data'] # convert data to raw NIML data = _datastring2rawniml(data, niml) # if string data, replace escape characters if is_multiple_string_data or is_string_data: data = decode_escape(data) # store data niml['data'] = data # update position i += m.end() debug('NIML', 'Completed %s, now at %d', (name, i)) else: # see how many bytes (characters) to read # convert this part of the string if 'base64' in niml['ni_form']: # base 64 has no '<' character - so we should be fine endpos = s.index(b'<', i + 1) datastring = s[i:endpos] nbytes = len(datastring) else: # hardcode binary data - see how many bytes we need nbytes = _binary_data_bytecount(niml) debug('NIML', 'Raw data with %d bytes - total length ' '%d, starting at %d', (nbytes, len(s), i)) datastring = s[i:(i + nbytes)] niml['data'] = _datastring2rawniml(datastring, niml) # update position i += nbytes # ensure that immediately after this segment there is an # end-part marker endstr = '</%s>' % name.decode() if s[i:(i + len(endstr))].decode() != endstr: raise ValueError("Not found expected end string %s" " (found %s...)" % (endstr, _partial_string(s, i))) i += len(endstr) debug('NIML', "Adding element '%s' with keys %r" % (niml['name'], niml.keys())) nimls.append(niml) # we should never end up here. raise ValueError("this should never happen")