Ejemplo n.º 1
0
def _datastring2rawniml(s, niml):
    '''Converts data with uniform type to raw NIML'''
    debug('NIML', 'Raw string to NIML: %d characters', len(s))

    tps = niml['vec_typ']

    onetype = types.findonetype(tps)

    if onetype is None or ([onetype] == types.str2codes('string')
                           and len(tps) > 1):
        return _mixedtypes_datastring2rawniml(s, niml)

    if [onetype] == types.str2codes('string'):
        # single string
        return decode_escape(s.decode())  # do not string2rawniml

    # numeric, either int or float
    ncols = niml['vec_num']
    nrows = niml['vec_len']
    tp = types.code2numpy_type(onetype)

    niform = niml.get('ni_form', None)

    if not niform or niform == 'text':
        data = np.zeros((nrows, ncols), dtype=tp)  # allocate space for data
        convertor = types.code2python_convertor(
            onetype)  # string to type convertor

        vals = s.split(None)  # split by whitespace seperator
        if len(vals) != ncols * nrows:
            raise ValueError("unexpected number of elements")

        for i, val in enumerate(vals):
            data[i // ncols, i % ncols] = convertor(val)

    else:
        dtype = np.dtype(tp)
        dtype = types.byteorder_from_niform(niform, dtype)

        if 'base64' in niform:
            debug('NIML', 'base64, %d chars: %s',
                  (len(s), _partial_string(s, 0)))

            s = base64.b64decode(s)
        elif not 'binary' in niform:
            raise ValueError('Illegal niform %s' % niform)

        data_1d = np.fromstring(s, dtype=tp)

        debug('NIML', 'data vector has %d elements, reshape to %d x %d = %d',
              (np.size(data_1d), nrows, ncols, nrows * ncols))

        data = np.reshape(data_1d, (nrows, ncols))

    return data
Ejemplo n.º 2
0
def _datastring2rawniml(s, niml):
    '''Converts data with uniform type to raw NIML'''
    debug('NIML', 'Raw string to NIML: %d characters', len(s))

    tps = niml['vec_typ']

    onetype = types.findonetype(tps)

    if onetype is None or ([onetype] == types.str2codes('string') and
                            len(tps) > 1):
        return _mixedtypes_datastring2rawniml(s, niml)

    if [onetype] == types.str2codes('string'):
        # single string
        return decode_escape(s.decode()) # do not string2rawniml

    # numeric, either int or float
    ncols = niml['vec_num']
    nrows = niml['vec_len']
    tp = types.code2numpy_type(onetype)

    niform = niml.get('ni_form', None)

    if not niform or niform == 'text':
        data = np.zeros((nrows, ncols), dtype=tp) # allocate space for data
        convertor = types.code2python_convertor(onetype) # string to type convertor

        vals = s.split(None) # split by whitespace seperator
        if len(vals) != ncols * nrows:
            raise ValueError("unexpected number of elements")

        for i, val in enumerate(vals):
            data[i // ncols, i % ncols] = convertor(val)

    else:
        dtype = np.dtype(tp)
        dtype = types.byteorder_from_niform(niform, dtype)

        if 'base64' in niform:
            debug('NIML', 'base64, %d chars: %s',
                            (len(s), _partial_string(s, 0)))

            s = base64.b64decode(s)
        elif not 'binary' in niform:
            raise ValueError('Illegal niform %s' % niform)

        data_1d = np.fromstring(s, dtype=tp)

        debug('NIML', 'data vector has %d elements, reshape to %d x %d = %d',
                        (np.size(data_1d), nrows, ncols, nrows * ncols))

        data = np.reshape(data_1d, (nrows, ncols))

    return data
Ejemplo n.º 3
0
def string2rawniml(s, i=None):
    '''Parses a NIML string to a raw NIML tree-like structure
    
    Parameters
    ----------
    s: bytearray
        string to be converted
    i: int
        Starting position in the string.
        By default None is used, which means that the entire string is 
        converted.
        
    Returns
    -------
    r: the NIML result.
        If input parameter i is None then a dictionary with NIML elements, or 
        a list containing such elements, is returned. If i is an integer, 
        then a tuple j, d is returned with d the new starting position and a 
        dictionary or list with the elements parsed so far.
    '''

    # return new starting position?
    return_pos = not i is None
    if not return_pos:
        i = 0

    debug('NIML', 'Parsing at %d, total length %d', (i, len(s)))
    # start parsing from header
    #
    # the tricky part is that binary data can contain characters that also
    # indicate the end of a data segment, so 'typical' parsing with start
    # and end markers cannot be done. Instead the header of each part is
    # read first, then the number of elements is computed based on the
    # header information, and the required number of bytes is converted.
    # From then on the remainder of the string is parsed as above.

    headerpat = b'\W*<(?P<name>\w+)\W(?P<header>.*?)>'

    nimls = []  # here all found parts are stored

    #if isinstance(s, basestring):
    #    s = s.encode()

    # Keep on reading new parts
    while True:
        # ignore any xml tags
        if s.startswith(b'<?xml', i):
            i = s.index(b'>', i) + 1

        # try to read a name and header part
        m = re.match(headerpat, s[i:], _RE_FLAGS)

        if m is None:
            # no header - was it the end of a section?
            m = re.match(b'\W*</\w+>\s*', s[i:], _RE_FLAGS)

            if not m is None:
                # for NIFTI extensions there can be some null bytes left
                # so get rid of them here
                remaining = s[i + m.end():].replace(chr(0).encode(),
                                                    b'').strip()

                if len(remaining) == 0:
                    # entire file was parsed - we are done
                    debug('NIML', 'Completed parsing, length %d (%d elements)',
                          (len(s), len(nimls)))
                    if return_pos:
                        return i, nimls
                    else:
                        return nimls

            # not good - not at the end of the file
            raise ValueError("Unexpected end: [%s] " % _partial_string(s, i))

        else:
            # get values from header
            d = m.groupdict()
            name, header = d['name'], d['header']

            # update current position
            i += m.end()

            # parse the keys and values in the header
            debug('NIML', 'Parsing header %s, header end position %d',
                  (name, i + m.end()))
            niml = _parse_keyvalues(header)

            debug('NIML', 'Found keys %s.', (", ".join(niml.keys())))
            # set the name of this element
            niml['name'] = name.decode()

            if niml.get('ni_form', None) == 'ni_group':
                # it's a group. Parse the group using recursion
                debug("NIML", "Starting a group %s >>>", niml['name'])
                i, niml['nodes'] = string2rawniml(s, i)
                debug("NIML", "<<< ending a group %s", niml['name'])
            else:
                # it's a normal element with data
                debug(
                    'NIML', 'Parsing element %s from position %d, total '
                    'length %d', (niml['name'], i, len(s)))

                # set a few data elements
                datatypes = niml['ni_type']
                niml['vec_typ'] = types.str2codes(datatypes)
                niml['vec_len'] = int(niml['ni_dimen'])
                niml['vec_num'] = len(niml['vec_typ'])

                # data can be in string form, binary or base64.
                is_string = niml['ni_type'] == 'String' or \
                                not 'ni_form' in niml
                if is_string:
                    # string form is handled separately. It's easy to parse
                    # because it cannot contain any end markers in the data

                    debug("NIML", "Parsing string body for %s", name)

                    is_string_data = niml['ni_type'] == 'String'

                    # If the data type is string, it is surrounded by quotes
                    # Otherwise (numeric data) there are no quotes
                    quote = '"' if is_string_data else ''

                    # construct the regular pattern for this string
                    strpat = ('\s*%s(?P<data>[^"]*)[^"]*%s\s*</%s>' % \
                                                    (quote, quote, name.decode())).encode()
                    m = re.match(strpat, s[i:])
                    if m is None:
                        # something went wrong
                        raise ValueError("Could not parse string data from "
                                         "pos %d: %s" %
                                         (i, _partial_string(s, i)))

                    # parse successful - get the parsed data
                    data = m.groupdict()['data']

                    # convert data to raw NIML
                    data = _datastring2rawniml(data, niml)

                    # if string data, replace esscape characters
                    if is_string_data:
                        data = decode_escape(data)

                    # store data
                    niml['data'] = data

                    # update position
                    i += m.end()

                    debug('NIML', 'Completed %s, now at %d', (name, i))

                else:
                    # see how many bytes (characters) to read

                    # convert this part of the string
                    if 'base64' in niml['ni_form']:
                        # base 64 has no '<' character - so we should be fine
                        endpos = s.index(b'<', i + 1)
                        datastring = s[i:endpos]
                        nbytes = len(datastring)
                    else:
                        # hardcode binary data - see how many bytes we need
                        nbytes = _binary_data_bytecount(niml)
                        debug(
                            'NIML', 'Raw data with %d bytes - total length '
                            '%d, starting at %d', (nbytes, len(s), i))
                        datastring = s[i:(i + nbytes)]

                    niml['data'] = _datastring2rawniml(datastring, niml)

                    # update position
                    i += nbytes

                    # ensure that immediately after this segment there is an
                    # end-part marker
                    endstr = '</%s>' % name.decode()
                    if s[i:(i + len(endstr))].decode() != endstr:
                        raise ValueError("Not found expected end string %s"
                                         "  (found %s...)" %
                                         (endstr, _partial_string(s, i)))
                    i += len(endstr)

            debug(
                'NIML', "Adding element '%s' with keys %r" %
                (niml['name'], niml.keys()))
            nimls.append(niml)

    # we should never end up here.
    raise ValueError("this should never happen")
Ejemplo n.º 4
0
def string2rawniml(s, i=None):
    '''Parses a NIML string to a raw NIML tree-like structure
    
    Parameters
    ----------
    s: bytearray
        string to be converted
    i: int
        Starting position in the string.
        By default None is used, which means that the entire string is 
        converted.
        
    Returns
    -------
    r: the NIML result.
        If input parameter i is None then a dictionary with NIML elements, or 
        a list containing such elements, is returned. If i is an integer, 
        then a tuple j, d is returned with d the new starting position and a 
        dictionary or list with the elements parsed so far.
    '''

    # return new starting position?
    return_pos = not i is None
    if not return_pos:
        i = 0

    debug('NIML', 'Parsing at %d, total length %d', (i, len(s)))
    # start parsing from header
    #
    # the tricky part is that binary data can contain characters that also 
    # indicate the end of a data segment, so 'typical' parsing with start
    # and end markers cannot be done. Instead the header of each part is
    # read first, then the number of elements is computed based on the 
    # header information, and the required number of bytes is converted.
    # From then on the remainder of the string is parsed as above.


    headerpat = b'\W*<(?P<name>\w+)\W(?P<header>.*?)>'

    nimls = [] # here all found parts are stored


    # Keep on reading new parts
    while True:
        # ignore any xml tags
        if s.startswith(b'<?xml', i):
            i = s.index(b'>', i) + 1

        # try to read a name and header part
        m = re.match(headerpat, s[i:], _RE_FLAGS)

        if m is None:
            # no header - was it the end of a section?
            m = re.match(b'\W*</\w+>\s*', s[i:], _RE_FLAGS)

            if m is None:
                if len(s[i:].strip()) == 0:
                    if return_pos:
                        return i, nimls
                    else:
                        return nimls
                else:
                    raise ValueError("No match towards end of header end: [%s] " % _partial_string(s, i))

            else:
                # for NIFTI extensions there can be some null bytes left
                # so get rid of them here
                remaining = s[i + m.end():].replace(chr(0).encode(), b'').strip()

                if len(remaining) > 0:
                    # there is more stuff to parse
                    i += m.end()
                    continue


                # entire file was parsed - we are done
                debug('NIML', 'Completed parsing, length %d (%d elements)', (len(s), len(nimls)))
                if return_pos:
                    return i, nimls
                else:
                    return nimls



        else:
            # get values from header
            d = m.groupdict()
            name, header = d['name'], d['header']

            # update current position
            i += m.end()

            # parse the keys and values in the header
            debug('NIML', 'Parsing header %s, header end position %d',
                                                (name, i + m.end()))
            niml = _parse_keyvalues(header)

            debug('NIML', 'Found keys %s.', (", ".join(niml.keys())))
            # set the name of this element
            niml['name'] = name.decode()

            if niml.get('ni_form', None) == 'ni_group':
                # it's a group. Parse the group using recursion
                debug("NIML", "Starting a group %s >>>" , niml['name'])
                i, niml['nodes'] = string2rawniml(s, i)
                debug("NIML", "<<< ending a group %s", niml['name'])
            else:
                # it's a normal element with data
                debug('NIML', 'Parsing element %s from position %d, total '
                                    'length %d', (niml['name'], i, len(s)))

                # set a few data elements
                datatypes = niml['ni_type']
                niml['vec_typ'] = types.str2codes(datatypes)
                niml['vec_len'] = int(niml['ni_dimen'])
                niml['vec_num'] = len(niml['vec_typ'])

                debug('NIML', 'Element of type %s' % niml['vec_typ'])

                # data can be in string form, binary or base64.
                is_string = niml['ni_type'] == 'String' or \
                                not 'ni_form' in niml
                if is_string:
                    # string form is handled separately. It's easy to parse 
                    # because it cannot contain any end markers in the data 

                    debug("NIML", "Parsing string body for %s", name)

                    is_string_data = niml['ni_type'] == 'String'
                    is_mixed_data = len(set(niml['vec_typ'])) > 1

                    if is_mixed_data:
                        debug("NIML", "Data is mixed type")
                        strpat = ('\s*(?P<data>.*)\s*</%s>' % \
                                                (name.decode())).encode()
                        m = re.match(strpat, s[i:], _RE_FLAGS)
                    else:
                        # If the data type is string, it is surrounded by quotes
                        # Otherwise (numeric data) there are no quotes
                        quote = '"' if is_string_data else ''

                        # construct the regular pattern for this string
                        strpat = ('\s*%s(?P<data>[^"]*)[^"]*%s\s*</%s>' % \
                                                        (quote, quote, name.decode())).encode()

                        m = re.match(strpat, s[i:], _RE_FLAGS)

                    if m is None:
                        # something went wrong
                        raise ValueError("Could not parse string data from "
                                         "pos %d: %s" %
                                                (i, _partial_string(s, i)))

                    # parse successful - get the parsed data
                    data = m.groupdict()['data']

                    # convert data to raw NIML
                    data = _datastring2rawniml(data, niml)

                    # if string data, replace esscape characters                    
                    if is_string_data:
                        data = decode_escape(data)

                    # store data
                    niml['data'] = data

                    # update position
                    i += m.end()

                    debug('NIML', 'Completed %s, now at %d', (name, i))

                else:
                    # see how many bytes (characters) to read

                    # convert this part of the string
                    if 'base64' in niml['ni_form']:
                        # base 64 has no '<' character - so we should be fine
                        endpos = s.index(b'<', i + 1)
                        datastring = s[i:endpos]
                        nbytes = len(datastring)
                    else:
                        # hardcode binary data - see how many bytes we need
                        nbytes = _binary_data_bytecount(niml)
                        debug('NIML', 'Raw data with %d bytes - total length '
                                    '%d, starting at %d', (nbytes, len(s), i))
                        datastring = s[i:(i + nbytes)]

                    niml['data'] = _datastring2rawniml(datastring, niml)

                    # update position
                    i += nbytes

                    # ensure that immediately after this segment there is an 
                    # end-part marker
                    endstr = '</%s>' % name.decode()
                    if s[i:(i + len(endstr))].decode() != endstr:
                        raise ValueError("Not found expected end string %s"
                                         "  (found %s...)" %
                                            (endstr, _partial_string(s, i)))
                    i += len(endstr)

            debug('NIML', "Adding element '%s' with keys %r" % (niml['name'], niml.keys()))
            nimls.append(niml)


    # we should never end up here.
    raise ValueError("this should never happen")