Beispiel #1
0
    def __init__(self, file, header_fields=None):
        """Initialize a new instance.

        Parameters
        ----------
        file : file-like or str
            Stream or filename from which to read the data. The stream
            is allowed to be already opened in ``'rb'`` mode.
        header_fields : sequence of dicts, optional
            Definition of the fields in the header (per row), each
            containing key-value pairs for the following keys:

            - ``'name'`` : Label for the field.
            - ``'offset'`` : Start of the field in bytes.
            - ``'size'`` : Size of the field in bytes.
            - ``'dtype'`` : Data type in Numpy- or Numpy-readable format.
            - ``'dshape'`` (optional) : The array of values is reshaped to
              this shape.
            - ``'description'`` (optional) : A human-readable description
              of the field.

            For the default ``None``, the MRC2014 format is used, see
            `print_mrc2014_spec`.
        """
        if header_fields is None:
            header_fields = header_fields_from_table(
                spec_table=MRC_2014_SPEC_TABLE,
                keys=MRC_SPEC_KEYS,
                dtype_map=MRC_DTYPE_TO_NPY_DTYPE)

        super().__init__(file, header_fields)
Beispiel #2
0
    def __init__(self, file, header_fields=None):
        """Initialize a new instance.

        Parameters
        ----------
        file : file-like or str
            Stream or filename from which to read the data. The stream
            is allowed to be already opened in ``'rb'`` mode.
        header_fields : sequence of dicts, optional
            Definition of the fields in the header (per row), each
            containing key-value pairs for the following keys:

            - ``'name'`` : Label for the field.
            - ``'offset'`` : Start of the field in bytes.
            - ``'size'`` : Size of the field in bytes.
            - ``'dtype'`` : Data type in Numpy- or Numpy-readable format.
            - ``'dshape'`` (optional) : The array of values is reshaped to
              this shape.
            - ``'description'`` (optional) : A human-readable description
              of the field.

            For the default ``None``, the MRC2014 format is used, see
            `print_mrc2014_spec`.
        """
        if header_fields is None:
            header_fields = header_fields_from_table(
                spec_table=MRC_2014_SPEC_TABLE,
                keys=MRC_SPEC_KEYS,
                dtype_map=MRC_DTYPE_TO_NPY_DTYPE)

        super().__init__(file, header_fields)
Beispiel #3
0
def mrc_header_from_params(shape, dtype, kind, **kwargs):
    """Create a minimal MRC2014 header from the given parameters.

    Parameters
    ----------
    shape : 3-sequence of ints
        3D shape of the stored data. The values are used as
        ``'nx', 'ny', 'nz'`` header entries, in this order. Note that
        this is different from the actual data storage shape for
        non-trivial ``axis_order``.
    dtype : {'int8', 'int16', 'float32', 'uint16'}
        Data type specifier as understood by `numpy.dtype`. It is
        translated to a ``'mode'`` header entry. See `this page
        <http://www.ccpem.ac.uk/mrc_format/mrc2014.php>`_ for valid
        modes.
    kind : {'volume', 'projections'}
        Interpretation of the 3D data, either as single 3D volume or as
        a stack of 2D projections. The value is used for the ``'ispg'``
        header entry.
    extent : 3-sequence of floats, optional
        Size of the 3D volume in meters. The values are used for
        the ``'cella'`` header entry.
        Default: ``shape``, resulting in ``(1, 1, 1)`` unit cells
    axis_order : permutation of ``(0, 1, 2)`` optional
        Order of the data axes as they should appear in the stored file.
        The values are used for the ``'mapc', 'mapr', 'maps'`` header
        entries.
        Default: ``(0, 1, 2)``
    dmin, dmax : float, optional
        Minimum and maximum values of the data, used for header entries
        ``'dmin'`` and ``'dmax'``, resp.
        Default: 1.0, 0.0. These values indicate according to [Che+2015]_
        that the values are considered as undetermined.
    dmean, rms : float, optional
        Mean and variance of the data, used for header entries ``'dmean'``
        and ``'rms'``, resp.
        Default: ``min(dmin, dmax) - 1, -1.0``. These values indicate
        according to [Che+2015]_ that the values are considered as
        undetermined.
    mrc_version : 2-tuple of int, optional
        Version identifier for the MRC file, used for the ``'nversion'``
        header entry.
        Default: ``(2014, 0)``
    text_labels : sequence of strings, optional
        Maximal 10 strings with 80 characters each, used for the
        ``'nlabl'`` and ``'label'`` header entries.
        Default: ``[]``

    Returns
    -------
    header : `OrderedDict`
        Header stored in an ordered dictionary, where each entry has the
        following form::

            'name': {'value': value_as_array,
                     'offset': offset_in_bytes
                     'description': description_string}

        All ``'value'``'s are `numpy.ndarray`'s with at least one
        dimension.

    References
    ----------
    [Che+2015] Cheng, A et al. *MRC2014: Extensions to the MRC format header
    for electron cryo-microscopy and tomography*. Journal of Structural
    Biology, 129 (2015), pp 146--150.
    """
    # Positional args
    shape = [int(n) for n in shape]
    kind, kind_in = str(kind).lower(), kind
    if kind not in ('volume', 'projections'):
        raise ValueError("`kind '{}' not understood".format(kind_in))

    # Keyword args
    extent = kwargs.pop('extent', shape)
    axis_order = kwargs.pop('axis_order', (0, 1, 2))
    if tuple(axis_order) not in permutations((0, 1, 2)):
        raise ValueError('`axis_order` must be a permutation of (0, 1, 2), '
                         'got {}'.format(axis_order))
    dmin = kwargs.pop('dmin', 1.0)
    dmax = kwargs.pop('dmax', 0.0)
    dmean = kwargs.pop('dmean', min(dmin, dmax) - 1.0)
    rms = kwargs.pop('rms', -1.0)
    mrc_version = kwargs.pop('mrc_version', (2014, 0))
    if len(mrc_version) != 2:
        raise ValueError('`mrc_version` must be a sequence of length 2, got '
                         '{}'.format(mrc_version))

    # Text labels: fill each label up with whitespace to 80 characters.
    # Create the remaining labels as 80 * '\x00'
    text_labels_in = kwargs.pop('text_labels', [])
    nlabl = len(text_labels_in)
    if nlabl > 10:
        raise ValueError('expexted maximum of 10 labels, got {} labels'
                         ''.format(nlabl))
    text_labels = [str(label).ljust(80) for label in text_labels_in]
    if any(len(label) > 80 for label in text_labels):
        raise ValueError('labels cannot have more than 80 characters each')

    # Convert to header-friendly form. Names are required to match
    # exactly the header field names, and all of them must exist,
    # so that `eval` below succeeds for all fields.
    nx, ny, nz = [np.array(n, dtype='int32').reshape([1]) for n in shape]
    mode = np.array(NPY_DTYPE_TO_MRC_MODE[np.dtype(dtype)],
                    dtype='int32').reshape([1])
    mx, my, mz = nx, ny, nz
    cella = np.array(extent).reshape([3]).astype('float32')
    mapc, mapr, maps = [np.array(m, dtype='int32').reshape([1]) + 1
                        for m in axis_order]
    dmin, dmax, dmean, rms = [np.array(x, dtype='float32').reshape([1])
                              for x in (dmin, dmax, dmean, rms)]
    ispg = 1 if kind == 'volume' else 0
    ispg = np.array(ispg, dtype='int32', ndmin=1)
    nsymbt = np.array([0], dtype='int32')
    exttype = np.fromstring('    ', dtype='S1')
    nversion = np.array(10 * mrc_version[0] + mrc_version[1],
                        dtype='int32').reshape([1])
    origin = np.zeros(3, dtype='int32')
    map = np.fromstring('MAP ', dtype='S1')
    # TODO: no idea how to properly choose the machine stamp
    machst = np.fromiter(b'DD  ', dtype='S1')
    nlabl = np.array(nlabl, dtype='int32').reshape([1])
    label = np.zeros((10, 80), dtype='S1')  # ensure correct size
    for i, label_i in enumerate(text_labels):
        label[i] = np.fromstring(label_i, dtype='S1')

    # Make the header
    # We use again the specification to set the values
    header_fields = header_fields_from_table(
        MRC_2014_SPEC_TABLE, MRC_SPEC_KEYS, MRC_DTYPE_TO_NPY_DTYPE)

    header = OrderedDict()
    for field in header_fields:
        header[field['name']] = {'offset': field['offset'],
                                 'value': eval(field['name'])}

    return header
Beispiel #4
0
    def read_extended_header(self, groupby='field', force_type=''):
        """Read the extended header according to `extended_header_type`.

        Currently, only the FEI extended header format is supported.
        See `print_fei_ext_header_spec` or `this homepage`_ for the format
        specification.

        The extended header usually has one header section per
        image (slice), in case of the FEI header 128 bytes each, with
        a maximum of 1024 sections.

        Parameters
        ----------
        groupby : {'field', 'section'}, optional
            How to group the values in the extended header sections.

            ``'field'`` : make an array per section field, e.g.::

                'defocus': [dval1, dval2, ..., dvalN],
                'exp_time': [tval1, tval2, ..., tvalN],
                ...

            ``'section'`` : make a dictionary for each section, e.g.::

                {'defocus': dval1, 'exp_time': tval1},
                {'defocus': dval2, 'exp_time': tval2},
                ...

        force_type : string, optional
            If given, this value overrides the `extended_header_type`
            from `header`.

            Currently supported: ``'FEI1'``

        Returns
        -------
        ext_header: `OrderedDict` or tuple
            For ``groupby == 'field'``, a dictionary with the field names
            as keys, like in the example.
            For ``groupby == 'section'``, a tuple of dictionaries as
            shown above.
            The returned data structures store no offsets, in contrast
            to the regular header.

        See Also
        --------

        References
        ----------
        .. _this homepage:
           http://www.2dx.unibas.ch/documentation/mrc-software/fei-\
extended-mrc-format-not-used-by-2dx
        """
        ext_header_type = str(force_type).upper() or self.extended_header_type
        if ext_header_type != 'FEI1':
            raise ValueError("extended header type '{}' not supported"
                             "".format(self.extended_header_type))

        groupby, groupby_in = str(groupby).lower(), groupby

        ext_header_len = int(self.header['nsymbt']['value'])
        if ext_header_len % MRC_FEI_SECTION_SIZE:
            raise ValueError('extended header length {} from header is '
                             'not divisible by extended header section size '
                             '{}'.format(ext_header_len, MRC_FEI_SECTION_SIZE))

        num_sections = ext_header_len // MRC_FEI_SECTION_SIZE
        if num_sections > MRC_FEI_MAX_SECTIONS:
            raise ValueError('calculated number of sections ({}) exceeds '
                             'maximum number of sections ({})'
                             ''.format(num_sections, MRC_FEI_MAX_SECTIONS))

        section_fields = header_fields_from_table(
            MRC_FEI_EXT_HEADER_SECTION, keys=MRC_SPEC_KEYS,
            dtype_map=MRC_DTYPE_TO_NPY_DTYPE)

        if groupby == 'field':
            # Make a list for each field and append the values for that
            # field. Then create an array from that list and store it
            # under the field name.
            ext_header = OrderedDict()
            for field in section_fields:
                value_list = []
                field_offset = field['offset']
                field_dtype = field['dtype']
                field_dshape = field['dshape']
                for section in range(num_sections):
                    # Get the bytestring from the right position in the file,
                    # unpack it and append the value to the list.
                    section_start = section * MRC_FEI_SECTION_SIZE
                    self.file.seek(section_start + field_offset)
                    num_items = int(np.prod(field_dshape))
                    size_bytes = num_items * field_dtype.itemsize
                    packed_value = self.file.read(size_bytes)
                    fmt = '{}{}'.format(num_items, field_dtype.char)
                    value_list.append(struct.unpack(fmt, packed_value))

                ext_header[field['name']] = np.array(value_list,
                                                     dtype=field_dtype)
            return ext_header

        elif groupby == 'section':
            # Loop though the sections and append all values from that
            # section to a list. Return it as a tuple.
            ext_header = []
            for section in range(num_sections):
                entry = {}
                section_start = section * MRC_FEI_SECTION_SIZE
                for field in section_fields:
                    # Get the bytestring from the right position in the file,
                    # unpack it and store the value as array in the dict.
                    self.file.seek(section_start + field['offset'])
                    num_items = int(np.prod(field['dshape']))
                    size_bytes = num_items * field['dtype'].itemsize
                    packed_value = self.file.read(size_bytes)
                    fmt = '{}{}'.format(num_items, field['dtype'].char)
                    value = struct.unpack(fmt, packed_value)
                    # Make each entry a 1-element 1D array as usual
                    entry[field['name']] = np.array(
                        value, dtype=field['dtype']).reshape(field['dshape'])

                ext_header.append(entry)
            return tuple(ext_header)

        else:
            raise ValueError("`groupby` '{}' not understood"
                             "".format(groupby_in))
Beispiel #5
0
def mrc_header_from_params(shape, dtype, kind, **kwargs):
    """Create a minimal MRC2014 header from the given parameters.

    Parameters
    ----------
    shape : 3-sequence of ints
        3D shape of the stored data. The values are used as
        ``'nx', 'ny', 'nz'`` header entries, in this order. Note that
        this is different from the actual data storage shape for
        non-trivial ``axis_order``.
    dtype : {'int8', 'int16', 'float32', 'uint16'}
        Data type specifier as understood by `numpy.dtype`. It is
        translated to a ``'mode'`` header entry. See `this page
        <http://www.ccpem.ac.uk/mrc_format/mrc2014.php>`_ for valid
        modes.
    kind : {'volume', 'projections'}
        Interpretation of the 3D data, either as single 3D volume or as
        a stack of 2D projections. The value is used for the ``'ispg'``
        header entry.
    extent : 3-sequence of floats, optional
        Size of the 3D volume in meters. The values are used for
        the ``'cella'`` header entry.
        Default: ``shape``, resulting in ``(1, 1, 1)`` unit cells
    axis_order : permutation of ``(0, 1, 2)`` optional
        Order of the data axes as they should appear in the stored file.
        The values are used for the ``'mapc', 'mapr', 'maps'`` header
        entries.
        Default: ``(0, 1, 2)``
    dmin, dmax : float, optional
        Minimum and maximum values of the data, used for header entries
        ``'dmin'`` and ``'dmax'``, resp.
        Default: 1.0, 0.0. These values indicate according to [Che+2015]_
        that the values are considered as undetermined.
    dmean, rms : float, optional
        Mean and variance of the data, used for header entries ``'dmean'``
        and ``'rms'``, resp.
        Default: ``min(dmin, dmax) - 1, -1.0``. These values indicate
        according to [Che+2015]_ that the values are considered as
        undetermined.
    mrc_version : 2-tuple of int, optional
        Version identifier for the MRC file, used for the ``'nversion'``
        header entry.
        Default: ``(2014, 0)``
    text_labels : sequence of strings, optional
        Maximal 10 strings with 80 characters each, used for the
        ``'nlabl'`` and ``'label'`` header entries.
        Default: ``[]``

    Returns
    -------
    header : `OrderedDict`
        Header stored in an ordered dictionary, where each entry has the
        following form::

            'name': {'value': value_as_array,
                     'offset': offset_in_bytes
                     'description': description_string}

        All ``'value'``'s are `numpy.ndarray`'s with at least one
        dimension.

    References
    ----------
    [Che+2015] Cheng, A et al. *MRC2014: Extensions to the MRC format header
    for electron cryo-microscopy and tomography*. Journal of Structural
    Biology, 129 (2015), pp 146--150.
    """
    # Positional args
    shape = [int(n) for n in shape]
    kind, kind_in = str(kind).lower(), kind
    if kind not in ('volume', 'projections'):
        raise ValueError("`kind '{}' not understood".format(kind_in))

    # Keyword args
    extent = kwargs.pop('extent', shape)
    axis_order = kwargs.pop('axis_order', (0, 1, 2))
    if tuple(axis_order) not in permutations((0, 1, 2)):
        raise ValueError('`axis_order` must be a permutation of (0, 1, 2), '
                         'got {}'.format(axis_order))
    dmin = kwargs.pop('dmin', 1.0)
    dmax = kwargs.pop('dmax', 0.0)
    dmean = kwargs.pop('dmean', min(dmin, dmax) - 1.0)
    rms = kwargs.pop('rms', -1.0)
    mrc_version = kwargs.pop('mrc_version', (2014, 0))
    if len(mrc_version) != 2:
        raise ValueError('`mrc_version` must be a sequence of length 2, got '
                         '{}'.format(mrc_version))

    # Text labels: fill each label up with whitespace to 80 characters.
    # Create the remaining labels as 80 * '\x00'
    text_labels_in = kwargs.pop('text_labels', [])
    nlabl = len(text_labels_in)
    if nlabl > 10:
        raise ValueError('expexted maximum of 10 labels, got {} labels'
                         ''.format(nlabl))
    text_labels = [str(label).ljust(80) for label in text_labels_in]
    if any(len(label) > 80 for label in text_labels):
        raise ValueError('labels cannot have more than 80 characters each')

    # Convert to header-friendly form. Names are required to match
    # exactly the header field names, and all of them must exist,
    # so that `eval` below succeeds for all fields.
    nx, ny, nz = [np.array(n, dtype='int32').reshape([1]) for n in shape]
    mode = np.array(NPY_DTYPE_TO_MRC_MODE[np.dtype(dtype)],
                    dtype='int32').reshape([1])
    mx, my, mz = nx, ny, nz
    cella = np.array(extent).reshape([3]).astype('float32')
    mapc, mapr, maps = [
        np.array(m, dtype='int32').reshape([1]) + 1 for m in axis_order
    ]
    dmin, dmax, dmean, rms = [
        np.array(x, dtype='float32').reshape([1])
        for x in (dmin, dmax, dmean, rms)
    ]
    ispg = 1 if kind == 'volume' else 0
    ispg = np.array(ispg, dtype='int32', ndmin=1)
    nsymbt = np.array([0], dtype='int32')
    exttype = np.fromstring('    ', dtype='S1')
    nversion = np.array(10 * mrc_version[0] + mrc_version[1],
                        dtype='int32').reshape([1])
    origin = np.zeros(3, dtype='int32')
    map = np.fromstring('MAP ', dtype='S1')
    # TODO: no idea how to properly choose the machine stamp
    machst = np.fromiter(b'DD  ', dtype='S1')
    nlabl = np.array(nlabl, dtype='int32').reshape([1])
    label = np.zeros((10, 80), dtype='S1')  # ensure correct size
    for i, label_i in enumerate(text_labels):
        label[i] = np.fromstring(label_i, dtype='S1')

    # Make the header
    # We use again the specification to set the values
    header_fields = header_fields_from_table(MRC_2014_SPEC_TABLE,
                                             MRC_SPEC_KEYS,
                                             MRC_DTYPE_TO_NPY_DTYPE)

    header = OrderedDict()
    for field in header_fields:
        header[field['name']] = {
            'offset': field['offset'],
            'value': eval(field['name'])
        }

    return header
Beispiel #6
0
    def read_extended_header(self, groupby='field', force_type=''):
        """Read the extended header according to `extended_header_type`.

        Currently, only the FEI extended header format is supported.
        See `print_fei_ext_header_spec` or `this homepage`_ for the format
        specification.

        The extended header usually has one header section per
        image (slice), in case of the FEI header 128 bytes each, with
        a maximum of 1024 sections.

        Parameters
        ----------
        groupby : {'field', 'section'}, optional
            How to group the values in the extended header sections.

            ``'field'`` : make an array per section field, e.g.::

                'defocus': [dval1, dval2, ..., dvalN],
                'exp_time': [tval1, tval2, ..., tvalN],
                ...

            ``'section'`` : make a dictionary for each section, e.g.::

                {'defocus': dval1, 'exp_time': tval1},
                {'defocus': dval2, 'exp_time': tval2},
                ...

        force_type : string, optional
            If given, this value overrides the `extended_header_type`
            from `header`.

            Currently supported: ``'FEI1'``

        Returns
        -------
        ext_header: `OrderedDict` or tuple
            For ``groupby == 'field'``, a dictionary with the field names
            as keys, like in the example.
            For ``groupby == 'section'``, a tuple of dictionaries as
            shown above.
            The returned data structures store no offsets, in contrast
            to the regular header.

        See Also
        --------

        References
        ----------
        .. _this homepage:
           http://www.2dx.unibas.ch/documentation/mrc-software/fei-\
extended-mrc-format-not-used-by-2dx
        """
        ext_header_type = str(force_type).upper() or self.extended_header_type
        if ext_header_type != 'FEI1':
            raise ValueError("extended header type '{}' not supported"
                             "".format(self.extended_header_type))

        groupby, groupby_in = str(groupby).lower(), groupby

        ext_header_len = int(self.header['nsymbt']['value'])
        if ext_header_len % MRC_FEI_SECTION_SIZE:
            raise ValueError('extended header length {} from header is '
                             'not divisible by extended header section size '
                             '{}'.format(ext_header_len, MRC_FEI_SECTION_SIZE))

        num_sections = ext_header_len // MRC_FEI_SECTION_SIZE
        if num_sections > MRC_FEI_MAX_SECTIONS:
            raise ValueError('calculated number of sections ({}) exceeds '
                             'maximum number of sections ({})'
                             ''.format(num_sections, MRC_FEI_MAX_SECTIONS))

        section_fields = header_fields_from_table(
            MRC_FEI_EXT_HEADER_SECTION,
            keys=MRC_SPEC_KEYS,
            dtype_map=MRC_DTYPE_TO_NPY_DTYPE)

        if groupby == 'field':
            # Make a list for each field and append the values for that
            # field. Then create an array from that list and store it
            # under the field name.
            ext_header = OrderedDict()
            for field in section_fields:
                value_list = []
                field_offset = field['offset']
                field_dtype = field['dtype']
                field_dshape = field['dshape']
                for section in range(num_sections):
                    # Get the bytestring from the right position in the file,
                    # unpack it and append the value to the list.
                    section_start = section * MRC_FEI_SECTION_SIZE
                    self.file.seek(section_start + field_offset)
                    num_items = int(np.prod(field_dshape))
                    size_bytes = num_items * field_dtype.itemsize
                    packed_value = self.file.read(size_bytes)
                    fmt = '{}{}'.format(num_items, field_dtype.char)
                    value_list.append(struct.unpack(fmt, packed_value))

                ext_header[field['name']] = np.array(value_list,
                                                     dtype=field_dtype)
            return ext_header

        elif groupby == 'section':
            # Loop though the sections and append all values from that
            # section to a list. Return it as a tuple.
            ext_header = []
            for section in range(num_sections):
                entry = {}
                section_start = section * MRC_FEI_SECTION_SIZE
                for field in section_fields:
                    # Get the bytestring from the right position in the file,
                    # unpack it and store the value as array in the dict.
                    self.file.seek(section_start + field['offset'])
                    num_items = int(np.prod(field['dshape']))
                    size_bytes = num_items * field['dtype'].itemsize
                    packed_value = self.file.read(size_bytes)
                    fmt = '{}{}'.format(num_items, field['dtype'].char)
                    value = struct.unpack(fmt, packed_value)
                    # Make each entry a 1-element 1D array as usual
                    entry[field['name']] = np.array(
                        value, dtype=field['dtype']).reshape(field['dshape'])

                ext_header.append(entry)
            return tuple(ext_header)

        else:
            raise ValueError("`groupby` '{}' not understood"
                             "".format(groupby_in))