예제 #1
0
def test_get_signal_chunks_small_dataset():
    # Whole dataset fits in one chunk
    shape = (10, 10, 2, 2)
    chunks = get_signal_chunks(shape=shape,
                               dtype=np.int32,
                               signal_axes=(2, 3),
                               target_size=1e6)
    # The chunks must be smaller or equal that the corresponding sizes
    assert chunks == shape
예제 #2
0
def test_get_signal_chunks_big_signal():
    # One signal exceeds the target size
    shape = (10, 1000, 5, 1000)
    chunks = get_signal_chunks(shape=shape,
                               dtype=np.int32,
                               signal_axes=(1, 3),
                               target_size=1e6)
    # The chunks must be smaller or equal that the corresponding sizes
    assert chunks == (1, 1000, 1, 1000)
예제 #3
0
def test_get_signal_chunks(target_size):
    shape = (2, 150, 3, 200, 1, 600, 1)
    chunks = get_signal_chunks(shape=shape,
                               dtype=np.int64,
                               signal_axes=(2, 3),
                               target_size=target_size)
    assert (np.prod(chunks) * 8 < target_size)
    # The chunks must be smaller or equal that the corresponding sizes
    assert (np.array(chunks) <= np.array(shape)).all()
예제 #4
0
파일: nexus.py 프로젝트: pc494/hyperspy
def _extract_hdf_dataset(group, dataset, lazy=False):
    """Import data from hdf path.

    Parameters
    ----------
    group : hdf group
        group from which to load the dataset
    dataset : str
        path to the dataset within the group
    lazy    : bool {default:True}
        If true use lazy opening, if false read into memory

    Returns
    -------
    dask or numpy array

    """
    data = group[dataset]
    if lazy:
        if "chunks" in data.attrs.keys():
            chunks = data.attrs["chunks"]
        else:
            chunks = get_signal_chunks(data.shape, data.dtype)
        data_lazy = da.from_array(data, chunks=chunks)
    else:
        data_lazy = np.array(data)

    nav_list = []
    for i in range(data.ndim):
        nav_list.append({
            'size': data.shape[i],
            'index_in_array': i,
            'scale': 1,
            'offset': 0.0,
            'units': '',
            'navigate': True,
        })

    dictionary = {
        'data': data_lazy,
        'metadata': {},
        'original_metadata': {},
        'axes': nav_list
    }

    return dictionary
예제 #5
0
파일: nexus.py 프로젝트: pc494/hyperspy
def _parse_from_file(value, lazy=False):
    """To convert values from the hdf file to compatible formats.

    When reading string arrays we convert or keep string arrays as
    byte_strings (some io_plugins only supports byte-strings arrays so this
    ensures inter-compatibility across io_plugins)
    Arrays of length 1 - return the single value stored.
    Large datasets are returned as dask arrays if lazy=True.

    Parameters
    ----------
    value : input read from hdf file (array,list,tuple,string,int,float)
    lazy  : bool  {default: False}
        The lazy flag is only applied to values of size >=2

    Returns
    -------
    str,int, float, ndarray dask Array
        parsed value.

    """
    toreturn = value
    if isinstance(value, h5py.Dataset):
        if value.size < 2:
            toreturn = value[...].item()
        else:
            if lazy:
                if value.chunks:
                    toreturn = da.from_array(value, value.chunks)
                else:
                    chunks = get_signal_chunks(value.shape, value.dtype)
                    toreturn = da.from_array(value, chunks)
            else:
                toreturn = np.array(value)

    if isinstance(toreturn, np.ndarray) and value.shape == (1, ):
        toreturn = toreturn[0]
    if isinstance(toreturn, bytes):
        toreturn = _byte_to_string(toreturn)
    if isinstance(toreturn, (np.int, np.float)):
        toreturn = toreturn
    if isinstance(toreturn, (np.ndarray)) and toreturn.dtype.char == "U":
        toreturn = toreturn.astype("S")
    return toreturn
예제 #6
0
파일: nexus.py 프로젝트: pc494/hyperspy
def _nexus_dataset_to_signal(group, nexus_dataset_path, lazy=False):
    """Load an NXdata set as a hyperspy signal.

    Parameters
    ----------
    group : hdf group containing the NXdata
    nexus_data_path : str
        Path to the NXdata set in the group
    lazy : bool, default : True
        lazy loading of data

    Returns
    -------
    dict
        A signal dictionary which can be used to instantiate a signal.

    """
    detector_index = 0
    interpretation = None
    dataentry = group[nexus_dataset_path]
    if "signal" in dataentry.attrs.keys():
        if _is_int(dataentry.attrs["signal"]):
            data_key = "data"
        else:
            data_key = dataentry.attrs["signal"]
    else:
        _logger.info("No signal attr associated with NXdata will\
                     try assume signal name is data")
        if "data" not in dataentry.keys():
            raise ValueError("Signal attribute not found in NXdata and\
                             attempt to find a default \"data\" key failed")
        else:
            data_key = "data"

    if "interpretation" in dataentry.attrs.keys():
        interpretation = _parse_from_file(dataentry.attrs["interpretation"])

    data = dataentry[data_key]
    nav_list = []
    # list indices...
    axis_index_list = []
    if "axes" in dataentry.attrs.keys():
        axes_key = dataentry.attrs["axes"]
        axes_list = ["."] * data.ndim
        if isinstance(axes_key, np.ndarray):
            for i, num in enumerate(axes_key):
                axes_list[i] = _parse_from_file(num)
        else:
            axes_list[0] = _parse_from_file(axes_key)

        named_axes = list(range(len(axes_list)))
        for i, ax in enumerate(axes_list):
            if ax != ".":
                index_name = ax + "_indices"
                if index_name in dataentry.attrs:
                    ind_in_array = int(dataentry.attrs[index_name])
                else:
                    ind_in_array = i
                axis_index_list.append(ind_in_array)
                if "units" in dataentry[ax].attrs:
                    units = _parse_from_file(dataentry[ax].attrs["units"])
                else:
                    units = ""

                navigation = True
                named_axes.remove(ind_in_array)

                if _is_numeric_data(dataentry[ax]):
                    if dataentry[ax].size > 1:
                        if _is_linear_axis(dataentry[ax]):
                            nav_list.append({
                                'size':
                                data.shape[ind_in_array],
                                'index_in_array':
                                ind_in_array,
                                'name':
                                ax,
                                'scale':
                                abs(dataentry[ax][1] - dataentry[ax][0]),
                                'offset':
                                min(dataentry[ax][0], dataentry[ax][-1]),
                                'units':
                                units,
                                'navigate':
                                navigation
                            })
                        else:
                            nav_list.append({
                                'size': data.shape[ind_in_array],
                                'index_in_array': ind_in_array,
                                'name': ax,
                                'scale': 1,
                                'offset': 0,
                                'navigate': navigation
                            })
                    else:
                        nav_list.append({
                            'size': 1,
                            'index_in_array': ind_in_array,
                            'name': ax,
                            'scale': 1,
                            'offset': dataentry[ax][0],
                            'units': units,
                            'navigate': True
                        })
            else:
                if len(data.shape) == len(axes_list):
                    nav_list.append({
                        'size':
                        data.shape[named_axes[detector_index]],
                        'index_in_array':
                        named_axes[detector_index],
                        'scale':
                        1,
                        'offset':
                        0.0,
                        'units':
                        '',
                        'navigate':
                        False
                    })
                    detector_index = detector_index + 1

    if lazy:
        if "chunks" in data.attrs.keys():
            chunks = data.attrs["chunks"]
        else:
            chunks = get_signal_chunks(data.shape, data.dtype)
        data_lazy = da.from_array(data, chunks=chunks)
    else:
        data_lazy = np.array(data)

    if not nav_list:
        for i in range(data.ndim):
            nav_list.append({
                'size': data_lazy.shape[i],
                'index_in_array': i,
                'scale': 1,
                'offset': 0.0,
                'units': '',
                'navigate': True
            })
    title = _text_split(nexus_dataset_path, '/')[-1]
    metadata = {'General': {'title': title}}

    #
    # if interpretation - reset the nav axes
    # assume the last dimensions are the signal
    #
    if interpretation:
        for x in nav_list:
            x["navigate"] = True
        if interpretation == "spectrum":
            nav_list[-1]["navigate"] = False
        elif interpretation == "image":
            nav_list[-1]["navigate"] = False
            nav_list[-2]["navigate"] = False

    dictionary = {'data': data_lazy, 'axes': nav_list, 'metadata': metadata}
    return dictionary
예제 #7
0
def test_get_signal_chunks(target_size):
    chunks = get_signal_chunks(shape=[15, 15, 256, 256],
                               dtype=np.int64,
                               signal_axes=(2, 3),
                               target_size=target_size)
    assert (np.prod(chunks) * 8 < target_size)
예제 #8
0
파일: h5ebsd.py 프로젝트: swang29/kikuchipy
def h5ebsd2signaldict(scan_group, manufacturer, version, lazy=False):
    """Return a dictionary with signal, metadata and original metadata
    from an h5ebsd scan.

    Parameters
    ----------
    scan_group : h5py.Group
        HDF group of scan.
    manufacturer : {'KikuchiPy', 'EDAX', 'Bruker Nano'}
        Manufacturer of file.
    version : str
        Version of manufacturer software.
    lazy : bool, optional

    Returns
    -------
    scan : dictionary
        Dictionary with patterns, metadata and original metadata.
    """

    md, omd, scan_size = h5ebsdheader2dicts(scan_group, manufacturer, version,
                                            lazy)
    md.set_item('Signal.signal_type', 'EBSD')
    md.set_item('Signal.record_by', 'image')

    scan = {'metadata': md.as_dictionary(),
            'original_metadata': omd.as_dictionary(), 'attributes': {}}

    # Get data group
    man_pats = manufacturer_pattern_names()
    for man, pats in man_pats.items():
        if manufacturer.lower() == man.lower():
            data = scan_group['EBSD/Data/' + pats]

    # Get data from group
    if lazy:
        chunks = data.chunks
        if chunks is None:
            chunks = get_signal_chunks(data.shape, data.dtype, [1, 2])
        data = da.from_array(data, chunks=chunks)
        scan['attributes']['_lazy'] = True
    else:
        data = np.asanyarray(data)

    sx, sy = scan_size.sx, scan_size.sy
    nx, ny = scan_size.nx, scan_size.ny
    try:
        data = data.reshape((ny, nx, sy, sx)).squeeze()
    except ValueError:
        warnings.warn("Pattern size ({} x {}) and scan size ({} x {}) larger "
                      "than file size. Will attempt to load by zero padding "
                      "incomplete frames.".format(sx, sy, nx, ny))
        # Data is stored pattern by pattern
        pw = [(0, ny * nx * sy * sx - data.size)]
        if lazy:
            data = da.pad(data, pw, mode='constant')
        else:
            data = np.pad(data, pw, mode='constant')
        data = data.reshape((ny, nx, sy, sx))
    scan['data'] = data

    units = np.repeat(u'\u03BC'+'m', 4)
    names = ['y', 'x', 'dy', 'dx']
    scales = np.ones(4)

    # Calibrate scan dimension and detector dimension
    step_x, step_y = scan_size.step_x, scan_size.step_y
    scales[0] = scales[0] * step_x
    scales[1] = scales[1] * step_y
    detector_pixel_size = scan_size.delta
    scales[2] = scales[2] * detector_pixel_size
    scales[3] = scales[3] * detector_pixel_size

    # Create axis objects for each axis
    axes = [{'size': data.shape[i], 'index_in_array': i, 'name': names[i],
            'scale': scales[i], 'offset': 0.0, 'units': units[i]}
            for i in range(data.ndim)]
    scan['axes'] = axes

    return scan