Ejemplo n.º 1
0
    def loadFileList(self, filelist, selection, scanlist=None):
        """
        loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None)
        filelist is the list of file names belonging to the stack
        selection is a dictionary with the keys x, y, m.
        x        is the path to the x data (the channels) in the spectrum,
                 without the first level "directory". It is unused (for now).
        y        is the path to the 1D data (the counts) in the spectrum,
                 without the first level "directory"
        m        is the path to the normalizing data (I0 or whatever)
                 without the first level "directory".
        scanlist is the list of first level "directories" containing the 1D data
                 Example: The actual path has the form:
                 /whatever1/whatever2/counts
                 That means scanlist = ["/whatever1"]
                 and selection['y'] = "/whatever2/counts"
        """
        _logger.info("filelist = %s", filelist)
        _logger.info("selection = %s", selection)
        _logger.info("scanlist = %s", scanlist)
        # all the files in the same source
        hdfStack = NexusDataSource.NexusDataSource(filelist)

        # if there is more than one file, it is assumed all the files have
        # the same structure.
        tmpHdf = hdfStack._sourceObjectList[0]
        entryNames = []
        for key in tmpHdf["/"].keys():
            try:
                if isinstance(tmpHdf["/" + key], h5py.Group):
                    entryNames.append(key)
            except KeyError:
                _logger.info("Broken link with key? <%s>" % key)

        # built the selection in terms of HDF terms
        # for the time being
        xSelectionList = selection.get('x', None)
        if xSelectionList == []:
            xSelectionList = None
        if xSelectionList is not None:
            if type(xSelectionList) != type([]):
                xSelectionList = [xSelectionList]
            if len(xSelectionList):
                xSelection = xSelectionList[0]
            else:
                xSelection = None
        else:
            xSelection = None
        # only one y is taken
        ySelection = selection['y']
        if type(ySelection) == type([]):
            ySelectionList = list(ySelection)
            ySelection = ySelection[0]
        else:
            ySelectionList = [ySelection]

        # monitor selection
        mSelection = selection.get('m', None)
        if mSelection not in [None, []]:
            if type(mSelection) != type([]):
                mSelection = [mSelection]
        if type(mSelection) == type([]):
            if len(mSelection):
                mSelection = mSelection[0]
            else:
                mSelection = None
        else:
            mSelection = None

        USE_JUST_KEYS = False
        # deal with the pathological case where the scanlist corresponds
        # to a selected top level dataset
        if len(entryNames) == 0:
            if scanlist is not None:
                if (ySelection in scanlist) or \
                   (xSelection in scanlist) or \
                   (mSelection in scanlist):
                    scanlist = None
                    USE_JUST_KEYS = True
            else:
                USE_JUST_KEYS = True
        elif len(entryNames) == 1:
            # deal with the SOLEIL case of one entry but with different name
            # in different files
            USE_JUST_KEYS = True
        elif scanlist in [None, []]:
            USE_JUST_KEYS = True
        if USE_JUST_KEYS:
            # if the scanlist is None, it is assumed we are interested on all
            # the scans containing the selection, not that all the scans
            # contain the selection.
            scanlist = []
            if 0:
                JUST_KEYS = False
                #expect same entry names in the files
                #Unfortunately this does not work for SOLEIL
                for entry in entryNames:
                    path = "/" + entry + ySelection
                    dirname = posixpath.dirname(path)
                    base = posixpath.basename(path)
                    try:
                        file_entry = tmpHdf[dirname]
                        if base in file_entry.keys():
                            scanlist.append(entry)
                    except:
                        pass
            else:
                JUST_KEYS = True
                #expect same structure in the files even if the
                #names are different (SOLEIL ...)
                if len(entryNames):
                    i = 0
                    for entry in entryNames:
                        i += 1
                        path = "/" + entry + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            file_entry = tmpHdf[dirname]
                            if hasattr(file_entry, "keys"):
                                if base in file_entry.keys():
                                    # this is the case of a selection inside a group
                                    scanlist.append("1.%d" % i)
                        except KeyError:
                            _logger.warning("%s not in file, ignoring.",
                                            dirname)
                    if not len(scanlist):
                        if not ySelection.startswith("/"):
                            path = "/" + ySelection
                        else:
                            path = ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            if dirname in tmpHdf["/"]:
                                # this is the case of a dataset at top plevel
                                # or having given the complete path
                                if base in tmpHdf[dirname]:
                                    JUST_KEYS = False
                                    scanlist.append("")
                            elif base in file_entry.keys():
                                JUST_KEYS = False
                                scanlist.append("")
                        except:
                            #it will crash later on
                            pass
                else:
                    JUST_KEYS = False
                    scanlist.append("")
        else:
            try:
                number, order = [int(x) for x in scanlist[0].split(".")]
                JUST_KEYS = True
            except:
                JUST_KEYS = False
            if not JUST_KEYS:
                for scan in scanlist:
                    if scan.startswith("/"):
                        t = scan[1:]
                    else:
                        t = scan
                    if t not in entryNames:
                        raise ValueError("Entry %s not in file" % scan)

        nFiles = len(filelist)
        nScans = len(scanlist)
        if JUST_KEYS:
            if not nScans:
                raise IOError("No entry contains the required data")

        _logger.debug("Retained number of files = %d", nFiles)
        _logger.debug("Retained number of scans = %d", nScans)

        # Now is to decide the number of mca ...
        # I assume all the scans contain the same number of mca
        if JUST_KEYS:
            path = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                    1] + ySelection
            if mSelection is not None:
                mpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                         1] + mSelection
            if xSelectionList is not None:
                xpathList = []
                for xSelection in xSelectionList:
                    xpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                             1] + xSelection
                    xpathList.append(xpath)
        else:
            path = scanlist[0] + ySelection
            if mSelection is not None:
                mpath = scanlist[0] + mSelection
            if xSelectionList is not None:
                xpathList = []
                for xSelection in xSelectionList:
                    xpath = scanlist[0] + xSelection
                    xpathList.append(xpath)

        yDataset = tmpHdf[path]
        if (self.__dtype is None) or (mSelection is not None):
            self.__dtype = yDataset.dtype
            if self.__dtype in [numpy.int16, numpy.uint16]:
                self.__dtype = numpy.float32
            elif self.__dtype in [numpy.int32, numpy.uint32]:
                if mSelection:
                    self.__dtype = numpy.float32
                else:
                    self.__dtype = numpy.float64
            elif self.__dtype not in [
                    numpy.float16, numpy.float32, numpy.float64
            ]:
                # Some datasets form CLS (origin APS?) arrive as data format
                # equal to ">u2" and are not triggered as integer types
                _logger.debug("Not basic dataset type %s", self.__dtype)
                if ("%s" % self.__dtype).endswith("2"):
                    self.__dtype = numpy.float32
                else:
                    if mSelection:
                        self.__dtype = numpy.float32
                    else:
                        self.__dtype = numpy.float64

        # figure out the shape of the stack
        shape = yDataset.shape
        mcaIndex = selection.get('index', len(shape) - 1)
        if mcaIndex == -1:
            mcaIndex = len(shape) - 1
        _logger.debug("mcaIndex = %d", mcaIndex)
        considerAsImages = False
        dim0, dim1, mcaDim = self.getDimensions(nFiles,
                                                nScans,
                                                shape,
                                                index=mcaIndex)
        try:
            if self.__dtype in [numpy.float32, numpy.int32]:
                bytefactor = 4
            elif self.__dtype in [numpy.int16, numpy.uint16]:
                bytefactor = 2
            elif self.__dtype in [numpy.int8, numpy.uint8]:
                bytefactor = 1
            else:
                bytefactor = 8

            neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor /
                                                      (1024 * 1024.))
            _logger.info("Using %d bytes per item" % bytefactor)
            _logger.info("Needed %d Megabytes" % neededMegaBytes)
            physicalMemory = None
            if hasattr(PhysicalMemory, "getAvailablePhysicalMemoryOrNone"):
                physicalMemory = PhysicalMemory.getAvailablePhysicalMemoryOrNone(
                )
            if not physicalMemory:
                physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
            else:
                _logger.info("Available physical memory %.1f GBytes" % \
                             (physicalMemory/(1024*1024*1024.)))
            if physicalMemory is None:
                # 6 Gigabytes of available memory
                # should be a good compromise in 2018
                physicalMemory = 6000
                _logger.info("Assumed physical memory %.1f MBytes" %
                             physicalMemory)
            else:
                physicalMemory /= (1024 * 1024.)
            _logger.info("Using physical memory %.1f GBytes" %
                         (physicalMemory / 1024))
            if (neededMegaBytes > (0.95*physicalMemory))\
               and (nFiles == 1) and (len(shape) == 3):
                if self.__dtype0 is None:
                    if (bytefactor == 8) and (neededMegaBytes <
                                              (2 * physicalMemory)):
                        # try reading as float32
                        print("Forcing the use of float32 data")
                        self.__dtype = numpy.float32
                    else:
                        raise MemoryError("Force dynamic loading")
                else:
                    raise MemoryError("Force dynamic loading")
            if (mcaIndex == 0) and (nFiles == 1) and (nScans == 1):
                #keep the original arrangement but in memory
                self.data = numpy.zeros(yDataset.shape, self.__dtype)
                considerAsImages = True
            else:
                # force arrangement as spectra
                self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype)
            DONE = False
        except (MemoryError, ValueError):
            # some versions report ValueError instead of MemoryError
            if (nFiles == 1) and (len(shape) == 3):
                _logger.warning("Attempting dynamic loading")
                if mSelection is not None:
                    _logger.warning("Ignoring monitor")
                self.data = yDataset
                if mSelection is not None:
                    mdtype = tmpHdf[mpath].dtype
                    if mdtype not in [numpy.float64, numpy.float32]:
                        mdtype = numpy.float64
                    mDataset = numpy.asarray(tmpHdf[mpath], dtype=mdtype)
                    self.monitor = [mDataset]
                if xSelectionList is not None:
                    if len(xpathList) == 1:
                        xpath = xpathList[0]
                        xDataset = tmpHdf[xpath][()]
                        self.x = [xDataset]
                if h5py.version.version < '2.0':
                    #prevent automatic closing keeping a reference
                    #to the open file
                    self._fileReference = hdfStack
                DONE = True
            else:
                # what to do if the number of dimensions is only 2?
                raise

        # get the positioners information associated to the path
        positioners = {}
        try:
            positionersGroup = NexusTools.getPositionersGroup(tmpHdf, path)
            for motorName, motorValues in positionersGroup.items():
                positioners[motorName] = motorValues[()]
        except:
            positionersGroup = None
            positioners = {}

        # get the mca information associated to the path
        mcaObjectPaths = NexusTools.getMcaObjectPaths(tmpHdf, path)
        _time = None
        _calibration = None
        _channels = None
        if considerAsImages:
            self._pathHasRelevantInfo = False
        else:
            numberOfRelevantInfoKeys = 0
            for objectPath in mcaObjectPaths:
                if objectPath not in ["counts", "target"]:
                    numberOfRelevantInfoKeys += 1
            if numberOfRelevantInfoKeys:  # not just "counts" or "target"
                self._pathHasRelevantInfo = True
                if "live_time" in mcaObjectPaths:
                    if DONE:
                        # hopefully it will fit into memory
                        if mcaObjectPaths["live_time"] in tmpHdf:
                            _time = tmpHdf[mcaObjectPaths["live_time"]][()]
                        elif "::" in mcaObjectPaths["live_time"]:
                            tmpFileName, tmpDatasetPath = \
                                        mcaObjectPaths["live_time"].split("::")
                            with h5py.File(tmpFileName, "r") as tmpH5:
                                _time = tmpH5[tmpDatasetPath][()]
                        else:
                            del mcaObjectPaths["live_time"]
                    else:
                        # we have to have as many live times as MCA spectra
                        _time = numpy.zeros( \
                                    (self.data.shape[0] * self.data.shape[1]),
                                    dtype=numpy.float64)
                elif "elapsed_time" in mcaObjectPaths:
                    if DONE:
                        # hopefully it will fit into memory
                        if mcaObjectPaths["elapsed_time"] in tmpHdf:
                            _time = \
                                tmpHdf[mcaObjectPaths["elapsed_time"]][()]
                        elif "::" in mcaObjectPaths["elapsed_time"]:
                            tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["elapsed_time"].split("::")
                            with h5py.File(tmpFileName, "r") as tmpH5:
                                _time = tmpH5[tmpDatasetPath][()]
                        else:
                            del mcaObjectPaths["elapsed_time"]
                    else:
                        # we have to have as many elpased times as MCA spectra
                        _time = numpy.zeros(
                            (self.data.shape[0] * self.data.shape[1]),
                            numpy.float32)
                if "calibration" in mcaObjectPaths:
                    if mcaObjectPaths["calibration"] in tmpHdf:
                        _calibration = \
                                tmpHdf[mcaObjectPaths["calibration"]][()]
                    elif "::" in mcaObjectPaths["calibration"]:
                        tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["calibration"].split("::")
                        with h5py.File(tmpFileName, "r") as tmpH5:
                            _calibration = tmpH5[tmpDatasetPath][()]
                    else:
                        del mcaObjectPaths["calibration"]
                if "channels" in mcaObjectPaths:
                    if mcaObjectPaths["channels"] in tmpHdf:
                        _channels = \
                                tmpHdf[mcaObjectPaths["channels"]][()]
                    elif "::" in mcaObjectPaths["channels"]:
                        tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["channels"].split("::")
                        with h5py.File(tmpFileName, "r") as tmpH5:
                            _channels = tmpH5[tmpDatasetPath][()]
                    else:
                        del mcaObjectPaths["channels"]
            else:
                self._pathHasRelevantInfo = False

        if (not DONE) and (not considerAsImages):
            _logger.info("Data in memory as spectra")
            self.info["McaIndex"] = 2
            n = 0

            if dim0 == 1:
                self.onBegin(dim1)
            else:
                self.onBegin(dim0)
            self.incrProgressBar = 0
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                goodEntryNames = []
                for entry in entryNames:
                    tmpPath = "/" + entry
                    try:
                        if hasattr(hdf[tmpPath], "keys"):
                            goodEntryNames.append(entry)
                    except KeyError:
                        _logger.info("Broken link with key? <%s>" % tmpPath)

                for scan in scanlist:
                    IN_MEMORY = None
                    nStart = n
                    for ySelection in ySelectionList:
                        n = nStart
                        if JUST_KEYS:
                            entryName = goodEntryNames[
                                int(scan.split(".")[-1]) - 1]
                            path = entryName + ySelection
                            if mSelection is not None:
                                mpath = entryName + mSelection
                                mdtype = hdf[mpath].dtype
                                if mdtype not in [
                                        numpy.float64, numpy.float32
                                ]:
                                    mdtype = numpy.float64
                                mDataset = numpy.asarray(hdf[mpath],
                                                         dtype=mdtype)
                            if xSelectionList is not None:
                                xDatasetList = []
                                for xSelection in xSelectionList:
                                    xpath = entryName + xSelection
                                    xDataset = hdf[xpath][()]
                                    xDatasetList.append(xDataset)
                        else:
                            path = scan + ySelection
                            if mSelection is not None:
                                mpath = scan + mSelection
                                mdtype = hdf[mpath].dtype
                                if mdtype not in [
                                        numpy.float64, numpy.float32
                                ]:
                                    mdtype = numpy.float64
                                mDataset = numpy.asarray(hdf[mpath],
                                                         dtype=mdtype)
                            if xSelectionList is not None:
                                xDatasetList = []
                                for xSelection in xSelectionList:
                                    xpath = scan + xSelection
                                    xDataset = hdf[xpath][()]
                                    xDatasetList.append(xDataset)
                        try:
                            yDataset = hdf[path]
                            tmpShape = yDataset.shape
                            totalBytes = numpy.ones((1, ),
                                                    yDataset.dtype).itemsize
                            for nItems in tmpShape:
                                totalBytes *= nItems
                            # should one be conservative or just try?
                            if (totalBytes /
                                (1024. * 1024.)) > (0.4 * physicalMemory):
                                _logger.info(
                                    "Force dynamic loading of spectra")
                                #read from disk
                                IN_MEMORY = False
                            else:
                                #read the data into memory
                                _logger.info(
                                    "Attempt to load whole map into memory")
                                yDataset = hdf[path][()]
                                IN_MEMORY = True
                        except (MemoryError, ValueError):
                            _logger.info("Dynamic loading of spectra")
                            yDataset = hdf[path]
                            IN_MEMORY = False
                        nMcaInYDataset = 1
                        for dim in yDataset.shape:
                            nMcaInYDataset *= dim
                        nMcaInYDataset = int(nMcaInYDataset / mcaDim)
                        timeData = None
                        if _time is not None:
                            if "live_time" in mcaObjectPaths:
                                # it is assumed that all have the same structure!!!
                                timePath = NexusTools.getMcaObjectPaths(
                                    hdf, path)["live_time"]
                            elif "elapsed_time" in mcaObjectPaths:
                                timePath = NexusTools.getMcaObjectPaths(
                                    hdf, path)["elapsed_time"]
                            if timePath in hdf:
                                timeData = hdf[timePath][()]
                            elif "::" in timePath:
                                externalFile, externalPath = timePath.split(
                                    "::")
                                with h5py.File(externalFile, "r") as timeHdf:
                                    timeData = timeHdf[externalPath][()]
                        if mcaIndex != 0:
                            if IN_MEMORY:
                                yDataset.shape = -1, mcaDim
                            if mSelection is not None:
                                case = -1
                                nMonitorData = 1
                                for v in mDataset.shape:
                                    nMonitorData *= v
                                if nMonitorData == nMcaInYDataset:
                                    mDataset.shape = nMcaInYDataset
                                    case = 0
                                elif nMonitorData == (nMcaInYDataset * mcaDim):
                                    case = 1
                                    mDataset.shape = nMcaInYDataset, mcaDim
                                if case == -1:
                                    raise ValueError(\
                                        "I do not know how to handle this monitor data")
                            if timeData is not None:
                                case = -1
                                nTimeData = 1
                                for v in timeData.shape:
                                    nTimeData *= v
                                if nTimeData == nMcaInYDataset:
                                    timeData.shape = nMcaInYDataset
                                    case = 0
                                    _time[nStart:nStart +
                                          nMcaInYDataset] += timeData
                                if case == -1:
                                    _logger.warning(
                                        "I do not know how to handle this time data"
                                    )
                                    _logger.warning(
                                        "Ignoring time information")
                                    _time = None
                            if (len(yDataset.shape) == 3) and\
                               (dim1 == yDataset.shape[1]):
                                mca = 0
                                deltaI = int(yDataset.shape[1] / dim1)
                                for ii in range(yDataset.shape[0]):
                                    i = int(n / dim1)
                                    yData = yDataset[ii:(ii + 1)]
                                    yData.shape = -1, mcaDim
                                    if mSelection is not None:
                                        if case == 0:
                                            mData = numpy.outer(
                                                mDataset[mca:(mca + dim1)],
                                                numpy.ones((mcaDim)))
                                            self.data[i, :, :] += yData / mData
                                        elif case == 1:
                                            mData = mDataset[mca:(mca +
                                                                  dim1), :]
                                            mData.shape = -1, mcaDim
                                            self.data[i, :, :] += yData / mData
                                    else:
                                        self.data[i:(i + deltaI), :] += yData
                                    n += yDataset.shape[1]
                                    mca += dim1
                            else:
                                for mca in range(nMcaInYDataset):
                                    i = int(n / dim1)
                                    j = n % dim1
                                    if len(yDataset.shape) == 3:
                                        ii = int(mca / yDataset.shape[1])
                                        jj = mca % yDataset.shape[1]
                                        yData = yDataset[ii, jj]
                                    elif len(yDataset.shape) == 2:
                                        yData = yDataset[mca, :]
                                    elif len(yDataset.shape) == 1:
                                        yData = yDataset
                                    if mSelection is not None:
                                        if case == 0:
                                            self.data[
                                                i,
                                                j, :] += yData / mDataset[mca]
                                        elif case == 1:
                                            self.data[
                                                i, j, :] += yData / mDataset[
                                                    mca, :]
                                    else:
                                        self.data[i, j, :] += yData
                                    n += 1
Ejemplo n.º 2
0
    def loadFileList(self, filelist, fileindex=0):
        if type(filelist) == type(''): filelist = [filelist]
        self.__keyList = []
        self.sourceName = filelist
        self.__indexedStack = True
        self.sourceType = SOURCE_TYPE
        self.info = {}
        self.nbFiles = len(filelist)

        #read first edf file
        #get information
        tempEdf = EdfFileDataSource.EdfFileDataSource(filelist[0])
        keylist = tempEdf.getSourceInfo()['KeyList']
        nImages = len(keylist)
        dataObject = tempEdf.getDataObject(keylist[0])
        self.info.update(dataObject.info)
        if len(dataObject.data.shape) == 3:
            #this is already a stack
            self.data = dataObject.data
            self.__nFiles = 1
            self.__nImagesPerFile = nImages
            shape = self.data.shape
            for i in range(len(shape)):
                key = 'Dim_%d' % (i + 1, )
                self.info[key] = shape[i]
            self.info["SourceType"] = SOURCE_TYPE
            self.info["SourceName"] = filelist[0]
            self.info["Size"] = 1
            self.info["NumberOfFiles"] = 1
            self.info["FileIndex"] = fileindex
            return
        arrRet = dataObject.data
        if self.__dtype is None:
            self.__dtype = arrRet.dtype

        self.onBegin(self.nbFiles)
        singleImageShape = arrRet.shape
        actualImageStack = False
        if (fileindex == 2) or (self.__imageStack):
            self.__imageStack = True
            if len(singleImageShape) == 1:
                #single line
                #be ready for specfile stack?
                self.onEnd()
                raise IOError("Not implemented yet")
                self.data = numpy.zeros(
                    (arrRet.shape[0], nImages, self.nbFiles), self.__dtype)
                self.incrProgressBar = 0
                for tempEdfFileName in filelist:
                    tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                    for i in range(nImages):
                        pieceOfStack = tempEdf.GetData(i)
                        self.data[:, i, self.incrProgressBar] = pieceOfStack[:]
                    self.incrProgressBar += 1
                    self.onProgress(self.incrProgressBar)
                self.onEnd()
            else:
                if nImages > 1:
                    #this is not the common case
                    #should I try to convert it to a standard one
                    #using a 3D matrix or keep as 4D matrix?
                    if self.nbFiles > 1:
                        raise IOError(\
                            "Multiple files with multiple images implemented yet")
                    self.data = numpy.zeros((arrRet.shape[0], arrRet.shape[1],
                                             nImages * self.nbFiles),
                                            self.__dtype)
                    self.incrProgressBar = 0
                    for tempEdfFileName in filelist:
                        tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                        for i in range(nImages):
                            pieceOfStack = tempEdf.GetData(i)
                            self.data[:,:,
                                      nImages*self.incrProgressBar+i] = \
                                                      pieceOfStack[:,:]
                        self.incrProgressBar += 1
                else:
                    #this is the common case
                    try:
                        # calculate needed megabytes
                        if self.__dtype == numpy.float:
                            bytefactor = 8
                        else:
                            bytefactor = 4
                        needed_ = self.nbFiles * \
                                   arrRet.shape[0] *\
                                   arrRet.shape[1] * bytefactor
                        physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone(
                        )
                        if physicalMemory is not None:
                            # spare 5% or memory
                            if physicalMemory < (1.05 * needed_):
                                raise MemoryError(
                                    "Not enough physical memory available")
                        if self.__imageStack:
                            self.data = numpy.zeros(
                                (self.nbFiles, arrRet.shape[0],
                                 arrRet.shape[1]), self.__dtype)
                            self.incrProgressBar = 0
                            for tempEdfFileName in filelist:
                                tempEdf = EdfFile.EdfFile(
                                    tempEdfFileName, 'rb')
                                pieceOfStack = tempEdf.GetData(0)
                                self.data[self.incrProgressBar] = pieceOfStack
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                            actualImageStack = True
                        else:
                            self.data = numpy.zeros(
                                (arrRet.shape[0], arrRet.shape[1],
                                 self.nbFiles), self.__dtype)
                            self.incrProgressBar = 0
                            for tempEdfFileName in filelist:
                                tempEdf = EdfFile.EdfFile(
                                    tempEdfFileName, 'rb')
                                pieceOfStack = tempEdf.GetData(0)
                                self.data[:, :,
                                          self.incrProgressBar] = pieceOfStack
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                    except (MemoryError, ValueError):
                        hdf5done = False
                        if HDF5 and (('PyMcaQt' in sys.modules) or\
                           ('PyMca.PyMcaQt' in sys.modules)):
                            from PyMca5 import PyMcaQt as qt
                            from PyMca5 import ArraySave
                            msg = qt.QMessageBox.information(
                                None, "Memory error\n",
                                "Do you want to convert your data to HDF5?\n",
                                qt.QMessageBox.Yes, qt.QMessageBox.No)
                            if msg != qt.QMessageBox.No:
                                hdf5file = qt.QFileDialog.getSaveFileName(
                                    None, "Please select output file name",
                                    os.path.dirname(filelist[0]),
                                    "HDF5 files *.h5")
                                if not len(hdf5file):
                                    raise IOError("Invalid output file")
                                hdf5file = qt.safe_str(hdf5file)
                                if not hdf5file.endswith(".h5"):
                                    hdf5file += ".h5"
                                hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer(
                                    hdf5file, (self.nbFiles, arrRet.shape[0],
                                               arrRet.shape[1]))
                                self.incrProgressBar = 0
                                for tempEdfFileName in filelist:
                                    tempEdf = EdfFile.EdfFile(
                                        tempEdfFileName, 'rb')
                                    pieceOfStack = tempEdf.GetData(0)
                                    self.data[
                                        self.
                                        incrProgressBar, :, :] = pieceOfStack[:, :]
                                    hdf.flush()
                                    self.incrProgressBar += 1
                                    self.onProgress(self.incrProgressBar)
                                hdf5done = True
                        if not hdf5done:
                            for i in range(3):
                                print("\7")
                            samplingStep = None
                            i = 2
                            while samplingStep is None:
                                print(
                                    "**************************************************"
                                )
                                print(
                                    " Memory error!, attempting %dx%d sampling reduction "
                                ) % (i, i)
                                print(
                                    "**************************************************"
                                )
                                s1, s2 = arrRet[::i, ::i].shape
                                try:
                                    self.data = numpy.zeros(
                                        (s1, s2, self.nbFiles), self.__dtype)
                                    samplingStep = i
                                except:
                                    i += 1
                            self.incrProgressBar = 0
                            for tempEdfFileName in filelist:
                                tempEdf = EdfFile.EdfFile(
                                    tempEdfFileName, 'rb')
                                pieceOfStack = tempEdf.GetData(0)
                                self.data[:, :, self.
                                          incrProgressBar] = pieceOfStack[::
                                                                          samplingStep, ::
                                                                          samplingStep]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                self.onEnd()
        else:
            self.__imageStack = False
            if len(singleImageShape) == 1:
                #single line
                #be ready for specfile stack?
                raise IOError("Not implemented yet")
                self.data = numpy.zeros(
                    (self.nbFiles, arrRet.shape[0], nImages), self.__dtype)
                self.incrProgressBar = 0
                for tempEdfFileName in filelist:
                    tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                    for i in range(nImages):
                        pieceOfStack = tempEdf.GetData(i)
                        self.data[self.incrProgressBar, :, i] = pieceOfStack[:]
                    self.incrProgressBar += 1
                    self.onProgress(self.incrProgressBar)
                self.onEnd()
            else:
                if nImages > 1:
                    #this is not the common case
                    #should I try to convert it to a standard one
                    #using a 3D matrix or kepp as 4D matrix?
                    if self.nbFiles > 1:
                        if (arrRet.shape[0] > 1) and\
                           (arrRet.shape[1] > 1):
                            raise IOError(\
                                "Multiple files with multiple images not implemented yet")
                        elif arrRet.shape[0] == 1:
                            self.data = numpy.zeros(
                                (self.nbFiles, arrRet.shape[0] * nImages,
                                 arrRet.shape[1]), self.__dtype)
                            self.incrProgressBar = 0
                            for tempEdfFileName in filelist:
                                tempEdf = EdfFile.EdfFile(
                                    tempEdfFileName, 'rb')
                                for i in range(nImages):
                                    pieceOfStack = tempEdf.GetData(i)
                                    self.data[self.incrProgressBar, i,:] = \
                                                              pieceOfStack[:,:]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                        elif arrRet.shape[1] == 1:
                            self.data = numpy.zeros(
                                (self.nbFiles, arrRet.shape[1] * nImages,
                                 arrRet.shape[0]), self.__dtype)
                            self.incrProgressBar = 0
                            for tempEdfFileName in filelist:
                                tempEdf = EdfFile.EdfFile(
                                    tempEdfFileName, 'rb')
                                for i in range(nImages):
                                    pieceOfStack = tempEdf.GetData(i)
                                    self.data[self.incrProgressBar, i,:] = \
                                                            pieceOfStack[:,:]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                    else:
                        self.data = numpy.zeros(
                            (nImages * self.nbFiles, arrRet.shape[0],
                             arrRet.shape[1]), self.__dtype)
                        self.incrProgressBar = 0
                        for tempEdfFileName in filelist:
                            tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                            for i in range(nImages):
                                pieceOfStack = tempEdf.GetData(i)
                                self.data[nImages * self.incrProgressBar +
                                          i, :, :] = pieceOfStack[:, :]
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    self.onEnd()
                else:
                    if fileindex == 1:
                        try:
                            self.data = numpy.zeros(
                                (arrRet.shape[0], self.nbFiles,
                                 arrRet.shape[1]), self.__dtype)
                        except:
                            try:
                                self.data = numpy.zeros(
                                    (arrRet.shape[0], self.nbFiles,
                                     arrRet.shape[1]), numpy.float32)
                            except:
                                self.data = numpy.zeros(
                                    (arrRet.shape[0], self.nbFiles,
                                     arrRet.shape[1]), numpy.int16)
                    else:
                        try:
                            # calculate needed megabytes
                            if self.__dtype == numpy.float:
                                bytefactor = 8
                            else:
                                bytefactor = 4
                            needed_ = self.nbFiles * \
                                       arrRet.shape[0] *\
                                       arrRet.shape[1] * 4
                            physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone(
                            )
                            if physicalMemory is not None:
                                # spare 5% of memory
                                if physicalMemory < (1.05 * needed_):
                                    raise MemoryError(
                                        "Not enough physical memory available")
                            self.data = numpy.zeros(
                                (self.nbFiles, arrRet.shape[0],
                                 arrRet.shape[1]), self.__dtype)
                        except:
                            try:
                                needed_ = self.nbFiles * \
                                           arrRet.shape[0] *\
                                           arrRet.shape[1] * 4
                                physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone(
                                )
                                if physicalMemory is not None:
                                    # spare 5 % of memory
                                    if physicalMemory < (1.05 * needed_):
                                        raise MemoryError(
                                            "Not enough physical memory available"
                                        )
                                self.data = numpy.zeros(
                                    (self.nbFiles, arrRet.shape[0],
                                     arrRet.shape[1]), numpy.float32)
                            except (MemoryError, ValueError):
                                text = "Memory Error: Attempt subsampling or convert to HDF5"
                                if HDF5 and (('PyMcaQt' in sys.modules) or\
                                   ('PyMca.PyMcaQt' in sys.modules)):
                                    from PyMca5 import PyMcaQt as qt
                                    from PyMca5 import ArraySave
                                    msg = qt.QMessageBox.information(
                                        None, "Memory error\n",
                                        "Do you want to convert your data to HDF5?\n",
                                        qt.QMessageBox.Yes, qt.QMessageBox.No)
                                    if msg == qt.QMessageBox.No:
                                        raise MemoryError(text)
                                    hdf5file = qt.QFileDialog.getSaveFileName(
                                        None, "Please select output file name",
                                        os.path.dirname(filelist[0]),
                                        "HDF5 files *.h5")
                                    if not len(hdf5file):
                                        raise IOError(\
                                            "Invalid output file")
                                    hdf5file = qt.safe_str(hdf5file)
                                    if not hdf5file.endswith(".h5"):
                                        hdf5file += ".h5"
                                    hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer(
                                        hdf5file,
                                        (self.nbFiles, arrRet.shape[0],
                                         arrRet.shape[1]))
                                else:
                                    raise MemoryError("Memory Error")
                    self.incrProgressBar = 0
                    if fileindex == 1:
                        for tempEdfFileName in filelist:
                            tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                            pieceOfStack = tempEdf.GetData(0)
                            self.data[:, self.
                                      incrProgressBar, :] = pieceOfStack[:, :]
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    else:
                        # test for ID24 map
                        ID24 = False
                        if "_sample_" in filelist[0]:
                            bckFile = filelist[0].replace(
                                "_sample_", "_samplebk_")
                            if os.path.exists(bckFile):
                                bckData = EdfFile.EdfFile(bckFile).GetData(0)
                            else:
                                bckData = 0
                            i0StartFile = filelist[0].replace(
                                "_sample_", "_I0start_")
                            if os.path.exists(i0StartFile):
                                ID24 = True
                                id24idx = 0
                                i0Start = EdfFile.EdfFile(
                                    i0StartFile,
                                    'rb').GetData(0).astype(numpy.float)
                                i0Start -= bckData
                                i0EndFile = filelist[0].replace(
                                    "_sample_", "_I0end_")
                                i0Slope = 0.0
                                if os.path.exists(i0EndFile):
                                    i0End = EdfFile.EdfFile(
                                        i0EndFile, 'rb').GetData(0) - bckData
                                    i0Slope = (i0End - i0Start) / len(filelist)
                            positionersFile = filelist[0].replace(
                                "_sample_", "_positioners_")
                            if os.path.exists(positionersFile):
                                positionersEdf = EdfFile.EdfFile(
                                    positionersFile, 'rb')
                                self.info["positioners"] = {}
                                for i in range(positionersEdf.GetNumImages()):
                                    motorName = positionersEdf.GetHeader(
                                        i).get("Title", "Motor_%02d" % i)
                                    motorValue = positionersEdf.GetData(i)
                                    self.info["positioners"][
                                        motorName] = motorValue
                        for tempEdfFileName in filelist:
                            tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb')
                            if ID24:
                                pieceOfStack = -numpy.log(
                                    (tempEdf.GetData(0) - bckData) /
                                    (i0Start[0, :] + id24idx * i0Slope))
                                pieceOfStack[numpy.isfinite(pieceOfStack) ==
                                             False] = 1
                                id24idx += 1
                            else:
                                pieceOfStack = tempEdf.GetData(0)
                            try:
                                self.data[
                                    self.
                                    incrProgressBar, :, :] = pieceOfStack[:, :]
                            except:
                                if pieceOfStack.shape[1] != arrRet.shape[1]:
                                    print(" ERROR on file %s" %
                                          tempEdfFileName)
                                    print(
                                        " DIM 1 error Assuming missing data were at the end!!!"
                                    )
                                if pieceOfStack.shape[0] != arrRet.shape[0]:
                                    print(" ERROR on file %s" %
                                          tempEdfFileName)
                                    print(
                                        " DIM 0 error Assuming missing data were at the end!!!"
                                    )
                                self.data[self.incrProgressBar,\
                                         :pieceOfStack.shape[0],\
                                         :pieceOfStack.shape[1]] = pieceOfStack[:,:]
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    self.onEnd()
        self.__nFiles = self.incrProgressBar
        self.__nImagesPerFile = nImages
        shape = self.data.shape
        for i in range(len(shape)):
            key = 'Dim_%d' % (i + 1, )
            self.info[key] = shape[i]
        if not isinstance(self.data, numpy.ndarray):
            hdf.flush()
            self.info["SourceType"] = "HDF5Stack1D"
            if self.__imageStack:
                self.info["McaIndex"] = 0
                self.info["FileIndex"] = 1
            else:
                self.info["McaIndex"] = 2
                self.info["FileIndex"] = 0
            self.info["SourceName"] = [hdf5file]
            self.info["NumberOfFiles"] = 1
            self.info["Size"] = 1
        elif actualImageStack:
            self.info["SourceType"] = SOURCE_TYPE
            self.info["McaIndex"] = 0
            self.info["FileIndex"] = 1
            self.info["SourceName"] = self.sourceName
            self.info["NumberOfFiles"] = self.__nFiles * 1
            self.info["Size"] = self.__nFiles * self.__nImagesPerFile
        else:
            self.info["SourceType"] = SOURCE_TYPE
            self.info["FileIndex"] = fileindex
            self.info["SourceName"] = self.sourceName
            self.info["NumberOfFiles"] = self.__nFiles * 1
            self.info["Size"] = self.__nFiles * self.__nImagesPerFile

        # try to use positioners to compute the scales (ID24 specific)
        xPositionerName = None
        yPositionerName = None
        if "positioners" in self.info and len(self.info["positioners"]) == 2:
            for k, v in self.info["positioners"].items():
                if isinstance(v, numpy.ndarray) and v.ndim == 2:
                    deltaDim1 = v[:, 1:] - v[:, :-1]
                    deltaDim0 = v[1:, :] - v[:-1, :]
                    if numpy.any(deltaDim1) and not numpy.any(deltaDim0):
                        # positioner varying only along dim1
                        xPositionerName = k
                        # should we check that all delta values are equal?
                        deltaX = numpy.mean(deltaDim1)
                        originX = v[0, 0]
                    elif numpy.any(deltaDim0) and not numpy.any(deltaDim1):
                        # positioner varying only along dim0
                        yPositionerName = k
                        deltaY = numpy.mean(deltaDim0)
                        originY = v[0, 0]
            if xPositionerName is not None and yPositionerName is not None:
                self.info["xScale"] = (originX, deltaX)
                self.info["yScale"] = (originY, deltaY)
Ejemplo n.º 3
0
    def loadFileList(self, filelist, selection, scanlist=None):
        """
        loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None)
        filelist is the list of file names belonging to the stack
        selection is a dictionary with the keys x, y, m.
        x        is the path to the x data (the channels) in the spectrum,
                 without the first level "directory". It is unused (for now).
        y        is the path to the 1D data (the counts) in the spectrum,
                 without the first level "directory"
        m        is the path to the normalizing data (I0 or whatever)
                 without the first level "directory".
        scanlist is the list of first level "directories" containing the 1D data
                 Example: The actual path has the form:
                 /whatever1/whatever2/counts
                 That means scanlist = ["/whatever1"]
                 and               selection['y'] = "/whatever2/counts"
        """
        if DEBUG:
            print("filelist = ", filelist)
            print("selection = ", selection)
            print("scanlist = ", scanlist)
        # all the files in the same source
        hdfStack = NexusDataSource.NexusDataSource(filelist)

        #if there is more than one file, it is assumed all the files have
        #the same structure.
        tmpHdf = hdfStack._sourceObjectList[0]
        entryNames = []
        for key in tmpHdf["/"].keys():
            if isinstance(tmpHdf["/"+key], h5py.Group):
                entryNames.append(key)

        # built the selection in terms of HDF terms
        # for the time being, only the first item in x selection used
        
        xSelection = selection['x']
        if xSelection is not None:
            if type(xSelection) != type([]):
                xSelection = [xSelection]
        if type(xSelection) == type([]):
            if len(xSelection):
                xSelection = xSelection[0]
            else:
                xSelection = None
        else:
            xSelection = None
        # only one y is taken
        ySelection = selection['y']
        if type(ySelection) == type([]):
            ySelection = ySelection[0]

        # monitor selection
        mSelection = selection['m']
        if mSelection not in [None, []]:
            if type(mSelection) != type([]):
                mSelection = [mSelection]            
        if type(mSelection) == type([]):
            if len(mSelection):
                mSelection = mSelection[0]
            else:
                mSelection = None
        else:
            mSelection = None

        USE_JUST_KEYS = False
        # deal with the pathological case where the scanlist corresponds
        # to a selected top level dataset
        if len(entryNames) == 0:
            if scanlist is not None:
                if len(scanlist) == 1:
                    if scanlist[0] == ySelection:
                        scanlist = None
        elif len(entryNames) == 1:
            # deal with the SOLEIL case of one entry but with different name
            # in different files
            USE_JUST_KEYS = True
        elif scanlist in [None, []]:
            USE_JUST_KEYS = True
        if USE_JUST_KEYS:
            #if the scanlist is None, it is assumed we are interested on all
            #the scans containing the selection, not that all the scans
            #contain the selection.
            scanlist = []
            if 0:
                JUST_KEYS = False
                #expect same entry names in the files
                #Unfortunately this does not work for SOLEIL
                for entry in entryNames:
                    path = "/"+entry + ySelection
                    dirname = posixpath.dirname(path)
                    base = posixpath.basename(path)
                    try:
                        if base in tmpHdf[dirname].keys():                        
                            scanlist.append(entry)
                    except:
                        pass
            else:
                JUST_KEYS = True
                #expect same structure in the files even if the
                #names are different (SOLEIL ...)
                if len(entryNames):
                    i = 0
                    for entry in entryNames:
                        path = "/"+entry + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        if hasattr(tmpHdf[dirname], "keys"):
                            i += 1
                            if base in tmpHdf[dirname].keys():
                                scanlist.append("1.%d" % i)
                    if not len(scanlist):
                        path = "/" + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            if base in tmpHdf[dirname].keys():
                                JUST_KEYS = False
                                scanlist.append("")
                        except:
                            #it will crash later on
                            pass                        
                else:
                    JUST_KEYS = False
                    scanlist.append("")
        else:
            try:
                number, order = [int(x) for x in scanlist[0].split(".")]
                JUST_KEYS = True
            except:
                JUST_KEYS = False
            if not JUST_KEYS:
                for scan in scanlist:
                    if scan.startswith("/"):
                        t = scan[1:]
                    else:
                        t = scan
                    if t not in entryNames:
                        raise ValueError("Entry %s not in file" % scan)
        
        nFiles = len(filelist)
        nScans = len(scanlist)
        if JUST_KEYS:
            if not nScans:
                raise IOError("No entry contains the required data")

        if DEBUG:
            print("Retained number of files = %d" % nFiles)
            print("Retained number of scans = %d" % nScans)

        #Now is to decide the number of mca ...
        #I assume all the scans contain the same number of mca
        if JUST_KEYS:
            path = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + ySelection
            if mSelection is not None:
                mpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + mSelection
            if xSelection is not None:
                xpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + xSelection
        else:
            path = scanlist[0] +  ySelection
            if mSelection is not None:
                mpath = scanlist[0] + mSelection
            if xSelection is not None:
                xpath = scanlist[0] + xSelection
        
        yDataset = tmpHdf[path]

        if self.__dtype is None:
            self.__dtype = yDataset.dtype
            if self.__dtype in [numpy.int16, numpy.uint16]:
                self.__dtype = numpy.float32
            elif self.__dtype in [numpy.int32, numpy.uint32]:
                self.__dtype = numpy.float64

        #figure out the shape of the stack
        shape = yDataset.shape
        mcaIndex = selection.get('index', len(shape)-1)
        if mcaIndex == -1:
            mcaIndex = len(shape) - 1
        if DEBUG:
            print("mcaIndex = %d" % mcaIndex)
        considerAsImages = False
        dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape,
                                                index=mcaIndex)
        try:
            if self.__dtype in [numpy.float32, numpy.int32]:
                bytefactor = 4
            elif self.__dtype in [numpy.int16, numpy.uint16]:
                bytefactor = 2
            elif self.__dtype in [numpy.int8, numpy.uint8]:
                bytefactor = 1
            else:
                bytefactor = 8

            neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor/(1024*1024.))
            physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
            if physicalMemory is None:
                # 5 Gigabytes should be a good compromise
                physicalMemory = 6000
            else:
                physicalMemory /= (1024*1024.)
            if (neededMegaBytes > (0.95*physicalMemory))\
               and (nFiles == 1) and (len(shape) == 3):
                if self.__dtype0 is None:
                    if (bytefactor == 8) and (neededMegaBytes < (2*physicalMemory)):
                        #try reading as float32
                        self.__dtype = numpy.float32
                    else:
                        raise MemoryError("Force dynamic loading")
                else:
                    raise MemoryError("Force dynamic loading")
            if (mcaIndex == 0) and ( nFiles == 1) and (nScans == 1):
                #keep the original arrangement but in memory
                self.data = numpy.zeros(yDataset.shape, self.__dtype)
                considerAsImages = True
            else:
                # force arrangement as spectra
                self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype)
            DONE = False
        except (MemoryError, ValueError):
            #some versions report ValueError instead of MemoryError
            if (nFiles == 1) and (len(shape) == 3):
                print("Attempting dynamic loading")
                self.data = yDataset
                if mSelection is not None:
                    mDataset = tmpHdf[mpath].value
                    self.monitor = [mDataset]
                if xSelection is not None:
                    xDataset = tmpHdf[xpath].value
                    self.x = [xDataset]
                if h5py.version.version < '2.0':
                    #prevent automatic closing keeping a reference
                    #to the open file
                    self._fileReference = hdfStack
                DONE = True
            else:
                #what to do if the number of dimensions is only 2?
                raise
        
        if (not DONE) and (not considerAsImages):
            self.info["McaIndex"] = 2
            n = 0

            if dim0 == 1:
                self.onBegin(dim1)
            else:
                self.onBegin(dim0)
            self.incrProgressBar=0
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                goodEntryNames = []
                for entry in entryNames:
                    tmpPath = "/" + entry
                    if hasattr(hdf[tmpPath], "keys"):
                        goodEntryNames.append(entry)
                for scan in scanlist:
                    if JUST_KEYS:
                        entryName = goodEntryNames[int(scan.split(".")[-1])-1]
                        path = entryName + ySelection
                        if mSelection is not None:
                            mpath = entryName + mSelection
                            mDataset = hdf[mpath].value
                        if xSelection is not None:
                            xpath = entryName + xSelection
                            xDataset = hdf[xpath].value
                    else:
                        path = scan + ySelection
                        if mSelection is not None:
                            mpath = scan + mSelection
                            mDataset = hdf[mpath].value
                        if xSelection is not None:
                            xpath = scan + xSelection
                            xDataset = hdf[xpath].value
                    try:
                        yDataset = hdf[path]
                        tmpShape = yDataset.shape
                        totalBytes = numpy.ones((1,), yDataset.dtype).itemsize
                        for nItems in tmpShape:
                            totalBytes *= nItems
                        if (totalBytes/(1024.*1024.)) > 500:
                            #read from disk
                            IN_MEMORY = False
                        else:
                            #read the data into memory
                            yDataset = hdf[path].value 
                            IN_MEMORY = True
                    except (MemoryError, ValueError):
                        yDataset = hdf[path]
                        IN_MEMORY = False
                    nMcaInYDataset = 1
                    for dim in yDataset.shape:
                        nMcaInYDataset *= dim
                    nMcaInYDataset = int(nMcaInYDataset/mcaDim)
                    if mcaIndex != 0:
                        if IN_MEMORY:
                            yDataset.shape = -1, mcaDim
                        if mSelection is not None:
                            case = -1
                            nMonitorData = 1
                            for  v in mDataset.shape:
                                nMonitorData *= v
                            if nMonitorData == nMcaInYDataset:
                                mDataset.shape = nMcaInYDataset
                                case = 0
                            elif nMonitorData == (nMcaInYDataset * mcaDim):
                                case = 1
                                mDataset.shape = nMcaInYDataset, mcaDim
                            if case == -1:
                                raise ValueError(\
                                    "I do not know how to handle this monitor data")
                        if (len(yDataset.shape) == 3) and\
                           (dim1 == yDataset.shape[1]):
                            mca = 0
                            deltaI = int(yDataset.shape[1]/dim1)
                            for ii in range(yDataset.shape[0]):
                                i = int(n/dim1)
                                yData = yDataset[ii:(ii+1)]
                                yData.shape = -1, mcaDim
                                if mSelection is not None:
                                    if case == 0:
                                        mData = numpy.outer(mDataset[mca:(mca+dim1)],
                                                            numpy.ones((mcaDim)))
                                        self.data[i, :, :] = yData/mData
                                    elif case == 1:
                                        mData = mDataset[mca:(mca+dim1), :]
                                        mData.shape = -1, mcaDim
                                        self.data[i, :, :]  = yData/mData
                                else:
                                    self.data[i:(i+deltaI), :] = yData
                                n += yDataset.shape[1]
                                mca += dim1
                        else:
                            for mca in range(nMcaInYDataset):
                                i = int(n/dim1)
                                j = n % dim1
                                if len(yDataset.shape) == 3:
                                    ii = int(mca/yDataset.shape[1])
                                    jj = mca % yDataset.shape[1]
                                    yData = yDataset[ii, jj]
                                elif len(yDataset.shape) == 2:
                                    yData = yDataset[mca,:]
                                elif len(yDataset.shape) == 1:
                                    yData = yDataset
                                if mSelection is not None:
                                    if case == 0:
                                        self.data[i, j, :] = yData/mDataset[mca]
                                    elif case == 1:
                                        self.data[i, j, :]  = yData/mDataset[mca, :]
                                else:
                                    self.data[i, j, :] = yData
                                n += 1
                    else:
                        if mSelection is not None:
                            case = -1
                            nMonitorData = 1
                            for  v in mDataset.shape:
                                nMonitorData *= v
                            if nMonitorData == yDataset.shape[0]:
                                case = 3
                                mDataset.shape = yDataset.shape[0]
                            elif nMonitorData == nMcaInYDataset:
                                mDataset.shape = nMcaInYDataset
                                case = 0
                            #elif nMonitorData == (yDataset.shape[1] * yDataset.shape[2]):
                            #    case = 1
                            #    mDataset.shape = yDataset.shape[1], yDataset.shape[2]
                            if case == -1:
                                raise ValueError(\
                                    "I do not know how to handle this monitor data")
                        if IN_MEMORY:
                            yDataset.shape = mcaDim, -1
                        if len(yDataset.shape) != 3:
                            for mca in range(nMcaInYDataset):
                                i = int(n/dim1)
                                j = n % dim1
                                if len(yDataset.shape) == 3:
                                    ii = int(mca/yDataset.shape[2])
                                    jj = mca % yDataset.shape[2]
                                    yData = yDataset[:, ii, jj]
                                elif len(yDataset.shape) == 2:
                                    yData = yDataset[:, mca]
                                elif len(yDataset.shape) == 1:
                                    yData = yDataset[:]                            
                                if mSelection is not None:
                                    if case == 0:
                                        self.data[i, j, :] = yData/mDataset[mca]
                                    elif case == 1:
                                        self.data[i, j, :]  = yData/mDataset[:, mca]
                                    elif case == 3:
                                        self.data[i, j, :]  = yData/mDataset
                                else:
                                    self.data[i, j, :] = yData
                                n += 1
                        else:
                            #stack of images to be read as MCA
                            for nImage in range(yDataset.shape[0]):
                                tmp = yDataset[nImage:(nImage+1)]
                                if len(tmp.shape) == 3:
                                    i = int(n/dim1)
                                    j = n % dim1
                                    if 0:
                                        #this loop is extremely SLOW!!!(and useless)
                                        for ii in range(tmp.shape[1]):
                                            for jj in range(tmp.shape[2]):
                                                self.data[i+ii, j+jj, nImage] = tmp[0, ii, jj]
                                    else:
                                        self.data[i:i+tmp.shape[1],
                                                  j:j+tmp.shape[2], nImage] = tmp[0]
                            if mSelection is not None:
                                for mca in range(yDataset.shape[0]):
                                    i = int(n/dim1)
                                    j = n % dim1
                                    yData = self.data[i, j, :]
                                    if case == 0:
                                        self.data[i, j, :] = yData/mDataset[mca]
                                    elif case == 1:
                                        self.data[i, j, :]  = yData/mDataset[:, mca]
                                    n += 1
                            else:
                                n += tmp.shape[1] * tmp.shape[2]
                    if dim0 == 1:
                        self.onProgress(j)
                if dim0 != 1:
                    self.onProgress(i)
            self.onEnd()
        elif not DONE:
            # data into memory but as images
            self.info["McaIndex"] = mcaIndex            
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                for scan in scanlist:
                    if JUST_KEYS:
                        entryName = entryNames[int(scan.split(".")[-1])-1]
                        path = entryName + ySelection
                        if mSelection is not None:
                            mpath = entryName + mSelection
                            mDataset.shape
                        if xSelection is not None:
                            xpath = entryName + xSelection
                            xDataset = hdf[xpath].value
                    else:
                        path = scan + ySelection
                        if mSelection is not None:
                            mpath = scan + mSelection
                            mDataset = hdf[mpath].value
                        if xSelection is not None:
                            xpath = scan + xSelection
                            xDataset = hdf[xpath].value
                    if mSelection is not None:
                        nMonitorData = mDataset.size
                        case = -1
                        yDatasetShape = yDataset.shape 
                        if nMonitorData == yDatasetShape[0]:
                            #as many monitor data as images
                            mDataset.shape = yDatasetShape[0]
                            case = 0
                        elif nMonitorData == (yDatasetShape[1] * yDatasetShape[2]):
                            #as many monitorData as pixels
                            case = 1
                            mDataset.shape = yDatasetShape[1], yDatasetShape[2]
                        if case == -1:
                            raise ValueError(\
                                "I do not know how to handle this monitor data")
                        if case == 0:
                            for i in range(yDatasetShape[0]):
                                self.data[i] = yDataset[i].value / mDataset[i]
                        elif case == 1:
                            for i in range(yDataset.shape[0]):
                                self.data[i] = yDataset[i] / mDataset
                    else:
                            for i in range(yDataset.shape[0]):
                                self.data[i:i+1] = yDataset[i:i+1]
        else:
            self.info["McaIndex"] = mcaIndex


        self.info["SourceType"] = SOURCE_TYPE
        self.info["SourceName"] = filelist
        self.info["Size"]       = 1
        self.info["NumberOfFiles"] = 1
        if mcaIndex == 0:
            self.info["FileIndex"] = 1
        else:
            self.info["FileIndex"] = 0
        self.info['McaCalib'] = [ 0.0, 1.0, 0.0]
        self.info['Channel0'] = 0
        shape = self.data.shape
        for i in range(len(shape)):
            key = 'Dim_%d' % (i+1,)
            self.info[key] = shape[i]
        if xSelection is not None:
            if xDataset.size == shape[self.info['McaIndex']]:
                self.x = [xDataset.reshape(-1)]
            else:
                print("Ignoring xSelection")
Ejemplo n.º 4
0
    def loadFileList(self, filelist, selection, scanlist=None):
        """
        loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None)
        filelist is the list of file names belonging to the stack
        selection is a dictionary with the keys x, y, m.
        x        is the path to the x data (the channels) in the spectrum,
                 without the first level "directory". It is unused (for now).
        y        is the path to the 1D data (the counts) in the spectrum,
                 without the first level "directory"
        m        is the path to the normalizing data (I0 or whatever)
                 without the first level "directory".
        scanlist is the list of first level "directories" containing the 1D data
                 Example: The actual path has the form:
                 /whatever1/whatever2/counts
                 That means scanlist = ["/whatever1"]
                 and               selection['y'] = "/whatever2/counts"
        """
        _logger.debug("filelist = %s", filelist)
        _logger.debug("selection = %s", selection)
        _logger.debug("scanlist = %s", scanlist)
        # all the files in the same source
        hdfStack = NexusDataSource.NexusDataSource(filelist)

        # if there is more than one file, it is assumed all the files have
        # the same structure.
        tmpHdf = hdfStack._sourceObjectList[0]
        entryNames = []
        for key in tmpHdf["/"].keys():
            if isinstance(tmpHdf["/"+key], h5py.Group):
                entryNames.append(key)

        # built the selection in terms of HDF terms
        # for the time being, only the first item in x selection used
        xSelection = selection.get('x', None)
        if xSelection is not None:
            if type(xSelection) != type([]):
                xSelection = [xSelection]
        if type(xSelection) == type([]):
            if len(xSelection):
                xSelection = xSelection[0]
            else:
                xSelection = None
        else:
            xSelection = None
        # only one y is taken
        ySelection = selection['y']
        if type(ySelection) == type([]):
            ySelectionList = list(ySelection)
            ySelection = ySelection[0]
        else:
            ySelectionList = [ySelection]

        # monitor selection
        mSelection = selection.get('m', None)
        if mSelection not in [None, []]:
            if type(mSelection) != type([]):
                mSelection = [mSelection]
        if type(mSelection) == type([]):
            if len(mSelection):
                mSelection = mSelection[0]
            else:
                mSelection = None
        else:
            mSelection = None

        USE_JUST_KEYS = False
        # deal with the pathological case where the scanlist corresponds
        # to a selected top level dataset
        if len(entryNames) == 0:
            if scanlist is not None:
                if (ySelection in scanlist) or \
                   (xSelection in scanlist) or \
                   (mSelection in scanlist):
                    scanlist = None
                    USE_JUST_KEYS = True
            else:
                USE_JUST_KEYS = True
        elif len(entryNames) == 1:
            # deal with the SOLEIL case of one entry but with different name
            # in different files
            USE_JUST_KEYS = True
        elif scanlist in [None, []]:
            USE_JUST_KEYS = True
        if USE_JUST_KEYS:
            # if the scanlist is None, it is assumed we are interested on all
            # the scans containing the selection, not that all the scans
            # contain the selection.
            scanlist = []
            if 0:
                JUST_KEYS = False
                #expect same entry names in the files
                #Unfortunately this does not work for SOLEIL
                for entry in entryNames:
                    path = "/" + entry + ySelection
                    dirname = posixpath.dirname(path)
                    base = posixpath.basename(path)
                    try:
                        file_entry = tmpHdf[dirname]
                        if base in file_entry.keys():
                            scanlist.append(entry)
                    except:
                        pass
            else:
                JUST_KEYS = True
                #expect same structure in the files even if the
                #names are different (SOLEIL ...)
                if len(entryNames):
                    i = 0
                    for entry in entryNames:
                        i += 1
                        path = "/" + entry + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            file_entry = tmpHdf[dirname]
                            if hasattr(file_entry, "keys"):
                                if base in file_entry.keys():
                                    # this is the case of a selection inside a group
                                    scanlist.append("1.%d" % i)
                        except KeyError:
                            _logger.warning("%s not in file, ignoring.", dirname)
                    if not len(scanlist):
                        if not ySelection.startswith("/"):
                            path = "/" + ySelection
                        else:
                            path = ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            if dirname in tmpHdf["/"]:
                                # this is the case of a dataset at top plevel
                                # or having given the complete path
                                if base in tmpHdf[dirname]:
                                    JUST_KEYS = False
                                    scanlist.append("")
                            elif base in file_entry.keys():
                                JUST_KEYS = False
                                scanlist.append("")
                        except:
                            #it will crash later on
                            pass
                else:
                    JUST_KEYS = False
                    scanlist.append("")
        else:
            try:
                number, order = [int(x) for x in scanlist[0].split(".")]
                JUST_KEYS = True
            except:
                JUST_KEYS = False
            if not JUST_KEYS:
                for scan in scanlist:
                    if scan.startswith("/"):
                        t = scan[1:]
                    else:
                        t = scan
                    if t not in entryNames:
                        raise ValueError("Entry %s not in file" % scan)

        nFiles = len(filelist)
        nScans = len(scanlist)
        if JUST_KEYS:
            if not nScans:
                raise IOError("No entry contains the required data")

        _logger.debug("Retained number of files = %d", nFiles)
        _logger.debug("Retained number of scans = %d", nScans)

        # Now is to decide the number of mca ...
        # I assume all the scans contain the same number of mca
        if JUST_KEYS:
            path = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + ySelection
            if mSelection is not None:
                mpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + mSelection
            if xSelection is not None:
                xpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + xSelection
        else:
            path = scanlist[0] +  ySelection
            if mSelection is not None:
                mpath = scanlist[0] + mSelection
            if xSelection is not None:
                xpath = scanlist[0] + xSelection

        yDataset = tmpHdf[path]
        if (self.__dtype is None) or (mSelection is not None):
            self.__dtype = yDataset.dtype
            if self.__dtype in [numpy.int16, numpy.uint16]:
                self.__dtype = numpy.float32
            elif self.__dtype in [numpy.int32, numpy.uint32]:
                if mSelection:
                    self.__dtype = numpy.float32
                else:
                    self.__dtype = numpy.float64
            elif self.__dtype not in [numpy.float16, numpy.float32,
                                      numpy.float64]:
                # Some datasets form CLS (origin APS?) arrive as data format
                # equal to ">u2" and are not triggered as integer types
                _logger.debug("Not basic dataset type %s", self.__dtype)
                if ("%s" % self.__dtype).endswith("2"):
                    self.__dtype = numpy.float32
                else:
                    if mSelection:
                        self.__dtype = numpy.float32
                    else:
                        self.__dtype = numpy.float64

        # figure out the shape of the stack
        shape = yDataset.shape
        mcaIndex = selection.get('index', len(shape)-1)
        if mcaIndex == -1:
            mcaIndex = len(shape) - 1
        _logger.debug("mcaIndex = %d", mcaIndex)
        considerAsImages = False
        dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape,
                                                index=mcaIndex)
        try:
            if self.__dtype in [numpy.float32, numpy.int32]:
                bytefactor = 4
            elif self.__dtype in [numpy.int16, numpy.uint16]:
                bytefactor = 2
            elif self.__dtype in [numpy.int8, numpy.uint8]:
                bytefactor = 1
            else:
                bytefactor = 8

            neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor/(1024*1024.))
            _logger.info("Using %d bytes per item" % bytefactor)
            _logger.info("Needed %d Megabytes" % neededMegaBytes)
            physicalMemory = None
            if hasattr(PhysicalMemory, "getAvailablePhysicalMemoryOrNone"):
                physicalMemory = PhysicalMemory.getAvailablePhysicalMemoryOrNone()
            if not physicalMemory:
                physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
            else:
                _logger.info("Available physical memory %.1f GBytes" % \
                             (physicalMemory/(1024*1024*1024.)))
            if physicalMemory is None:
                # 6 Gigabytes of available memory
                # should be a good compromise in 2018
                physicalMemory = 6000
                _logger.info("Assumed physical memory %.1f MBytes" % physicalMemory)
            else:
                physicalMemory /= (1024*1024.)
            _logger.info("Using physical memory %.1f GBytes" % (physicalMemory/1024))
            if (neededMegaBytes > (0.95*physicalMemory))\
               and (nFiles == 1) and (len(shape) == 3):
                if self.__dtype0 is None:
                    if (bytefactor == 8) and (neededMegaBytes < (2*physicalMemory)):
                        # try reading as float32
                        print("Forcing the use of float32 data")
                        self.__dtype = numpy.float32
                    else:
                        raise MemoryError("Force dynamic loading")
                else:
                    raise MemoryError("Force dynamic loading")
            if (mcaIndex == 0) and ( nFiles == 1) and (nScans == 1):
                #keep the original arrangement but in memory
                self.data = numpy.zeros(yDataset.shape, self.__dtype)
                considerAsImages = True
            else:
                # force arrangement as spectra
                self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype)
            DONE = False
        except (MemoryError, ValueError):
            # some versions report ValueError instead of MemoryError
            if (nFiles == 1) and (len(shape) == 3):
                _logger.warning("Attempting dynamic loading")
                if mSelection is not None:
                    _logger.warning("Ignoring monitor")
                self.data = yDataset
                if mSelection is not None:
                    mdtype = tmpHdf[mpath].dtype
                    if mdtype not in [numpy.float64, numpy.float32]:
                        mdtype = numpy.float64
                    mDataset = numpy.asarray(tmpHdf[mpath], dtype=mdtype)
                    self.monitor = [mDataset]
                if xSelection is not None:
                    xDataset = tmpHdf[xpath][()]
                    self.x = [xDataset]
                if h5py.version.version < '2.0':
                    #prevent automatic closing keeping a reference
                    #to the open file
                    self._fileReference = hdfStack
                DONE = True
            else:
                # what to do if the number of dimensions is only 2?
                raise

        # get the mca information associated to the path
        mcaObjectPaths = NexusTools.getMcaObjectPaths(tmpHdf, path)
        _time = None
        _calibration = None
        _channels = None
        if considerAsImages:
            self._pathHasRelevantInfo = False
        else:
            if len(list(mcaObjectPaths.keys())) > 1: # not just "counts"
                self._pathHasRelevantInfo = True
                if "live_time" in mcaObjectPaths:
                    if DONE:
                        # hopefully it will fit into memory
                        if mcaObjectPaths["live_time"] in tmpHdf:
                            _time = tmpHdf[mcaObjectPaths["live_time"]][()]
                        elif "::" in mcaObjectPaths["live_time"]:
                            tmpFileName, tmpDatasetPath = \
                                        mcaObjectPaths["live_time"].split("::")
                            with h5py.File(tmpFileName, "r") as tmpH5:
                                _time = tmpH5[tmpDatasetPath][()]
                        else:
                            del mcaObjectPaths["live_time"]
                    else:
                        # we have to have as many live times as MCA spectra
                        _time = numpy.zeros( \
                                    (self.data.shape[0] * self.data.shape[1]),
                                    dtype=numpy.float64)
                elif "elapsed_time" in mcaObjectPaths:
                    if DONE:
                        # hopefully it will fit into memory
                        if mcaObjectPaths["elapsed_time"] in tmpHdf:
                            _time = \
                                tmpHdf[mcaObjectPaths["elapsed_time"]][()]
                        elif "::" in mcaObjectPaths["elapsed_time"]:
                            tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["elapsed_time"].split("::")
                            with h5py.File(tmpFileName, "r") as tmpH5:
                                _time = tmpH5[tmpDatasetPath][()]
                        else:
                            del mcaObjectPaths["elapsed_time"]
                    else:
                        # we have to have as many elpased times as MCA spectra
                        _time = numpy.zeros((self.data.shape[0] * self.data.shape[1]),
                                                numpy.float32)
                if "calibration" in mcaObjectPaths:
                    if mcaObjectPaths["calibration"] in tmpHdf:
                        _calibration = \
                                tmpHdf[mcaObjectPaths["calibration"]][()]
                    elif "::" in mcaObjectPaths["calibration"]:
                        tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["calibration"].split("::")
                        with h5py.File(tmpFileName, "r") as tmpH5:
                            _calibration = tmpH5[tmpDatasetPath][()]
                    else:
                        del mcaObjectPaths["calibration"]
                if "channels" in mcaObjectPaths:
                    if mcaObjectPaths["channels"] in tmpHdf:
                        _channels = \
                                tmpHdf[mcaObjectPaths["channels"]][()]
                    elif "::" in mcaObjectPaths["channels"]:
                        tmpFileName, tmpDatasetPath = \
                                    mcaObjectPaths["channels"].split("::")
                        with h5py.File(tmpFileName, "r") as tmpH5:
                            _channels = tmpH5[tmpDatasetPath][()]
                    else:
                        del mcaObjectPaths["channels"]
            else:
                self._pathHasRelevantInfo = False

        if (not DONE) and (not considerAsImages):
            _logger.info("Data in memory as spectra")
            self.info["McaIndex"] = 2
            n = 0

            if dim0 == 1:
                self.onBegin(dim1)
            else:
                self.onBegin(dim0)
            self.incrProgressBar=0
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                goodEntryNames = []
                for entry in entryNames:
                    tmpPath = "/" + entry
                    if hasattr(hdf[tmpPath], "keys"):
                        goodEntryNames.append(entry)
                for scan in scanlist:
                    IN_MEMORY = None
                    nStart = n
                    for ySelection in ySelectionList:
                        n = nStart
                        if JUST_KEYS:
                            entryName = goodEntryNames[int(scan.split(".")[-1])-1]
                            path = entryName + ySelection
                            if mSelection is not None:
                                mpath = entryName + mSelection
                                mdtype = hdf[mpath].dtype
                                if mdtype not in [numpy.float64, numpy.float32]:
                                    mdtype = numpy.float64
                                mDataset = numpy.asarray(hdf[mpath], dtype=mdtype)
                            if xSelection is not None:
                                xpath = entryName + xSelection
                                xDataset = hdf[xpath][()]
                        else:
                            path = scan + ySelection
                            if mSelection is not None:
                                mpath = scan + mSelection
                                mdtype = hdf[mpath].dtype
                                if mdtype not in [numpy.float64, numpy.float32]:
                                    mdtype = numpy.float64
                                mDataset = numpy.asarray(hdf[mpath], dtype=mdtype)
                            if xSelection is not None:
                                xpath = scan + xSelection
                                xDataset = hdf[xpath][()]
                        try:
                            yDataset = hdf[path]
                            tmpShape = yDataset.shape
                            totalBytes = numpy.ones((1,), yDataset.dtype).itemsize
                            for nItems in tmpShape:
                                totalBytes *= nItems
                            # should one be conservative or just try?
                            if (totalBytes/(1024.*1024.)) > (0.4 * physicalMemory):
                                _logger.info("Force dynamic loading of spectra")
                                #read from disk
                                IN_MEMORY = False
                            else:
                                #read the data into memory
                                _logger.info("Attempt to load whole map into memory")
                                yDataset = hdf[path][()]
                                IN_MEMORY = True
                        except (MemoryError, ValueError):
                            _logger.info("Dynamic loading of spectra")
                            yDataset = hdf[path]
                            IN_MEMORY = False
                        nMcaInYDataset = 1
                        for dim in yDataset.shape:
                            nMcaInYDataset *= dim
                        nMcaInYDataset = int(nMcaInYDataset/mcaDim)
                        timeData = None
                        if _time is not None:
                            if "live_time" in mcaObjectPaths:
                                # it is assumed that all have the same structure!!!
                                timePath = NexusTools.getMcaObjectPaths(hdf, path)["live_time"]
                            elif "elapsed_time" in mcaObjectPaths:
                                timePath = NexusTools.getMcaObjectPaths(hdf,
                                                                        path)["elapsed_time"]
                            if timePath in hdf:
                                timeData = hdf[timePath][()]
                            elif "::" in timePath:
                                externalFile, externalPath = timePath.split("::")
                                with h5py.File(externalFile, "r") as timeHdf:
                                    timeData = timeHdf[externalPath][()]
                        if mcaIndex != 0:
                            if IN_MEMORY:
                                yDataset.shape = -1, mcaDim
                            if mSelection is not None:
                                case = -1
                                nMonitorData = 1
                                for v in mDataset.shape:
                                    nMonitorData *= v
                                if nMonitorData == nMcaInYDataset:
                                    mDataset.shape = nMcaInYDataset
                                    case = 0
                                elif nMonitorData == (nMcaInYDataset * mcaDim):
                                    case = 1
                                    mDataset.shape = nMcaInYDataset, mcaDim
                                if case == -1:
                                    raise ValueError(\
                                        "I do not know how to handle this monitor data")
                            if timeData is not None:
                                case = -1
                                nTimeData = 1
                                for v in timeData.shape:
                                    nTimeData *= v
                                if nTimeData == nMcaInYDataset:
                                    timeData.shape = nMcaInYDataset
                                    case = 0
                                    _time[nStart: nStart + nMcaInYDataset] += timeData
                                if case == -1:
                                    _logger.warning("I do not know how to handle this time data")
                                    _logger.warning("Ignoring time information")
                                    _time= None
                            if (len(yDataset.shape) == 3) and\
                               (dim1 == yDataset.shape[1]):
                                mca = 0
                                deltaI = int(yDataset.shape[1]/dim1)
                                for ii in range(yDataset.shape[0]):
                                    i = int(n/dim1)
                                    yData = yDataset[ii:(ii+1)]
                                    yData.shape = -1, mcaDim
                                    if mSelection is not None:
                                        if case == 0:
                                            mData = numpy.outer(mDataset[mca:(mca+dim1)],
                                                                numpy.ones((mcaDim)))
                                            self.data[i, :, :] += yData / mData
                                        elif case == 1:
                                            mData = mDataset[mca:(mca+dim1), :]
                                            mData.shape = -1, mcaDim
                                            self.data[i, :, :]  += yData / mData
                                    else:
                                        self.data[i:(i+deltaI), :] += yData
                                    n += yDataset.shape[1]
                                    mca += dim1
                            else:
                                for mca in range(nMcaInYDataset):
                                    i = int(n/dim1)
                                    j = n % dim1
                                    if len(yDataset.shape) == 3:
                                        ii = int(mca/yDataset.shape[1])
                                        jj = mca % yDataset.shape[1]
                                        yData = yDataset[ii, jj]
                                    elif len(yDataset.shape) == 2:
                                        yData = yDataset[mca,:]
                                    elif len(yDataset.shape) == 1:
                                        yData = yDataset
                                    if mSelection is not None:
                                        if case == 0:
                                            self.data[i, j, :] += yData / mDataset[mca]
                                        elif case == 1:
                                            self.data[i, j, :] += yData / mDataset[mca, :]
                                    else:
                                        self.data[i, j, :] += yData
                                    n += 1
                        else:
                            if mSelection is not None:
                                case = -1
                                nMonitorData = 1
                                for v in mDataset.shape:
                                    nMonitorData *= v
                                if nMonitorData == yDataset.shape[0]:
                                    case = 3
                                    mDataset.shape = yDataset.shape[0]
                                elif nMonitorData == nMcaInYDataset:
                                    mDataset.shape = nMcaInYDataset
                                    case = 0
                                #elif nMonitorData == (yDataset.shape[1] * yDataset.shape[2]):
                                #    case = 1
                                #    mDataset.shape = yDataset.shape[1], yDataset.shape[2]
                                if case == -1:
                                    raise ValueError(\
                                        "I do not know how to handle this monitor data")
                            if IN_MEMORY:
                                yDataset.shape = mcaDim, -1
                            if len(yDataset.shape) != 3:
                                for mca in range(nMcaInYDataset):
                                    i = int(n/dim1)
                                    j = n % dim1
                                    if len(yDataset.shape) == 3:
                                        ii = int(mca/yDataset.shape[2])
                                        jj = mca % yDataset.shape[2]
                                        yData = yDataset[:, ii, jj]
                                    elif len(yDataset.shape) == 2:
                                        yData = yDataset[:, mca]
                                    elif len(yDataset.shape) == 1:
                                        yData = yDataset[:]
                                    if mSelection is not None:
                                        if case == 0:
                                            self.data[i, j, :] += yData / mDataset[mca]
                                        elif case == 1:
                                            self.data[i, j, :] += yData / mDataset[:, mca]
                                        elif case == 3:
                                            self.data[i, j, :] += yData / mDataset
                                    else:
                                        self.data[i, j, :] += yData
                                    n += 1
                            else:
                                #stack of images to be read as MCA
                                for nImage in range(yDataset.shape[0]):
                                    tmp = yDataset[nImage:(nImage+1)]
                                    if len(tmp.shape) == 3:
                                        i = int(n/dim1)
                                        j = n % dim1
                                        if 0:
                                            #this loop is extremely SLOW!!!(and useless)
                                            for ii in range(tmp.shape[1]):
                                                for jj in range(tmp.shape[2]):
                                                    self.data[i+ii, j+jj, nImage] += tmp[0, ii, jj]
                                        else:
                                            self.data[i:i+tmp.shape[1],
                                                      j:j+tmp.shape[2], nImage] += tmp[0]
                                if mSelection is not None:
                                    for mca in range(yDataset.shape[0]):
                                        i = int(n/dim1)
                                        j = n % dim1
                                        yData = self.data[i, j, :]
                                        if case == 0:
                                            self.data[i, j, :] += yData / mDataset[mca]
                                        elif case == 1:
                                            self.data[i, j, :]  += yData / mDataset[:, mca]
                                        n += 1
                                else:
                                    n += tmp.shape[1] * tmp.shape[2]
                        yDataset = None
                        if dim0 == 1:
                            self.onProgress(j)
                if dim0 != 1:
                    self.onProgress(i)
            self.onEnd()
        elif not DONE:
            # data into memory but as images
            self.info["McaIndex"] = mcaIndex
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                for scan in scanlist:
                    for ySelection in ySelectionList:
                        if JUST_KEYS:
                            entryName = entryNames[int(scan.split(".")[-1])-1]
                            path = entryName + ySelection
                            if mSelection is not None:
                                mpath = entryName + mSelection
                                mDataset.shape
                            if xSelection is not None:
                                xpath = entryName + xSelection
                                xDataset = hdf[xpath][()]
                        else:
                            path = scan + ySelection
                            if mSelection is not None:
                                mpath = scan + mSelection
                                mdtype = hdf[mpath].dtype
                                if mdtype not in [numpy.float64, numpy.float32]:
                                    mdtype = numpy.float64
                                mDataset = numpy.asarray(hdf[mpath], dtype=mdtype)
                            if xSelection is not None:
                                xpath = scan + xSelection
                                xDataset = hdf[xpath][()]
                        if mSelection is not None:
                            nMonitorData = mDataset.size
                            case = -1
                            yDatasetShape = yDataset.shape
                            if nMonitorData == yDatasetShape[0]:
                                #as many monitor data as images
                                mDataset.shape = yDatasetShape[0]
                                case = 0
                            elif nMonitorData == (yDatasetShape[1] * yDatasetShape[2]):
                                #as many monitorData as pixels
                                case = 1
                                mDataset.shape = yDatasetShape[1], yDatasetShape[2]
                            if case == -1:
                                raise ValueError(\
                                    "I do not know how to handle this monitor data")
                            if case == 0:
                                for i in range(yDatasetShape[0]):
                                    self.data[i] += yDataset[i][()] / mDataset[i]
                            elif case == 1:
                                for i in range(yDataset.shape[0]):
                                    self.data[i] += yDataset[i] / mDataset
                        else:
                            for i in range(yDataset.shape[0]):
                                self.data[i:i+1] += yDataset[i:i+1]
        else:
            self.info["McaIndex"] = mcaIndex
            if _time:
                nRequiredValues = 1
                for i in range(len(self.data.shape)):
                    if i != mcaIndex:
                        nRequiredValues *= self.data.shape[i]
                if _time.size != nRequiredValues:
                    _logger.warning("I do not know how to interpret the time information")
                    _logger.warning("Ignoring time information")
                    _time = None
                else:
                    _time.shape = -1

        self.info["SourceType"] = SOURCE_TYPE
        self.info["SourceName"] = filelist
        self.info["Size"]       = 1
        self.info["NumberOfFiles"] = 1
        if mcaIndex == 0:
            self.info["FileIndex"] = 1
        else:
            self.info["FileIndex"] = 0
        if _calibration is not None:
            self.info['McaCalib'] = _calibration
        else:
            self.info['McaCalib'] = [ 0.0, 1.0, 0.0]
        shape = self.data.shape
        for i in range(len(shape)):
            key = 'Dim_%d' % (i+1,)
            self.info[key] = shape[i]
        self.info['Channel0'] = 0
        if xSelection is not None:
            if xDataset.size == shape[self.info['McaIndex']]:
                self.x = [xDataset.reshape(-1)]
            else:
                _logger.warning("Ignoring xSelection")
        elif _channels is not None:
            _channels.shape = -1
            self.x = [_channels]
        if _time is not None:
            self.info["McaLiveTime"] = _time
Ejemplo n.º 5
0
    def loadFileList(self, filelist, selection, scanlist=None):
        """
        loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None)
        filelist is the list of file names belonging to the stack
        selection is a dictionary with the keys x, y, m.
        x        is the path to the x data (the channels) in the spectrum,
                 without the first level "directory". It is unused (for now).
        y        is the path to the 1D data (the counts) in the spectrum,
                 without the first level "directory"
        m        is the path to the normalizing data (I0 or whatever)
                 without the first level "directory".
        scanlist is the list of first level "directories" containing the 1D data
                 Example: The actual path has the form:
                 /whatever1/whatever2/counts
                 That means scanlist = ["/whatever1"]
                 and               selection['y'] = "/whatever2/counts"
        """
        if DEBUG:
            print("filelist = ", filelist)
            print("selection = ", selection)
            print("scanlist = ", scanlist)
        # all the files in the same source
        hdfStack = NexusDataSource.NexusDataSource(filelist)

        #if there is more than one file, it is assumed all the files have
        #the same structure.
        tmpHdf = hdfStack._sourceObjectList[0]
        entryNames = []
        for key in tmpHdf["/"].keys():
            if isinstance(tmpHdf["/" + key], h5py.Group):
                entryNames.append(key)

        # built the selection in terms of HDF terms
        # for the time being, only the first item in x selection used

        xSelection = selection['x']
        if xSelection is not None:
            if type(xSelection) != type([]):
                xSelection = [xSelection]
        if type(xSelection) == type([]):
            if len(xSelection):
                xSelection = xSelection[0]
            else:
                xSelection = None
        else:
            xSelection = None
        # only one y is taken
        ySelection = selection['y']
        if type(ySelection) == type([]):
            ySelection = ySelection[0]

        # monitor selection
        mSelection = selection['m']
        if mSelection not in [None, []]:
            if type(mSelection) != type([]):
                mSelection = [mSelection]
        if type(mSelection) == type([]):
            if len(mSelection):
                mSelection = mSelection[0]
            else:
                mSelection = None
        else:
            mSelection = None

        USE_JUST_KEYS = False
        # deal with the pathological case where the scanlist corresponds
        # to a selected top level dataset
        if len(entryNames) == 0:
            if scanlist is not None:
                if len(scanlist) == 1:
                    if scanlist[0] == ySelection:
                        scanlist = None
                        USE_JUST_KEYS = True
        elif len(entryNames) == 1:
            # deal with the SOLEIL case of one entry but with different name
            # in different files
            USE_JUST_KEYS = True
        elif scanlist in [None, []]:
            USE_JUST_KEYS = True
        if USE_JUST_KEYS:
            #if the scanlist is None, it is assumed we are interested on all
            #the scans containing the selection, not that all the scans
            #contain the selection.
            scanlist = []
            if 0:
                JUST_KEYS = False
                #expect same entry names in the files
                #Unfortunately this does not work for SOLEIL
                for entry in entryNames:
                    path = "/" + entry + ySelection
                    dirname = posixpath.dirname(path)
                    base = posixpath.basename(path)
                    try:
                        if base in tmpHdf[dirname].keys():
                            scanlist.append(entry)
                    except:
                        pass
            else:
                JUST_KEYS = True
                #expect same structure in the files even if the
                #names are different (SOLEIL ...)
                if len(entryNames):
                    i = 0
                    for entry in entryNames:
                        path = "/" + entry + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        if hasattr(tmpHdf[dirname], "keys"):
                            i += 1
                            if base in tmpHdf[dirname].keys():
                                scanlist.append("1.%d" % i)
                    if not len(scanlist):
                        path = "/" + ySelection
                        dirname = posixpath.dirname(path)
                        base = posixpath.basename(path)
                        try:
                            if base in tmpHdf[dirname].keys():
                                JUST_KEYS = False
                                scanlist.append("")
                        except:
                            #it will crash later on
                            pass
                else:
                    JUST_KEYS = False
                    scanlist.append("")
        else:
            try:
                number, order = [int(x) for x in scanlist[0].split(".")]
                JUST_KEYS = True
            except:
                JUST_KEYS = False
            if not JUST_KEYS:
                for scan in scanlist:
                    if scan.startswith("/"):
                        t = scan[1:]
                    else:
                        t = scan
                    if t not in entryNames:
                        raise ValueError("Entry %s not in file" % scan)

        nFiles = len(filelist)
        nScans = len(scanlist)
        if JUST_KEYS:
            if not nScans:
                raise IOError("No entry contains the required data")

        if DEBUG:
            print("Retained number of files = %d" % nFiles)
            print("Retained number of scans = %d" % nScans)

        #Now is to decide the number of mca ...
        #I assume all the scans contain the same number of mca
        if JUST_KEYS:
            path = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                    1] + ySelection
            if mSelection is not None:
                mpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                         1] + mSelection
            if xSelection is not None:
                xpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) -
                                         1] + xSelection
        else:
            path = scanlist[0] + ySelection
            if mSelection is not None:
                mpath = scanlist[0] + mSelection
            if xSelection is not None:
                xpath = scanlist[0] + xSelection

        yDataset = tmpHdf[path]

        if self.__dtype is None:
            self.__dtype = yDataset.dtype
            if self.__dtype in [numpy.int16, numpy.uint16]:
                self.__dtype = numpy.float32
            elif self.__dtype in [numpy.int32, numpy.uint32]:
                self.__dtype = numpy.float64

        #figure out the shape of the stack
        shape = yDataset.shape
        mcaIndex = selection.get('index', len(shape) - 1)
        if mcaIndex == -1:
            mcaIndex = len(shape) - 1
        if DEBUG:
            print("mcaIndex = %d" % mcaIndex)
        considerAsImages = False
        dim0, dim1, mcaDim = self.getDimensions(nFiles,
                                                nScans,
                                                shape,
                                                index=mcaIndex)
        try:
            if self.__dtype in [numpy.float32, numpy.int32]:
                bytefactor = 4
            elif self.__dtype in [numpy.int16, numpy.uint16]:
                bytefactor = 2
            elif self.__dtype in [numpy.int8, numpy.uint8]:
                bytefactor = 1
            else:
                bytefactor = 8

            neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor /
                                                      (1024 * 1024.))
            physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
            if physicalMemory is None:
                # 5 Gigabytes should be a good compromise
                physicalMemory = 6000
            else:
                physicalMemory /= (1024 * 1024.)
            if (neededMegaBytes > (0.95*physicalMemory))\
               and (nFiles == 1) and (len(shape) == 3):
                if self.__dtype0 is None:
                    if (bytefactor == 8) and (neededMegaBytes <
                                              (2 * physicalMemory)):
                        #try reading as float32
                        self.__dtype = numpy.float32
                    else:
                        raise MemoryError("Force dynamic loading")
                else:
                    raise MemoryError("Force dynamic loading")
            if (mcaIndex == 0) and (nFiles == 1) and (nScans == 1):
                #keep the original arrangement but in memory
                self.data = numpy.zeros(yDataset.shape, self.__dtype)
                considerAsImages = True
            else:
                # force arrangement as spectra
                self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype)
            DONE = False
        except (MemoryError, ValueError):
            #some versions report ValueError instead of MemoryError
            if (nFiles == 1) and (len(shape) == 3):
                print("Attempting dynamic loading")
                self.data = yDataset
                if mSelection is not None:
                    mDataset = tmpHdf[mpath].value
                    self.monitor = [mDataset]
                if xSelection is not None:
                    xDataset = tmpHdf[xpath].value
                    self.x = [xDataset]
                if h5py.version.version < '2.0':
                    #prevent automatic closing keeping a reference
                    #to the open file
                    self._fileReference = hdfStack
                DONE = True
            else:
                #what to do if the number of dimensions is only 2?
                raise

        if (not DONE) and (not considerAsImages):
            self.info["McaIndex"] = 2
            n = 0

            if dim0 == 1:
                self.onBegin(dim1)
            else:
                self.onBegin(dim0)
            self.incrProgressBar = 0
            for hdf in hdfStack._sourceObjectList:
                entryNames = list(hdf["/"].keys())
                goodEntryNames = []
                for entry in entryNames:
                    tmpPath = "/" + entry
                    if hasattr(hdf[tmpPath], "keys"):
                        goodEntryNames.append(entry)
                for scan in scanlist:
                    if JUST_KEYS:
                        entryName = goodEntryNames[int(scan.split(".")[-1]) -
                                                   1]
                        path = entryName + ySelection
                        if mSelection is not None:
                            mpath = entryName + mSelection
                            mDataset = hdf[mpath].value
                        if xSelection is not None:
                            xpath = entryName + xSelection
                            xDataset = hdf[xpath].value
                    else:
                        path = scan + ySelection
                        if mSelection is not None:
                            mpath = scan + mSelection
                            mDataset = hdf[mpath].value
                        if xSelection is not None:
                            xpath = scan + xSelection
                            xDataset = hdf[xpath].value
                    try:
                        yDataset = hdf[path]
                        tmpShape = yDataset.shape
                        totalBytes = numpy.ones((1, ), yDataset.dtype).itemsize
                        for nItems in tmpShape:
                            totalBytes *= nItems
                        if (totalBytes / (1024. * 1024.)) > 500:
                            #read from disk
                            IN_MEMORY = False
                        else:
                            #read the data into memory
                            yDataset = hdf[path].value
                            IN_MEMORY = True
                    except (MemoryError, ValueError):
                        yDataset = hdf[path]
                        IN_MEMORY = False
                    nMcaInYDataset = 1
                    for dim in yDataset.shape:
                        nMcaInYDataset *= dim
                    nMcaInYDataset = int(nMcaInYDataset / mcaDim)
                    if mcaIndex != 0:
                        if IN_MEMORY:
                            yDataset.shape = -1, mcaDim
                        if mSelection is not None:
                            case = -1
                            nMonitorData = 1
                            for v in mDataset.shape:
                                nMonitorData *= v
                            if nMonitorData == nMcaInYDataset:
                                mDataset.shape = nMcaInYDataset
                                case = 0
                            elif nMonitorData == (nMcaInYDataset * mcaDim):
                                case = 1
                                mDataset.shape = nMcaInYDataset, mcaDim
                            if case == -1:
                                raise ValueError(\
                                    "I do not know how to handle this monitor data")
                        if (len(yDataset.shape) == 3) and\
                           (dim1 == yDataset.shape[1]):
                            mca = 0
                            deltaI = int(yDataset.shape[1] / dim1)
                            for ii in range(yDataset.shape[0]):
                                i = int(n / dim1)
                                yData = yDataset[ii:(ii + 1)]
                                yData.shape = -1, mcaDim
                                if mSelection is not None:
                                    if case == 0:
                                        mData = numpy.outer(
                                            mDataset[mca:(mca + dim1)],
                                            numpy.ones((mcaDim)))
                                        self.data[i, :, :] = yData / mData
                                    elif case == 1:
                                        mData = mDataset[mca:(mca + dim1), :]
                                        mData.shape = -1, mcaDim
                                        self.data[i, :, :] = yData / mData
                                else:
                                    self.data[i:(i + deltaI), :] = yData
                                n += yDataset.shape[1]
                                mca += dim1
                        else:
                            for mca in range(nMcaInYDataset):
                                i = int(n / dim1)
                                j = n % dim1
                                if len(yDataset.shape) == 3:
                                    ii = int(mca / yDataset.shape[1])
                                    jj = mca % yDataset.shape[1]
                                    yData = yDataset[ii, jj]
                                elif len(yDataset.shape) == 2:
                                    yData = yDataset[mca, :]
                                elif len(yDataset.shape) == 1:
                                    yData = yDataset
                                if mSelection is not None:
                                    if case == 0:
                                        self.data[i,
                                                  j, :] = yData / mDataset[mca]
                                    elif case == 1:
                                        self.data[
                                            i, j, :] = yData / mDataset[mca, :]
                                else:
                                    self.data[i, j, :] = yData
                                n += 1
                    else:
                        if mSelection is not None:
                            case = -1
                            nMonitorData = 1
                            for v in mDataset.shape:
                                nMonitorData *= v
                            if nMonitorData == yDataset.shape[0]:
                                case = 3
                                mDataset.shape = yDataset.shape[0]
                            elif nMonitorData == nMcaInYDataset:
                                mDataset.shape = nMcaInYDataset
                                case = 0
Ejemplo n.º 6
0
    def loadFileList(self, filelist, fileindex=0):
        if type(filelist) == type(''):filelist = [filelist]
        self.__keyList = []
        self.sourceName = filelist
        self.__indexedStack = True
        self.sourceType = SOURCE_TYPE
        self.info = {}
        self.nbFiles=len(filelist)

        #read first edf file
        #get information
        tempEdf=EdfFileDataSource.EdfFileDataSource(filelist[0])
        keylist = tempEdf.getSourceInfo()['KeyList']
        nImages = len(keylist)
        dataObject = tempEdf.getDataObject(keylist[0])
        self.info.update(dataObject.info)
        if len(dataObject.data.shape) == 3:
            #this is already a stack
            self.data = dataObject.data
            self.__nFiles         = 1
            self.__nImagesPerFile = nImages
            shape = self.data.shape
            for i in range(len(shape)):
                key = 'Dim_%d' % (i+1,)
                self.info[key] = shape[i]
            self.info["SourceType"] = SOURCE_TYPE
            self.info["SourceName"] = filelist[0]
            self.info["Size"]       = 1
            self.info["NumberOfFiles"] = 1
            self.info["FileIndex"] = fileindex
            return
        arrRet = dataObject.data
        if self.__dtype is None:
            self.__dtype = arrRet.dtype

        self.onBegin(self.nbFiles)
        singleImageShape = arrRet.shape
        actualImageStack = False
        if (fileindex == 2) or (self.__imageStack):
            self.__imageStack = True
            if len(singleImageShape) == 1:
                #single line
                #be ready for specfile stack?
                self.onEnd()
                raise IOError("Not implemented yet")
                self.data = numpy.zeros((arrRet.shape[0],
                                           nImages,
                                           self.nbFiles),
                                           self.__dtype)
                self.incrProgressBar=0
                for tempEdfFileName in filelist:
                    tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                    for i in range(nImages):
                        pieceOfStack=tempEdf.GetData(i)
                        self.data[:,i, self.incrProgressBar] = pieceOfStack[:]
                    self.incrProgressBar += 1
                    self.onProgress(self.incrProgressBar)
                self.onEnd()
            else:
                if nImages > 1:
                    #this is not the common case
                    #should I try to convert it to a standard one
                    #using a 3D matrix or keep as 4D matrix?
                    if self.nbFiles > 1:
                        raise IOError(\
                            "Multiple files with multiple images implemented yet")
                    self.data = numpy.zeros((arrRet.shape[0],
                                               arrRet.shape[1],
                                               nImages * self.nbFiles),
                                               self.__dtype)
                    self.incrProgressBar=0
                    for tempEdfFileName in filelist:
                        tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                        for i in range(nImages):
                            pieceOfStack=tempEdf.GetData(i)
                            self.data[:,:,
                                      nImages*self.incrProgressBar+i] = \
                                                      pieceOfStack[:,:]
                        self.incrProgressBar += 1
                else:
                    #this is the common case
                    try:
                        # calculate needed megabytes
                        if self.__dtype == numpy.float:
                            bytefactor = 8
                        else:
                            bytefactor = 4
                        needed_ = self.nbFiles * \
                                   arrRet.shape[0] *\
                                   arrRet.shape[1] * bytefactor
                        physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
                        if physicalMemory is not None:
                            # spare 5% or memory
                            if physicalMemory < (1.05 * needed_):
                                raise MemoryError("Not enough physical memory available")
                        if self.__imageStack:
                            self.data = numpy.zeros((self.nbFiles,
                                                     arrRet.shape[0],
                                                     arrRet.shape[1]),
                                                     self.__dtype)
                            self.incrProgressBar=0
                            for tempEdfFileName in filelist:
                                tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                pieceOfStack=tempEdf.GetData(0)    
                                self.data[self.incrProgressBar] = pieceOfStack
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                            actualImageStack = True
                        else:
                            self.data = numpy.zeros((arrRet.shape[0],
                                                     arrRet.shape[1],
                                                     self.nbFiles),
                                                     self.__dtype)
                            self.incrProgressBar=0
                            for tempEdfFileName in filelist:
                                tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                pieceOfStack=tempEdf.GetData(0)    
                                self.data[:,:, self.incrProgressBar] = pieceOfStack
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                    except (MemoryError, ValueError):
                        hdf5done = False
                        if HDF5 and (('PyMcaQt' in sys.modules) or\
                           ('PyMca.PyMcaQt' in sys.modules)):
                            from PyMca5 import PyMcaQt as qt
                            from PyMca5 import ArraySave
                            msg=qt.QMessageBox.information( None,
                              "Memory error\n",
                              "Do you want to convert your data to HDF5?\n",
                              qt.QMessageBox.Yes,qt.QMessageBox.No)
                            if msg != qt.QMessageBox.No:
                                hdf5file = qt.QFileDialog.getSaveFileName(None,
                                            "Please select output file name",
                                            os.path.dirname(filelist[0]),
                                            "HDF5 files *.h5")
                                if not len(hdf5file):
                                    raise IOError("Invalid output file")
                                hdf5file = qt.safe_str(hdf5file)
                                if not hdf5file.endswith(".h5"):
                                    hdf5file += ".h5"
                                hdf, self.data =  ArraySave.getHDF5FileInstanceAndBuffer(hdf5file,
                                              (self.nbFiles,
                                               arrRet.shape[0],
                                               arrRet.shape[1]))
                                self.incrProgressBar=0
                                for tempEdfFileName in filelist:
                                    tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                    pieceOfStack=tempEdf.GetData(0)
                                    self.data[self.incrProgressBar,:,:] = pieceOfStack[:,:]
                                    hdf.flush()
                                    self.incrProgressBar += 1
                                    self.onProgress(self.incrProgressBar)
                                hdf5done = True
                        if not hdf5done:
                            for i in range(3):
                                print("\7")
                            samplingStep = None
                            i = 2
                            while samplingStep is None:
                                print("**************************************************")
                                print(" Memory error!, attempting %dx%d sampling reduction ") % (i,i)
                                print("**************************************************")
                                s1, s2 = arrRet[::i, ::i].shape
                                try:
                                    self.data = numpy.zeros((s1, s2,
                                                         self.nbFiles),
                                                         self.__dtype)
                                    samplingStep = i
                                except:
                                    i += 1
                            self.incrProgressBar=0
                            for tempEdfFileName in filelist:
                                tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                pieceOfStack=tempEdf.GetData(0)
                                self.data[:,:, self.incrProgressBar] = pieceOfStack[
                                                            ::samplingStep,::samplingStep]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                self.onEnd()
        else:
            self.__imageStack = False
            if len(singleImageShape) == 1:
                #single line
                #be ready for specfile stack?
                raise IOError("Not implemented yet")
                self.data = numpy.zeros((self.nbFiles,
                                           arrRet.shape[0],
                                           nImages),
                                           self.__dtype)
                self.incrProgressBar=0
                for tempEdfFileName in filelist:
                    tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                    for i in range(nImages):
                        pieceOfStack=tempEdf.GetData(i)
                        self.data[self.incrProgressBar, :,i] = pieceOfStack[:]
                    self.incrProgressBar += 1
                    self.onProgress(self.incrProgressBar)
                self.onEnd()
            else:
                if nImages > 1:
                    #this is not the common case
                    #should I try to convert it to a standard one
                    #using a 3D matrix or kepp as 4D matrix?
                    if self.nbFiles > 1:
                        if (arrRet.shape[0] > 1) and\
                           (arrRet.shape[1] > 1):
                                raise IOError(\
                                    "Multiple files with multiple images not implemented yet")
                        elif arrRet.shape[0] == 1:
                            self.data = numpy.zeros((self.nbFiles,
                                               arrRet.shape[0] * nImages,
                                               arrRet.shape[1]),
                                               self.__dtype)
                            self.incrProgressBar=0
                            for tempEdfFileName in filelist:
                                tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                for i in range(nImages):
                                    pieceOfStack=tempEdf.GetData(i)
                                    self.data[self.incrProgressBar, i,:] = \
                                                              pieceOfStack[:,:]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                        elif arrRet.shape[1] == 1:
                            self.data = numpy.zeros((self.nbFiles,
                                               arrRet.shape[1] * nImages,
                                               arrRet.shape[0]),
                                               self.__dtype)
                            self.incrProgressBar=0
                            for tempEdfFileName in filelist:
                                tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                                for i in range(nImages):
                                    pieceOfStack=tempEdf.GetData(i)
                                    self.data[self.incrProgressBar, i,:] = \
                                                            pieceOfStack[:,:]
                                self.incrProgressBar += 1
                                self.onProgress(self.incrProgressBar)
                    else:
                        self.data = numpy.zeros((nImages * self.nbFiles,
                                               arrRet.shape[0],
                                               arrRet.shape[1]),
                                               self.__dtype)
                        self.incrProgressBar=0
                        for tempEdfFileName in filelist:
                            tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                            for i in range(nImages):
                                pieceOfStack=tempEdf.GetData(i)
                                self.data[nImages*self.incrProgressBar+i,
                                          :,:] = pieceOfStack[:,:]
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    self.onEnd()
                else:
                    if fileindex == 1:
                        try:
                            self.data = numpy.zeros((arrRet.shape[0],
                                                    self.nbFiles,
                                                   arrRet.shape[1]),
                                                   self.__dtype)
                        except:
                            try:
                                self.data = numpy.zeros((arrRet.shape[0],
                                                    self.nbFiles,
                                                   arrRet.shape[1]),
                                                   numpy.float32)
                            except:
                                self.data = numpy.zeros((arrRet.shape[0],
                                                    self.nbFiles,
                                                   arrRet.shape[1]),
                                                   numpy.int16)
                    else:
                        try:
                            # calculate needed megabytes
                            if self.__dtype == numpy.float:
                                bytefactor = 8
                            else:
                                bytefactor = 4
                            needed_ = self.nbFiles * \
                                       arrRet.shape[0] *\
                                       arrRet.shape[1] * 4
                            physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
                            if physicalMemory is not None:
                                # spare 5% of memory
                                if physicalMemory < (1.05 * needed_):
                                    raise MemoryError("Not enough physical memory available")
                            self.data = numpy.zeros((self.nbFiles,
                                                   arrRet.shape[0],
                                                   arrRet.shape[1]),
                                                   self.__dtype)
                        except:
                            try:
                                needed_ = self.nbFiles * \
                                           arrRet.shape[0] *\
                                           arrRet.shape[1] * 4
                                physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone()
                                if physicalMemory is not None:
                                    # spare 5 % of memory
                                    if physicalMemory < (1.05 * needed_):
                                        raise MemoryError("Not enough physical memory available")
                                self.data = numpy.zeros((self.nbFiles,
                                                   arrRet.shape[0],
                                                   arrRet.shape[1]),
                                                   numpy.float32)
                            except (MemoryError, ValueError):
                                text = "Memory Error: Attempt subsampling or convert to HDF5"
                                if HDF5 and (('PyMcaQt' in sys.modules) or\
                                   ('PyMca.PyMcaQt' in sys.modules)):
                                    from PyMca5 import PyMcaQt as qt
                                    from PyMca5 import ArraySave
                                    msg=qt.QMessageBox.information( None,
                                      "Memory error\n",
                                      "Do you want to convert your data to HDF5?\n",
                                      qt.QMessageBox.Yes,qt.QMessageBox.No)
                                    if msg == qt.QMessageBox.No:
                                        raise MemoryError(text)
                                    hdf5file = qt.QFileDialog.getSaveFileName(None,
                                                "Please select output file name",
                                                os.path.dirname(filelist[0]),
                                                "HDF5 files *.h5")
                                    if not len(hdf5file):
                                        raise IOError(\
                                            "Invalid output file")
                                    hdf5file = qt.safe_str(hdf5file)
                                    if not hdf5file.endswith(".h5"):
                                        hdf5file += ".h5"
                                    hdf, self.data =  ArraySave.getHDF5FileInstanceAndBuffer(hdf5file,
                                                  (self.nbFiles,
                                                   arrRet.shape[0],
                                                   arrRet.shape[1]))               
                                else:    
                                    raise MemoryError("Memory Error")
                    self.incrProgressBar=0
                    if fileindex == 1:
                        for tempEdfFileName in filelist:
                            tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                            pieceOfStack=tempEdf.GetData(0)    
                            self.data[:,self.incrProgressBar,:] = pieceOfStack[:,:]
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    else:
                        # test for ID24 map
                        ID24 = False
                        if "_sample_" in filelist[0]:
                            i0StartFile = filelist[0].replace("_sample_", "_I0start_")
                            if os.path.exists(i0StartFile):
                                ID24 = True
                                id24idx = 0
                                i0Start = EdfFile.EdfFile(i0StartFile, 'rb').GetData(0).astype(numpy.float)
                                i0EndFile = filelist[0].replace("_sample_", "_I0end_")
                                i0Slope = 0.0
                                if os.path.exists(i0EndFile):
                                    i0End = EdfFile.EdfFile(i0EndFile, 'rb').GetData(0)
                                    i0Slope = (i0End-i0Start)/len(filelist)
                        for tempEdfFileName in filelist:
                            tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb')
                            if ID24:
                                pieceOfStack=-numpy.log(tempEdf.GetData(0)/(i0Start[0,:] + id24idx * i0Slope))
                                pieceOfStack[numpy.isfinite(pieceOfStack) == False] = 1
                                id24idx += 1
                            else:
                                pieceOfStack=tempEdf.GetData(0)
                            try:
                                self.data[self.incrProgressBar, :,:] = pieceOfStack[:,:]
                            except:
                                if pieceOfStack.shape[1] != arrRet.shape[1]:
                                    print(" ERROR on file %s" % tempEdfFileName)
                                    print(" DIM 1 error Assuming missing data were at the end!!!")
                                if pieceOfStack.shape[0] != arrRet.shape[0]:
                                    print(" ERROR on file %s" % tempEdfFileName)
                                    print(" DIM 0 error Assuming missing data were at the end!!!")
                                self.data[self.incrProgressBar,\
                                         :pieceOfStack.shape[0],\
                                         :pieceOfStack.shape[1]] = pieceOfStack[:,:]                                
                            self.incrProgressBar += 1
                            self.onProgress(self.incrProgressBar)
                    self.onEnd()
        self.__nFiles         = self.incrProgressBar
        self.__nImagesPerFile = nImages
        shape = self.data.shape
        for i in range(len(shape)):
            key = 'Dim_%d' % (i+1,)
            self.info[key] = shape[i]
        if not isinstance(self.data, numpy.ndarray):
            hdf.flush()
            self.info["SourceType"] = "HDF5Stack1D"
            if self.__imageStack:
                self.info["McaIndex"] = 0
                self.info["FileIndex"] = 1
            else:
                self.info["McaIndex"] = 2
                self.info["FileIndex"] = 0
            self.info["SourceName"] = [hdf5file]
            self.info["NumberOfFiles"] = 1
            self.info["Size"]       = 1
        elif actualImageStack:
            self.info["SourceType"] = SOURCE_TYPE
            self.info["McaIndex"] = 0
            self.info["FileIndex"] = 1
            self.info["SourceName"] = self.sourceName
            self.info["NumberOfFiles"] = self.__nFiles * 1
            self.info["Size"] = self.__nFiles * self.__nImagesPerFile
        else:
            self.info["SourceType"] = SOURCE_TYPE
            self.info["FileIndex"] = fileindex
            self.info["SourceName"] = self.sourceName
            self.info["NumberOfFiles"] = self.__nFiles * 1
            self.info["Size"] = self.__nFiles * self.__nImagesPerFile