Example #1
0
    def _readbinary_byte_offset(self, inStream):
        """
        Read in a binary part of an x-CBF_BYTE_OFFSET compressed image 
        
        @param inStream: the binary image (without any CIF decorators)
        @type inStream: python string.
        @return: a linear numpy array without shape and dtype set
        @rtype: numpy array
        """

        starter = "\x0c\x1a\x04\xd5"
        startPos = inStream.find(starter) + 4
        data = inStream[startPos:startPos + int(self.header["X-Binary-Size"])]
        try:
            import byte_offset
        except ImportError:
            logging.warning(
                "Error in byte_offset part: Falling back to Numpy implementation"
            )
            myData = cbfimage.analyseNumpy(data, size=self.dim1 * self.dim2)
        else:
            myData = byte_offset.analyseCython(data,
                                               size=self.dim1 * self.dim2)

        assert len(myData) == self.dim1 * self.dim2
        return myData
Example #2
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._bytecode is None:
                if "DATATYPE" in self.capsHeader:
                    self._bytecode = DATA_TYPES[self.header[self.capsHeader["DATATYPE"]]]
                else:
                    self._bytecode = numpy.uint16
            dims = self.dims[:]
            dims.reverse()
            with self.file.lock:
                if self.file.closed:
                    logger.error("file: %s from %s is closed. Cannot read data." % (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    fileData = self.file.read(self.size)

            if ("COMPRESSION" in self.capsHeader):
                compression = self.header[self.capsHeader["COMPRESSION"]].upper()
                uncompressed_size = self.bpp
                for i in dims:
                    uncompressed_size *= i
                if "OFFSET" in compression :
                    try:
                        import byte_offset#IGNORE:F0401
                    except ImportError, error:
                        logger.error("Unimplemented compression scheme:  %s (%s)" % (compression, error))
                    else:
                        myData = byte_offset.analyseCython(fileData, size=uncompressed_size)
                        rawData = myData.astype(self._bytecode).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression :
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression :
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" % compression)
                    rawData = fileData

            else:
Example #3
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._bytecode is None:
                if "DATATYPE" in self.capsHeader:
                    self._bytecode = DATA_TYPES[self.header[self.capsHeader["DATATYPE"]]]
                else:
                    self._bytecode = numpy.uint16
            dims = self.dims[:]
            dims.reverse()
            with self.file.lock:
                if self.file.closed:
                    logger.error("file: %s from %s is closed. Cannot read data." % (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    fileData = self.file.read(self.size)

            if ("COMPRESSION" in self.capsHeader):
                compression = self.header[self.capsHeader["COMPRESSION"]].upper()
                uncompressed_size = self.bpp
                for i in dims:
                    uncompressed_size *= i
                if "OFFSET" in compression :
                    try:
                        import byte_offset#IGNORE:F0401
                    except ImportError, error:
                        logger.error("Unimplemented compression scheme:  %s (%s)" % (compression, error))
                    else:
                        myData = byte_offset.analyseCython(fileData, size=uncompressed_size)
                        rawData = myData.astype(self._bytecode).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression :
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression :
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" % compression)
                    rawData = fileData

            else:
Example #4
0
    def _readbinary_byte_offset(self, inStream):
        """
        Read in a binary part of an x-CBF_BYTE_OFFSET compressed image

        @param inStream: the binary image (without any CIF decorators)
        @type inStream: python string.
        @return: a linear numpy array without shape and dtype set
        @rtype: numpy array
        """
        startPos = inStream.find(STARTER) + 4
        data = inStream[ startPos: startPos + int(self.header["X-Binary-Size"])]
        try:
            import byte_offset
        except ImportError:
            logger.warning("Error in byte_offset part: Falling back to Numpy implementation")
            myData = decByteOffet_numpy(data, size=self.dim1 * self.dim2)
        else:
            myData = byte_offset.analyseCython(data, size=self.dim1 * self.dim2)

        assert len(myData) == self.dim1 * self.dim2
        return myData
Example #5
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        if self.data is not None:
            return self.data
        if self.rawData is None:
            return self.data

        if self.bytecode is None:
            if "DATATYPE" in self.capsHeader:
                self.bytecode = DATA_TYPES[self.header[
                    self.capsHeader["DATATYPE"]]]
            else:
                self.bytecode = np.uint16
        dims = self.dims[:]
        dims.reverse()

        if ("COMPRESSION" in self.capsHeader):
            compression = self.header[self.capsHeader["COMPRESSION"]].upper()
            uncompressed_size = self.bpp
            for i in dims:
                uncompressed_size *= i
            if "OFFSET" in compression:
                try:
                    import byte_offset
                except ImportError:
                    logging.error("Unimplemented compression scheme:  %s" %
                                  compression)
                else:
                    myData = byte_offset.analyseCython(self.rawData,
                                                       size=uncompressed_size)
                    rawData = myData.astype(self.bytecode).tostring()
                    self.size = uncompressed_size
            elif compression == "NONE":
                rawData = self.rawData
            elif "GZIP" in compression:
                fileobj = StringIO.StringIO(self.rawData)
                try:
                    rawData = gzip.GzipFile(fileobj=fileobj).read()
                except IOError:
                    logging.warning(
                        "Encounter the python-gzip bug with trailing garbage, trying subprocess gzip"
                    )
                    try:
                        #This is as an ugly hack against a bug in Python gzip
                        import subprocess
                        sub = subprocess.Popen(["gzip", "-d", "-f"],
                                               stdout=subprocess.PIPE,
                                               stdin=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
                        rawData, err = sub.communicate(input=self.rawData)
                        logging.debug(
                            "Gzip subprocess ended with %s err= %s; I got %s bytes back"
                            % (sub.wait(), err, len(rawData)))
                    except:
                        logging.warning(
                            "Unable to use the subprocess gzip. is gzip available? "
                        )
                        for i in range(1, 513):
                            try:
                                fileobj = StringIO.StringIO(self.rawData[:-i])
                                rawData = gzip.GzipFile(fileobj=fileobj).read()
                            except IOError:
                                logging.debug(
                                    "trying with %s bytes less, doesn't work" %
                                    i)
                            else:
                                break
                        else:
                            logging.error(
                                "I am totally unable to read this gzipped compressed data block, giving up"
                            )

                self.size = uncompressed_size
            elif "BZ" in compression:
                rawData = bz2.decompress(self.rawData)
                self.size = uncompressed_size
            elif "Z" in compression:
                rawData = zlib.decompress(self.rawData)
                self.size = uncompressed_size
            else:
                logging.warning("Unknown compression scheme %s" % compression)
                rawData = self.rawData

        else:
            rawData = self.rawData

        expected = self.size
        obtained = len(rawData)
        if expected > obtained:
            logging.error("Data stream is incomplete: %s < expected %s bytes" %
                          (obtained, expected))
            rawData += "\x00" * (expected - obtained)
        elif expected < len(rawData):
            logging.info(
                "Data stream contains trailing junk : %s > expected %s bytes" %
                (obtained, expected))
            rawData = rawData[:expected]
#        logging.debug("dims = %s, bpp = %s, expected= %s obtained = %s" % (dims, self.bpp, expected, obtained))
        if self.swap_needed():
            data = np.fromstring(rawData,
                                 self.bytecode).byteswap().reshape(tuple(dims))
        else:
            data = np.fromstring(rawData, self.bytecode).reshape(tuple(dims))
        self.data = data
        self.rawData = None  #no need to keep garbage in memory
        self.bytecode = data.dtype.type
        return data
Example #6
0
    def _unpack(self):
        """
        Unpack a binary blob according to the specification given in the header

        :return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._dtype is None:
                assert(False)
            shape = self.shape
            with self.file.lock:
                if self.file.closed:
                    logger.error("file: %s from %s is closed. Cannot read data." % (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    try:
                        fileData = self.file.read(self.size)
                    except Exception as e:
                        if isinstance(self.file, fabioutils.GzipFile):
                            if compression_module.is_incomplete_gz_block_exception(e):
                                return numpy.zeros(shape)
                        raise e

            if self._data_compression is not None:
                compression = self._data_compression
                uncompressed_size = self._dtype.itemsize
                for i in shape:
                    uncompressed_size *= i
                if "OFFSET" in compression:
                    try:
                        import byte_offset  # IGNORE:F0401
                    except ImportError as error:
                        logger.error("Unimplemented compression scheme:  %s (%s)" % (compression, error))
                    else:
                        myData = byte_offset.analyseCython(fileData, size=uncompressed_size)
                        rawData = myData.astype(self._dtype).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression:
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression:
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" % compression)
                    rawData = fileData

            else:
                rawData = fileData

            expected = self.size
            obtained = len(rawData)
            if expected > obtained:
                logger.error("Data stream is incomplete: %s < expected %s bytes" % (obtained, expected))
                rawData += "\x00".encode("ascii") * (expected - obtained)
            elif expected < len(rawData):
                logger.info("Data stream contains trailing junk : %s > expected %s bytes" % (obtained, expected))
                rawData = rawData[:expected]
            data = numpy.frombuffer(rawData, self._dtype).copy().reshape(shape)
            if self.swap_needed():
                data.byteswap(True)
            self._data = data
            self._dtype = None
        return data
Example #7
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        :return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._bytecode is None:
                assert (False)
            dims = self.dims[:]
            dims.reverse()
            with self.file.lock:
                if self.file.closed:
                    logger.error(
                        "file: %s from %s is closed. Cannot read data." %
                        (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    try:
                        fileData = self.file.read(self.size)
                    except Exception as e:
                        if isinstance(self.file, fabioutils.GzipFile):
                            if compression_module.is_incomplete_gz_block_exception(
                                    e):
                                return numpy.zeros(dims)
                        raise e

            if self._data_compression is not None:
                compression = self._data_compression
                uncompressed_size = self.bpp
                for i in dims:
                    uncompressed_size *= i
                if "OFFSET" in compression:
                    try:
                        import byte_offset  # IGNORE:F0401
                    except ImportError as error:
                        logger.error(
                            "Unimplemented compression scheme:  %s (%s)" %
                            (compression, error))
                    else:
                        myData = byte_offset.analyseCython(
                            fileData, size=uncompressed_size)
                        rawData = myData.astype(self._bytecode).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression:
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression:
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" %
                                   compression)
                    rawData = fileData

            else:
                rawData = fileData

            expected = self.size
            obtained = len(rawData)
            if expected > obtained:
                logger.error(
                    "Data stream is incomplete: %s < expected %s bytes" %
                    (obtained, expected))
                rawData += "\x00".encode("ascii") * (expected - obtained)
            elif expected < len(rawData):
                logger.info(
                    "Data stream contains trailing junk : %s > expected %s bytes"
                    % (obtained, expected))
                rawData = rawData[:expected]
            data = numpy.frombuffer(rawData,
                                    self._bytecode).copy().reshape(tuple(dims))
            if self.swap_needed():
                data.byteswap(True)
            self._data = data
            self._bytecode = data.dtype.type
        return data
Example #8
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._bytecode is None:
                if "DATATYPE" in self.capsHeader:
                    self._bytecode = DATA_TYPES[self.header[self.capsHeader["DATATYPE"]]]
                else:
                    self._bytecode = numpy.uint16
            dims = self.dims[:]
            dims.reverse()
            with self.file.lock:
                if self.file.closed:
                    logger.error("file: %s from %s is closed. Cannot read data." % (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    fileData = self.file.read(self.size)

            if ("COMPRESSION" in self.capsHeader):
                compression = self.header[self.capsHeader["COMPRESSION"]].upper()
                uncompressed_size = self.bpp
                for i in dims:
                    uncompressed_size *= i
                if "OFFSET" in compression :
                    try:
                        import byte_offset  # IGNORE:F0401
                    except ImportError as error:
                        logger.error("Unimplemented compression scheme:  %s (%s)" % (compression, error))
                    else:
                        myData = byte_offset.analyseCython(fileData, size=uncompressed_size)
                        rawData = myData.astype(self._bytecode).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression :
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression :
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" % compression)
                    rawData = fileData

            else:
                rawData = fileData

            expected = self.size
            obtained = len(rawData)
            if expected > obtained:
                logger.error("Data stream is incomplete: %s < expected %s bytes" % (obtained, expected))
                rawData += "\x00" * (expected - obtained)
            elif expected < len(rawData):
                logger.info("Data stream contains trailing junk : %s > expected %s bytes" % (obtained, expected))
                rawData = rawData[:expected]
            data = numpy.fromstring(rawData, self._bytecode).reshape(tuple(dims))
            if self.swap_needed():
                data.byteswap(True)
            self._data = data
            self._bytecode = data.dtype.type
        return data
Example #9
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        data = None
        if self._data is not None:
            data = self._data
        elif self.file is None:
            data = self._data
        else:
            if self._bytecode is None:
                if "DATATYPE" in self.capsHeader:
                    self._bytecode = DATA_TYPES[self.header[
                        self.capsHeader["DATATYPE"]]]
                else:
                    self._bytecode = numpy.uint16
            dims = self.dims[:]
            dims.reverse()
            with self.file.lock:
                if self.file.closed:
                    logger.error(
                        "file: %s from %s is closed. Cannot read data." %
                        (self.file, self.file.filename))
                    return
                else:
                    self.file.seek(self.start)
                    fileData = self.file.read(self.size)

            if ("COMPRESSION" in self.capsHeader):
                compression = self.header[
                    self.capsHeader["COMPRESSION"]].upper()
                uncompressed_size = self.bpp
                for i in dims:
                    uncompressed_size *= i
                if "OFFSET" in compression:
                    try:
                        import byte_offset  # IGNORE:F0401
                    except ImportError as error:
                        logger.error(
                            "Unimplemented compression scheme:  %s (%s)" %
                            (compression, error))
                    else:
                        myData = byte_offset.analyseCython(
                            fileData, size=uncompressed_size)
                        rawData = myData.astype(self._bytecode).tostring()
                        self.size = uncompressed_size
                elif compression == "NONE":
                    rawData = fileData
                elif "GZIP" in compression:
                    rawData = decGzip(fileData)
                    self.size = uncompressed_size
                elif "BZ" in compression:
                    rawData = decBzip2(fileData)
                    self.size = uncompressed_size
                elif "Z" in compression:
                    rawData = decZlib(fileData)
                    self.size = uncompressed_size
                else:
                    logger.warning("Unknown compression scheme %s" %
                                   compression)
                    rawData = fileData

            else:
                rawData = fileData

            expected = self.size
            obtained = len(rawData)
            if expected > obtained:
                logger.error(
                    "Data stream is incomplete: %s < expected %s bytes" %
                    (obtained, expected))
                rawData += "\x00" * (expected - obtained)
            elif expected < len(rawData):
                logger.info(
                    "Data stream contains trailing junk : %s > expected %s bytes"
                    % (obtained, expected))
                rawData = rawData[:expected]
            data = numpy.fromstring(rawData,
                                    self._bytecode).reshape(tuple(dims))
            if self.swap_needed():
                data.byteswap(True)
            self._data = data
            self._bytecode = data.dtype.type
        return data
Example #10
0
    def getData(self):
        """
        Unpack a binary blob according to the specification given in the header

        @return: dataset as numpy.ndarray
        """
        if self.data is not None:
            return self.data
        if self.rawData is None:
            return self.data

        if self.bytecode is None:
            if "DATATYPE" in self.capsHeader:
                self.bytecode = DATA_TYPES[self.header[self.capsHeader["DATATYPE"]]]
            else:
                self.bytecode = np.uint16
        dims = self.dims[:]
        dims.reverse()

        if ("COMPRESSION" in self.capsHeader):
            compression = self.header[self.capsHeader["COMPRESSION"]].upper()
            uncompressed_size = self.bpp
            for i in dims:
                uncompressed_size *= i
            if "OFFSET" in compression :
                try:
                    import byte_offset
                except ImportError:
                    logging.error("Unimplemented compression scheme:  %s" % compression)
                else:
                    myData = byte_offset.analyseCython(self.rawData, size=uncompressed_size)
                    rawData = myData.astype(self.bytecode).tostring()
                    self.size = uncompressed_size
            elif compression == "NONE":
                rawData = self.rawData
            elif "GZIP" in compression:
                fileobj = StringIO.StringIO(self.rawData)
                try:
                    rawData = gzip.GzipFile(fileobj=fileobj).read()
                except IOError:
                    logging.warning("Encounter the python-gzip bug with trailing garbage, trying subprocess gzip")
                    try:
                        #This is as an ugly hack against a bug in Python gzip
                        import subprocess
                        sub = subprocess.Popen(["gzip", "-d", "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
                        rawData, err = sub.communicate(input=self.rawData)
                        logging.debug("Gzip subprocess ended with %s err= %s; I got %s bytes back" % (sub.wait(), err, len(rawData)))
                    except:
                        logging.warning("Unable to use the subprocess gzip. is gzip available? ")
                        for i in range(1, 513):
                            try:
                                fileobj = StringIO.StringIO(self.rawData[:-i])
                                rawData = gzip.GzipFile(fileobj=fileobj).read()
                            except IOError:
                                logging.debug("trying with %s bytes less, doesn't work" % i)
                            else:
                                break
                        else:
                            logging.error("I am totally unable to read this gzipped compressed data block, giving up")

                self.size = uncompressed_size
            elif "BZ" in compression :
                rawData = bz2.decompress(self.rawData)
                self.size = uncompressed_size
            elif "Z" in compression :
                rawData = zlib.decompress(self.rawData)
                self.size = uncompressed_size
            else:
                logging.warning("Unknown compression scheme %s" % compression)
                rawData = self.rawData

        else:
            rawData = self.rawData

        expected = self.size
        obtained = len(rawData)
        if expected > obtained:
            logging.error("Data stream is incomplete: %s < expected %s bytes" % (obtained, expected))
            rawData += "\x00" * (expected - obtained)
        elif expected < len(rawData):
            logging.info("Data stream contains trailing junk : %s > expected %s bytes" % (obtained, expected))
            rawData = rawData[:expected]
#        logging.debug("dims = %s, bpp = %s, expected= %s obtained = %s" % (dims, self.bpp, expected, obtained))
        if self.swap_needed():
            data = np.fromstring(rawData, self.bytecode).byteswap().reshape(tuple(dims))
        else:
            data = np.fromstring(rawData, self.bytecode).reshape(tuple(dims))
        self.data = data
        self.rawData = None #no need to keep garbage in memory
        self.bytecode = data.dtype.type
        return data