Exemple #1
0
    def __init__(self,
                 output_filename,
                 mz_dtype=np.float64,
                 intensity_dtype=np.float32,
                 mode="auto",
                 mz_compression=NoCompression(),
                 intensity_compression=NoCompression(),
                 polarity=None):

        self.mz_dtype = mz_dtype
        self.intensity_dtype = intensity_dtype
        self.mode = mode
        self.mz_compression = mz_compression
        self.intensity_compression = intensity_compression
        self.run_id = os.path.splitext(output_filename)[0]
        self.filename = self.run_id + ".imzML"
        self.ibd_filename = self.run_id + ".ibd"
        self.xml = open(self.filename, 'w')
        self.ibd = open(self.ibd_filename, 'wb+')
        self.sha1 = hashlib.sha1()
        self.uuid = uuid.uuid4()

        self._write_ibd(self.uuid.bytes)

        self.wheezy_engine = Engine(loader=DictLoader(
            {'imzml': IMZML_TEMPLATE}),
                                    extensions=[CoreExtension()])
        self.imzml_template = self.wheezy_engine.get_template('imzml')
        self.spectra = []
        self.first_mz = None
        self.hashes = defaultdict(
            list)  # mz_hash -> list of mz_data (disk location)
        self.lru_cache = _MaxlenDict(
            maxlen=10)  # mz_array (as tuple) -> mz_data (disk location)
        self._setPolarity(polarity)
Exemple #2
0
 def test_compress_float():
     count = 100
     array = np.random.rand(count)
     compressor = NoCompression()
     compressed = compressor.compress(array.tobytes())
     decompressed = np.frombuffer(compressor.decompress(compressed),
                                  count=count)
     assert_array_equal(array, decompressed)
Exemple #3
0
    def __init__(self,
                 output_filename,
                 mz_dtype=np.float64,
                 intensity_dtype=np.float32,
                 mode="auto",
                 mz_compression=NoCompression(),
                 intensity_compression=NoCompression()):
        """
        Create an imzML file set.

        :param output_filename:
            is used to make the base name by removing the extension (if any).
            two files will be made by adding ".ibd" and ".imzML" to the base name
        :param intensity_dtype:
            The numpy data type to use for saving intensity values
        :param mz_dtype:
            The numpy data type to use for saving mz array values
        :param mode:
            "continuous" mode will save the first mz array only
            "processed" mode save every mz array seperately
            "auto" mode writes only mz arrays that have not already been written
        :param intensity_compression:
            How to compress the intensity data before saving
            must be an instance of NoCompression or ZlibCompression
        :param mz_compression:
            How to compress the mz array data before saving
            must be an instance of NoCompression or ZlibCompression
        :return:
            None
        """

        self.mz_dtype = mz_dtype
        self.intensity_dtype = intensity_dtype
        self.mode = mode
        self.mz_compression = mz_compression
        self.intensity_compression = intensity_compression
        self.run_id = os.path.splitext(output_filename)[0]
        self.filename = self.run_id + ".imzML"
        self.ibd_filename = self.run_id + ".ibd"
        self.xml = open(self.filename, 'w')
        self.ibd = open(self.ibd_filename, 'wb+')
        self.sha1 = hashlib.sha1()
        self.uuid = uuid.uuid4()

        self._write_ibd(self.uuid.bytes_le)

        self.wheezy_engine = Engine(loader=DictLoader(
            {'imzml': IMZML_TEMPLATE}),
                                    extensions=[CoreExtension()])
        self.imzml_template = self.wheezy_engine.get_template('imzml')

        self.spectra = []
        self.first_mz = None
        self.hashes = defaultdict(
            list)  #mz_hash -> list of mz_data (disk location)
        self.lru_cache = MaxlenDict(
            maxlen=10)  #mz_array (as tuple) -> mz_data (disk location)
Exemple #4
0
 def test_compress_int():
     count = 100
     array = np.random.randint(0, 1000, count, dtype=np.int32)
     compressor = NoCompression()
     compressed = compressor.compress(array.tobytes())
     decompressed = np.frombuffer(compressor.decompress(compressed),
                                  count=count,
                                  dtype=np.int32)
     assert_array_equal(array, decompressed)
Exemple #5
0
def get_compression(compression, **kwargs):
    """Retrieve appropriate compression type"""
    if compression is None:
        return NoCompression()
    if isinstance(compression, (NoCompression, ZlibCompression)):
        return compression
    if isinstance(compression, str):
        if compression == "None":
            return NoCompression()
        elif compression.lower() == "zlib":
            return ZlibCompression(**kwargs)
Exemple #6
0
 def _encode_and_write(self,
                       data,
                       dtype=np.float32,
                       compression=NoCompression()):
     data = np.asarray(data, dtype=dtype)
     offset = self.ibd.tell()
     bytes = data.tobytes()
     bytes = compression.compress(bytes)
     return offset, data.shape[0], self._write_ibd(bytes)
Exemple #7
0
class TestImzMLWriter:
    @staticmethod
    def test_writer_single_pixel(get_temp_path):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode="processed") as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)

        with ImzMLParser(output_filename) as parser:
            _mz_x, _mz_y = parser.get_spectrum(0)
            assert_array_almost_equal(_mz_x, mz_x, 4)
            assert_array_almost_equal(_mz_y, mz_y, 4)
            assert parser.n_pixels == 1

    @staticmethod
    @pytest.mark.parametrize("data_mode", ("processed", "continuous", "auto"))
    def test_writer_image(get_temp_path, data_mode):
        """Test adding image to the dataset"""
        mz_x = np.linspace(100, 1000, 20)
        coordinates = [
            [1, 1, 1],
            [1, 2, 1],
            [1, 3, 1],
            [2, 1, 1],
            [2, 2, 1],
            [2, 3, 1],
            [3, 1, 1],
            [3, 2, 1],
            [3, 3, 1],
        ]
        mz_ys = np.random.rand(len(coordinates), mz_x.shape[0])

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode=data_mode) as imzml:
            for mz_y, _coordinates in zip(mz_ys, coordinates):
                imzml.add_spectrum(mz_x, mz_y, coords=_coordinates)

        with ImzMLParser(output_filename) as parser:
            for px, (_mz_x, _mz_y) in enumerate(parser):
                assert_array_almost_equal(_mz_x, mz_x, 4)
                assert_array_almost_equal(_mz_y, mz_ys[px], 4)
                assert parser.n_pixels == len(coordinates)

    @staticmethod
    @pytest.mark.parametrize(
        "compression",
        (NoCompression(), ZlibCompression(), None, "None", "zlib"))
    def test_writer_with_compression(get_temp_path, compression):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(
                output_filename,
                mode="processed",
                mz_compression=compression,
                intensity_compression=compression,
        ) as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)

    @staticmethod
    @pytest.mark.parametrize("round_digits", (None, 4))
    def test_writer_zlib_compression_round(get_temp_path, round_digits):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        compression = ZlibCompression(round_digits)
        with ImzMLWriter(
                output_filename,
                mode="processed",
                mz_compression=compression,
                intensity_compression=compression,
        ) as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)