def __init__(self, output_filename, mz_dtype=np.float64, intensity_dtype=np.float32, mode="auto", mz_compression=NoCompression(), intensity_compression=NoCompression(), polarity=None): self.mz_dtype = mz_dtype self.intensity_dtype = intensity_dtype self.mode = mode self.mz_compression = mz_compression self.intensity_compression = intensity_compression self.run_id = os.path.splitext(output_filename)[0] self.filename = self.run_id + ".imzML" self.ibd_filename = self.run_id + ".ibd" self.xml = open(self.filename, 'w') self.ibd = open(self.ibd_filename, 'wb+') self.sha1 = hashlib.sha1() self.uuid = uuid.uuid4() self._write_ibd(self.uuid.bytes) self.wheezy_engine = Engine(loader=DictLoader( {'imzml': IMZML_TEMPLATE}), extensions=[CoreExtension()]) self.imzml_template = self.wheezy_engine.get_template('imzml') self.spectra = [] self.first_mz = None self.hashes = defaultdict( list) # mz_hash -> list of mz_data (disk location) self.lru_cache = _MaxlenDict( maxlen=10) # mz_array (as tuple) -> mz_data (disk location) self._setPolarity(polarity)
def test_compress_float(): count = 100 array = np.random.rand(count) compressor = NoCompression() compressed = compressor.compress(array.tobytes()) decompressed = np.frombuffer(compressor.decompress(compressed), count=count) assert_array_equal(array, decompressed)
def __init__(self, output_filename, mz_dtype=np.float64, intensity_dtype=np.float32, mode="auto", mz_compression=NoCompression(), intensity_compression=NoCompression()): """ Create an imzML file set. :param output_filename: is used to make the base name by removing the extension (if any). two files will be made by adding ".ibd" and ".imzML" to the base name :param intensity_dtype: The numpy data type to use for saving intensity values :param mz_dtype: The numpy data type to use for saving mz array values :param mode: "continuous" mode will save the first mz array only "processed" mode save every mz array seperately "auto" mode writes only mz arrays that have not already been written :param intensity_compression: How to compress the intensity data before saving must be an instance of NoCompression or ZlibCompression :param mz_compression: How to compress the mz array data before saving must be an instance of NoCompression or ZlibCompression :return: None """ self.mz_dtype = mz_dtype self.intensity_dtype = intensity_dtype self.mode = mode self.mz_compression = mz_compression self.intensity_compression = intensity_compression self.run_id = os.path.splitext(output_filename)[0] self.filename = self.run_id + ".imzML" self.ibd_filename = self.run_id + ".ibd" self.xml = open(self.filename, 'w') self.ibd = open(self.ibd_filename, 'wb+') self.sha1 = hashlib.sha1() self.uuid = uuid.uuid4() self._write_ibd(self.uuid.bytes_le) self.wheezy_engine = Engine(loader=DictLoader( {'imzml': IMZML_TEMPLATE}), extensions=[CoreExtension()]) self.imzml_template = self.wheezy_engine.get_template('imzml') self.spectra = [] self.first_mz = None self.hashes = defaultdict( list) #mz_hash -> list of mz_data (disk location) self.lru_cache = MaxlenDict( maxlen=10) #mz_array (as tuple) -> mz_data (disk location)
def test_compress_int(): count = 100 array = np.random.randint(0, 1000, count, dtype=np.int32) compressor = NoCompression() compressed = compressor.compress(array.tobytes()) decompressed = np.frombuffer(compressor.decompress(compressed), count=count, dtype=np.int32) assert_array_equal(array, decompressed)
def get_compression(compression, **kwargs): """Retrieve appropriate compression type""" if compression is None: return NoCompression() if isinstance(compression, (NoCompression, ZlibCompression)): return compression if isinstance(compression, str): if compression == "None": return NoCompression() elif compression.lower() == "zlib": return ZlibCompression(**kwargs)
def _encode_and_write(self, data, dtype=np.float32, compression=NoCompression()): data = np.asarray(data, dtype=dtype) offset = self.ibd.tell() bytes = data.tobytes() bytes = compression.compress(bytes) return offset, data.shape[0], self._write_ibd(bytes)
class TestImzMLWriter: @staticmethod def test_writer_single_pixel(get_temp_path): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode="processed") as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates) with ImzMLParser(output_filename) as parser: _mz_x, _mz_y = parser.get_spectrum(0) assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_y, 4) assert parser.n_pixels == 1 @staticmethod @pytest.mark.parametrize("data_mode", ("processed", "continuous", "auto")) def test_writer_image(get_temp_path, data_mode): """Test adding image to the dataset""" mz_x = np.linspace(100, 1000, 20) coordinates = [ [1, 1, 1], [1, 2, 1], [1, 3, 1], [2, 1, 1], [2, 2, 1], [2, 3, 1], [3, 1, 1], [3, 2, 1], [3, 3, 1], ] mz_ys = np.random.rand(len(coordinates), mz_x.shape[0]) output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode=data_mode) as imzml: for mz_y, _coordinates in zip(mz_ys, coordinates): imzml.add_spectrum(mz_x, mz_y, coords=_coordinates) with ImzMLParser(output_filename) as parser: for px, (_mz_x, _mz_y) in enumerate(parser): assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_ys[px], 4) assert parser.n_pixels == len(coordinates) @staticmethod @pytest.mark.parametrize( "compression", (NoCompression(), ZlibCompression(), None, "None", "zlib")) def test_writer_with_compression(get_temp_path, compression): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter( output_filename, mode="processed", mz_compression=compression, intensity_compression=compression, ) as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates) @staticmethod @pytest.mark.parametrize("round_digits", (None, 4)) def test_writer_zlib_compression_round(get_temp_path, round_digits): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") compression = ZlibCompression(round_digits) with ImzMLWriter( output_filename, mode="processed", mz_compression=compression, intensity_compression=compression, ) as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates)