def recal(imzml_out_fn, imzml, fit, m=3): # Write recalibrated dataset # spatial smoothing on recal params im3 = [] for ii in range(len(fit[0])): im = np.mean( [fit[spec_ix][ii] for spec_ix in range(len(imzml.coordinates))]) + np.zeros( (imzml.imzmldict["max count of pixels y"], imzml.imzmldict["max count of pixels x"])) for spec_ix, (x, y, z) in enumerate(imzml.coordinates): im[y - 1, x - 1] = fit[spec_ix][ii] im = medfilt2d(im, m) im3.append(im) im3 = np.dstack(im3) # recal and write with ImzMLWriter(imzml_out_fn) as imzml_out: for spec_ix, coords in enumerate(imzml.coordinates): if spec_ix % 500 == 0: logging.debug(spec_ix / float(len(imzml.coordinates))) mzs, intensities = imzml.getspectrum(index=spec_ix) mzs = np.asarray(mzs) mzs = recalibrate_spectrum(mzs, im3[coords[1] - 1, coords[0] - 1, :]) imzml_out.addSpectrum(mzs, intensities, coords)
def MSIFilter(self, coi, alpha): "Filter imzML file for complex of interest" if coi == "N-Glycan": self.glycanFilter() truefiltertime = time.time() self.filterIntens(self.intensity_list, self.mzlist) truefilterend = time.time() print("Removal of 0 values: " + str(truefilterend - truefiltertime)) self.glycan_intens = [] for i in range(len(self.filtered_intens)): kendricktime = time.time() self.kendrickMass(self.filtered_mzs[i]) kendrickend = time.time() print("KMD Algorithm Time: " + str(kendrickend - kendricktime)) filtertime = time.time() probFilter = self.glycanProb(self.KM, self.KMD, alpha, self.filtered_intens[i]) filterend = time.time() print("Prob Time: " + str(filterend - filtertime)) self.glycan_intens.append(probFilter) outname = "Filtered_mz_" + str(np.random.randint(100000)) with ImzMLWriter(outname) as w: for i in range(len(self.filtered_mzs)): w.addSpectrum(self.filtered_mzs[i], self.glycan_intens[i], self.spectrum.coordinates[i]) print("File Written to : " + outname)
def write_corrected_msi(msi, output_file, tolerance, database_exactmass, step, dalim): # iterate throug each pixel of an MSI with ImzMLWriter(output_file) as w: p = ImzMLParser(msi, parse_lib='ElementTree') for idx, (x, y, z) in enumerate(p.coordinates): ms_mzs, ms_intensities = p.getspectrum(idx) peaks_ind = peak_selection(ms_intensities) peaks_mz = ms_mzs[peaks_ind] if len(peaks_mz) > 30: hit_exp, hit_errors = hits_generation(peaks_mz, database_exactmass, tolerance) if len(hit_errors) > 10: roi = hits_selection(hit_errors, step, tolerance, da_limit=dalim) if np.sum(roi) > 10: mz_error_model = create_lm(hit_exp, hit_errors, tolerance=tolerance, da_limit=dalim, step=step) if mz_error_model: corrected_mzs = correct_mz_lm( ms_mzs, mz_error_model) w.addSpectrum(corrected_mzs, ms_intensities, (x, y, z))
def export_imzml(self, imzml_fn=None): """ :param imzml_fn: string containing path to write imzml file to, if not supplied the default (same name, same directory as mzml) will be used """ imzml_fn = self.parse_imzml_fn(imzml_fn) with ImzMLWriter(imzml_fn) as imzml: for mzml_fn in self._mzml_fns: print("exporting file: {}".format(mzml_fn)) ii = int(mzml_fn.split("_")[-1][:-5]) y_coord = ii * float( self._config["stage_parameters"]["y_spacing"]) y_ix = ii mzml = pymzml.run.Reader(mzml_fn) for spectrum in mzml: x_scan = spectrum.scan_time * 60. * float( self._config["stage_parameters"]["x_velocity"]) x_ix, x_coord = find_nearest(self._x_coords, x_scan) peaks = np.asarray(spectrum.peaks('centroided')) imzml.addSpectrum(mzs=peaks[:, 0], intensities=peaks[:, 1], coords=(x_ix, y_ix, 1), userParams=[{ 'name': 'xCoord', 'value': str(x_coord) }, { 'name': 'yCoord', 'value': str(y_coord) }])
def test_writer_image(get_temp_path, data_mode): """Test adding image to the dataset""" mz_x = np.linspace(100, 1000, 20) coordinates = [ [1, 1, 1], [1, 2, 1], [1, 3, 1], [2, 1, 1], [2, 2, 1], [2, 3, 1], [3, 1, 1], [3, 2, 1], [3, 3, 1], ] mz_ys = np.random.rand(len(coordinates), mz_x.shape[0]) output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode=data_mode) as imzml: for mz_y, _coordinates in zip(mz_ys, coordinates): imzml.add_spectrum(mz_x, mz_y, coords=_coordinates) with ImzMLParser(output_filename) as parser: for px, (_mz_x, _mz_y) in enumerate(parser): assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_ys[px], 4) assert parser.n_pixels == len(coordinates)
def imzml(input_filename, output_filename, smoothMethod="nosmooth", centroid=False): import h5py import numpy as np ### Open files h5 = h5py.File(input_filename, 'r') # Readonly, file must exist ### get root groups from input data root_group_names = h5.keys() spots = h5['Spots'] spectraGroup = 'InitialMeasurement' mzs = np.asarray( h5['/SamplePositions/GlobalMassAxis/']['SamplePositions'] ) # we don't write this but will use it for peak detection file_version = h5['Version'][ 0] # some hard-coding to deal with different file versions if file_version > 5: coords = h5['Registrations']['0']['Coordinates'] else: coords = h5['Coordinates'] coords = np.asarray(coords).T.round(5) coords -= np.amin(coords, axis=0) step = np.array( [np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)]) step[np.isnan(step)] = 1 coords /= np.reshape(step, (3, )) coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 g = h5['Spots/0/' + spectraGroup + '/'] mz_dtype = g['SamplePositions/SamplePositions'][:].dtype int_dtype = g['Intensities'][:].dtype print 'dim: {} x {}'.format(nrow, ncol) n_total = len(spots.keys()) done = 0 keys = map(str, sorted(map(int, h5['Spots'].keys()))) ### write spectra with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: n = 0 for key, pos in zip(keys, coords): spot = spots[key] ## make new spectrum intensities = np.asarray(spot[spectraGroup]['Intensities']) if smoothMethod != []: intensities = smooth_spectrum(mzs, intensities, smoothMethod) if centroid: from pyMS import centroid_detection mzs, intensities, _ = centroid_detection.gradient( mzs, intensities, max_output=-1, weighted_bins=3) # write to file pos = (nrow - 1 - pos[1], pos[0], pos[2]) imzml.addSpectrum(mzs, intensities, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def _imzml_writer_process(output_path, queue): with ImzMLWriter(output_path) as writer: while True: job = queue.get() queue.task_done() if job is not None: for mzs, ints, coord in job: writer.addSpectrum(mzs, ints, coord) if job is None: return
def do_recalibration(input_filename, output_filename, p): ims_dataset = inMemoryIMS(input_filename) with ImzMLWriter(output_filename) as file_out: for ii in range(len(ims_dataset.coords)): spec = ims_dataset.get_spectrum(ii).get_spectrum( source='centroids') mzs = spec[0] mzs_recal = [m - (1e-6) * m * p(m) for m in mzs] file_out.addSpectrum(mzs_recal, spec[1], ims_dataset.coords[ii, [1, 0, 2]])
class MyImzmlExportthread(QtCore.QThread): trigger = QtCore.pyqtSignal(int, str) def __init__(self, a, b, c): super().__init__() self.input = a self.output = ImzMLWriter(b) self.meta_info = c def run(self): try: print('grterw') self.trigger.emit(10, '') Coor = self.input.coordinates T_spec = len(Coor) meta_count = len(self.meta_info) kkk = 0 mzs = [] for uu in range(0, meta_count): mz_data = self.meta_info[uu] mzs.append(float(mz_data[0])) mzs = np.array(mzs) print('retrerewrw') for indecount in range(0, T_spec): v1 = int((indecount / (T_spec - 1)) * 80) + 20 if v1 - kkk >= 1: self.trigger.emit(v1, '') kkk = v1 m = self.input.getspectrum(indecount) intensity = [] for uu in range(0, meta_count): mz_data = self.meta_info[uu] I = PeakIntensitySum(m, float(mz_data[1]), float(mz_data[2])) intensity.append(I) intensity = np.array(intensity) self.output.addSpectrum(mzs, intensity, Coor[indecount]) self.trigger.emit(100, '') self.output.close() except Exception as e: m = 'Running error, info: ' + str(e) self.trigger.emit(-1, m)
def make_test_imzml(ds_config): mzs, ints = make_mock_spectrum(ds_config) with TemporaryDirectory() as tmpdir: with ImzMLWriter(f'{tmpdir}/test.imzML', polarity='positive') as writer: for x, y in MOCK_COORDS: # Scale intensity differently per spectrum, as chaos and spatial metrics # return 0 on completely uniform images writer.addSpectrum(mzs, ints * x * y, (x, y, 1)) yield f'{tmpdir}/test.imzML', f'{tmpdir}/test.ibd'
def convert(h5file, outpath, filename): data = pd.read_hdf(h5file) if data.index.names[0] != "grid_x" or data.index.names[1] != "grid_y": raise ValueError("Wrong index in h5, contact programmer!") with ImzMLWriter(join(outpath, filename)) as writer: for coords, series in data.iterrows(): #gx,gy,rx,ry = coords gx, gy, _ = coords mzs = np.array(series.index) intensities = series.values writer.addSpectrum(mzs, intensities, (gx, gy))
def test_writer_with_compression(get_temp_path, compression): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter( output_filename, mode="processed", mz_compression=compression, intensity_compression=compression, ) as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates)
def test_writer_single_pixel(get_temp_path): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode="processed") as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates) with ImzMLParser(output_filename) as parser: _mz_x, _mz_y = parser.get_spectrum(0) assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_y, 4) assert parser.n_pixels == 1
def write_imzml(self, imzml_fn): mzml = pymzml.run.Reader(self.mzml_fn) with ImzMLWriter(imzml_fn, mode="processed") as imzml: for ii, spec in enumerate(mzml): ix = spec["id"] if ix == 'TIC': continue row = self.df.loc[self.df['microscan'] == ix] if row.empty: continue if spec["ms level"] == 1: print('.', end='') imzml.addSpectrum(spec.mz, spec.i, coords=(row['x'].values[0], row['y'].values[0], 1)) if (ii > 0) & (ii % 1000 == 0): print("{:3.2f}".format(float(ii) / mzml.info['spectrum_count']), end="")
def convert(sqlite_fn, imzml_fn): with ImzMLWriter(imzml_fn) as w: peaks = sqlite3.connect(sqlite_fn) peaks.row_factory = sqlite3.Row c = peaks.cursor() i = 0 count = int(c.execute("select count(*) from Spectra").fetchone()[0]) threshold = estimateThreshold(c) for sp in map(Spectrum, c.execute("select * from Spectra")): real_peaks = sp.fwhms / sp.mzs**2 > threshold mzs = sp.mzs[real_peaks] intensities = sp.intensities[real_peaks] w.addSpectrum(mzs, intensities, (sp.x, sp.y)) i += 1 if i % 1000 == 0: print "{}% complete".format(float(i) / count * 100.0) print "done"
def make_lithops_imzml_reader( storage: Storage, mz_precision='f', polarity='positive', ds_config=TEST_DS_CONFIG, ): """Create an ImzML file, upload it into storage, and return an imzml_reader for it""" mz_dtype = {'f': np.float32, 'd': np.float64}[mz_precision] with TemporaryDirectory() as tmpdir: with ImzMLWriter(f'{tmpdir}/test.imzML', polarity=polarity, mz_dtype=mz_dtype) as writer: for coords, (mzs, ints) in zip(MOCK_COORDINATES, MOCK_SPECTRA): writer.addSpectrum(mzs, ints, coords) imzml_content = open(f'{tmpdir}/test.imzML', 'rb').read() ibd_content = open(f'{tmpdir}/test.ibd', 'rb').read() imzml_cobj = storage.put_cloudobject(imzml_content) ibd_cobj = storage.put_cloudobject(ibd_content) return LithopsImzMLReader(storage, imzml_cobj, ibd_cobj)
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, region_name="", prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type min_intensity: float """ from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient sl = slFile(input_filename, region_name=region_name) mz_dtype = sl.Mzs.dtype int_dtype = sl.get_spectrum(0)[1].dtype # Convert coords to index -> kinda hacky coords = np.asarray(sl.coords.copy()).T.round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([ np.median(np.diff(np.unique(coords[sl.spotlist, i]))) for i in range(3) ]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3, )).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view( np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(sl.spotlist), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: done = 0 for key in sl.spotlist: if all((prevent_duplicate_pixels, key not in coord_idx)): # skip duplicate pixels #print 'skip {}'.format(key) continue mzs, intensities = sl.get_spectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization(mzs, intensities) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type input_filename string - source file path (must be .imzml) :type output_filename string - output file path (must be .imzml) :type step tuple grid spacing of pixels (if [] the script will try and guess it) :type apodization boolean whether to try and remove FT wiglet artefacts :type w_size window side (m/z bins) for apodization :type min_intensity: float minimum intensity peaks to return during centroiding :type prevent_duplicate_pixels bool if True will only return the first spectrum for pixels with the same coodinates """ from pyimzml.ImzMLParser import ImzMLParser from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient imzml_in = ImzMLParser(input_filename) precisionDict = { 'f': ("32-bit float", np.float32), 'd': ("64-bit float", np.float64), 'i': ("32-bit integer", np.int32), 'l': ("64-bit integer", np.int64) } mz_dtype = precisionDict[imzml_in.mzPrecision][1] int_dtype = precisionDict[imzml_in.intensityPrecision][1] # Convert coords to index -> kinda hacky coords = np.asarray(imzml_in.coordinates).round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([ np.median(np.diff(np.unique(coords[:, i]))) for i in range(coords.shape[1]) ]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3, )).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view( np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(imzml_in.coordinates), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml_out: done = 0 for key in range(np.shape(imzml_in.coordinates)[0]): print key if all((prevent_duplicate_pixels, key not in coord_idx)): # skip duplicate pixels continue mzs, intensities = imzml_in.getspectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization( mzs, intensities, {'w_size': w_size}) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] if len(pos) == 2: pos.append(0) pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml_out.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def __init__(self, a, b, c): super().__init__() self.input = a self.output = ImzMLWriter(b) self.meta_info = c