Beispiel #1
0
def recal(imzml_out_fn, imzml, fit, m=3):
    # Write recalibrated dataset
    # spatial smoothing on recal params
    im3 = []
    for ii in range(len(fit[0])):
        im = np.mean(
            [fit[spec_ix][ii]
             for spec_ix in range(len(imzml.coordinates))]) + np.zeros(
                 (imzml.imzmldict["max count of pixels y"],
                  imzml.imzmldict["max count of pixels x"]))
        for spec_ix, (x, y, z) in enumerate(imzml.coordinates):
            im[y - 1, x - 1] = fit[spec_ix][ii]
        im = medfilt2d(im, m)
        im3.append(im)
    im3 = np.dstack(im3)
    # recal and write
    with ImzMLWriter(imzml_out_fn) as imzml_out:
        for spec_ix, coords in enumerate(imzml.coordinates):
            if spec_ix % 500 == 0:
                logging.debug(spec_ix / float(len(imzml.coordinates)))
            mzs, intensities = imzml.getspectrum(index=spec_ix)
            mzs = np.asarray(mzs)
            mzs = recalibrate_spectrum(mzs, im3[coords[1] - 1,
                                                coords[0] - 1, :])
            imzml_out.addSpectrum(mzs, intensities, coords)
Beispiel #2
0
    def MSIFilter(self, coi, alpha):
        "Filter imzML file for complex of interest"
        if coi == "N-Glycan":
            self.glycanFilter()
            truefiltertime = time.time()
            self.filterIntens(self.intensity_list, self.mzlist)
            truefilterend = time.time()
            print("Removal of 0 values: " +
                  str(truefilterend - truefiltertime))
            self.glycan_intens = []
            for i in range(len(self.filtered_intens)):
                kendricktime = time.time()
                self.kendrickMass(self.filtered_mzs[i])
                kendrickend = time.time()
                print("KMD Algorithm Time: " + str(kendrickend - kendricktime))
                filtertime = time.time()
                probFilter = self.glycanProb(self.KM, self.KMD, alpha,
                                             self.filtered_intens[i])
                filterend = time.time()
                print("Prob Time: " + str(filterend - filtertime))
                self.glycan_intens.append(probFilter)

            outname = "Filtered_mz_" + str(np.random.randint(100000))
            with ImzMLWriter(outname) as w:
                for i in range(len(self.filtered_mzs)):
                    w.addSpectrum(self.filtered_mzs[i], self.glycan_intens[i],
                                  self.spectrum.coordinates[i])
            print("File Written to : " + outname)
def write_corrected_msi(msi, output_file, tolerance, database_exactmass, step,
                        dalim):
    # iterate throug each pixel of an MSI
    with ImzMLWriter(output_file) as w:
        p = ImzMLParser(msi, parse_lib='ElementTree')
        for idx, (x, y, z) in enumerate(p.coordinates):

            ms_mzs, ms_intensities = p.getspectrum(idx)
            peaks_ind = peak_selection(ms_intensities)
            peaks_mz = ms_mzs[peaks_ind]

            if len(peaks_mz) > 30:
                hit_exp, hit_errors = hits_generation(peaks_mz,
                                                      database_exactmass,
                                                      tolerance)
                if len(hit_errors) > 10:
                    roi = hits_selection(hit_errors,
                                         step,
                                         tolerance,
                                         da_limit=dalim)
                    if np.sum(roi) > 10:
                        mz_error_model = create_lm(hit_exp,
                                                   hit_errors,
                                                   tolerance=tolerance,
                                                   da_limit=dalim,
                                                   step=step)
                        if mz_error_model:
                            corrected_mzs = correct_mz_lm(
                                ms_mzs, mz_error_model)
                            w.addSpectrum(corrected_mzs, ms_intensities,
                                          (x, y, z))
Beispiel #4
0
    def export_imzml(self, imzml_fn=None):
        """
        :param imzml_fn:
        string containing path to write imzml file to, 
        if not supplied the default (same name, same directory as mzml) will be used
        """
        imzml_fn = self.parse_imzml_fn(imzml_fn)

        with ImzMLWriter(imzml_fn) as imzml:
            for mzml_fn in self._mzml_fns:
                print("exporting file: {}".format(mzml_fn))
                ii = int(mzml_fn.split("_")[-1][:-5])
                y_coord = ii * float(
                    self._config["stage_parameters"]["y_spacing"])
                y_ix = ii
                mzml = pymzml.run.Reader(mzml_fn)
                for spectrum in mzml:
                    x_scan = spectrum.scan_time * 60. * float(
                        self._config["stage_parameters"]["x_velocity"])
                    x_ix, x_coord = find_nearest(self._x_coords, x_scan)
                    peaks = np.asarray(spectrum.peaks('centroided'))
                    imzml.addSpectrum(mzs=peaks[:, 0],
                                      intensities=peaks[:, 1],
                                      coords=(x_ix, y_ix, 1),
                                      userParams=[{
                                          'name': 'xCoord',
                                          'value': str(x_coord)
                                      }, {
                                          'name': 'yCoord',
                                          'value': str(y_coord)
                                      }])
Beispiel #5
0
    def test_writer_image(get_temp_path, data_mode):
        """Test adding image to the dataset"""
        mz_x = np.linspace(100, 1000, 20)
        coordinates = [
            [1, 1, 1],
            [1, 2, 1],
            [1, 3, 1],
            [2, 1, 1],
            [2, 2, 1],
            [2, 3, 1],
            [3, 1, 1],
            [3, 2, 1],
            [3, 3, 1],
        ]
        mz_ys = np.random.rand(len(coordinates), mz_x.shape[0])

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode=data_mode) as imzml:
            for mz_y, _coordinates in zip(mz_ys, coordinates):
                imzml.add_spectrum(mz_x, mz_y, coords=_coordinates)

        with ImzMLParser(output_filename) as parser:
            for px, (_mz_x, _mz_y) in enumerate(parser):
                assert_array_almost_equal(_mz_x, mz_x, 4)
                assert_array_almost_equal(_mz_y, mz_ys[px], 4)
                assert parser.n_pixels == len(coordinates)
Beispiel #6
0
def imzml(input_filename,
          output_filename,
          smoothMethod="nosmooth",
          centroid=False):
    import h5py
    import numpy as np
    ### Open files
    h5 = h5py.File(input_filename, 'r')  # Readonly, file must exist
    ### get root groups from input data
    root_group_names = h5.keys()
    spots = h5['Spots']
    spectraGroup = 'InitialMeasurement'
    mzs = np.asarray(
        h5['/SamplePositions/GlobalMassAxis/']['SamplePositions']
    )  # we don't write this but will use it for peak detection
    file_version = h5['Version'][
        0]  # some hard-coding to deal with different file versions
    if file_version > 5:
        coords = h5['Registrations']['0']['Coordinates']
    else:
        coords = h5['Coordinates']

    coords = np.asarray(coords).T.round(5)
    coords -= np.amin(coords, axis=0)
    step = np.array(
        [np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)])
    step[np.isnan(step)] = 1
    coords /= np.reshape(step, (3, ))
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    g = h5['Spots/0/' + spectraGroup + '/']
    mz_dtype = g['SamplePositions/SamplePositions'][:].dtype
    int_dtype = g['Intensities'][:].dtype
    print 'dim: {} x {}'.format(nrow, ncol)
    n_total = len(spots.keys())
    done = 0
    keys = map(str, sorted(map(int, h5['Spots'].keys())))
    ### write spectra
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml:
        n = 0
        for key, pos in zip(keys, coords):
            spot = spots[key]
            ## make new spectrum
            intensities = np.asarray(spot[spectraGroup]['Intensities'])
            if smoothMethod != []:
                intensities = smooth_spectrum(mzs, intensities, smoothMethod)
            if centroid:
                from pyMS import centroid_detection
                mzs, intensities, _ = centroid_detection.gradient(
                    mzs, intensities, max_output=-1, weighted_bins=3)
            # write to file
            pos = (nrow - 1 - pos[1], pos[0], pos[2])
            imzml.addSpectrum(mzs, intensities, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
    print "finished!"
Beispiel #7
0
def _imzml_writer_process(output_path, queue):
    with ImzMLWriter(output_path) as writer:
        while True:
            job = queue.get()
            queue.task_done()
            if job is not None:
                for mzs, ints, coord in job:
                    writer.addSpectrum(mzs, ints, coord)
            if job is None:
                return
Beispiel #8
0
def do_recalibration(input_filename, output_filename, p):
    ims_dataset = inMemoryIMS(input_filename)
    with ImzMLWriter(output_filename) as file_out:
        for ii in range(len(ims_dataset.coords)):
            spec = ims_dataset.get_spectrum(ii).get_spectrum(
                source='centroids')
            mzs = spec[0]
            mzs_recal = [m - (1e-6) * m * p(m) for m in mzs]
            file_out.addSpectrum(mzs_recal, spec[1],
                                 ims_dataset.coords[ii, [1, 0, 2]])
class MyImzmlExportthread(QtCore.QThread):
    trigger = QtCore.pyqtSignal(int, str)

    def __init__(self, a, b, c):
        super().__init__()
        self.input = a
        self.output = ImzMLWriter(b)
        self.meta_info = c

    def run(self):
        try:
            print('grterw')
            self.trigger.emit(10, '')
            Coor = self.input.coordinates
            T_spec = len(Coor)
            meta_count = len(self.meta_info)
            kkk = 0
            mzs = []
            for uu in range(0, meta_count):
                mz_data = self.meta_info[uu]
                mzs.append(float(mz_data[0]))
            mzs = np.array(mzs)
            print('retrerewrw')
            for indecount in range(0, T_spec):
                v1 = int((indecount / (T_spec - 1)) * 80) + 20
                if v1 - kkk >= 1:
                    self.trigger.emit(v1, '')
                    kkk = v1
                m = self.input.getspectrum(indecount)
                intensity = []
                for uu in range(0, meta_count):
                    mz_data = self.meta_info[uu]
                    I = PeakIntensitySum(m, float(mz_data[1]),
                                         float(mz_data[2]))
                    intensity.append(I)
                intensity = np.array(intensity)
                self.output.addSpectrum(mzs, intensity, Coor[indecount])
            self.trigger.emit(100, '')
            self.output.close()
        except Exception as e:
            m = 'Running error, info: ' + str(e)
            self.trigger.emit(-1, m)
Beispiel #10
0
def make_test_imzml(ds_config):
    mzs, ints = make_mock_spectrum(ds_config)
    with TemporaryDirectory() as tmpdir:
        with ImzMLWriter(f'{tmpdir}/test.imzML',
                         polarity='positive') as writer:
            for x, y in MOCK_COORDS:
                # Scale intensity differently per spectrum, as chaos and spatial metrics
                # return 0 on completely uniform images
                writer.addSpectrum(mzs, ints * x * y, (x, y, 1))

        yield f'{tmpdir}/test.imzML', f'{tmpdir}/test.ibd'
Beispiel #11
0
def convert(h5file, outpath, filename):
    data = pd.read_hdf(h5file)
    if data.index.names[0] != "grid_x" or data.index.names[1] != "grid_y":
        raise ValueError("Wrong index in h5, contact programmer!")

    with ImzMLWriter(join(outpath, filename)) as writer:
        for coords, series in data.iterrows():
            #gx,gy,rx,ry = coords
            gx, gy, _ = coords
            mzs = np.array(series.index)
            intensities = series.values
            writer.addSpectrum(mzs, intensities, (gx, gy))
Beispiel #12
0
    def test_writer_with_compression(get_temp_path, compression):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(
                output_filename,
                mode="processed",
                mz_compression=compression,
                intensity_compression=compression,
        ) as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)
Beispiel #13
0
    def test_writer_single_pixel(get_temp_path):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode="processed") as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)

        with ImzMLParser(output_filename) as parser:
            _mz_x, _mz_y = parser.get_spectrum(0)
            assert_array_almost_equal(_mz_x, mz_x, 4)
            assert_array_almost_equal(_mz_y, mz_y, 4)
            assert parser.n_pixels == 1
Beispiel #14
0
 def write_imzml(self, imzml_fn):
     mzml = pymzml.run.Reader(self.mzml_fn)
     with ImzMLWriter(imzml_fn, mode="processed") as imzml:
         for ii, spec in enumerate(mzml):
             ix = spec["id"]
             if ix == 'TIC':
                 continue
             row = self.df.loc[self.df['microscan'] == ix]
             if row.empty:
                 continue
             if spec["ms level"] == 1:
                 print('.', end='')
                 imzml.addSpectrum(spec.mz, spec.i, coords=(row['x'].values[0], row['y'].values[0], 1))
             if (ii > 0) & (ii % 1000 == 0):
                 print("{:3.2f}".format(float(ii) / mzml.info['spectrum_count']), end="")
Beispiel #15
0
def convert(sqlite_fn, imzml_fn):
    with ImzMLWriter(imzml_fn) as w:
        peaks = sqlite3.connect(sqlite_fn)
        peaks.row_factory = sqlite3.Row
        c = peaks.cursor()
        i = 0
        count = int(c.execute("select count(*) from Spectra").fetchone()[0])
        threshold = estimateThreshold(c)
        for sp in map(Spectrum, c.execute("select * from Spectra")):
            real_peaks = sp.fwhms / sp.mzs**2 > threshold
            mzs = sp.mzs[real_peaks]
            intensities = sp.intensities[real_peaks]
            w.addSpectrum(mzs, intensities, (sp.x, sp.y))
            i += 1
            if i % 1000 == 0:
                print "{}% complete".format(float(i) / count * 100.0)
        print "done"
def make_lithops_imzml_reader(
    storage: Storage,
    mz_precision='f',
    polarity='positive',
    ds_config=TEST_DS_CONFIG,
):
    """Create an ImzML file, upload it into storage, and return an imzml_reader for it"""
    mz_dtype = {'f': np.float32, 'd': np.float64}[mz_precision]
    with TemporaryDirectory() as tmpdir:
        with ImzMLWriter(f'{tmpdir}/test.imzML', polarity=polarity, mz_dtype=mz_dtype) as writer:
            for coords, (mzs, ints) in zip(MOCK_COORDINATES, MOCK_SPECTRA):
                writer.addSpectrum(mzs, ints, coords)

        imzml_content = open(f'{tmpdir}/test.imzML', 'rb').read()
        ibd_content = open(f'{tmpdir}/test.ibd', 'rb').read()

    imzml_cobj = storage.put_cloudobject(imzml_content)
    ibd_cobj = storage.put_cloudobject(ibd_content)
    return LithopsImzMLReader(storage, imzml_cobj, ibd_cobj)
Beispiel #17
0
def centroid_imzml(input_filename,
                   output_filename,
                   step=[],
                   apodization=False,
                   w_size=10,
                   min_intensity=1e-5,
                   region_name="",
                   prevent_duplicate_pixels=False):
    # write a file to imzml format (centroided)
    """
    :type min_intensity: float
    """
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMSpec.centroid_detection import gradient
    sl = slFile(input_filename, region_name=region_name)
    mz_dtype = sl.Mzs.dtype
    int_dtype = sl.get_spectrum(0)[1].dtype
    # Convert coords to index -> kinda hacky
    coords = np.asarray(sl.coords.copy()).T.round(5)
    coords -= np.amin(coords, axis=0)
    if step == []:  # have a guesss
        step = np.array([
            np.median(np.diff(np.unique(coords[sl.spotlist, i])))
            for i in range(3)
        ])
        step[np.isnan(step)] = 1
    print 'estimated pixel size: {} x {}'.format(step[0], step[1])
    coords = coords / np.reshape(step, (3, )).T
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'new image size: {} x {}'.format(nrow, ncol)
    if prevent_duplicate_pixels:
        b = np.ascontiguousarray(coords).view(
            np.dtype((np.void, coords.dtype.itemsize * coords.shape[1])))
        _, coord_idx = np.unique(b, return_index=True)
        print np.shape(sl.spotlist), np.shape(coord_idx)

        print "original number of spectra: {}".format(len(coords))
    else:
        coord_idx = range(len(coords))
    n_total = len(coord_idx)
    print 'spectra to write: {}'.format(n_total)
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml:
        done = 0
        for key in sl.spotlist:
            if all((prevent_duplicate_pixels, key
                    not in coord_idx)):  # skip duplicate pixels
                #print 'skip {}'.format(key)
                continue
            mzs, intensities = sl.get_spectrum(key)
            if apodization:
                from pyMSpec import smoothing
                # todo - add to processing list in imzml
                mzs, intensities = smoothing.apodization(mzs, intensities)
            mzs_c, intensities_c, _ = gradient(mzs,
                                               intensities,
                                               min_intensity=min_intensity)
            pos = coords[key]
            pos = (pos[0], nrow - 1 - pos[1], pos[2])
            imzml.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
        print "finished!"
Beispiel #18
0
def centroid_imzml(input_filename,
                   output_filename,
                   step=[],
                   apodization=False,
                   w_size=10,
                   min_intensity=1e-5,
                   prevent_duplicate_pixels=False):

    # write a file to imzml format (centroided)
    """
    :type input_filename string - source file path (must be .imzml)
    :type output_filename string - output file path (must be .imzml)
    :type step tuple grid spacing of pixels (if [] the script will try and guess it)
    :type apodization boolean whether to try and remove FT wiglet artefacts
    :type w_size window side (m/z bins) for apodization
    :type min_intensity: float minimum intensity peaks to return during centroiding
    :type prevent_duplicate_pixels bool if True will only return the first spectrum for pixels with the same coodinates
    """
    from pyimzml.ImzMLParser import ImzMLParser
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMSpec.centroid_detection import gradient

    imzml_in = ImzMLParser(input_filename)
    precisionDict = {
        'f': ("32-bit float", np.float32),
        'd': ("64-bit float", np.float64),
        'i': ("32-bit integer", np.int32),
        'l': ("64-bit integer", np.int64)
    }
    mz_dtype = precisionDict[imzml_in.mzPrecision][1]
    int_dtype = precisionDict[imzml_in.intensityPrecision][1]
    # Convert coords to index -> kinda hacky
    coords = np.asarray(imzml_in.coordinates).round(5)
    coords -= np.amin(coords, axis=0)
    if step == []:  # have a guesss
        step = np.array([
            np.median(np.diff(np.unique(coords[:, i])))
            for i in range(coords.shape[1])
        ])
        step[np.isnan(step)] = 1
    print 'estimated pixel size: {} x {}'.format(step[0], step[1])
    coords = coords / np.reshape(step, (3, )).T
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'new image size: {} x {}'.format(nrow, ncol)
    if prevent_duplicate_pixels:
        b = np.ascontiguousarray(coords).view(
            np.dtype((np.void, coords.dtype.itemsize * coords.shape[1])))
        _, coord_idx = np.unique(b, return_index=True)
        print np.shape(imzml_in.coordinates), np.shape(coord_idx)

        print "original number of spectra: {}".format(len(coords))
    else:
        coord_idx = range(len(coords))
    n_total = len(coord_idx)
    print 'spectra to write: {}'.format(n_total)
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml_out:
        done = 0
        for key in range(np.shape(imzml_in.coordinates)[0]):
            print key
            if all((prevent_duplicate_pixels, key
                    not in coord_idx)):  # skip duplicate pixels
                continue
            mzs, intensities = imzml_in.getspectrum(key)
            if apodization:
                from pyMSpec import smoothing
                # todo - add to processing list in imzml
                mzs, intensities = smoothing.apodization(
                    mzs, intensities, {'w_size': w_size})
            mzs_c, intensities_c, _ = gradient(mzs,
                                               intensities,
                                               min_intensity=min_intensity)
            pos = coords[key]
            if len(pos) == 2:
                pos.append(0)
            pos = (pos[0], nrow - 1 - pos[1], pos[2])
            imzml_out.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
        print "finished!"
 def __init__(self, a, b, c):
     super().__init__()
     self.input = a
     self.output = ImzMLWriter(b)
     self.meta_info = c