Example #1
0
 def empty_datacube(self):
     data_out = ion_datacube()
     # add precomputed pixel indices
     data_out.coords = self.coords
     data_out.pixel_indices = self.cube_pixel_indices
     data_out.nRows = self.cube_n_row
     data_out.nColumns = self.cube_n_col
     return data_out
Example #2
0
    def load_file(self,
                  filename,
                  min_mz=0,
                  max_mz=np.inf,
                  min_int=0,
                  index_range=[]):
        # parse file to get required parameters
        # can use thin hdf5 wrapper for getting data from file
        self.file_dir, self.filename = file_type = os.path.splitext(filename)
        self.file_type = file_type
        self.hdf = h5py.File(filename, 'r')  #Readonly, file must exist
        if index_range == []:
            self.index_list = map(int, self.hdf['/spectral_data'].keys())
        else:
            self.index_list = index_range
        self.max_index = max(self.index_list)
        self.coords = self.get_coords()
        # precompute pixel indices for use in get_ion_image
        cube = ion_datacube()
        cube.add_coords(self.coords)
        self.cube_pixel_indices = cube.pixel_indices
        self.cube_n_row, self.cube_n_col = cube.nRows, cube.nColumns
        # load data into memory
        self.mz_list = []
        self.count_list = []
        self.idx_list = []
        for ii in self.index_list:
            # load spectrum, keep values gt0 (shouldn't be here anyway)
            this_spectrum = self.get_spectrum(ii)
            mzs, counts = this_spectrum.get_spectrum(source='centroids')
            if len(mzs) != len(counts):
                raise TypeError(
                    'length of mzs ({}) not equal to counts ({})'.format(
                        len(mzs), len(counts)))
            # Enforce data limits
            valid = np.where((mzs > min_mz) & (mzs < max_mz)
                             & (counts > min_int))
            counts = counts[valid]
            mzs = mzs[valid]

            # append ever-growing lists (should probably be preallocated or piped to disk and re-loaded)
            self.mz_list.append(mzs)
            self.count_list.append(counts)
            self.idx_list.append(np.ones(len(mzs), dtype=int) * ii)

        print 'loaded spectra'
        self.mz_list = np.concatenate(self.mz_list)
        self.count_list = np.concatenate(self.count_list)
        self.idx_list = np.concatenate(self.idx_list)
        # sort by mz for fast image formation
        mz_order = np.argsort(self.mz_list)
        self.mz_list = self.mz_list[mz_order]
        self.count_list = self.count_list[mz_order]
        self.idx_list = self.idx_list[mz_order]
        self.mz_min = self.mz_list[0]
        self.mz_max = self.mz_list[-1]
        self.histogram_mz_axis = {}
        print 'file loaded'
Example #3
0
 def get_summary_image(self,summary_func='tic'):
     if summary_func not in ['tic','mic']: raise KeyError("requested type not in 'tic' mic'")
     data_out = ion_datacube()
     # add precomputed pixel indices
     data_out.coords = self.coords
     data_out.pixel_indices = self.cube_pixel_indices
     data_out.nRows = self.cube_n_row
     data_out.nColumns = self.cube_n_col
     data_out.add_xic(np.asarray(getattr(self, summary_func))[self.index_list], [0], [0])
     return data_out
Example #4
0
    def load_file(self, filename, min_mz=0, max_mz=np.inf, min_int=0, index_range=[],cache_spectra=True):
        # parse file to get required parameters
        # can use thin hdf5 wrapper for getting data from file
        self.file_dir, self.filename = file_type = os.path.splitext(filename)
        self.file_type = file_type
        self.hdf = h5py.File(filename, 'r')  # Readonly, file must exist
        if index_range == []:
            self.index_list = map(int, self.hdf['/spectral_data'].keys())
        else:
            self.index_list = index_range
        self.max_index = max(self.index_list)
        self.coords = self.get_coords()
        # precompute pixel indices for use in get_ion_image
        cube = ion_datacube()
        cube.add_coords(self.coords)
        self.cube_pixel_indices = cube.pixel_indices
        self.cube_n_row, self.cube_n_col = cube.nRows, cube.nColumns
        self.histogram_mz_axis = {}

        if cache_spectra == True:
            # load data into memory
            self.mz_list = []
            self.count_list = []
            self.idx_list = []
            for ii in self.index_list:
                # load spectrum, keep values gt0 (shouldn't be here anyway)
                this_spectrum = self.get_spectrum(ii)
                mzs, counts = this_spectrum.get_spectrum(source='centroids')
                if len(mzs) != len(counts):
                    raise TypeError('length of mzs ({}) not equal to counts ({})'.format(len(mzs), len(counts)))
                # Enforce data limits
                valid = np.where((mzs > min_mz) & (mzs < max_mz) & (counts > min_int))
                counts = counts[valid]
                mzs = mzs[valid]

                # append ever-growing lists (should probably be preallocated or piped to disk and re-loaded)
                self.mz_list.append(mzs)
                self.count_list.append(counts)
                self.idx_list.append(np.ones(len(mzs), dtype=int) * ii)

            print 'loaded spectra'
            self.mz_list = np.concatenate(self.mz_list)
            self.count_list = np.concatenate(self.count_list)
            self.idx_list = np.concatenate(self.idx_list)
            # sort by mz for fast image formation
            mz_order = np.argsort(self.mz_list)
            self.mz_list = self.mz_list[mz_order]
            self.count_list = self.count_list[mz_order]
            self.idx_list = self.idx_list[mz_order]
            self.mz_min = self.mz_list[0]
            self.mz_max = self.mz_list[-1]
            # split binary searches into two stages for better locality
            self.window_size = 1024
            self.mz_sublist = self.mz_list[::self.window_size].copy()
        print 'file loaded'
    def load_file(self, filename, min_mz=0, max_mz=np.inf, min_int=0, index_range=[]):
        if filename.endswith('.hdf5'):
            self.hdf = h5py.File(filename, 'r')
            keys = index_range or map(int, self.hdf['/spectral_data'].keys())
        else:
            self.imzml = ImzMLParser.ImzMLParser(filename)
            keys = index_range or range(len(self.imzml.coordinates))

        self.coords = np.zeros((len(keys), 3))

        def spectra_iter_hdf5(keys):
            for i in keys:
                tmp_str = "/spectral_data/" + str(i)
                mzs = self.hdf[tmp_str + '/centroid_mzs/'][()]
                counts = self.hdf[tmp_str + '/centroid_intensities/'][()]
                self.coords[i, :] = self.hdf[tmp_str + '/coordinates']
                valid = np.where((mzs > min_mz) & (mzs < max_mz) & (counts > min_int))
                counts = counts[valid]
                mzs = mzs[valid]
                yield (i, mzs, counts)

        def spectra_iter_imzml(keys):
            for i in keys:
                coords = self.imzml.coordinates[i]
                mzs, counts = map(np.array, self.imzml.getspectrum(i))
                if len(coords) == 2:
                    coords = (coords[0], coords[1], 0)
                self.coords[i, :] = coords
                yield (i, mzs, counts)

        sp_iter = spectra_iter_hdf5 if filename.endswith('.hdf5') else spectra_iter_imzml

        import reorder
        data = reorder.sortDatasetByMass(sp_iter(keys))
        self.index_list, self.mz_list, self.count_list, self.idx_list = data
        self.max_index = max(self.index_list)

        cube = ion_datacube()
        cube.add_coords(self.coords)
        self.cube_pixel_indices = cube.pixel_indices
        self.cube_n_row, self.cube_n_col = cube.nRows, cube.nColumns

        self.mz_min = self.mz_list[0]
        self.mz_max = self.mz_list[-1]
        self.histogram_mz_axis = {}

        # split binary searches into two stages for better locality
        self.window_size = 1024
        self.mz_sublist = self.mz_list[::self.window_size].copy()
def get_datacube(reader, mzs, ppm):
    cube = ion_datacube()
    cube.xic = []
    cube.nRows = reader.height
    cube.nColumns = reader.width
    cube.pixel_indices = None

    for mz in mzs:
        img = reader.get_mz_image(mz, ppm)
        if cube.pixel_indices is None:
            cube.pixel_indices = np.where(img.ravel() >= 0)[0]
        img = img.ravel()[cube.pixel_indices]
        img[img < 0] = 0.0
        cube.xic.append(img)
    return cube
Example #7
0
    def get_ion_image(self,
                      mz_list,
                      tol_list,
                      tol_type='mz',
                      return_method='sum'):
        # todo - use tol_type and ensure tol is a vector
        # define mz ranges once
        if tol_type == ppm:
            tol_list = [tol * mz / 1e6 for mz, tol, in zip(mz_list, tol_list)]

        mz_list_upper = np.zeros(np.shape(mz_list))
        mz_list_lower = np.zeros(np.shape(mz_list))
        for mm in range(0, len(mz_list)):
            mz_list_upper[mm] = mz_list[mm] + tol_list[mm]
            mz_list_lower[mm] = mz_list[mm] - tol_list[mm]
        # sum intensities
        # todo - implement alternative return_method (e.g. max, median sum)
        xic_array = np.zeros((len(self.index_list), len(mz_list)))
        if self.consistent_mz == True:
            print "consistent"
            this_spectrum = self.get_spectrum(0)
            # precalculate which mzs should be made
            mz_index = np.zeros((len(this_spectrum.mzs), len(mz_list)),
                                dtype=bool)
            for mm in range(0, len(mz_list)):
                mz_index[:, mm] = (this_spectrum.mzs < mz_list_upper[mm]) & (
                    this_spectrum.mzs > mz_list_lower[mm])
            for ii in self.index_list:
                this_spectrum = self.get_spectrum(ii)
                for mm in range(0, len(mz_list)):
                    xic_array[ii, mm] = sum(
                        this_spectrum.intensities[mz_index[:, mm]])
        else:
            print "inconsistent"
            for ii in self.index_list:
                this_spectrum = self.get_spectrum(ii)
                for mm in range(0, len(mz_list)):
                    mz_index = (this_spectrum.mzs < mz_list_upper[mm]) & (
                        this_spectrum.mzs > mz_list_lower[mm])
                    xic_array[ii,
                              mm] = sum(this_spectrum.intensities[mz_index])
        data_out = ion_datacube()
        data_out.add_coords(self.coords)
        data_out.add_xic(xic_array, mz_list, tol_list)
        return data_out
Example #8
0
 def get_ion_image(self,mzs,tols,tol_type='ppm'):
     tols = tols*mzs/1e6 # to m/z
     data_out = ion_datacube()
     data_out.add_coords(self.coords)
     for mz,tol in zip(mzs,tols):
         mz_upper = mz + tol
         mz_lower = mz - tol
         print mz_lower
         print mz_upper
         idx_left = bisect.bisect_left(self.mz_list,mz_lower)
         idx_right = bisect.bisect_right(self.mz_list,mz_upper)
         # slice list for code clarity
         count_vect = np.concatenate((np.asarray([0]),self.count_list[idx_left:idx_right],np.asarray([0])))
         idx_vect = np.concatenate((np.asarray([0]),self.idx_list[idx_left:idx_right],np.asarray([max(self.index_list)])))
         # bin vectors
         ion_vect=np.bincount(idx_vect,count_vect)
         data_out.add_xic(ion_vect,[mz],[tol])
     return data_out
Example #9
0
    def get_ion_image(self,mzs,tols,tol_type='ppm'):
        if tol_type=='ppm':
            tols = tols*mzs/1e6 # to m/z
        data_out = ion_datacube()
        # add precomputed pixel indices
        data_out.coords = self.coords
        data_out.pixel_indices = self.cube_pixel_indices
        data_out.nRows = self.cube_n_row
        data_out.nColumns = self.cube_n_col

        idx_left = np.searchsorted(self.mz_list, mzs - tols, 'l')
        idx_right = np.searchsorted(self.mz_list, mzs + tols, 'r')
        for mz,tol,il,ir in zip(mzs,tols,idx_left,idx_right):
            # slice list for code clarity
            idx_vect = self.idx_list[il:ir]
            count_vect = self.count_list[il:ir]
            # bin vectors
            ion_vect=np.bincount(idx_vect,count_vect,minlength=self.max_index+1)
            data_out.add_xic(ion_vect,[mz],[tol])
        return data_out
Example #10
0
    def get_ion_image(self, mzs, tols, tol_type='ppm'):
        if tol_type == 'ppm':
            tols = tols * mzs / 1e6  # to m/z
        data_out = ion_datacube()
        # add precomputed pixel indices
        data_out.coords = self.coords
        data_out.pixel_indices = self.cube_pixel_indices
        data_out.nRows = self.cube_n_row
        data_out.nColumns = self.cube_n_col

        idx_left = np.searchsorted(self.mz_list, mzs - tols, 'l')
        idx_right = np.searchsorted(self.mz_list, mzs + tols, 'r')
        for mz, tol, il, ir in zip(mzs, tols, idx_left, idx_right):
            # slice list for code clarity
            idx_vect = self.idx_list[il:ir]
            count_vect = self.count_list[il:ir]
            # bin vectors
            ion_vect = np.bincount(idx_vect,
                                   count_vect,
                                   minlength=self.max_index + 1)
            data_out.add_xic(ion_vect, [mz], [tol])
        return data_out
Example #11
0
 def get_ion_image(self, mzs, tols, tol_type='ppm'):
     tols = tols * mzs / 1e6  # to m/z
     data_out = ion_datacube()
     data_out.add_coords(self.coords)
     for mz, tol in zip(mzs, tols):
         mz_upper = mz + tol
         mz_lower = mz - tol
         print mz_lower
         print mz_upper
         idx_left = bisect.bisect_left(self.mz_list, mz_lower)
         idx_right = bisect.bisect_right(self.mz_list, mz_upper)
         # slice list for code clarity
         count_vect = np.concatenate(
             (np.asarray([0]), self.count_list[idx_left:idx_right],
              np.asarray([0])))
         idx_vect = np.concatenate(
             (np.asarray([0]), self.idx_list[idx_left:idx_right],
              np.asarray([max(self.index_list)])))
         # bin vectors
         ion_vect = np.bincount(idx_vect, count_vect)
         data_out.add_xic(ion_vect, [mz], [tol])
     return data_out
Example #12
0
    def get_ion_image(self,mz_list,tol_list,tol_type='mz',return_method='sum'):
        # todo - use tol_type and ensure tol is a vector
        # define mz ranges once
        if tol_type==ppm:
            tol_list = [tol*mz/1e6 for mz,tol, in zip(mz_list,tol_list)]
                 
        mz_list_upper = np.zeros(np.shape(mz_list))
        mz_list_lower = np.zeros(np.shape(mz_list))
        for mm in range(0,len(mz_list)):
            mz_list_upper[mm] = mz_list[mm]+tol_list[mm]
            mz_list_lower[mm] = mz_list[mm]-tol_list[mm]
        # sum intensities
        # todo - implement alternative return_method (e.g. max, median sum)
        xic_array = np.zeros((len(self.index_list),len(mz_list)))
        if self.consistent_mz==True:
            print "consistent"
            this_spectrum = self.get_spectrum(0)
	    # precalculate which mzs should be made
            mz_index = np.zeros((len(this_spectrum.mzs),len(mz_list)),dtype=bool)
            for mm in range(0,len(mz_list)):
                mz_index[:,mm] = (this_spectrum.mzs<mz_list_upper[mm]) & (this_spectrum.mzs>mz_list_lower[mm])
            for ii in self.index_list:
                this_spectrum = self.get_spectrum(ii)
                for mm in range(0,len(mz_list)):
                   xic_array[ii,mm] = sum(this_spectrum.intensities[mz_index[:,mm]])
        else:
            print "inconsistent"
            for ii in self.index_list:
                this_spectrum = self.get_spectrum(ii)
                for mm in range(0,len(mz_list)):
                    mz_index = (this_spectrum.mzs<mz_list_upper[mm]) & (this_spectrum.mzs>mz_list_lower[mm])            
                    xic_array[ii,mm] = sum(this_spectrum.intensities[mz_index])
        data_out = ion_datacube()
        data_out.add_coords(self.coords)
        data_out.add_xic(xic_array,mz_list,tol_list)
        return data_out
Example #13
0
 def _calculate_dimensions(self):
     cube = ion_datacube()
     cube.add_coords(self.coords)
     self.nrows = int(cube.nRows)
     self.ncols = int(cube.nColumns)
     self.pixel_indices = cube.pixel_indices
Example #14
0
 def _calculate_dimensions(self):
     cube = ion_datacube()
     cube.add_coords(self.coords)
     self.nrows = int(cube.nRows)
     self.ncols = int(cube.nColumns)
     self.pixel_indices = cube.pixel_indices
Example #15
0
 def load_file(self, filename, min_mz=0, max_mz=np.inf, min_int=0, index_range=[],cache_spectra=True,do_summary=True,norm=[]):
     # parse file to get required parameters
     # can use thin hdf5 wrapper for getting data from file
     self.file_dir, self.filename = os.path.split(filename)
     self.filename, self.file_type = os.path.splitext(self.filename)
     self.file_type = self.file_type.lower()
     self.norm=norm
     if self.file_type == '.hdf5':
         import h5py
         self.hdf = h5py.File(filename, 'r')  # Readonly, fie must exist
         if index_range == []:
             self.index_list = map(int, self.hdf['/spectral_data'].keys())
         else:
             self.index_list = index_range
     elif self.file_type == '.imzml':
         from pyimzml.ImzMLParser import ImzMLParser
         self.imzml = ImzMLParser(filename)
         self.index_list=range(0,len(self.imzml.coordinates))
     else:
         raise TypeError('File type not recogised: {}'.format(self.file_type))
     self.max_index = max(self.index_list)
     self.coords = self.get_coords()
     step_size = self.get_step_size()
     cube = ion_datacube(step_size=step_size)
     cube.add_coords(self.coords)
     self.cube_pixel_indices = cube.pixel_indices
     self.cube_n_row, self.cube_n_col = cube.nRows, cube.nColumns
     self.histogram_mz_axis = {}
     self.mz_min = 9999999999999.
     self.mz_max = 0.
     if any([cache_spectra,do_summary]) == True:
         # load data into memory
         self.mz_list = []
         self.count_list = []
         self.idx_list = []
         if do_summary:
             self.mic=np.zeros((len(self.index_list),1))
             self.tic=np.zeros((len(self.index_list),1))
         for ii in self.index_list:
             # load spectrum, keep values gt0 (shouldn't be here anyway)
             this_spectrum = self.get_spectrum(ii)
             mzs, counts = this_spectrum.get_spectrum(source='centroids')
             if len(mzs) != len(counts):
                 raise TypeError('length of mzs ({}) not equal to counts ({})'.format(len(mzs), len(counts)))
             # Enforce data limits
             valid = np.where((mzs > min_mz) & (mzs < max_mz) & (counts > min_int))
             counts = counts[valid]
             mzs = mzs[valid]
             # record min/max
             if mzs[0]<self.mz_min:
                 self.mz_min = mzs[0]
             if mzs[-1]>self.mz_max:
                 self.mz_max = mzs[-1]
             # append ever-growing lists (should probably be preallocated or piped to disk and re-loaded)
             if cache_spectra:
                 self.mz_list.append(mzs)
                 self.count_list.append(counts)
                 self.idx_list.append(np.ones(len(mzs), dtype=int) * ii)
             #record summary values
             if do_summary:
                 self.tic[ii]=sum(counts)
                 self.mic[ii]=max(counts)
         print 'loaded spectra'
         if cache_spectra:
             self.mz_list = np.concatenate(self.mz_list)
             self.count_list = np.concatenate(self.count_list)
             self.idx_list = np.concatenate(self.idx_list)
             # sort by mz for fast image formation
             mz_order = np.argsort(self.mz_list)
             self.mz_list = self.mz_list[mz_order]
             self.count_list = self.count_list[mz_order]
             self.idx_list = self.idx_list[mz_order]
             # split binary searches into two stages for better locality
             self.window_size = 1024
             self.mz_sublist = self.mz_list[::self.window_size].copy()
     print 'file loaded'
Example #16
0
    def get_ion_image(self, mzs, tols, tol_type='ppm'):
        data_out = ion_datacube()
        # add precomputed pixel indices
        data_out.coords = self.coords
        data_out.pixel_indices = self.cube_pixel_indices
        data_out.nRows = self.cube_n_row
        data_out.nColumns = self.cube_n_col

        def search_sort(mzs,tols):
            data_out = blank_dataout()
            idx_left = np.searchsorted(self.mz_list, mzs - tols, 'l')
            idx_right = np.searchsorted(self.mz_list, mzs + tols, 'r')
            for mz, tol, il, ir in zip(mzs, tols, idx_left, idx_right):
                if any((mz<self.mz_list[0],mz>self.mz_list[-1])):
                    data_out.add_xic(np.zeros(np.shape(self.cube_pixel_indices)), [mz], [tol])
                    continue
                # slice list for code clarity
                mz_vect=self.mz_list[il:ir]
                idx_vect = self.idx_list[il:ir]
                count_vect = self.count_list[il:ir]
                # bin vectors
                ion_vect = np.bincount(idx_vect, weights=count_vect, minlength=self.max_index + 1)
                data_out.add_xic(ion_vect, [mz], [tol])
            return data_out
        def search_bisect(mzs,tols):
            data_out = blank_dataout()
            for mz,tol in zip(mzs,tols):
                if any((mz<self.mz_list[0],mz>self.mz_list[-1])):
                    data_out.add_xic(np.zeros(np.shape(self.cube_pixel_indices)), [mz], [tol])
                    continue
                mz_upper = mz + tol
                mz_lower = mz - tol
                il = bisect.bisect_left(self.mz_list,mz_lower)
                ir = bisect.bisect_right(self.mz_list,mz_upper)
                # slice list for code clarity
                mz_vect=self.mz_list[il:ir]
                idx_vect = self.idx_list[il:ir]
                count_vect = self.count_list[il:ir]
                # bin vectors
                ion_vect = np.bincount(idx_vect, weights=count_vect, minlength=self.max_index + 1)
                data_out.add_xic(ion_vect, [mz], [tol])
            return data_out
        if type(mzs) not in (np.ndarray, list):
            mzs = np.asarray([mzs, ])
        if tol_type == 'ppm':
            tols = tols * mzs / 1e6  # to m/z

        # Fast search for insertion point of mz in self.mz_list
        # First stage is looking for windows using the sublist
        idx_left = np.searchsorted(self.mz_sublist, mzs - tols, 'l')
        idx_right = np.searchsorted(self.mz_sublist, mzs + tols, 'r')
        for mz, tol, il, ir in zip(mzs, tols, idx_left, idx_right):
            l = max(il - 1, 0) * self.window_size
            r = ir * self.window_size
            # Second stage is binary search within the windows
            il = l + np.searchsorted(self.mz_list[l:r], mz - tol, 'l')
            ir = l + np.searchsorted(self.mz_list[l:r], mz + tol, 'r')
            # slice list for code clarity
            mz_vect=self.mz_list[il:ir]
            idx_vect = self.idx_list[il:ir]
            count_vect = self.count_list[il:ir]
            # bin vectors
            ion_vect = np.bincount(idx_vect, weights=count_vect, minlength=self.max_index + 1)
            data_out.add_xic(ion_vect, [mz], [tol])
        return data_out