def __init__(self, mass_list, intensity_list): """ @summary: Initialise the MassSpectrum @param mass_list: List of binned masses @type mass_list: ListType @param intensity_list: List of binned intensities @type intensity_list: ListType @author: Andrew Isaac @author: Qiao Wang @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") if not len(mass_list) == len(intensity_list): error("'mass_list' is not the same size as 'intensity_list'") #TODO: should these be public, or accessed through methods??? self.mass_list = mass_list self.mass_spec = intensity_list
def __init__(self, mass_list, intensity_list): """ @summary: Initialise the MassSpectrum @param mass_list: List of binned masses @type mass_list: ListType @param intensity_list: List of binned intensities @type intensity_list: ListType @author: Andrew Isaac @author: Qiao Wang @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") if not len(mass_list) == len(intensity_list): error("'mass_list' is not the same size as 'intensity_list'") #TODO: should these be public, or accessed through methods??? self.mass_list = mass_list self.mass_spec = intensity_list
def __init__(self, mass_list, intensity_list): """ @summary: Initialize the Scan data @param mass_list: mass values @type mass_list: ListType @param intensity_list: intensity values @type intensity_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") self.__mass_list = mass_list self.__intensity_list = intensity_list self.__min_mass = min(mass_list) self.__max_mass = max(mass_list)
def __init__(self, mass_list, intensity_list): """ @summary: Initialize the Scan data @param mass_list: mass values @type mass_list: ListType @param intensity_list: intensity values @type intensity_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") self.__mass_list = mass_list self.__intensity_list = intensity_list self.__min_mass = min(mass_list) self.__max_mass = max(mass_list)
def vector_by_step(vstart,vstop,vstep): """ @summary: generates a list by using start, stop, and step values @param vstart: Initial value @type vstart: A number @param vstop: Max value @type vstop: A number @param vstep: Step @type vstep: A number @return: A list generated @rtype: ListType @author: Vladimir Likic """ if not is_number(vstart) or not is_number(vstop) or not is_number(vstep): error("parameters start, stop, step must be numbers") v = [] p = vstart while p < vstop: v.append(p) p = p + vstep return v
def __init__(self, rt=0.0, ms=None, minutes=False): """ @param rt: Retention time @type rt: FloatType @param ms: A ion mass, or spectra of maximising ions @type ms: FloatType, pyms.GCSM.Class.MassSpectrum @param minutes: Retention time units flag. If True, retention time is in minutes; if False retention time is in seconds @type minutes: BooleanType """ if not is_number(rt): error("'rt' must be a number") if not ms == None and \ not isinstance(ms, MassSpectrum) and \ not is_number(ms): error("'ms' must be a Float or a MassSpectrum object") if minutes: rt = rt*60.0 self.__minutes = minutes # basic peak attributes self.__rt = float(rt) # these two attributes are required for # setting the peak mass spectrum if not ms == None: if isinstance(ms, MassSpectrum): # mass spectrum self.__mass_spectrum = ms self.__ic_mass = None self.make_UID() # TEST: to test if this speeds things up self.mass_spec = ms.mass_spec self.ms = ms else: # single ion chromatogram properties self.__ic_mass = ms self.__mass_spectrum = None self.make_UID() # TEST: to test if this speeds things up self.mass_spec = None self.__pt_bounds = None self.__area = None self.__ion_areas = {} # TEST: to test if this speeds things up self.rt = self.__rt
def crop_mass(self, mass_min, mass_max): """ @summary: Crops mass spectrum @param mass_min: Minimum mass value @type mass_min: IntType or FloatType @param mass_max: Maximum mass value @type mass_max: IntType or FloatType @return: none @rtype: NoneType @author: Andrew Isaac """ if not is_number(mass_min) or not is_number(mass_max): error("'mass_min' and 'mass_max' must be numbers") if mass_min >= mass_max: error("'mass_min' must be less than 'mass_max'") mass_list = self.__mass_spectrum.mass_list if mass_min < min(mass_list): error("'mass_min' is less than the smallest mass: %d" \ % min(mass_list)) if mass_max > max(mass_list): error("'mass_max' is greater than the largest mass: %d" \ % max(mass_list)) # pre build mass_list and list of indecies new_mass_list = [] new_mass_spec = [] mass_spec = self.__mass_spectrum.mass_spec for ii in range(len(mass_list)): mass = mass_list[ii] if mass >= mass_min and mass <= mass_max: new_mass_list.append(mass) new_mass_spec.append(mass_spec[ii]) self.__mass_spectrum.mass_list = new_mass_list self.__mass_spectrum.mass_spec = new_mass_spec if len(new_mass_list) == 0: error("mass spectrum is now empty") elif len(new_mass_list) < 10: print " WARNING: peak mass spectrum contains < 10 points" # update UID self.make_UID() # TEST: to test if this speeds things up self.mass_spec = self.__mass_spectrum.mass_spec
def export_leco_csv(self, file_name): """ @summary: Exports data in LECO CSV format @param file_name: File name @type file_name: StringType @return: none @rtype: NoneType @author: Andrew Isaac @author: Vladimir Likic """ if not is_str(file_name): error("'file_name' is not a string") mass_list = self.__mass_list time_list = self.__time_list vals = self.__intensity_matrix fp = open_for_writing(file_name) # Format is text header with: # "Scan","Time",... # and the rest is "TIC" or m/z as text, i.e. "50","51"... # The following lines are: # scan_number,time,value,value,... # scan_number is an int, rest seem to be fixed format floats. # The format is 0.000000e+000 # write header fp.write("\"Scan\",\"Time\"") for ii in mass_list: if is_number(ii): fp.write(",\"%d\"" % int(ii)) else: error("mass list datum not a number") fp.write("\r\n") # windows CR/LF # write lines for ii in range(len(time_list)): fp.write("%s,%#.6e" % (ii, time_list[ii])) for jj in range(len(vals[ii])): if is_number(vals[ii][jj]): fp.write(",%#.6e" % (vals[ii][jj])) else: error("datum not a number") fp.write("\r\n") close_for_writing(fp)
def build_intensity_matrix( data: GCMS_data, bin_interval: float = 1, bin_left: float = 0.5, bin_right: float = 0.5, min_mass: Optional[float] = None, ) -> IntensityMatrix: """ Sets the full intensity matrix with flexible bins. The first bin is centered around ``min_mass``, and subsequent bins are offset by ``bin_interval``. :param data: Raw GCMS data :param bin_interval: interval between bin centres. :param bin_left: left bin boundary offset. :param bin_right: right bin boundary offset. :param min_mass: Minimum mass to bin (default minimum mass from data) :return: Binned IntensityMatrix object :authors: Qiao Wang, Andrew Isaac, Vladimir Likic """ # this package from pyms.GCMS.Class import GCMS_data if not isinstance(data, GCMS_data): raise TypeError("'data' must be a GCMS_data object") if bin_interval <= 0: raise ValueError("The bin interval must be larger than zero.") if not is_number(bin_left): raise TypeError("'bin_left' must be a number.") if not is_number(bin_right): raise TypeError("'bin_right' must be a number.") if min_mass is None: min_mass = data.min_mass elif not is_number(min_mass): raise TypeError("'min_mass' must be a number.") max_mass = data.max_mass if max_mass is None: raise ValueError("'max_mass' cannot be None") if min_mass is None: raise ValueError("'min_mass' cannot be None") return _fill_bins(data, min_mass, max_mass, bin_interval, bin_left, bin_right)
def export_leco_csv(self, file_name): """ @summary: Exports data in LECO CSV format @param file_name: File name @type file_name: StringType @return: none @rtype: NoneType @author: Andrew Isaac @author: Vladimir Likic """ if not is_str(file_name): error("'file_name' is not a string") mass_list = self.__mass_list time_list = self.__time_list vals = self.__intensity_matrix fp = open_for_writing(file_name) # Format is text header with: # "Scan","Time",... # and the rest is "TIC" or m/z as text, i.e. "50","51"... # The following lines are: # scan_number,time,value,value,... # scan_number is an int, rest seem to be fixed format floats. # The format is 0.000000e+000 # write header fp.write("\"Scan\",\"Time\"") for ii in mass_list: if is_number(ii): fp.write(",\"%d\"" % int(ii)) else: error("mass list datum not a number") fp.write("\r\n") # windows CR/LF # write lines for ii in range(len(time_list)): fp.write("%s,%#.6e" % (ii, time_list[ii])) for jj in range(len(vals[ii])): if is_number(vals[ii][jj]): fp.write(",%#.6e" % (vals[ii][jj])) else: error("datum not a number") fp.write("\r\n") close_for_writing(fp)
def export_leco_csv(self, file_name: PathLike): """ Exports data in LECO CSV format. :param file_name: The name of the output file. :authors: Andrew Isaac, Vladimir Likic, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) if not file_name.parent.is_dir(): file_name.parent.mkdir(parents=True) mass_list = self._mass_list time_list = self._time_list vals = self._intensity_array fp = file_name.open('w', encoding="UTF-8") # Format is text header with: # "Scan","Time",... # and the rest is "TIC" or m/z as text, i.e. "50","51"... # The following lines are: # scan_number,time,value,value,... # scan_number is an int, rest seem to be fixed format floats. # The format is 0.000000e+000 # write header fp.write('"Scan","Time"') for ii in mass_list: if is_number(ii): fp.write(f',"{int(ii):d}"') else: raise TypeError("mass list datum not a number") fp.write("\r\n") # windows CR/LF # write lines for ii, time_ in enumerate(time_list): fp.write(f"{ii},{time_:#.6e}") for jj in range(len(vals[ii])): if is_number(vals[ii][jj]): fp.write(f",{vals[ii][jj]:#.6e}") else: raise TypeError("datum not a number") fp.write("\r\n") fp.close()
def crop_mass(self, mass_min: float, mass_max: float): """ Crops mass spectrum. :param mass_min: Minimum mass value. :param mass_max: Maximum mass value. :author: Andrew Isaac """ if not self._mass_spectrum: raise ValueError("Mass spectrum is unset.") if not is_number(mass_min) or not is_number(mass_max): raise TypeError("'mass_min' and 'mass_max' must be numbers") if mass_min >= mass_max: raise ValueError("'mass_min' must be less than 'mass_max'") mass_list = self._mass_spectrum.mass_list if mass_min < min(mass_list): raise ValueError( f"'mass_min' is less than the smallest mass: {min(mass_list)}") if mass_max > max(mass_list): raise ValueError( f"'mass_max' is greater than the largest mass: {max(mass_list)}" ) # pre build mass_list and list of indices new_mass_list = [] new_mass_spec = [] mass_spec = self._mass_spectrum.mass_spec for ii in range(len(mass_list)): mass = mass_list[ii] if mass_min <= mass <= mass_max: new_mass_list.append(mass) new_mass_spec.append(mass_spec[ii]) self._mass_spectrum.mass_list = new_mass_list self._mass_spectrum.mass_spec = new_mass_spec if len(new_mass_list) == 0: raise ValueError("mass spectrum is now empty") elif len(new_mass_list) < 10: warn("peak mass spectrum contains < 10 points", Warning) # update UID self.make_UID()
def crop_mass(self, mass_min, mass_max): """ @summary: Crops mass spectrum @param mass_min: Minimum mass value @type mass_min: IntType or FloatType @param mass_max: Maximum mass value @type mass_max: IntType or FloatType @return: none @rtype: NoneType @author: Andrew Isaac """ if not is_number(mass_min) or not is_number(mass_max): error("'mass_min' and 'mass_max' must be numbers") if mass_min >= mass_max: error("'mass_min' must be less than 'mass_max'") if mass_min < self.__min_mass: error("'mass_min' is less than the smallest mass: %.3f" % self.__min_mass) if mass_max > self.__max_mass: error("'mass_max' is greater than the largest mass: %.3f" % self.__max_mass) # pre build mass_list and list of indecies mass_list = self.__mass_list new_mass_list = [] ii_list = [] for ii in range(len(mass_list)): mass = mass_list[ii] if mass >= mass_min and mass <= mass_max: new_mass_list.append(mass) ii_list.append(ii) # update intensity matrix im = self.__intensity_matrix for spec_jj in range(len(im)): new_spec = [] for ii in ii_list: new_spec.append(im[spec_jj][ii]) im[spec_jj] = new_spec self.__mass_list = new_mass_list self.__min_mass = min(new_mass_list) self.__max_mass = max(new_mass_list)
def crop_mass(self, mass_min, mass_max): """ @summary: Crops mass spectrum @param mass_min: Minimum mass value @type mass_min: IntType or FloatType @param mass_max: Maximum mass value @type mass_max: IntType or FloatType @return: none @rtype: NoneType @author: Andrew Isaac """ if not is_number(mass_min) or not is_number(mass_max): error("'mass_min' and 'mass_max' must be numbers") if mass_min >= mass_max: error("'mass_min' must be less than 'mass_max'") if mass_min < self.__min_mass: error("'mass_min' is less than the smallest mass: %.3f" % self.__min_mass) if mass_max > self.__max_mass: error("'mass_max' is greater than the largest mass: %.3f" % self.__max_mass) # pre build mass_list and list of indecies mass_list = self.__mass_list new_mass_list = [] ii_list = [] for ii in range(len(mass_list)): mass = mass_list[ii] if mass >= mass_min and mass <= mass_max: new_mass_list.append(mass) ii_list.append(ii) # update intensity matrix im = self.__intensity_matrix for spec_jj in range(len(im)): new_spec = [] for ii in ii_list: new_spec.append(im[spec_jj][ii]) im[spec_jj] = new_spec self.__mass_list = new_mass_list self.__min_mass = min(new_mass_list) self.__max_mass = max(new_mass_list)
def test_mass(tic, im): with pytest.warns(Warning): tic.mass ic = im.get_ic_at_index(0) assert is_number(ic.mass) assert ic.mass == 50.2516
def null_mass(self, mass: float): """ Ignore given mass in spectra. :param mass: Mass value to remove :author: Andrew Isaac """ if not self._mass_spectrum: raise ValueError("Mass spectrum is unset.") if not is_number(mass): raise TypeError("'mass' must be a number") mass_list = self._mass_spectrum.mass_list if mass < min(mass_list) or mass > max(mass_list): raise IndexError("'mass' not in mass range:", min(mass_list), "to", max(mass_list)) best = max(mass_list) ix = 0 for ii in range(len(mass_list)): tmp = abs(mass_list[ii] - mass) if tmp < best: best = tmp ix = ii self._mass_spectrum.mass_spec[ix] = 0 # update UID self.make_UID()
def rel_threshold(pl, percent=2): """ @summary: Remove ions with relative intensities less than the given relative percentage of the maximum intensity. @param pl: A list of Peak objects @type pl: ListType @param percent: Threshold for relative percentage of intensity (Default 2%) @type percent: FloatType @return: A new list of Peak objects with threshold ions @rtype: ListType @author: Andrew Isaac """ if not is_number(percent) or percent <= 0: error("'percent' must be a number > 0") pl_copy = copy.deepcopy(pl) new_pl = [] for p in pl_copy: ms = p.get_mass_spectrum() ia = ms.mass_spec # assume max(ia) big so /100 1st cutoff = (max(ia) / 100.0) * float(percent) for i in range(len(ia)): if ia[i] < cutoff: ia[i] = 0 ms.mass_spec = ia p.set_mass_spectrum(ms) new_pl.append(p) return new_pl
def __init__(self, intensity_list: Union[Sequence[float], numpy.ndarray], time_list: Sequence[float], mass: Optional[float] = None): if not is_sequence_of(intensity_list, _number_types): raise TypeError("'intensity_list' must be a Sequence of numbers") if not is_sequence_of(time_list, _number_types): raise TypeError("'time_list' must be a Sequence of numbers") if len(intensity_list) != len(time_list): raise ValueError( "'intensity_list' and 'time_list' differ in length") if mass is not None and not is_number(mass): raise TypeError("'mass' must be a number or None") if not isinstance(intensity_list, numpy.ndarray): intensity_list = numpy.array(intensity_list) self._intensity_array = intensity_list self._time_list = list(time_list) self._mass: Optional[float] = mass self._time_step = self._calc_time_step() self._min_rt = min(time_list) self._max_rt = max(time_list)
def get_index_at_time(self, time): """ @summary: Returns the nearest index corresponding to the given time @param time: Time in seconds @type time: FloatType @return: Nearest index corresponding to given time @rtype: IntType @author: Lewis Lee @author: Tim Erwin @author: Vladimir Likic """ if not is_number(time): error("'time' must be a number") if time < min(self.__time_list) or time > max(self.__time_list): error("time %.2f is out of bounds (min: %.2f, max: %.2f)" % (time, self.__min_rt, self.__max_rt)) time_list = self.__time_list time_diff_min = max(self.__time_list) ix_match = None for ix in range(len(time_list)): time_diff = math.fabs(time - time_list[ix]) if time_diff < time_diff_min: ix_match = ix time_diff_min = time_diff return ix_match
def __init__(self, ia, time_list, mass=None): """ @param ia: Ion chromatogram intensity values @type ia: numpy.array @param time_list: A list of ion chromatogram retention times @type time_list: ListType @param mass: Mass of ion chromatogram (Null if TIC) @type mass: IntType @author: Lewis Lee @author: Vladimir Likic @author: Vladimir Likic """ if not isinstance(ia, numpy.ndarray): error("'ia' must be a numpy array") if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if len(ia) != len(time_list): error("Intensity array and time list differ in length") self.__ia = ia self.__time_list = time_list self.__mass = mass self.__time_step = self.__calc_time_step(time_list) self.__min_rt = min(time_list) self.__max_rt = max(time_list)
def reduce_mass_spectra(self, n_intensities: int = 5): """ Reduces the mass spectra by retaining the top `n_intensities`, discarding all other intensities. :param n_intensities: The number of top intensities to keep :author: Vladimir Likic """ # noqa: D400 if not is_number(n_intensities): raise TypeError("'n_intensities' must be a number") # loop over all mass spectral scans for ii, intensity_list in enumerate(self._intensity_array): # get the next mass spectrum as list of intensities # intensity_list = self._intensity_array[ii] n = len(intensity_list) # get the indices of top N intensities top_indices = list(range(n)) top_indices.sort(key=lambda i: intensity_list[i], reverse=True) top_indices = top_indices[:n_intensities] # initiate new mass spectrum, and retain only top N intensities intensity_list_new = [] for jj in range(n): intensity_list_new.append(0.0) if jj in top_indices: intensity_list_new[jj] = intensity_list[jj] self._intensity_array[ii] = intensity_list_new
def get_ic_at_mass(self, mass: Optional[float] = None) -> IonChromatogram: """ Returns the ion chromatogram for the nearest binned mass to the specified mass. If no mass value is given, the function returns the total ion chromatogram. :param mass: Mass value of an ion chromatogram :return: Ion chromatogram for given mass :authors: Andrew Isaac, Vladimir Likic """ if mass is None: return self.tic elif not is_number(mass): raise TypeError("'mass' must be a number") if mass < self._min_mass or mass > self._max_mass: print("min mass: ", self._min_mass, "max mass:", self._max_mass) raise IndexError("mass is out of range") ix = self.get_index_of_mass(mass) return self.get_ic_at_index(ix)
def __init__(self, time_list, scan_list): """ @summary: Initialize the GC-MS data @param time_list: List of scan retention times @type time_list: ListType @param scan_list: List of Scan objects @type scan_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if not is_list(scan_list) or not isinstance(scan_list[0], Scan): error("'scan_list' must be a list of Scan objects") self.__set_time(time_list) self.__scan_list = scan_list self.__set_min_max_mass() self.__calc_tic()
def __init__(self, time_list, scan_list): """ @summary: Initialize the GC-MS data @param time_list: List of scan retention times @type time_list: ListType @param scan_list: List of Scan objects @type scan_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if not is_list(scan_list) or not isinstance(scan_list[0], Scan): error("'scan_list' must be a list of Scan objects") self.__set_time(time_list) self.__scan_list = scan_list self.__set_min_max_mass() self.__calc_tic()
def rel_threshold(pl, percent=2): """ @summary: Remove ions with relative intensities less than the given relative percentage of the maximum intensity. @param pl: A list of Peak objects @type pl: ListType @param percent: Threshold for relative percentage of intensity (Default 2%) @type percent: FloatType @return: A new list of Peak objects with threshold ions @rtype: ListType @author: Andrew Isaac """ if not is_number(percent) or percent <= 0: error("'percent' must be a number > 0") pl_copy = copy.deepcopy(pl) new_pl = [] for p in pl_copy: ms = p.get_mass_spectrum() ia = ms.mass_spec # assume max(ia) big so /100 1st cutoff = (max(ia)/100.0)*float(percent) for i in range(len(ia)): if ia[i] < cutoff: ia[i] = 0 ms.mass_spec = ia p.set_mass_spectrum(ms) new_pl.append(p) return new_pl
def __init__(self, ia, time_list, mass=None): """ @param ia: Ion chromatogram intensity values @type ia: numpy.array @param time_list: A list of ion chromatogram retention times @type time_list: ListType @param mass: Mass of ion chromatogram (Null if TIC) @type mass: IntType @author: Lewis Lee @author: Vladimir Likic @author: Vladimir Likic """ if not isinstance(ia, numpy.ndarray): error("'ia' must be a numpy array") if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if len(ia) != len(time_list): error("Intensity array and time list differ in length") self.__ia = ia self.__time_list = time_list self.__mass = mass self.__time_step = self.__calc_time_step(time_list) self.__min_rt = min(time_list) self.__max_rt = max(time_list)
def test_get_int_of_ion(peak): assert peak.get_int_of_ion(100) == 3888.0 assert peak.get_int_of_ion(200) == 0.0 assert is_number(peak.get_int_of_ion(100)) with pytest.raises(IndexError): peak.get_int_of_ion(1) with pytest.raises(IndexError): peak.get_int_of_ion(1000000)
def half_area( ia: List, max_bound: int = 0, tol: float = 0.5, ) -> Tuple[float, float, float]: """ Find bound of peak by summing intensities until change in sum is less than ``tol`` percent of the current area. :param ia: List of intensities from Peak apex for a given mass. :param max_bound: Optional value to limit size of detected bound. :param tol: Percentage tolerance of added area to current area. :return: Half peak area, boundary offset, shared (True if shared ion). :authors: Andrew Isaac, Dominic Davis-Foster (type assertions) """ # noqa: D400 if not isinstance(ia, list) or not is_number(ia[0]): raise TypeError("'ia' must be a list of numbers") if not isinstance(max_bound, int): raise TypeError("'max_bound' must be an integer") if not isinstance(tol, float): raise TypeError("'tol' must be a float") tol = tol / 200.0 # halve and convert from percent # Default number of points to sum new area across, for smoothing wide = 3 # start at 0, compare average value of 'wide' points to the right, # centre 'wide' points on edge point, and keep moving right until: # i) tolerance reached # ii) edge area starts increasing # iii) bound reached # initialise areas and bounds shared = False area = ia[0] edge = float(sum(ia[0:wide])) / wide old_edge = 2 * edge # bigger than expected edge index = 1 if max_bound < 1: limit = len(ia) else: limit = min(max_bound + 1, len(ia)) # while edge > area * tol and edge < old_edge and index < limit: while area * tol < edge < old_edge and index < limit: old_edge = edge area += ia[index] edge = float(sum(ia[index:index + wide])) / wide # bounds safe index += 1 if edge >= old_edge: shared = True index -= 1 return area, index, shared
def test_ion_area(peak): peak = copy.deepcopy(peak) assert peak.get_ion_area(1) is None peak.set_ion_area(1, 1234) peak.set_ion_area(2, 1234.56) assert is_number(peak.get_ion_area(1)) assert is_number(peak.get_ion_area(2)) assert peak.get_ion_area(1) == 1234 # Errors for obj in [test_dict, *test_sequences, test_float, test_string]: with pytest.raises(TypeError): peak.set_ion_area(obj, test_int) for obj in [test_dict, *test_sequences, test_string]: with pytest.raises(TypeError): peak.set_ion_area(1, obj)
def build_intensity_matrix_i( data: GCMS_data, bin_left: float = 0.3, bin_right: float = 0.7, ) -> IntensityMatrix: """ Sets the full intensity matrix with integer bins. :param data: Raw GCMS data :param bin_left: left bin boundary offset. :param bin_right: right bin boundary offset. :return: Binned IntensityMatrix object :authors: Qiao Wang, Andrew Isaac, Vladimir Likic """ # this package from pyms.GCMS.Class import GCMS_data if not isinstance(data, GCMS_data): raise TypeError("'data' must be a GCMS_data object") if not is_number(bin_left): raise TypeError("'bin_left' must be a number.") if not is_number(bin_right): raise TypeError("'bin_right' must be a number.") min_mass = data.min_mass max_mass = data.max_mass if max_mass is None: raise ValueError("'max_mass' cannot be None") if min_mass is None: raise ValueError("'min_mass' cannot be None") # Calculate integer min mass based on right boundary bin_right = abs(bin_right) min_mass = int(min_mass + 1 - bin_right) return _fill_bins(data, min_mass, max_mass, 1, bin_left, bin_right)
def ic_mass(self, value: float): """ Sets the mass for a single ion chromatogram peak and clears the mass spectrum. :param value: The mass of the ion chromatogram that the peak is from """ if not is_number(value): raise TypeError("'Peak.ic_mass' must be a number") self._ic_mass = value self.make_UID()
def ion_areas(self, value: Dict): """ Sets the ``ion: ion area`` pairs dictionary. :param value: The dictionary of ion:ion_area pairs """ if not isinstance(value, dict) or not is_number(list(value.keys())[0]): raise TypeError( "'Peak.ion_areas' must be a dictionary of ion:ion_area pairs") self._ion_areas = value
def crop_mass(self, mass_min: float, mass_max: float): """ Crops mass spectrum. :param mass_min: Minimum mass value :param mass_max: Maximum mass value :author: Andrew Isaac """ if not is_number(mass_min) or not is_number(mass_max): raise TypeError("'mass_min' and 'mass_max' must be numbers") if mass_min >= mass_max: raise ValueError("'mass_min' must be less than 'mass_max'") if mass_min < self._min_mass: raise ValueError(f"'mass_min' is less than the smallest mass: {self._min_mass:.3f}") if mass_max > self._max_mass: raise ValueError(f"'mass_max' is greater than the largest mass: {self._max_mass:.3f}") # pre build mass_list and list of indecies mass_list = self._mass_list new_mass_list = [] ii_list = [] for ii, mass in enumerate(mass_list): if mass_min <= mass <= mass_max: new_mass_list.append(mass) ii_list.append(ii) # update intensity matrix im: List[List[float]] = self._intensity_array.tolist() for spec_jj in range(len(im)): new_spec = [] for ii in ii_list: new_spec.append(im[spec_jj][ii]) im[spec_jj] = new_spec self._intensity_array = numpy.array(im) self._mass_list = new_mass_list self._min_mass = min(new_mass_list) self._max_mass = max(new_mass_list)
def build_intensity_matrix(data, bin_interval=1, bin_left=0.5, bin_right=0.5): """ @summary: Sets the full intensity matrix with flexible bins @param data: Raw GCMS data @type data: pyms.GCMS.Class.GCMS_data @param bin_interval: interval between bin centres (default 1) @type bin_interval: IntType or FloatType @param bin_left: left bin boundary offset (default 0.5) @type bin_left: FloatType @param bin_right: right bin boundary offset (default 0.5) @type bin_right: FloatType @return: Binned IntensityMatrix object @rtype: pyms.GCMS.Class.IntensityMatrix @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not isinstance(data, GCMS_data): error("data must be an GCMS_data object") if bin_interval <= 0: error("The bin interval must be larger than zero.") if not is_number(bin_left): error("'bin_left' must be a number.") if not is_number(bin_right): error("'bin_right' must be a number.") min_mass = data.get_min_mass() max_mass = data.get_max_mass() return __fill_bins(data, min_mass, max_mass, bin_interval, bin_left, bin_right)
def build_intensity_matrix(data, bin_interval=1, bin_left=0.5, bin_right=0.5): """ @summary: Sets the full intensity matrix with flexible bins @param data: Raw GCMS data @type data: pyms.GCMS.Class.GCMS_data @param bin_interval: interval between bin centres (default 1) @type bin_interval: IntType or FloatType @param bin_left: left bin boundary offset (default 0.5) @type bin_left: FloatType @param bin_right: right bin boundary offset (default 0.5) @type bin_right: FloatType @return: Binned IntensityMatrix object @rtype: pyms.GCMS.Class.IntensityMatrix @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not isinstance(data, GCMS_data): error("data must be an GCMS_data object") if bin_interval <= 0: error("The bin interval must be larger than zero.") if not is_number(bin_left): error("'bin_left' must be a number.") if not is_number(bin_right): error("'bin_right' must be a number.") min_mass = data.get_min_mass() max_mass = data.get_max_mass() return __fill_bins(data, min_mass, max_mass, bin_interval, bin_left, bin_right)
def vector_by_step(start: float, stop: float, step: float) -> List[float]: """ Generates a list by using start, stop, and step values. :param start: Initial value :param stop: Max value :param step: Step :author: Vladimir Likic """ # noqa: D400 if not is_number(start) or not is_number(stop) or not is_number(step): raise TypeError( "parameters 'start', 'stop', and 'step' must be numbers") v = [] p = start while p < stop: v.append(p) p = p + step return v
def set_area(self, area): """ @summary: Sets the area under the peak @param area: The peak area @type area: FloatType @author: Andrew Isaac """ if not is_number(area) or area <= 0: error("'area' must be a positive number") self.__area = area
def __init__( self, rt: Union[int, float] = 0.0, mass: Optional[float] = None, minutes: bool = False, outlier: bool = False, ): if mass and not is_number(mass): raise TypeError("'ms' must be a number") self._ic_mass = mass super().__init__(rt, minutes, outlier)
def build_intensity_matrix_i(data, bin_left=0.3, bin_right=0.7): """ @summary: Sets the full intensity matrix with integer bins @param data: Raw GCMS data @type data: pyms.GCMS.Class.GCMS_data @param bin_left: left bin boundary offset (default 0.3) @type bin_left: FloatType @param bin_right: right bin boundary offset (default 0.7) @type bin_right: FloatType @return: Binned IntensityMatrix object @rtype: pyms.GCMS.Class.IntensityMatrix @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not isinstance(data, GCMS_data): error("data must be an GCMS_data object") if not is_number(bin_left): error("'bin_left' must be a number.") if not is_number(bin_right): error("'bin_right' must be a number.") min_mass = data.get_min_mass() max_mass = data.get_max_mass() # Calculate integer min mass based on right boundary bin_right = abs(bin_right) min_mass = int(min_mass+1-bin_right) return __fill_bins(data, min_mass, max_mass, 1, bin_left, bin_right)
def set_area(self, area): """ @summary: Sets the area under the peak @param area: The peak area @type area: FloatType @author: Andrew Isaac """ if not is_number(area) or area <= 0: error("'area' must be a positive number") self.__area = area
def get_maxima_indices(ion_intensities, points=3): """ @summary: Find local maxima. @param ion_intensities: A list of intensities for a single ion @type ion_intensities: ListType @param points: Peak if maxima over 'points' number of scans @type points: IntType @return: A list of scan indices @rtype: ListType @author: Andrew Isaac """ if not is_list(ion_intensities) or not is_number(ion_intensities[0]): error("'ion_intensities' must be a List of numbers") # find peak inflection points # use a 'points' point window # for a plateau after a rise, need to check if it is the left edge of # a peak peak_point = [] edge = -1 points = int(points) half = int(points/2) points = 2*half+1 # ensure odd number of points for index in range(len(ion_intensities)-points+1): left = ion_intensities[index:index+half] mid = ion_intensities[index+half] right = ion_intensities[index+half+1:index+points] # max in middle if mid > max(left) and mid > max(right): peak_point.append(index+half) edge = -1 # ignore previous rising edge # flat from rise (left of peak?) if mid > max(left) and mid == max(right): edge = index+half # ignore previous rising edge, update latest # fall from flat if mid == max(left) and mid > max(right): if edge > -1: centre = int((edge+index+half)/2) # mid point peak_point.append(centre) edge = -1 return peak_point
def null_mass(self, mass): """ @summary: Ignore given (closest) mass in spectra @param mass: Mass value to remove @type mass: IntType or FloatType @author: Andrew Isaac """ if not is_number(mass): error("'mass' must be numbers") if mass < self.__min_mass or mass > self.__max_mass: error("'mass' not in mass range: %.3f to %.3f" % (self.__min_mass, \ self.__max_mass)) ii = self.get_index_of_mass(mass) im = self.__intensity_matrix for spec_jj in range(len(im)): im[spec_jj][ii] = 0
def set_ic_mass(self, mz): """ @summary: Sets the mass for a single ion chromatogram peak Clears the mass spectrum @param mz: The mass of the ion chromatogram that the peak is from @type mz: FloatType @return: none @rtype: NoneType """ if not is_number(mz): error("'mz' must be a number") self.__ic_mass = mz # clear mass spectrum self.__mass_spectrum = None self.make_UID() # TEST: to test if this speeds things up self.mass_spec = None
def null_mass(self, mass): """ @summary: Ignore given mass in spectra @param mass: Mass value to remove @type mass: IntType or FloatType @author: Andrew Isaac """ if self.__mass_spectrum == None: error("mass spectrum not set for this peak") if not is_number(mass): error("'mass' must be numbers") mass_list = self.__mass_spectrum.mass_list if mass < min(mass_list) or mass > max(mass_list): error("'mass' not in mass range:", min(mass_list), "to", \ max(mass_list)) best = max(mass_list) ix = 0 for ii in range(len(mass_list)): tmp = abs(mass_list[ii] - mass) if tmp < best: best = tmp ix = ii self.__mass_spectrum.mass_spec[ix] = 0 # update UID self.make_UID() # TEST: to test if this speeds things up self.mass_spec = self.__mass_spectrum.mass_spec
def get_index_at_time(self, time): """ @summary: Returns the nearest index corresponding to the given time @param time: Time in seconds @type time: FloatType @return: Nearest index corresponding to given time @rtype: IntType @author: Lewis Lee @author: Tim Erwin @author: Milica Ng @author: Vladimir Likic """ if not is_number(time): error("'time' must be a number") if time < self.__min_rt or time > self.__max_rt: error("time %.2f is out of bounds (min: %.2f, max: %.2f)" % (time, self.__min_rt, self.__max_rt)) time_list = self.__time_list time_diff_min = self.__max_rt ix_match = None for ix in range(len(time_list)): time_diff = math.fabs(time-time_list[ix]) if time_diff < time_diff_min: ix_match = ix time_diff_min = time_diff return ix_match
def __fill_bins(data, min_mass, max_mass, bin_interval, bin_left, bin_right): """ @summary: Fills the intensity values for all bins @param data: Raw GCMS data @type data: pyms.GCMS.Class.GCMS_data @param min_mass: minimum mass value @type min_mass: IntType or FloatType @param max_mass: maximum mass value @type max_mass: IntType or FloatType @param bin_interval: interval between bin centres @type bin_interval: IntType or FloatType @param bin_left: left bin boundary offset @type bin_left: FloatType @param bin_right: right bin boundary offset @type bin_right: FloatType @return: Binned IntensityMatrix object @rtype: pyms.GCMS.Class.IntensityMatrix @author: Qiao Wang @author: Andrew Isaac @author: Moshe Olshansky @author: Vladimir Likic """ if not isinstance(data, GCMS_data): error("data must be an GCMS_data object") if not is_number(min_mass): error("'min_mass' must be a number") if not is_number(max_mass): error("'max_mass' must be a number") if not is_number(bin_interval): error("'bin_interval' must be a number") if not is_number(bin_left): error("'bin_left' must be a number.") if not is_number(bin_right): error("'bin_right' must be a number.") if not (abs(bin_left+bin_right-bin_interval) < 1.0e-6*bin_interval): error("there should be no gaps or overlap.") bin_left = abs(bin_left) bin_right = abs(bin_right) # To convert to int range, ensure bounds are < 1 bl = bin_left - int(bin_left) # Number of bins num_bins = int(float(max_mass+bl-min_mass)/bin_interval)+1 # initialise masses to bin centres mass_list = [i * bin_interval + min_mass for i in xrange(num_bins)] # Modified binning loops. I've replaced the deepcopy getting routines with # the alias properties. This way we can avoid performing the copies when # it is clear that we do not intend on modifying the contents of the arrays # here. # - Luke Hodkinson, 18/05/2010 # fill the bins intensity_matrix = [] for scan in data.scan_list: # use the alias, not the copy (Luke) intensity_list = [0.0] * num_bins masses = scan.mass_list # use the alias, not the copy (Luke) intensities = scan.intensity_list # use the alias, not the copy (Luke) for ii in xrange(len(masses)): mm = int((masses[ii] + bl - min_mass)/bin_interval) intensity_list[mm] += intensities[ii] intensity_matrix.append(intensity_list) return IntensityMatrix(data.get_time_list(), mass_list, intensity_matrix)
def __fill_bins_old(data, min_mass, max_mass, bin_interval, bin_left, bin_right): """ @summary: Fills the intensity values for all bins @param data: Raw GCMS data @type data: pyms.GCMS.Class.GCMS_data @param min_mass: minimum mass value @type min_mass: IntType or FloatType @param max_mass: maximum mass value @type max_mass: IntType or FloatType @param bin_interval: interval between bin centres @type bin_interval: IntType or FloatType @param bin_left: left bin boundary offset @type bin_left: FloatType @param bin_right: right bin boundary offset @type bin_right: FloatType @return: Binned IntensityMatrix object @rtype: pyms.GCMS.Class.IntensityMatrix @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not isinstance(data, GCMS_data): error("data must be an GCMS_data object") if not is_number(min_mass): error("'min_mass' must be a number") if not is_number(max_mass): error("'max_mass' must be a number") if not is_number(bin_interval): error("'bin_interval' must be a number") if not is_number(bin_left): error("'bin_left' must be a number.") if not is_number(bin_right): error("'bin_right' must be a number.") bin_left = abs(bin_left) bin_right = abs(bin_right) # To convert to int range, ensure bounds are < 1 bl = bin_left - int(bin_left) # Number of bins num_bins = int(float(max_mass+bl-min_mass)/bin_interval)+1 # initialise masses to bin centres mass_list = [i * bin_interval + min_mass for i in xrange(num_bins)] # fill the bins intensity_matrix = [] for scan in data.get_scan_list(): intensity_list = [0.0] * num_bins masses = scan.get_mass_list() intensities = scan.get_intensity_list() for mm in xrange(num_bins): for ii in xrange(len(scan)): if masses[ii] >= mass_list[mm]-bin_left and \ masses[ii] < mass_list[mm]+bin_right: intensity_list[mm] += intensities[ii] intensity_matrix.append(intensity_list) return IntensityMatrix(data.get_time_list(), mass_list, intensity_matrix)
def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ", compressed=False): """ @summary: Saves a list of numbers or a list of lists of numbers to a file with specific formatting @param file_name: Name of a file @type: StringType @param data: A list of numbers, or a list of lists @type: ListType @param format_str: A format string for individual entries @type: StringType @param prepend: A string, printed before each row @type: StringType @param sep: A string, printed after each number @type: StringType @param compressed: A boolean. If True, the output will be gzipped @type: BooleanType @return: none @rtype: NoneType @author: Vladimir Likic """ if not is_str(file_name): error("'file_name' is not a string") if not is_list(data): error("'data' is not a list") if not is_str(prepend): error("'prepend' is not a string") if not is_str(sep): error("'sep' is not a string") fp = open_for_writing(file_name) # decide whether data is a vector or matrix if is_number(data[0]): for item in data: if not is_number(item): error("not all elements of the list are numbers") data_is_matrix = 0 else: for item in data: if not is_list(item): error("not all elements of the list are lists") data_is_matrix = 1 if data_is_matrix: for ii in range(len(data)): fp.write(prepend) for jj in range(len(data[ii])): if is_number(data[ii][jj]): fp.write(format_str % (data[ii][jj])) if (jj<(len(data[ii])-1)): fp.write(sep) else: error("datum not a number") fp.write("\n") else: for ii in range(len(data)): fp.write(prepend) fp.write(format_str % (data[ii])) fp.write("\n") close_for_writing(fp) if compressed: status = os.system('gzip %s' % (file_name)) if status != 0: error("gzip compress failed")
def __init__(self, time_list, mass_list, intensity_matrix): """ @summary: Initialize the IntensityMatrix data @param time_list: Retention time values @type time_list: ListType @param mass_list: Binned mass values @type mass_list: ListType @param intensity_matrix: Binned intensity values per scan @type intensity_matrix: ListType @author: Andrew Isaac """ # sanity check if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_matrix) or \ not is_list(intensity_matrix[0]) or \ not is_number(intensity_matrix[0][0]): error("'intensity_matrix' must be a list, of a list, of numbers") if not len(time_list) == len(intensity_matrix): error("'time_list' is not the same length as 'intensity_matrix'") if not len(mass_list) == len(intensity_matrix[0]): error("'mass_list' is not the same size as 'intensity_matrix'" " width") self.__time_list = time_list self.__mass_list = mass_list self.__intensity_matrix = intensity_matrix self.__min_mass = min(mass_list) self.__max_mass = max(mass_list) # Direct access for speed (DANGEROUS) self.intensity_matrix = self.__intensity_matrix # Try to include parallelism. try: from mpi4py import MPI comm = MPI.COMM_WORLD num_ranks = comm.Get_size() rank = comm.Get_rank() M, N = len(intensity_matrix), len(intensity_matrix[0]) lrr = (rank*M/num_ranks, (rank + 1)*M/num_ranks) lcr = (rank*N/num_ranks, (rank + 1)*N/num_ranks) m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0]) self.comm = comm self.num_ranks = num_ranks self.rank = rank self.M = M self.N = N self.local_row_range = lrr self.local_col_range = lcr self.m = m self.n = n # If we can't import mpi4py then continue in serial. except: pass