def from_mz_int_pairs(cls, mz_int_pairs): """ Construct a MassSpectrum from a list of (m/z, intensity) tuples. :param mz_int_pairs: :type mz_int_pairs: list of tuple """ err_msg = "`mz_int_pairs` must be a list of (m/z, intensity) tuples." if (not is_sequence(mz_int_pairs) or not is_sequence(mz_int_pairs[0]) # or not isinstance(mz_int_pairs[0][0], Number) ): raise TypeError(err_msg) if not len(mz_int_pairs[0]) == 2: raise ValueError(err_msg) mass_list = [] intensity_list = [] for mass, intensity in mz_int_pairs: mass_list.append(float(mass)) intensity_list.append(float(intensity)) return cls(mass_list, intensity_list)
def rmsd(list1: Union[Sequence, numpy.ndarray], list2: Union[Sequence, numpy.ndarray]) -> float: """ Calculates RMSD for the 2 lists :param list1: First data set :type list1: list, tuple, or numpy.core.ndarray :param list2: Second data set :type list2: list, tuple, or numpy.core.ndarray :return: RMSD value :rtype: float :author: Qiao Wang :author: Andrew Isaac :author: Vladimir Likic """ if not is_sequence(list1): raise TypeError("'list1' must be a Sequence") if not is_sequence(list2): raise TypeError("'list2' must be a Sequence") total = 0.0 for i in range(len(list1)): total = total + (list1[i] - list2[i])**2 _rmsd = math.sqrt(total / len(list1)) return _rmsd
def exprl2alignment(expr_list: List[Experiment]) -> List[Alignment]: """ Converts experiments into alignments :param expr_list: The list of experiments to be converted into an alignment objects :type expr_list: list of :class:`pyms.Experiment.Experiment` :return: A list of alignment objects for the experiments :rtype: list of :class:`pyms.DPA.Alignment.Alignment` :author: Vladimir Likic """ if not is_sequence(expr_list): raise TypeError("'expr_list' must be a Sequence") alignments = [] for item in expr_list: if not isinstance(item, Experiment): raise TypeError("list items must be 'Experiment' instances") alignments.append(Alignment(item)) return alignments
def __init__( self, time_list: Sequence[float], mass_list: Sequence[float], intensity_array: Union[Sequence[Sequence[float]], numpy.ndarray], ): # sanity check if not is_sequence_of(time_list, _number_types): raise TypeError("'time_list' must be a Sequence of numbers") if not is_sequence_of(mass_list, _number_types): raise TypeError("'mass_list' must be a Sequence of numbers") if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], _number_types): raise TypeError("'intensity_array' must be a Sequence, of Sequences, of numbers") if not isinstance(intensity_array, numpy.ndarray): intensity_array = numpy.array(intensity_array) if not len(time_list) == len(intensity_array): raise ValueError("'time_list' is not the same length as 'intensity_array'") if not len(mass_list) == len(intensity_array[0]): raise ValueError("'mass_list' is not the same size as 'intensity_array'") self._time_list = list(time_list) self._mass_list = list(mass_list) self._intensity_array = intensity_array self._min_rt = min(time_list) self._max_rt = max(time_list) self._min_mass = min(mass_list) self._max_mass = max(mass_list)
def __init__(self, ia, time_list, mass=None): """ :param ia: Ion chromatogram intensity values :type ia: numpy.array :param time_list: A list of ion chromatogram retention times :type time_list: list :param mass: Mass of ion chromatogram (Null if TIC) :type mass: int or float :author: Lewis Lee, Vladimir Likic """ if not isinstance(ia, numpy.ndarray): raise TypeError("'ia' must be a numpy array") if not is_sequence(time_list) or not all( isinstance(time, Number) for time in time_list): raise TypeError("'time_list' must be a list of numbers") if len(ia) != len(time_list): raise ValueError("Intensity array and time list differ in length") if mass and not isinstance(mass, Number): raise TypeError("'mass' must be a number") self._intensity_array = ia self._time_list = time_list self._mass = mass self._time_step = self.__calc_time_step() self._min_rt = min(time_list) self._max_rt = max(time_list)
def median_bounds(im: BaseIntensityMatrix, peak: Peak, shared: bool = True) -> Tuple[float, float]: """ Calculates the median of the left and right bounds found for each apexing peak mass. :param im: The originating IntensityMatrix object. :param peak: :param shared: Include shared ions shared with neighbouring peak. :return: Median left and right boundary offset in points. :authors: Andrew Isaac, Dominic Davis-Foster """ if not isinstance(im, BaseIntensityMatrix): raise TypeError("'im' must be an IntensityMatrix object") if not isinstance(peak, Peak): raise TypeError("'peak' must be a Peak object") if not isinstance(shared, bool): raise TypeError("'shared' must be a boolean") mat = im.intensity_array ms = peak.mass_spectrum rt = peak.rt apex = im.get_index_at_time(rt) # check if RT based index is similar to stored index if is_sequence(peak.bounds): bounds = cast(Sequence, peak.bounds) if apex - 1 < bounds[1] < apex + 1: apex = bounds[1] # get peak masses with non-zero intensity mass_ii = [ii for ii in range(len(ms.mass_list)) if ms.mass_spec[ii] > 0] # get stats on boundaries left_list = [] right_list = [] for ii in mass_ii: # get ion chromatogram as list ia = [mat[scan][ii] for scan in range(len(mat))] area, left, right, l_share, r_share = ion_area(ia, apex) if shared or not l_share: left_list.append(left) if shared or not r_share: right_list.append(right) # return medians # NB if shared=True, lists maybe empty l_med = 0.0 r_med = 0.0 if len(left_list) > 0: l_med = median(left_list) if len(right_list) > 0: r_med = median(right_list) return l_med, r_med
def load_peaks(file_name: Union[str, pathlib.Path]) -> Peak: """ Loads the peak_list stored with 'store_peaks' :param file_name: File name of peak list :type file_name: str or os.PathLike :return: The list of Peak objects :rtype: :class:`list` of :class:`pyms.Peak.Class.Peak` :author: Andrew Isaac :author: Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) fp = file_name.open('rb') peak_list = pickle.load(fp) fp.close() if not is_sequence(peak_list): raise IOError("The selected file is not a List") if not len(peak_list) > 0 or not isinstance(peak_list[0], Peak): raise IOError("The selected file is not a list of Peak objects") return peak_list
def MAD(v: Union[Sequence, numpy.ndarray]) -> float: """ Median absolute deviation :param v: List of values to calculate the median absolute deviation of :type v: list, tuple, or numpy.core.ndarray :return: median absolute deviation :rtype: float :author: Vladimir Likic """ if not is_sequence(v): raise TypeError("'v' must be a Sequence") m = median(v) m_list = [] for xi in v: d = math.fabs(xi - m) m_list.append(d) mad = median(m_list) / 0.6745 return mad
def __init__(self, time_list, mass_list, intensity_array): """ Initialize the IntensityMatrix data """ # sanity check if not is_sequence_of(time_list, Number): raise TypeError("'time_list' must be a Sequence of Numbers") if not is_sequence_of(mass_list, Number): raise TypeError("'mass_list' must be a Sequence of Numbers") if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], Number): raise TypeError("'intensity_array' must be a Sequence, of Sequences, of Numbers") if not isinstance(intensity_array, numpy.ndarray): intensity_array = numpy.array(intensity_array) if not len(time_list) == len(intensity_array): raise ValueError("'time_list' is not the same length as 'intensity_array'") if not len(mass_list) == len(intensity_array[0]): raise ValueError("'mass_list' is not the same size as 'intensity_array'") self._time_list = time_list self._mass_list = mass_list self._intensity_array = intensity_array self._min_rt = min(time_list) self._max_rt = max(time_list) self._min_mass = min(mass_list) self._max_mass = max(mass_list) # Try to include parallelism. try: from mpi4py import MPI comm = MPI.COMM_WORLD num_ranks = comm.Get_size() rank = comm.Get_rank() M, N = len(intensity_array), len(intensity_array[0]) lrr = (rank * M / num_ranks, (rank + 1) * M / num_ranks) lcr = (rank * N / num_ranks, (rank + 1) * N / num_ranks) m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0]) self.comm = comm self.num_ranks = num_ranks self.rank = rank self.M = M self.N = N self.local_row_range = lrr self.local_col_range = lcr self.m = m self.n = n # If we can't import mpi4py then continue in serial. except ModuleNotFoundError: pass
def sele_rt_range(self, rt_range: Sequence[str]): """ Discards all peaks which have the retention time outside the specified range. :param rt_range: Min, max retention time given as a sequence ``[rt_min, rt_max]``. """ if not is_sequence(rt_range): raise TypeError("'rt_range' must be a Sequence") peaks_sele = sele_peaks_by_rt(self._peak_list, rt_range) self._peak_list = peaks_sele
def sele_rt_range(self, rt_range): """ Discards all peaks which have the retention time outside the specified range :param rt_range: Min, max retention time given as a list [rt_min, rt_max] :type rt_range: ~collections.abc.Sequence """ if not is_sequence(rt_range): raise TypeError("'rt_range' must be a Sequence") peaks_sele = sele_peaks_by_rt(self._peak_list, rt_range) self._peak_list = peaks_sele
def intensity_array(self, ia: Union[Sequence, numpy.ndarray]): """ Sets the value for the intensity array. :param ia: An array of new intensity values :author: Vladimir Likic """ if not is_sequence(ia): raise TypeError("'intensity_array' must be a Sequence") if not isinstance(ia, numpy.ndarray): ia = numpy.array(ia) self._intensity_array = ia
def sele_peaks_by_rt(peaks: Union[Sequence, numpy.ndarray], rt_range: Sequence[str]) -> Peak: """ Selects peaks from a retention time range :param peaks: A list of peak objects :type peaks: list or tuple or numpy.ndarray :param rt_range: A list of two time strings, specifying lower and upper retention times :type rt_range: ~collections.abc.Sequence[str] :return: A list of peak objects :rtype: :class:`list` of :class:`pyms.Peak.Class.Peak` """ if not is_peak_list(peaks): raise TypeError("'peaks' must be a Sequence of Peak objects") if not is_sequence(rt_range): raise TypeError("'rt_range' must be a Sequence") else: if len(rt_range) != 2: raise ValueError("'rt_range' must have exactly two elements") if not isinstance(rt_range[0], str) or not isinstance( rt_range[1], str): raise TypeError( "lower/upper retention time limits must be strings") rt_lo = time_str_secs(rt_range[0]) rt_hi = time_str_secs(rt_range[1]) if rt_lo >= rt_hi: raise ValueError("lower retention time limit must be less than upper") peaks_sele = [] for peak in peaks: rt = peak.rt if rt_lo < rt < rt_hi: peaks_sele.append(peak) # print("%d peaks selected" % (len(peaks_sele))) return peaks_sele
def bounds(self, value: Sequence[int]): """ Sets peak boundaries in points. :param value: A 3-element tuple containing the left, apex, and right peak boundaries in points. Left and right are offsets. """ if not is_sequence(value): raise TypeError("'Peak.bounds' must be a Sequence") if len(value) != 3: raise ValueError("'Peak.bounds' must have exactly 3 elements") for index, item in enumerate(value): if not isinstance(item, int): raise TypeError( f"'Peak.bounds' element #{index} must be an integer") self._pt_bounds = cast(Tuple[int, int, int], tuple(value[:3]))
def bounds(self, value): """ Sets peak boundaries in points :param value: A list containing left, apex, and right peak boundaries in points, left and right are offsets :type value: list """ if not is_sequence(value): raise TypeError("'Peak.bounds' must be a list") if len(value) != 3: raise ValueError("'Peak.bounds' must have exactly 3 elements") for index, item in enumerate(value): if not isinstance(item, int): raise TypeError( f"'Peak.bounds' element #{index} must be an integer") self._pt_bounds = value