コード例 #1
0
def test_read_expr_list(filtered_peak_list, datadir, expr_filename):
	expr_list = read_expr_list(datadir / "read_expr_list.txt")
	assert isinstance(expr_list, list)
	assert is_sequence_of(expr_list, Experiment)

	expr = expr_list[0]
	assert isinstance(expr.expr_code, str)
	assert expr.expr_code == "ELEY_1_SUBTRACT"

	assert isinstance(expr.peak_list, list)
	assert is_sequence_of(expr.peak_list, Peak)
	assert expr.peak_list == filtered_peak_list

	expr.sele_rt_range(["6.5m", "21m"])

	# Errors
	for obj in [*test_numbers, test_dict, *test_lists]:
		with pytest.raises(TypeError):
			read_expr_list(obj)

	with pytest.raises(IOError):
		read_expr_list("non-existent.expr")
	with pytest.raises((IOError, UnicodeDecodeError)):
		read_expr_list("not-an-experiment.expr")
	with pytest.raises(IOError):
		read_expr_list("__init__.py")
コード例 #2
0
def test_read_expr_list(filtered_peak_list, pyms_datadir, expr_filename,
                        tmp_pathplus):
    (tmp_pathplus / "read_expr_list.txt").write_lines([str(expr_filename)] * 5)
    expr_list = read_expr_list(tmp_pathplus / "read_expr_list.txt")
    assert isinstance(expr_list, list)
    assert is_sequence_of(expr_list, Experiment)

    expr = expr_list[0]
    assert isinstance(expr.expr_code, str)
    assert expr.expr_code == "ELEY_1_SUBTRACT"

    assert isinstance(expr.peak_list, list)
    assert is_sequence_of(expr.peak_list, Peak)
    assert expr.peak_list == filtered_peak_list

    expr.sele_rt_range(["6.5m", "21m"])

    # Errors
    for obj in [*test_numbers, test_dict, *test_lists]:
        with pytest.raises(TypeError):
            read_expr_list(obj)  # type: ignore

    with pytest.raises(
            FileNotFoundError,
            match="No such file or directory: .*non-existent.expr.*"):
        read_expr_list("non-existent.expr")
    with pytest.raises(
            FileNotFoundError,
            match="No such file or directory: 'not-an-experiment.expr'"):
        read_expr_list("not-an-experiment.expr")
    with pytest.raises(FileNotFoundError,
                       match="No such file or directory: .*__init__.py.*"):
        read_expr_list("__init__.py")
コード例 #3
0
    def __init__(self,
                 intensity_list: Union[Sequence[float], numpy.ndarray],
                 time_list: Sequence[float],
                 mass: Optional[float] = None):
        if not is_sequence_of(intensity_list, _number_types):
            raise TypeError("'intensity_list' must be a Sequence of numbers")

        if not is_sequence_of(time_list, _number_types):
            raise TypeError("'time_list' must be a Sequence of numbers")

        if len(intensity_list) != len(time_list):
            raise ValueError(
                "'intensity_list' and 'time_list' differ in length")

        if mass is not None and not is_number(mass):
            raise TypeError("'mass' must be a number or None")

        if not isinstance(intensity_list, numpy.ndarray):
            intensity_list = numpy.array(intensity_list)

        self._intensity_array = intensity_list
        self._time_list = list(time_list)
        self._mass: Optional[float] = mass
        self._time_step = self._calc_time_step()
        self._min_rt = min(time_list)
        self._max_rt = max(time_list)
コード例 #4
0
	def __init__(
			self,
			time_list: Sequence[float],
			mass_list: Sequence[float],
			intensity_array: Union[Sequence[Sequence[float]], numpy.ndarray],
			):
		# sanity check
		if not is_sequence_of(time_list, _number_types):
			raise TypeError("'time_list' must be a Sequence of numbers")

		if not is_sequence_of(mass_list, _number_types):
			raise TypeError("'mass_list' must be a Sequence of numbers")

		if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], _number_types):
			raise TypeError("'intensity_array' must be a Sequence, of Sequences, of numbers")

		if not isinstance(intensity_array, numpy.ndarray):
			intensity_array = numpy.array(intensity_array)

		if not len(time_list) == len(intensity_array):
			raise ValueError("'time_list' is not the same length as 'intensity_array'")

		if not len(mass_list) == len(intensity_array[0]):
			raise ValueError("'mass_list' is not the same size as 'intensity_array'")

		self._time_list = list(time_list)
		self._mass_list = list(mass_list)

		self._intensity_array = intensity_array

		self._min_rt = min(time_list)
		self._max_rt = max(time_list)

		self._min_mass = min(mass_list)
		self._max_mass = max(mass_list)
コード例 #5
0
	def __init__(self, time_list, mass_list, intensity_array):
		"""
		Initialize the IntensityMatrix data
		"""

		# sanity check
		if not is_sequence_of(time_list, Number):
			raise TypeError("'time_list' must be a Sequence of Numbers")

		if not is_sequence_of(mass_list, Number):
			raise TypeError("'mass_list' must be a Sequence of Numbers")

		if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], Number):
			raise TypeError("'intensity_array' must be a Sequence, of Sequences, of Numbers")

		if not isinstance(intensity_array, numpy.ndarray):
			intensity_array = numpy.array(intensity_array)

		if not len(time_list) == len(intensity_array):
			raise ValueError("'time_list' is not the same length as 'intensity_array'")

		if not len(mass_list) == len(intensity_array[0]):
			raise ValueError("'mass_list' is not the same size as 'intensity_array'")

		self._time_list = time_list
		self._mass_list = mass_list

		self._intensity_array = intensity_array

		self._min_rt = min(time_list)
		self._max_rt = max(time_list)

		self._min_mass = min(mass_list)
		self._max_mass = max(mass_list)

		# Try to include parallelism.
		try:
			from mpi4py import MPI
			comm = MPI.COMM_WORLD
			num_ranks = comm.Get_size()
			rank = comm.Get_rank()
			M, N = len(intensity_array), len(intensity_array[0])
			lrr = (rank * M / num_ranks, (rank + 1) * M / num_ranks)
			lcr = (rank * N / num_ranks, (rank + 1) * N / num_ranks)
			m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0])
			self.comm = comm
			self.num_ranks = num_ranks
			self.rank = rank
			self.M = M
			self.N = N
			self.local_row_range = lrr
			self.local_col_range = lcr
			self.m = m
			self.n = n

		# If we can't import mpi4py then continue in serial.
		except ModuleNotFoundError:
			pass
コード例 #6
0
ファイル: Class.py プロジェクト: PyMassSpec/PyMassSpec
    def __init__(self, time_list: Sequence[float], scan_list: Sequence[Scan]):
        if not is_sequence_of(time_list, _number_types):
            raise TypeError("'time_list' must be a Sequence of numbers")

        if not is_sequence_of(scan_list, Scan):
            raise TypeError("'scan_list' must be a Sequence of Scan objects")

        self._time_list = list(time_list)
        self._scan_list = list(scan_list)
        self._set_time()
        self._set_min_max_mass()
        self._calc_tic()
コード例 #7
0
def test_load_expr(filtered_peak_list, pyms_datadir, expr_filename):
    expr = load_expr(expr_filename)
    assert isinstance(expr, Experiment)

    assert isinstance(expr.expr_code, str)
    assert expr.expr_code == "ELEY_1_SUBTRACT"

    assert isinstance(expr.peak_list, list)
    assert is_sequence_of(expr.peak_list, Peak)

    assert expr.peak_list == filtered_peak_list
    expr.sele_rt_range(["6.5m", "21m"])

    # Errors
    for obj in [*test_numbers, test_dict, *test_lists]:
        with pytest.raises(TypeError):
            load_expr(obj)  # type: ignore

    with pytest.raises(
            FileNotFoundError,
            match="No such file or directory: .*non-existent.expr.*"):
        load_expr(pyms_datadir / "non-existent.expr")
    with pytest.raises(TypeError,
                       match="The loaded file is not an experiment file"):
        load_expr(pyms_datadir / "not-an-experiment.expr")
コード例 #8
0
    def __init__(self, time_list, scan_list):
        """
		Initialize the GC-MS data
		"""

        if not is_sequence_of(time_list, Number):
            raise TypeError("'time_list' must be a Sequence of numbers")

        if not is_sequence_of(scan_list, Scan):
            raise TypeError("'scan_list' must be a Sequence of Scan objects")

        self._time_list = time_list
        self._scan_list = scan_list
        self.__set_time()
        self.__set_min_max_mass()
        self.__calc_tic()
コード例 #9
0
ファイル: Function.py プロジェクト: PyMassSpec/PyMassSpec
def is_peak_list(peaks: Any) -> bool:
	"""
	Returns whether ``peaks`` is a valid peak list.

	:author: Dominic Davis-Foster
	"""

	return is_sequence_of(peaks, Peak)
コード例 #10
0
def get_maxima_indices(ion_intensities: Union[Sequence, numpy.nd.array],
                       points: int = 3) -> List:
    """
    Find local maxima.

    :param ion_intensities: A list of intensities for a single ion
    :type ion_intensities: ~collections.abc.Sequence or numpy.ndarray
    :param points: Number of scans over which to consider a maxima to be a peak. Default ``3``
    :type points: int, optional

    :return: A list of scan indices
    :rtype: list

    :author: Andrew Isaac, Dominic Davis-Foster (type assertions)
    """

    if not is_sequence_of(ion_intensities, Number):
        raise TypeError("'ion_intensities' must be a List of Numbers")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    # find peak inflection points
    # use a 'points' point window
    # for a plateau after a rise, need to check if it is the left edge of
    # a peak
    peak_point = []
    edge = -1
    points = int(points)
    half = int(points / 2)
    points = 2 * half + 1  # ensure odd number of points

    for index in range(len(ion_intensities) - points + 1):
        left = ion_intensities[index:index + half]
        mid = ion_intensities[index + half]
        right = ion_intensities[index + half + 1:index + points]
        # max in middle
        if mid > max(left) and mid > max(right):
            peak_point.append(index + half)
            edge = -1  # ignore previous rising edge
        # flat from rise (left of peak?)
        if mid > max(left) and mid == max(right):
            edge = index + half  # ignore previous rising edge, update latest
        # fall from flat
        if mid == max(left) and mid > max(right):
            if edge > -1:
                centre = int((edge + index + half) / 2)  # mid point
                peak_point.append(centre)
            edge = -1

    return peak_point
コード例 #11
0
def is_peak_list(peaks: List) -> bool:
    """
    Returns True if 'peaks' is a valid peak list, False otherwise

    :param peaks: A list of peak objects
    :type peaks: list

    :return: A boolean indicator
    :rtype: bool

    :author: Dominic Davis-Foster
    """

    return is_sequence_of(peaks, Peak)
コード例 #12
0
    def __init__(self, alignments: List[Alignment], D: float, gap: float):
        if not is_sequence_of(alignments, Alignment):
            raise TypeError(
                "'alignments' must be a Sequence of Alignment objects")

        if not isinstance(D, float):
            raise TypeError("'D' must be a float")

        if not isinstance(gap, float):
            raise TypeError("'gap' must be a float")

        self.alignments = alignments
        self.D = D
        self.gap = gap

        self._sim_matrix()
        self._dist_matrix()
        self._guide_tree()
コード例 #13
0
	def __init__(self, alignments, D, gap):
		"""
		Models pairwise alignment of alignments
		"""

		if not is_sequence_of(alignments, Alignment):
			raise TypeError("'alignments' must be a Sequence of Alignment objects")

		if not isinstance(D, float):
			raise TypeError("'D' must be a float")

		if not isinstance(gap, float):
			raise TypeError("'gap' must be a float")

		self.alignments = alignments
		self.D = D
		self.gap = gap

		self._sim_matrix()
		self._dist_matrix()
		self._guide_tree()
コード例 #14
0
def test_load_expr(filtered_peak_list, datadir, expr_filename):
	expr = load_expr(expr_filename)
	assert isinstance(expr, Experiment)

	assert isinstance(expr.expr_code, str)
	assert expr.expr_code == "ELEY_1_SUBTRACT"

	assert isinstance(expr.peak_list, list)
	assert is_sequence_of(expr.peak_list, Peak)

	assert expr.peak_list == filtered_peak_list
	expr.sele_rt_range(["6.5m", "21m"])

	# Errors
	for obj in [*test_numbers, test_dict, *test_lists]:
		with pytest.raises(TypeError):
			load_expr(obj)

	with pytest.raises(IOError):
		load_expr(datadir / "non-existent.expr")
	with pytest.raises(IOError):
		load_expr(datadir / "not-an-experiment.expr")
コード例 #15
0
def test_peak_list(expr, filtered_peak_list):
    assert isinstance(expr.peak_list, list)
    assert is_sequence_of(filtered_peak_list, Peak)
    assert expr.peak_list == filtered_peak_list
コード例 #16
0
    def write_common_ion_csv(self,
                             area_file_name: Union[str, pathlib.Path],
                             top_ion_list: List,
                             minutes: bool = True):
        """
		Writes the alignment to CSV files

		This function writes two files: one containing the alignment of peak
		retention times and the other containing the alignment of peak areas.

		:param area_file_name: The name for the areas alignment file
		:type area_file_name: str or os.PathLike
		:param top_ion_list: A list of the highest intensity common ion along the aligned peaks
		:type top_ion_list: ~collections.abc.Sequence
		:param minutes: An optional indicator whether to save retention times
			in minutes. If False, retention time will be saved in seconds
		:type minutes: bool, optional

		:author: Woon Wai Keen
		:author: Andrew Isaac
		:author: Sean O'Callaghan
		:author: Vladimir Likic
		:author: Dominic Davis-Foster (pathlib support)
		"""

        # TODO: minutes currently does nothing

        if not is_path(area_file_name):
            raise TypeError(
                "'area_file_name' must be a string or a PathLike object")

        if not is_sequence_of(top_ion_list, Number):
            raise TypeError("'top_ion_list' must be a Sequence of Numbers")

        area_file_name = prepare_filepath(area_file_name)

        with area_file_name.open("w") as fp:

            # create header
            header = ['"UID"', '"RTavg"', '"Quant Ion"']
            for item in self.expr_code:
                header.append(f'"{item}"')

            # write headers
            fp.write(",".join(header) + "\n")

            rtsums = []
            rtcounts = []

            # The following two arrays will become list of lists
            # such that:
            # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
            #            [align1_peak2, ................................]
            #              .............................................
            #            [align1_peakm,....................,alignn_peakm]  ]
            areas: List[List] = []
            new_peak_lists: List[List[Peak]] = []

            for peak_list in self.peakpos:
                index = 0
                for peak in peak_list:
                    # one the first iteration, populate the lists
                    if len(areas) < len(peak_list):
                        areas.append([])
                        new_peak_lists.append([])
                        rtsums.append(0)
                        rtcounts.append(0)

                    if peak is not None:
                        rt = peak.rt

                        # get the area of the common ion for the peak
                        # an area of 'na' shows that while the peak was
                        # aligned, the common ion was not present
                        area = peak.get_ion_area(top_ion_list[index])

                        areas[index].append(area)
                        new_peak_lists[index].append(peak)

                        # The following code to the else statement is
                        # just for calculating the average rt
                        rtsums[index] += rt
                        rtcounts[index] += 1

                    else:
                        areas[index].append(None)

                    index += 1

            out_strings = []
            index = 0
            # now write the strings for the file
            for area_list in areas:

                # write initial info:
                # peak unique id, peak average rt
                compo_peak = composite_peak(new_peak_lists[index])
                peak_UID = compo_peak.UID
                peak_UID_string = f'"{peak_UID}"'

                rt_avg = rtsums[index] / rtcounts[index]

                out_strings.append(
                    f"{peak_UID_string},{rt_avg / 60:.3f},{top_ion_list[index]:f}"
                )

                for area in area_list:
                    if area is not None:
                        out_strings[index] += f",{area:.4f}"
                    else:
                        out_strings[index] += ",NA"

                index += 1

            # now write the file
            #        print("length of areas[0]", len(areas[0]))
            #        print("length of areas", len(areas))
            #        print("length of out_strings", len(out_strings))
            for row in out_strings:
                fp.write(row + "\n")
コード例 #17
0
def get_maxima_indices(ion_intensities: Union[Sequence, numpy.ndarray],
                       points: int = 3) -> List[int]:
    """
	Returns the scan indices for the apexes of the ion.

	:param ion_intensities: A list of intensities for a single ion.
	:param points: Number of scans over which to consider a maxima to be a peak.

	:author: Andrew Isaac, Dominic Davis-Foster (type assertions)

	**Example:**

	.. code-block:: python

		>>> # A trivial set of data with two clear peaks
		>>> data = [1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]
		>>> get_maxima_indices(data)
		[4, 13]
		>>> # Wider window (more points)
		>>> get_maxima_indices(data, points=10)
		[13]

	"""

    if not is_sequence_of(ion_intensities, _number_types):
        raise TypeError("'ion_intensities' must be a sequence of numbers")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    # find peak inflection points
    # use a 'points' point window
    # for a plateau after a rise, need to check if it is the left edge of a peak
    peak_point = []
    edge = -1
    points = int(points)
    half = int(points / 2)
    points = 2 * half + 1  # ensure odd number of points

    for index in range(len(ion_intensities) - points + 1):

        left = ion_intensities[index:index + half]
        mid = ion_intensities[index + half]
        right = ion_intensities[index + half + 1:index + points]
        # print(left, mid, right)

        if mid > max(left) and mid > max(right):
            # the max value is in the middle
            peak_point.append(index + half)
            edge = -1  # ignore previous rising edge

        elif mid > max(left) and mid == max(right):
            # start of plateau following rise (left of peak?)
            edge = index + half  # ignore previous rising edge, update latest

        elif mid == max(left) and mid > max(right):
            # start of fall from plateau
            if edge > -1:
                centre = int((edge + index + half) / 2)  # mid point
                peak_point.append(centre)
            edge = -1

    return peak_point