コード例 #1
0
def test_from_jcamp():
    nist_data_dir = pathlib.Path("nist_jdx_files")

    if not nist_data_dir.exists():
        nist_data_dir.mkdir(parents=True)

    # Compounds from nist
    for cas in [
            "122-39-4",
            "71-43-2",
            "85-98-3",
            "107-10-8",
            "50-37-3",
            "57-13-6",
            "77-92-9",
            "118-96-7",
    ]:
        print(f"Testing CAS {cas}")
        jcamp_file = nist_data_dir / f"{cas}.jdx"

        if not jcamp_file.exists():
            r = requests.get(
                f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{cas.replace('-', '')}&Index=0&Type=Mass"
            )
            jcamp_file.write_bytes(r.content)

        MassSpectrum.from_jcamp(jcamp_file)
コード例 #2
0
ファイル: Display.py プロジェクト: PyMassSpec/PyMassSpec
	def onclick(self, event):
		"""
		Finds the 5 highest intensity m/z channels for the selected peak.
		The peak is selected by clicking on it.
		If a button other than the left one is clicked, a new plot of the mass spectrum is displayed.

		:param event: a mouse click by the user
		"""

		intensity_list = []
		mass_list = []

		for peak in self.__peak_list:
			# if event.xdata > 0.9999*peak.rt and event.xdata < 1.0001*peak.rt:
			if 0.9999 * peak.rt < event.xdata < 1.0001 * peak.rt:
				intensity_list = peak.mass_spectrum.mass_spec
				mass_list = peak.mass_spectrum.mass_list

		largest = self.get_5_largest(intensity_list)

		if len(intensity_list) != 0:
			print("mass\t intensity")
			for i in range(10):
				print(mass_list[largest[i]], '\t', intensity_list[largest[i]])
		else:  # if the selected point is not close enough to peak
			print("No Peak at this point")

		# Check if a button other than left was pressed, if so plot mass spectrum
		# Also check that a peak was selected, not just whitespace
		if event.button != 1 and len(intensity_list) != 0:
			# self.plot_mass_spec(event.xdata, mass_list, intensity_list)
			self.plot_mass_spec(MassSpectrum(mass_list, intensity_list))
コード例 #3
0
ファイル: peak.py プロジェクト: domdfcoding/GunShotMatch
    def ms_list(self, value):
        type_err_msg = f"""

	'ms_list' must be a list, tuple or class:`pandas.Series` of class`pyms.Spectrum.MassSpectrum` objects.
	Alternatively, the sequence may consist of dictionary objects that can be converted to MassSpectrum objects.
	"""
        if not isinstance(value, (list, tuple, pandas.Series)) or not all(
                isinstance(ms, (MassSpectrum, dict)) or ms is None
                for ms in value):
            raise TypeError(
                f"Invalid Type: {type(value)} of {type(value[0])}!{type_err_msg}"
            )

        ms_list = []

        for ms in value:

            if isinstance(ms, MassSpectrum):
                ms_list.append(ms)
            elif isinstance(ms, dict):
                if "intensity_list" in ms and "mass_list" in ms:
                    ms_list.append(MassSpectrum(**ms))
            elif ms is None:
                ms_list.append(None)
            else:
                raise TypeError(
                    f"Unrecognised Type: {type(value)} of {type(ms)}!{type_err_msg}"
                )

        self._ms_list = ms_list

        self._calculate_spectra()
コード例 #4
0
ファイル: Display.py プロジェクト: PyMassSpec/PyMassSpec
def invert_mass_spec(mass_spec: MassSpectrum, inplace: bool = False) -> MassSpectrum:
	"""
	Invert the mass spectrum for display in a head2tail plot.

	:param mass_spec: The Mass Spectrum to normalize
	:param inplace: Whether the inversion should be applied to the
		:class:`~pyms.Spectrum.MassSpectrum` object given, or to a copy (default behaviour).

	:return: The normalized mass spectrum
	"""

	inverted_intensity_list = [-x for x in mass_spec.intensity_list]

	if inplace:
		mass_spec.intensity_list = inverted_intensity_list
		return mass_spec
	else:
		return MassSpectrum(mass_spec.mass_list, inverted_intensity_list)
コード例 #5
0
def test_equality(im, ms):
    assert ms != im.get_ms_at_index(1234)
    assert ms == MassSpectrum(ms.mass_list, ms.mass_spec)
    assert ms != test_list_ints
    assert ms != test_list_strs
    assert ms != test_tuple
    assert ms != test_string
    assert ms != test_int
    assert ms != test_float
コード例 #6
0
def mass_spec_from_mona(mona_ms_string: str) -> MassSpectrum:
    """
	Create a :class:`pyms.Spectrum.MassSpectrum` object from the MoNA JSON representation of the spectrum.

	:param mona_ms_string:
	"""

    return MassSpectrum.from_mz_int_pairs(
        [val.split(':') for val in mona_ms_string.split(' ')])
コード例 #7
0
ファイル: peak.py プロジェクト: domdfcoding/GunShotMatch
    def _calculate_spectra(self):
        """
		Calculate Combined and Averaged spectra
		"""

        mass_lists = []
        intensity_lists = []

        for spec in self._ms_list:

            if spec:
                # print(spec.mass_list)
                mass_lists.append(spec.mass_list)
                intensity_lists.append(spec.intensity_list)
            else:
                # print()
                pass

        if all_equal(mass_lists):
            mass_list = mass_lists[0]
            # print(intensity_lists)
            combined_intensity_list = list(
                sum(map(numpy.array, intensity_lists)))
            self._combined_mass_spectrum = MassSpectrum(
                mass_list=mass_list, intensity_list=combined_intensity_list)

            # averaged_intensity_list = [intensity / len(mass_lists) for intensity in combined_intensity_list]

            averaged_intensity_list = []
            avg_intensity_array = numpy.array(intensity_lists)
            for column in avg_intensity_array.T:
                if sum(column) == 0 or numpy.count_nonzero(column) == 0:
                    averaged_intensity_list.append(0)
                else:
                    averaged_intensity_list.append(
                        sum(column) / numpy.count_nonzero(column))

            self._averaged_mass_spectrum = MassSpectrum(
                mass_list=mass_list, intensity_list=averaged_intensity_list)

        else:
            warnings.warn("Mass Ranges Differ. Unable to process")
            self._combined_mass_spectrum = None
            self._averaged_mass_spectrum = None
コード例 #8
0
def quick_search():
    print("Searching Spectrum (Quick Search)")

    if search is None:
        return status()

    n_hits = flask.request.args.get('n_hits', default=5, type=int)

    ms = MassSpectrum(**json.loads(flask.request.get_json()))
    hit_list = search.spectrum_search(ms, n_hits)
    return json.dumps(hit_list, cls=PyNISTEncoder)
コード例 #9
0
def test_from_mz_int_pairs():
	# Diphenylamine
	mz_int_pairs = [
			(27, 138),	(28, 210),	(32, 59),	(37, 70),	(38, 273),
			(39, 895),	(40, 141),	(41, 82),	(50, 710),	(51, 2151),
			(52, 434),	(53, 49),	(57, 41),	(59, 121),	(61, 73),
			(62, 229),	(63, 703),	(64, 490),	(65, 1106), (66, 932),
			(67, 68),	(70, 159),	(71, 266),	(72, 297),	(73, 44),
			(74, 263),	(75, 233),	(76, 330),	(77, 1636),	(78, 294),
			(84, 1732),	(87, 70),	(88, 86),	(89, 311),	(90, 155),
			(91, 219),	(92, 160),	(93, 107),	(101, 65),	(102, 111),
			(103, 99),	(104, 188),	(113, 107),	(114, 120),	(115, 686),
			(116, 150),	(117, 91),	(126, 46),	(127, 137),	(128, 201),
			(129, 73),	(130, 69),	(139, 447),	(140, 364),	(141, 584),
			(142, 279),	(143, 182),	(152, 37),	(153, 60),	(154, 286),
			(166, 718),
			(167, 3770),
			(168, 6825),
			(169, 9999),
			(170, 1210),
			(171, 85),
			]

	ms = MassSpectrum.from_mz_int_pairs(mz_int_pairs)

	assert isinstance(ms, MassSpectrum)
	assert len(ms) == len(mz_int_pairs)
	assert ms.intensity_list[6] == 141
	assert ms.intensity_list[30] == 1732
	assert ms.mass_list[-1] == 171
	assert ms.mass_list[2] == 32

	# Errors
	for obj in [test_string, test_int, test_list_strs, test_dict, test_list_ints, test_tuple, (["abc", "123"])]:
		with pytest.raises(TypeError):
			MassSpectrum.from_mz_int_pairs(obj)

	for obj in [[(1, 2, 3)], ([1, 2, 3],), [(1,)], ([1],), [("abc", "123")]]:
		with pytest.raises(ValueError):
			MassSpectrum.from_mz_int_pairs(obj)
コード例 #10
0
ファイル: test_Peak.py プロジェクト: PyMassSpec/PyMassSpec
def test_mass_spectrum(peak, im_i):
    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)

    assert isinstance(peak.mass_spectrum, MassSpectrum)
    assert peak.mass_spectrum == ms

    peak = Peak(test_float)
    assert peak.mass_spectrum == MassSpectrum([], [])
    assert not peak.mass_spectrum
    peak.mass_spectrum = ms
    assert peak.mass_spectrum == ms

    peak = Peak(test_float)
    assert peak.mass_spectrum == MassSpectrum([], [])
    assert not peak.mass_spectrum

    peak.mass_spectrum = ms
    assert isinstance(peak.mass_spectrum, MassSpectrum)
    assert isinstance(peak.mass_spectrum.mass_spec, list)

    for obj in [test_string, *test_numbers, test_dict, *test_lists]:
        with pytest.raises(TypeError):
            peak.mass_spectrum = obj
コード例 #11
0
	def get_ms_at_index(self, ix: int) -> MassSpectrum:
		"""
		Returns a mass spectrum for a given scan index.

		:param ix: The index of the scan.

		:author: Andrew Isaac
		"""

		if not isinstance(ix, int):
			raise TypeError("'ix' must be an an integer")

		scan = self.get_scan_at_index(ix)

		return MassSpectrum(self.mass_list, scan)
コード例 #12
0
ファイル: Class.py プロジェクト: PyMassSpec/PyMassSpec
    def __init__(
        self,
        rt: Union[int, float] = 0.0,
        ms: Union[float, MassSpectrum, None] = None,
        minutes: bool = False,
        outlier: bool = False,
    ):

        if ms is None:
            ms = MassSpectrum([], [])
        elif not isinstance(ms, MassSpectrum):
            raise TypeError("'ms' must be a MassSpectrum object")

        self._mass_spectrum = ms

        super().__init__(rt, minutes, outlier)
コード例 #13
0
def spectrum_search_with_ref_data():
    print("Searching Spectrum with Ref Data")

    if search is None:
        return status()

    n_hits = flask.request.args.get('n_hits', default=5, type=int)

    ms = MassSpectrum(**json.loads(flask.request.get_json()))
    hit_list = search.full_spectrum_search(ms, n_hits)
    output_buffer = []

    for idx, hit in enumerate(hit_list):
        ref_data = search.get_reference_data(hit.spec_loc)
        output_buffer.append((hit, ref_data))

    return json.dumps(output_buffer, cls=PyNISTEncoder)
コード例 #14
0
def BillerBiemann(im: IntensityMatrix,
                  points: int = 3,
                  scans: int = 1) -> List[Peak]:
    """
    Deconvolution based on the algorithm of Biller and Biemann (1974)

    :param im: An :class:`~pyms.IntensityMatrix.IntensityMatrix` object
    :type im: ~pyms.IntensityMatrix.IntensityMatrix
    :param points: Number of scans over which to consider a maxima to be a peak. Default ``3``
    :type points: int, optional
    :param scans: Number of scans to combine peaks from to compensate for spectra skewing. Default ``1``
    :type scans: int, optional

    :return: List of detected peaks
    :rtype: List[:class:`pyms.Peak.Class.Peak`]

    :authors: Andrew Isaac, Dominic Davis-Foster (type assertions)
    """

    if not isinstance(im, IntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    if not isinstance(scans, int):
        raise TypeError("'scans' must be an integer")

    rt_list = im.time_list
    mass_list = im.mass_list
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)
    numrows = len(maxima_im)

    for row in range(numrows):
        if sum(maxima_im[row]) > 0:
            rt = rt_list[row]
            ms = MassSpectrum(mass_list, maxima_im[row])
            peak = Peak(rt, ms)
            peak.bounds = [0, row, 0]  # store IM index for convenience
            peak_list.append(peak)

    return peak_list
コード例 #15
0
ファイル: reference_data.py プロジェクト: domdfcoding/pynist
    def __init__(
        self,
        name: str = '',
        cas: Union[str, int] = "---",
        nist_no: Union[int, str] = 0,
        id: Union[str, int] = '',  # noqa: A002  # pylint: disable=redefined-builtin
        mw: Union[float, str] = 0.0,
        formula: str = '',
        contributor: str = '',
        mass_spec: Optional[MassSpectrum] = None,
        synonyms: Optional[Sequence[str]] = None,
        exact_mass: Optional[Any] = None,
    ) -> None:

        NISTBase.__init__(self, name, cas)

        self._formula: str = str(formula)
        self._contributor: str = str(contributor)

        self._nist_no: int = int(nist_no)
        self._id: str = str(id)

        self._mw: int = int(mw)

        if not exact_mass:
            self._exact_mass = float(mw)
        else:
            self._exact_mass = float(exact_mass)

        if mass_spec is None:
            self._mass_spec = None
        elif isinstance(mass_spec, dict):
            self._mass_spec = MassSpectrum(**mass_spec)
        else:
            self._mass_spec = copy.copy(mass_spec)

        if synonyms is None:
            self._synonyms = []
        else:
            self._synonyms = [str(synonym) for synonym in synonyms]
コード例 #16
0
def BillerBiemann(im: BaseIntensityMatrix,
                  points: int = 3,
                  scans: int = 1) -> List[Peak]:
    """
	Deconvolution based on the algorithm of Biller and Biemann (1974).

	:param im:
	:param points: Number of scans over which to consider a maxima to be a peak.
	:param scans: Number of scans to combine peaks from to compensate for spectra skewing.

	:return: List of detected peaks

	:authors: Andrew Isaac, Dominic Davis-Foster (type assertions)
	"""

    if not isinstance(im, BaseIntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    if not isinstance(scans, int):
        raise TypeError("'scans' must be an integer")

    rt_list = im.time_list
    mass_list = im.mass_list
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)

    for row_idx, row in enumerate(maxima_im):
        if sum(row) > 0:
            rt = rt_list[row_idx]
            ms = MassSpectrum(mass_list, row)
            peak = Peak(rt, ms)
            peak.bounds = (0, row_idx, 0)  # store IM index for convenience
            # TODO: can the bounds be determined from the intensity matrix?
            peak_list.append(peak)

    return peak_list
コード例 #17
0
ファイル: project.py プロジェクト: domdfcoding/GunShotMatch
    def load_alignment_data(self):

        if self.alignment_performed:
            from pyms.Spectrum import MassSpectrum

            self.rt_alignment = pandas.read_json(
                get_file_from_archive(self.filename.Path, 'alignment_rt.json'))

            # To make sure that columns of dataframe are in the same order as the experiment name list
            if self.rt_alignment.columns.tolist() != self.experiment_name_list:
                self.rt_alignment = self.rt_alignment[
                    self.experiment_name_list]

            self.area_alignment = pandas.read_json(
                get_file_from_archive(self.filename.Path,
                                      'alignment_area.json'))

            # To make sure that columns of dataframe are in the same order as the experiment name list
            if self.area_alignment.columns.tolist(
            ) != self.experiment_name_list:
                self.area_alignment = self.area_alignment[
                    self.experiment_name_list]

            raw_ms_alignment = json.load(
                get_file_from_archive(self.filename.Path, 'alignment_ms.json'))

            ordered_ms_alignment = {}

            for expr, peaks in raw_ms_alignment.items():
                ordered_ms_alignment[expr] = []

                for peak_idx in range(len(peaks)):
                    peak_idx = str(peak_idx)
                    if peaks[peak_idx]:
                        peaks[peak_idx] = MassSpectrum.from_dict(
                            peaks[peak_idx])
                    ordered_ms_alignment[expr].append(peaks[peak_idx])

            self.ms_alignment = pandas.DataFrame(data=ordered_ms_alignment)
コード例 #18
0
ファイル: reference_data.py プロジェクト: domdfcoding/pynist
    def from_pynist(cls, pynist_dict: Dict[str, Any]) -> "ReferenceData":
        """
		Create a :class:`ReferenceData` object from the raw data returned by the C extension.

		:param pynist_dict:
		"""

        return cls(
            name=parse_name_chars(pynist_dict["name_chars"]),
            cas=pynist_dict["cas"],
            formula=pynist_dict["formula"],
            contributor=pynist_dict["contributor"],
            nist_no=pynist_dict["nist_no"],
            id=pynist_dict["id"],
            mw=pynist_dict["mw"],
            mass_spec=MassSpectrum(pynist_dict["mass_list"],
                                   pynist_dict["intensity_list"]),
            synonyms=[
                parse_name_chars(synonym)
                for synonym in pynist_dict["synonyms_chars"]
            ],
        )
コード例 #19
0
ファイル: reference_data.py プロジェクト: domdfcoding/pynist
    def from_jcamp(cls,
                   file_name: PathLike,
                   ignore_warnings: bool = True) -> "ReferenceData":
        """
		Create a ReferenceData object from a JCAMP-DX file.

		:param file_name: Path of the file to read.
		:param ignore_warnings: Whether warnings about invalid tags should be shown.

		:authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster
		"""

        with warnings.catch_warnings():

            if ignore_warnings:
                warnings.simplefilter("ignore", JcampTagWarning)

            file_name = PathPlus(file_name)

            # Commented this line because it also gets printed when the MassSpectrum is created
            # print(f" -> Reading JCAMP file '{file_name}'")
            lines_list = file_name.read_lines()
            last_tag = None

            header_info: Dict[str, Any] = {
            }  # Dictionary containing header information

            for line in lines_list:

                if len(line.strip()):
                    if line.startswith("##"):
                        # key word or information
                        fields = line.split('=', 1)
                        current_tag = fields[0] = fields[0].lstrip(
                            "##").upper()
                        last_tag = fields[0]
                        fields[1] = fields[1].strip()

                        if current_tag.upper().startswith("END"):
                            break

                        elif current_tag in xydata_tags:
                            continue

                        elif current_tag in header_info_fields:
                            if fields[1].isdigit():
                                header_info[current_tag] = int(fields[1])
                            elif is_float(fields[1]):
                                header_info[current_tag] = float(fields[1])
                            else:
                                header_info[current_tag] = fields[1]
                        else:
                            warnings.warn(current_tag, JcampTagWarning)

                    else:
                        if last_tag in header_info:
                            header_info[last_tag] += f"{line}"

            return cls(
                name=header_info["TITLE"],
                cas=header_info["CAS REGISTRY NO"],
                nist_no=header_info["$NIST MASS SPEC NO"],
                contributor=header_info["ORIGIN"],
                formula=header_info["MOLFORM"],
                mw=header_info["MW"],
                mass_spec=MassSpectrum.from_jcamp(file_name),
            )
コード例 #20
0
def test_invert_mass_spec(
        advanced_data_regression: AdvancedDataRegressionFixture):
    # Diphenylamine
    mz_int_pairs = [
        (27, 138),
        (28, 210),
        (32, 59),
        (37, 70),
        (38, 273),
        (39, 895),
        (40, 141),
        (41, 82),
        (50, 710),
        (51, 2151),
        (52, 434),
        (53, 49),
        (57, 41),
        (59, 121),
        (61, 73),
        (62, 229),
        (63, 703),
        (64, 490),
        (65, 1106),
        (66, 932),
        (67, 68),
        (70, 159),
        (71, 266),
        (72, 297),
        (73, 44),
        (74, 263),
        (75, 233),
        (76, 330),
        (77, 1636),
        (78, 294),
        (84, 1732),
        (87, 70),
        (88, 86),
        (89, 311),
        (90, 155),
        (91, 219),
        (92, 160),
        (93, 107),
        (101, 65),
        (102, 111),
        (103, 99),
        (104, 188),
        (113, 107),
        (114, 120),
        (115, 686),
        (116, 150),
        (117, 91),
        (126, 46),
        (127, 137),
        (128, 201),
        (129, 73),
        (130, 69),
        (139, 447),
        (140, 364),
        (141, 584),
        (142, 279),
        (143, 182),
        (152, 37),
        (153, 60),
        (154, 286),
        (166, 718),
        (167, 3770),
        (168, 6825),
        (169, 9999),
        (170, 1210),
        (171, 85),
    ]

    ms = MassSpectrum.from_mz_int_pairs(mz_int_pairs)

    inverted = invert_mass_spec(ms)
    assert inverted is not ms
    assert inverted != ms
    advanced_data_regression.check({
        "mass_list": inverted.mass_list,
        "mass_spec": inverted.mass_spec
    })

    inverted_inplace = invert_mass_spec(ms, inplace=True)
    assert inverted_inplace is ms
    assert inverted_inplace is not inverted
    assert inverted_inplace == inverted
    advanced_data_regression.check({
        "mass_list": inverted_inplace.mass_list,
        "mass_spec": inverted_inplace.mass_spec,
    })
コード例 #21
0
def test_errors(ms, obj, expects):
    with pytest.raises(expects):
        MassSpectrum(obj, ms.intensity_list)

    with pytest.raises(expects):
        MassSpectrum(ms.mass_list, obj)
コード例 #22
0
def composite_peak(peak_list: List, ignore_outliers: bool = False) -> Peak:
    """
    Create a peak that consists of a composite spectrum from all spectra in the list of peaks.

    :param peak_list: A list of peak objects
    :type peak_list: list
    :param ignore_outliers:
    :type ignore_outliers: bool, optional

    :return: The composite peak
    :type: pyms.Peak.Class.Peak

    :author: Andrew Isaac
    :author: Dominic Davis-Foster (type assertions)
    """

    if not is_peak_list(peak_list):
        raise TypeError("'peak_list' must be a list of Peak objects")

    first = True
    count = 0
    avg_rt = 0
    # new_ms = None

    # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list
    if ignore_outliers:
        rts = []
        if len(peak_list) > 3:
            for peak in peak_list:
                rts.append(peak.rt)

            is_outlier = median_outliers(rts)

            for i, val in enumerate(is_outlier):
                if val:
                    peak_list[i].isoutlier = True

    # DK: the average RT and average mass spec for the compound peak is now calculated from peaks that are NOT outliers.
    # This should improve the ability to order peaks and figure out badly aligned entries
    for peak in peak_list:
        if peak is not None and ((ignore_outliers and not peak.is_outlier)
                                 or not ignore_outliers):
            ms = peak.mass_spectrum
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec) / 100.0
            if max_spec > 0:
                spec = spec / max_spec
            else:
                spec = spec * 0
            avg_rt += peak.rt
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt / count
        avg_spec = avg_spec / count
        new_ms = MassSpectrum(mass_list, avg_spec)

        return Peak(avg_rt, new_ms)
    else:
        return None
コード例 #23
0
def fill_peaks(data, peak_list: List, D: float, minutes: bool = False) -> Peak:
    """
    Gets the best matching Retention Time and spectra from 'data' for each peak
    in the peak list.

    :param data: A data IntensityMatrix that has the same mass range as the
        peaks in the peak list
    :type data: pyms.IntensityMatrix.IntensityMatrix
    :param peak_list: A list of peak objects
    :type peak_list: list
    :param D: Peak width standard deviation in seconds.
        Determines search window width.
    :type D: float
    :param minutes: Return retention time as minutes
    :type minutes: bool, optional

    :return: List of Peak Objects
    :type: list of :class:`pyms.Peak.Class.Peak`

    :author: Andrew Isaac
    :author: Dominic Davis-Foster (type assertions)
    """

    if not is_peak_list(peak_list):
        raise TypeError("'peak_list' must be a list of Peak objects")

    if not isinstance(D, float):
        raise TypeError("'D' must be a float")

    # Test for best match in range where RT weight is greater than _TOL
    _TOL = 0.001
    cutoff = D * math.sqrt(-2.0 * math.log(_TOL))

    # Penalise for neighboring peaks
    # reweight so RT weight at nearest peak is _PEN
    _PEN = 0.5

    datamat = data.intensity_array
    mass_list = data.mass_list
    datatimes = data.time_list
    minrt = min(datatimes)
    maxrt = max(datatimes)
    rtl = 0
    rtr = 0
    new_peak_list = []
    for ii in range(len(peak_list)):
        spec = peak_list[ii].mass_spectrum.mass_spec
        spec = numpy.array(spec, dtype='d')
        rt = peak_list[ii].rt
        sum_spec_squared = numpy.sum(spec**2, axis=0)

        # get neighbour RT's
        if ii > 0:
            rtl = peak_list[ii - 1].rt
        if ii < len(peak_list) - 1:
            rtr = peak_list[ii + 1].rt
        # adjust weighting for neighbours
        rtclose = min(abs(rt - rtl), abs(rt - rtr))
        Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN))

        if Dclose > 0:
            Dclose = min(D, Dclose)
        else:
            Dclose = D

        # Get bounds
        rtlow = rt - cutoff
        if rtlow < minrt:
            rtlow = minrt
        lowii = data.get_index_at_time(rtlow)

        rtup = rt + cutoff
        if rtup > maxrt:
            rtup = maxrt
        upii = data.get_index_at_time(rtup)

        # Get sub matrix of scans in bounds
        submat = datamat[lowii:upii + 1]
        submat = numpy.array(submat, dtype='d')
        subrts = datatimes[lowii:upii + 1]
        subrts = numpy.array(subrts, dtype='d')

        sum_summat_squared = numpy.sum(submat**2, axis=1)

        # transpose spec (as matrix) for dot product
        spec = numpy.transpose([spec])
        # dot product on rows

        toparr = numpy.dot(submat, spec)
        botarr = numpy.sqrt(sum_spec_squared * sum_summat_squared)

        # convert back to 1-D array
        toparr = toparr.ravel()

        # scaled dot product of each scan
        cosarr = toparr / botarr

        # RT weight of each scan
        rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0)

        # weighted scores
        scorearr = cosarr * rtimearr

        # index of best score
        best_ii = scorearr.argmax()

        # Add new peak
        bestrt = subrts[best_ii]
        bestspec = submat[best_ii].tolist()
        ms = MassSpectrum(mass_list, bestspec)
        new_peak_list.append(Peak(bestrt, ms, minutes))

    return new_peak_list
コード例 #24
0
ファイル: test.py プロジェクト: domdfcoding/pynist
    (154, 286),
    (166, 718),
    (167, 3770),
    (168, 6825),
    (169, 9999),
    (170, 1210),
    (171, 85),
]

mass_list = []
intensity_list = []
for mass, intensity in mz_int_pairs:
    mass_list.append(mass)
    intensity_list.append(intensity)

mass_spec = MassSpectrum(mass_list, intensity_list)

start_time = datetime.datetime.now()
print("Performing Full Search")

hit_list = search.full_search_with_ref_data(mass_spec)

for hit_no, (hit, ref_data) in enumerate(hit_list):
    print(f"Hit {hit_no}")
    print(hit)
    print(ref_data)
    print(ref_data.mass_spec)
    print()

    # reference_data = search.get_r#eference_data(hit.spec_loc)
    # print(reference_data.mass_spec == ref_data.mass_spec)