def main(cluster_file, consensus_file): with mgf.read(consensus_file) as reader: for spectrum_dict in reader: peptide_seq = spectrum_dict['params']['title'] precursor_mz = spectrum_dict['params']['pepmass'][0] precursor_charge = spectrum_dict['params']['charge'][0] cons_mz = spectrum_dict['m/z array'] cons_intensity = spectrum_dict['intensity array'] retention_time = float(spectrum_dict['params']['rtinseconds']) break cons_spec = sus.MsmsSpectrum(peptide_seq, precursor_mz=precursor_mz, precursor_charge=precursor_charge, mz=cons_mz, intensity=cons_intensity, retention_time=retention_time, peptide=peptide_seq) with mgf.read(cluster_file) as reader: for spectrum_dict in reader: precursor_mz = spectrum_dict['params']['pepmass'][0] precursor_charge = spectrum_dict['params']['charge'][0] mz = spectrum_dict['m/z array'] intensity = spectrum_dict['intensity array'] retention_time = float(spectrum_dict['params']['rtinseconds']) spectrum = sus.MsmsSpectrum(peptide_seq, precursor_mz=precursor_mz, precursor_charge=precursor_charge, mz=mz, intensity=intensity, retention_time=retention_time, peptide=peptide_seq) # Process the MS/MS spectrum. fragment_tol_mass = 10 fragment_tol_mode = 'ppm' # fragment_tol_mass = .5 # fragment_tol_mode = 'Da' spectrum = (spectrum.set_mz_range( min_mz=100, max_mz=1400).remove_precursor_peak( fragment_tol_mass, fragment_tol_mode).filter_intensity( min_intensity=0.05, max_num_peaks=50).scale_intensity( 'root').annotate_peptide_fragments(fragment_tol_mass, fragment_tol_mode, ion_types='aby')) # Generate theoretical spec # Plot the MS/MS spectrum. fig, ax = plt.subplots(figsize=(12, 6)) # sup.spectrum(spectrum, ax=ax) sup.mirror(spectrum, tspec, ax=ax) plt.show() plt.close()
def plot_spectrum(identifier, precursor_mz, precursor_charge, mz, intensity, retention_time, peptide): # identifier = spectrum_dict['params']['title'] # precursor_mz = spectrum_dict['params']['pepmass'][0] # precursor_charge = spectrum_dict['params']['charge'][0] # mz = spectrum_dict['m/z array'] # intensity = spectrum_dict['intensity array'] # retention_time = float(spectrum_dict['params']['rtinseconds']) # peptide = 'WNQLQAFWGTGK' # Create the MS/MS spectrum. spectrum = sus.MsmsSpectrum(identifier, precursor_mz=precursor_mz, precursor_charge=precursor_charge, mz=mz, intensity=intensity, retention_time=retention_time, peptide=peptide) # Process the MS/MS spectrum. # fragment_tol_mass = 10 # fragment_tol_mode = 'ppm' fragment_tol_mass = .5 fragment_tol_mode = 'Da' spectrum = (spectrum.set_mz_range( min_mz=100, max_mz=1400).remove_precursor_peak( fragment_tol_mass, fragment_tol_mode).filter_intensity( min_intensity=0.05, max_num_peaks=50).scale_intensity( 'root').annotate_peptide_fragments(fragment_tol_mass, fragment_tol_mode, ion_types='aby')) # Generate theoretical spec ts = sus._get_theoretical_peptide_fragments(peptide) tmz = [frag.calc_mz for frag in ts] ti = [1.0 for _ in tmz] tspec = sus.MsmsSpectrum(identifier, precursor_mz=precursor_mz, precursor_charge=precursor_charge, mz=tmz, intensity=ti, retention_time=retention_time, peptide=peptide) # Plot the MS/MS spectrum. fig, ax = plt.subplots(figsize=(12, 6)) # sup.spectrum(spectrum, ax=ax) sup.mirror(spectrum, tspec, ax=ax) plt.show() plt.close()
def test_init_modification_str(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) with pytest.raises(ValueError): spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity, peptide='PEPTIDER', modifications={'bla': 42})
def test_init_peptide(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) with pytest.raises(ValueError): spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity, peptide='BJOUXZ')
def get_cluster_spectra(mgf_filename: str) -> Dict[str, sus.MsmsSpectrum]: """ Read all spectra from the given MGF file corresponding to a single cluster. Parameters ---------- mgf_filename : str The file name of the MGF file to be read. Returns ------- Dict[str, sus.MsmsSpectrum] A dictionary with as keys the scan numbers and as values the corresponding spectra. """ spectra = {} for spectrum_dict in mgf.read(mgf_filename): # TODO: Make sure the USI doesn't contain a peptide identification. cluster, usi = spectrum_dict['params']['title'].split(';') spectrum = sus.MsmsSpectrum( usi, spectrum_dict['params']['pepmass'][0], spectrum_dict['params']['charge'][0], spectrum_dict['m/z array'], spectrum_dict['intensity array'], retention_time=spectrum_dict['params']['rtinseconds']) spectrum.cluster = cluster if usi in spectra: raise ValueError(f'Non-unique USI: {usi}') spectra[usi] = spectrum return spectra
def fraction_of_by(peptide_seq, precursor_mz, precursor_charge, mz, intensity): if not parser.fast_valid(peptide_seq): print("Invalid peptide sequence encountered", file=sys.stderr) return 0.0 spec = sus.MsmsSpectrum(peptide_seq, precursor_mz=precursor_mz, precursor_charge=precursor_charge, mz=mz, intensity=intensity, peptide=peptide_seq) fragment_tol_mass = 50 fragment_tol_mode = 'ppm' spectrum = (spectrum.set_mz_range( min_mz=100, max_mz=1400).remove_precursor_peak( fragment_tol_mass, fragment_tol_mode).annotate_peptide_fragments(fragment_tol_mass, fragment_tol_mode, ion_types='by')) current, by_current = 0., 0. for ix in range(len(spectrum.intensity)): current += spectrum.intensity[ix] if spectrum.annotation[ix] != None: by_current += spectrum.intensity[ix] if current > 0.: return by_current / current else: return 0.0
def _parse_spectrum(spectrum_dict: Dict) -> sus.MsmsSpectrum: """ Parse the Pyteomics cluster dict. Parameters ---------- spectrum_dict : Dict The Pyteomics cluster dict to be parsed. Returns ------- MsmsSpectrum The parsed cluster. """ spectrum_id = spectrum_dict['id'] mz_array = spectrum_dict['m/z array'] intensity_array = spectrum_dict['intensity array'] retention_time = spectrum_dict['scanList']['scan'][0]['scan start time'] precursor = spectrum_dict['precursorList']['precursor'][0] precursor_ion = precursor['selectedIonList']['selectedIon'][0] precursor_mz = precursor_ion['selected ion m/z'] if 'charge state' in precursor_ion: precursor_charge = int(precursor_ion['charge state']) elif 'possible charge state' in precursor_ion: precursor_charge = int(precursor_ion['possible charge state']) else: raise ValueError('Unknown precursor charge') return sus.MsmsSpectrum(spectrum_id, precursor_mz, precursor_charge, mz_array, intensity_array, None, retention_time)
def _parse_ms2lda(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) ms2lda_task_match = ms2lda_task_pattern.match(match.group(2)) if ms2lda_task_match is None: raise UsiError("Incorrectly formatted MS2LDA task", 400) experiment_id = ms2lda_task_match.group(1) index_flag = match.group(3) if index_flag.lower() != "accession": raise UsiError("Currently supported MS2LDA index flags: accession", 400) index = match.group(4) try: lookup_request = requests.get( f"{MS2LDA_SERVER}get_doc/?experiment_id={experiment_id}" f"&document_id={index}", timeout=timeout, ) lookup_request.raise_for_status() spectrum_dict = json.loads(lookup_request.text) if "error" in spectrum_dict: raise UsiError(f'MS2LDA error: {spectrum_dict["error"]}', 404) mz, intensity = zip(*spectrum_dict["peaks"]) source_link = f"http://ms2lda.org/basicviz/show_doc/{index}/" spectrum = sus.MsmsSpectrum(usi, float(spectrum_dict["precursor_mz"]), 0, mz, intensity) return spectrum, source_link except requests.exceptions.HTTPError: raise UsiError("Unknown MS2LDA USI", 404)
def _parse_spectrum(spectrum_dict: Dict) -> sus.MsmsSpectrum: """ Parse the Pyteomics cluster dict. Parameters ---------- spectrum_dict : Dict The Pyteomics cluster dict to be parsed. Returns ------- MsmsSpectrum The parsed cluster. """ identifier = spectrum_dict['params']['title'] mz_array = spectrum_dict['m/z array'] intensity_array = spectrum_dict['intensity array'] retention_time = float(spectrum_dict['params']['rtinseconds']) precursor_mz = float(spectrum_dict['params']['pepmass'][0]) if 'charge' in spectrum_dict['params']: precursor_charge = int(spectrum_dict['params']['charge'][0]) else: raise ValueError('Unknown precursor charge') return sus.MsmsSpectrum(identifier, precursor_mz, precursor_charge, mz_array, intensity_array, None, retention_time)
def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) index_flag = match.group(3) if index_flag.lower() != "accession": raise UsiError("Currently supported MassBank index flags: accession", 400) index = match.group(4) try: lookup_request = requests.get(f"{MASSBANK_SERVER}{index}", timeout=timeout) lookup_request.raise_for_status() spectrum_dict = lookup_request.json() mz, intensity = [], [] for peak in spectrum_dict["spectrum"].split(): peak_mz, peak_intensity = peak.split(":") mz.append(float(peak_mz)) intensity.append(float(peak_intensity)) precursor_mz = 0 for metadata in spectrum_dict["metaData"]: if metadata["name"] == "precursor m/z": precursor_mz = float(metadata["value"]) break source_link = (f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}") spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) return spectrum, source_link except requests.exceptions.HTTPError: raise UsiError("Unknown MassBank USI", 404)
def _parse_spectrum(spectrum_dict: Dict) -> sus.MsmsSpectrum: """ Parse the Pyteomics cluster dict. Parameters ---------- spectrum_dict : Dict The Pyteomics cluster dict to be parsed. Returns ------- MsmsSpectrum The parsed cluster. """ spectrum_id = spectrum_dict['id'] mz_array = spectrum_dict['m/z array'] intensity_array = spectrum_dict['intensity array'] retention_time = spectrum_dict['retentionTime'] precursor_mz = spectrum_dict['precursorMz'][0]['precursorMz'] if 'precursorCharge' in spectrum_dict['precursorMz'][0]: precursor_charge = spectrum_dict['precursorMz'][0]['precursorCharge'] else: raise ValueError('Unknown precursor charge') return sus.MsmsSpectrum(spectrum_id, precursor_mz, precursor_charge, mz_array, intensity_array, None, retention_time)
def test_init_mz_sorted(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) for mz1, mz2 in zip(spec.mz[:-1], spec.mz[1:]): assert mz1 <= mz2
def test_scale_intensity_rank_more_peaks(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) with pytest.raises(ValueError): spec.scale_intensity(scaling='rank', max_rank=num_peaks - 50)
def test_annotate_peptide_fragments(): fragment_tol_mass = 0.02 fragment_tol_mode = 'Da' peptides = [ 'SYELPDGQVITIGNER', 'MFLSFPTTK', 'DLYANTVLSGGTTMYPGIADR', 'YLYEIAR', 'VAPEEHPVLLTEAPLNPK' ] for peptide in peptides: fragment_mz = np.asarray([ fragment.calc_mz for fragment in spectrum._get_theoretical_peptide_fragments(peptide) ]) fragment_mz += np.random.uniform(-0.9 * fragment_tol_mass, 0.9 * fragment_tol_mass, len(fragment_mz)) num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) mz[:len(fragment_mz)] = fragment_mz intensity = np.random.lognormal(0, 1, num_peaks) charge = 2 spec = spectrum.MsmsSpectrum('test_spectrum', mass.calculate_mass(sequence=peptide, charge=charge), charge, mz, intensity, peptide=peptide) spec.annotate_peptide_fragments(fragment_tol_mass, fragment_tol_mode) assert np.count_nonzero(spec.annotation) == len(fragment_mz)
def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) gnps_task_match = gnps_task_pattern.match(match.group(2)) if gnps_task_match is None: raise UsiError("Incorrectly formatted GNPS task", 400) task = gnps_task_match.group(1) filename = gnps_task_match.group(2) index_flag = match.group(3) if index_flag.lower() != "scan": raise UsiError("Currently supported GNPS TASK index flags: scan", 400) scan = match.group(4) try: request_url = (f"https://gnps.ucsd.edu/ProteoSAFe/DownloadResultFile?" f"task={task}&invoke=annotatedSpectrumImageText&block=0" f"&file=FILE->{filename}&scan={scan}&peptide=*..*&" f"force=false&_=1561457932129&format=JSON") lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() spectrum_dict = lookup_request.json() mz, intensity = zip(*spectrum_dict["peaks"]) source_link = (f"https://gnps.ucsd.edu/ProteoSAFe/status.jsp?" f"task={task}") if "precursor" in spectrum_dict: precursor_mz = float(spectrum_dict["precursor"].get("mz", 0)) charge = int(spectrum_dict["precursor"].get("charge", 0)) else: precursor_mz, charge = 0, 0 spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) return spectrum, source_link except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): raise UsiError("Unknown GNPS task USI", 404)
def _read_spectra_mgf(filename: str) -> Iterable[sus.MsmsSpectrum]: """ Read MS/MS spectra from an MGF file. Parameters ---------- filename : str The MGF file name. Returns ------- Iterable[sus.MsmsSpectrum] An iterable of spectra in the given MGF file. """ for spectrum_dict in tqdm.tqdm(pyteomics.mgf.read(filename), desc='Spectra read', unit='spectra'): spectrum = sus.MsmsSpectrum( spectrum_dict['params']['title'], spectrum_dict['params']['pepmass'][0], spectrum_dict['params']['charge'][0], spectrum_dict['m/z array'], spectrum_dict['intensity array'], None, spectrum_dict['params'].get('rtinseconds')) spectrum.filename = spectrum_dict['params'].get( 'filename', os.path.splitext(os.path.basename(filename))[0]) if 'scan' in spectrum_dict['params']: spectrum.scan = spectrum_dict['params']['scan'] if 'cluster' in spectrum_dict['params']: spectrum.cluster = spectrum_dict['params']['cluster'] yield spectrum
def plot_prediction( self, peptide, modifications, charge, prediction=None, ax=None, filename=None, ): """ Plot MS²PIP-predicted spectrum with spectrum_utils. Parameters ---------- peptide: string Unmodified peptide sequence. Only canonical amino acids are allowed, and peptide sequence should be of length [3, 100]. modifications: string MS²PIP style-formatted modification string (e.g. `0|Acetyl|5|Oxidation`). See MS²PIP README.md for more info. charge: int Peptide precursor charge. prediction: tuple or None (default: None) Tuple with `ms2pip.single_prediction.SinglePrediction.predict()` output. ax: matplotlib.axes.Axes or None (default: None) Figure ax to plot onto. filename: str or None (default: None) Filename to save plot to. File extension defines the format. Figure will not be saved if None. """ if not prediction: prediction = self.predict(peptide, modifications, charge) mz, intensity, annotation = prediction identifier = f"{peptide}/{charge}/{modifications}" precursor_mz = self.mod_info.calc_precursor_mz(peptide, modifications, charge) mod_dict = self._modifications_to_dict(modifications) sus_annotation = self._get_sus_annotation(mz, annotation) spectrum = sus.MsmsSpectrum( identifier, precursor_mz, charge, mz, intensity, annotation=sus_annotation, retention_time=None, peptide=peptide, modifications=mod_dict, ) if not ax: ax = plt.gca() sup.spectrum(spectrum, ax=ax) ax.set_title("MS²PIP prediction for " + identifier) if filename: plt.savefig(filename)
def test_scale_intensity_rank(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) spec.scale_intensity(scaling='rank') np.testing.assert_allclose(np.sort(spec.intensity), np.arange(1, num_peaks + 1))
def test_mz_annotation_len(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) annotation = [str(this_mz) for this_mz in mz[:100]] with pytest.raises(ValueError): spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity, annotation)
def test_round_no_merge(): num_peaks = 150 mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.49, 0.5, num_peaks) intensity = np.random.exponential(1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) spec.round(0) for i, this_mz in enumerate(spec.mz, 1): assert i == pytest.approx(this_mz)
def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # E.g. mzspec:MOTIFDB:motif:motif_id. tokens = usi.split(':') motif_id = tokens[3] request_url = f'{MOTIFDB_SERVER}get_motif/{motif_id}' mz, intensity = zip(*json.loads(requests.get(request_url).text)) source_link = f'http://ms2lda.org/motifdb/motif/{motif_id}/' return sus.MsmsSpectrum(usi, 0, 1, mz, intensity), source_link
def _dict_to_spectrum(spectrum_dict: Dict): return sus.MsmsSpectrum( spectrum_dict['params']['title'], spectrum_dict['params']['pepmass'][0], spectrum_dict['params']['charge'][0], spectrum_dict['m/z array'], spectrum_dict['intensity array'], retention_time=spectrum_dict['params']['rtinseconds'])
def test_init_modification(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) modifications = {0: +1, 4: -56, 'N-term': +16, 'C-term': -42} spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity, peptide='PEPTIDER', modifications=modifications) assert len(spec.modifications) == 4
def test_init_annotation_order(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) annotation = [str(this_mz) for this_mz in mz] spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity, annotation) for this_mz, this_annotation in zip(spec.mz, spec.annotation): assert this_mz == pytest.approx(float(this_annotation))
def test_filter_intensity_keep_all(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) spec.filter_intensity() assert len(spec.mz) == num_peaks assert len(spec.intensity) == num_peaks assert len(spec.annotation) == num_peaks
def test_annotate_molecule_fragments_invalid_mz(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 1, mz, intensity) fragment_tol_mass = 0.02 fragment_tol_mode = 'Da' with pytest.raises(ValueError): spec.annotate_molecule_fragment('CCCCCCCC', 1600, 1, fragment_tol_mass, fragment_tol_mode)
def test_round_merge_max(): num_peaks = 10 mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks) mz[4] = mz[3] + 0.0002 mz[5] = mz[3] + 0.0005 mz[7] = mz[8] - 0.00037 intensity = np.arange(1, 11) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity.copy()) spec.round(1, 'max') np.testing.assert_allclose(spec.intensity, [1, 2, 3, 6, 7, 9, 10])
def test_round_merge_sum(): num_peaks = 10 mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks) mz[4] = mz[3] + 0.0002 mz[5] = mz[3] + 0.0005 mz[7] = mz[8] - 0.00037 intensity = np.random.exponential(1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity.copy()) spec.round(1, 'sum') assert np.sum(spec.intensity) == pytest.approx(np.sum(intensity))
def test_init_intensity_order(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) mz_intensity_tuples = sorted(zip(mz, intensity), key=operator.itemgetter(0)) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) for this_mz, this_intensity, mz_intensity_tuple in zip( spec.mz, spec.intensity, mz_intensity_tuples): assert (this_mz, this_intensity) == pytest.approx(mz_intensity_tuple)
def test_scale_intensity_max(): num_peaks = 150 mz = np.random.uniform(100, 1400, num_peaks) intensity = np.random.lognormal(0, 1, num_peaks) spec = spectrum.MsmsSpectrum('test_spectrum', 500, 2, mz, intensity) intensity_copy, max_intensity = spec.intensity.copy(), spec.intensity.max() spec.scale_intensity(max_intensity=1.) assert spec.intensity.max() == pytest.approx(1.) np.testing.assert_allclose(spec.intensity * max_intensity, intensity_copy, rtol=1e-5)