コード例 #1
0
def _get_peaks(spectrum):
    mzs = spectrum.mz
    rts = [get_rt(spectrum)] * len(mzs)
    intensities = spectrum.i
    peaklist = np.stack([mzs, rts, intensities], axis=1)
    return peaklist
コード例 #2
0
def get_precursor_info(fragfile):
    """
    Get (MS1) precursor peaks and their associated MS2 scans from an mzML file
    :param fragfile: path to an mzML file
    :return: a pandas dataframe that contains all the ms1 and ms2 information
    """
    run = pymzml.run.Reader(fragfile,
                            obo_version='4.0.1',
                            MS1_Precision=5e-6,
                            extraAccessions=[('MS:1000016',
                                              ['value', 'unitName'])])

    last_ms1_peaklist = None
    last_ms1_scan_no = 0
    isolation_width = 1.0  # Dalton
    data = []
    for scan_no, scan in enumerate(run):
        if scan.ms_level == 1:  # save the last ms1 scan that we've seen
            last_ms1_peaklist = _get_peaks(scan)
            last_ms1_scan_no = scan_no

        # TODO: it's better to use the "isolation window target m/z" field in the mzML file for matching
        precursors = scan.selected_precursors
        if len(precursors) > 0:
            assert len(
                precursors
            ) == 1  # assume exactly 1 precursor peak for each ms2 scan
            precursor = precursors[0]

            try:
                scan_rt = get_rt(scan)
                precursor_mz = precursor['mz']
                precursor_intensity = precursor['i']
                res = _find_precursor_peaks(precursor,
                                            last_ms1_peaklist,
                                            last_ms1_scan_no,
                                            isolation_width=isolation_width)
                ms2_peaklist = _get_peaks(scan)
                row = [
                    scan_no, scan_rt, precursor_mz, precursor_intensity,
                    ms2_peaklist
                ]
                row.extend(res)
                data.append(row)
            except ValueError as e:
                logger.warning(e)
            except KeyError as e:
                continue  # sometimes we can't find the intensity value precursor['i'] in precursors

    columns = [
        'ms2_scan_id', 'ms2_scan_rt', 'ms2_precursor_mz',
        'ms2_precursor_intensity', 'ms2_peaklist', 'ms1_scan_id',
        'ms1_scan_rt', 'ms1_mz', 'ms1_intensity'
    ]
    df = pd.DataFrame(data, columns=columns)

    # select only rows where we are sure of the matching, i.e. the intensity values aren't too different
    df['intensity_diff'] = np.abs(df['ms2_precursor_intensity'] -
                                  df['ms1_intensity'])
    idx = (df['intensity_diff'] < 0.1)
    ms1_df = df[idx]
    return ms1_df