def peaklist(self, scan_id, function_noise="median"): if function_noise not in ["mean", "median", "mad"]: raise ValueError("select a function that is available [mean, median, mad]") for scan in self.run(): if scan["id"] == scan_id: mzs, ints = zip(*scan.peaks) scan_time = scan["MS:1000016"] tic = scan["total ion current"] if "MS:1000927" in scan: ion_injection_time = scan["MS:1000927"] else: ion_injection_time = None header = scan['MS:1000512'] mz_range = mz_range_from_header(header) ms_level = scan['ms level'] pl = PeakList(ID=scan["id"], mz=mzs, intensity=ints, mz_range=mz_range, header=header, ms_level=ms_level, ion_injection_time=ion_injection_time, scan_time=scan_time, tic=tic, function_noise=function_noise) snr = np.divide(ints, scan.estimatedNoiseLevel(mode=function_noise)) pl.add_attribute('snr', snr) return pl return None
def remove_edges(pls_sd): if type(pls_sd) is not dict and type( pls_sd) is not collections.OrderedDict: raise TypeError( "Incorrect format - dict or collections.OrderedDict required") mzrs = [mz_range_from_header(h) for h in pls_sd] new_mzrs = _calculate_edges(mzrs) for h in pls_sd.keys(): mz_ranges = len(pls_sd[h]) * [new_mzrs[pls_sd.keys().index(h)]] for i in range(len(pls_sd[h])): remove = [ np.where(pls_sd[h][i].mz == mz)[0][0] for mz in pls_sd[h][i].mz if mz < mz_ranges[i][0] or mz >= mz_ranges[i][1] ] for mz in pls_sd[h][i].mz: if mz < mz_ranges[i][0] or mz >= mz_ranges[i][1]: remove.extend(list(np.where(pls_sd[h][i].mz == mz)[0])) pls_sd[h][i].remove_peak(remove) return pls_sd
def peaklist(self, scan_id, function_noise="median"): if function_noise not in ["mean", "median", "mad"]: raise ValueError( "select a function that is available [mean, median, mad]") run = pymzml.run.Reader(self.filename) for scan in run: if scan["id"] == scan_id: peaks = scan.peaks("raw") if len(peaks) > 0: mzs, ints = list(zip(*peaks)) else: mzs, ints = [], [] scan_time = scan["MS:1000016"] tic = scan["total ion current"] if "MS:1000927" in scan: ion_injection_time = scan["MS:1000927"] else: ion_injection_time = None header = scan['MS:1000512'] mz_range = mz_range_from_header(header) ms_level = scan['ms level'] pl = PeakList(ID=scan["id"], mz=mzs, intensity=ints, mz_range=mz_range, header=header, ms_level=ms_level, ion_injection_time=ion_injection_time, scan_time=scan_time, tic=tic, function_noise=function_noise) snr = np.divide( ints, scan.estimated_noise_level(mode=function_noise)) pl.add_attribute('snr', snr) run.info["file_object"].close() return pl return None
def read_scans(fn, source, function_noise, min_scans=1, filter_scan_events=None): if filter_scan_events is None: filter_scan_events = {} if not fn.lower().endswith(".mzml") and not fn.lower().endswith(".raw"): raise IOError("Check format raw data (.RAW or .mzML)") if min_scans is not None and type(min_scans) is not int: raise ValueError("Integer (>= 1) or None required for min_scans") if zipfile.is_zipfile(source): if fn.lower().endswith(".mzml"): run = mzml_portal.Mzml(fn, source) elif fn.lower().endswith(".raw"): raise IOError("Zip file with raw files not supported") else: raise IOError("Incorrect format: {}".format(os.path.basename(fn))) else: if fn.lower().endswith(".mzml"): run = mzml_portal.Mzml(fn) elif fn.lower().endswith(".raw"): run = thermo_raw_portal.ThermoRaw(fn) else: raise IOError("Incorrect format: {}".format(os.path.basename(fn))) h_sids = run.headers() if type(filter_scan_events) is dict and len(filter_scan_events) > 0: if ("include" in filter_scan_events and "exclude" in filter_scan_events) or \ ("include" not in filter_scan_events and "exclude" not in filter_scan_events): raise ValueError( "Use 'exclude' or 'include' for filter_scan_events not both. E.g {'include': [[70.0, 170.0, 'sim']]}" ) if len([ True for fse in filter_scan_events.values()[0] if len(fse) == 3 ]) != len(filter_scan_events.values()[0]): raise ValueError( "Provide a start, end and scan type (sim or full) for filter_scan_events." ) filter_scan_events = { filter_scan_events.keys()[0]: [[float(fse[0]), float(fse[1]), str(fse[2])] for fse in filter_scan_events.values()[0]] } h_descs = {} for h in h_sids.copy(): mzr = mz_range_from_header(h) h_descs[h] = [mzr[0], mzr[1], scan_type_from_header(h).lower()] incl_excl = filter_scan_events.keys()[0] for hd in filter_scan_events[incl_excl]: if hd not in h_descs.values(): logging.warning("Event {} doest not exist".format(str(hd))) for hd in h_descs: if filter_scan_events.keys()[0] == "include": if h_descs[hd] not in filter_scan_events["include"]: del h_sids[hd] elif filter_scan_events.keys()[0] == "exclude": if h_descs[hd] in filter_scan_events["exclude"]: del h_sids[hd] if len(h_sids) == 0: raise Exception("No scan data to process. Check filter_scan_events") scans = collections.OrderedDict() for h, sids in h_sids.iteritems(): if len(sids) >= min_scans: scans[h] = run.peaklists(sids, function_noise) else: logging.warning( 'Not enough scans for [{}] [{} < {}]. Scan event {} has been removed.' .format(h, len(scans), min_scans, h)) if fn.lower().endswith(".raw"): run.close() return scans