예제 #1
0
    def peaklist(self, scan_id, function_noise="median"):

        if function_noise not in ["mean", "median", "mad"]:
            raise ValueError("select a function that is available [mean, median, mad]")

        for scan in self.run():
            if scan["id"] == scan_id:

                mzs, ints = zip(*scan.peaks)

                scan_time = scan["MS:1000016"]
                tic = scan["total ion current"]
                if "MS:1000927" in scan:
                    ion_injection_time = scan["MS:1000927"]
                else:
                    ion_injection_time = None
                header = scan['MS:1000512']
                mz_range = mz_range_from_header(header)
                ms_level = scan['ms level']

                pl = PeakList(ID=scan["id"], mz=mzs, intensity=ints,
                              mz_range=mz_range,
                              header=header,
                              ms_level=ms_level,
                              ion_injection_time=ion_injection_time,
                              scan_time=scan_time,
                              tic=tic,
                              function_noise=function_noise)
                snr = np.divide(ints, scan.estimatedNoiseLevel(mode=function_noise))
                pl.add_attribute('snr', snr)
                return pl
        return None
예제 #2
0
def remove_edges(pls_sd):

    if type(pls_sd) is not dict and type(
            pls_sd) is not collections.OrderedDict:
        raise TypeError(
            "Incorrect format - dict or collections.OrderedDict required")

    mzrs = [mz_range_from_header(h) for h in pls_sd]
    new_mzrs = _calculate_edges(mzrs)
    for h in pls_sd.keys():
        mz_ranges = len(pls_sd[h]) * [new_mzrs[pls_sd.keys().index(h)]]
        for i in range(len(pls_sd[h])):
            remove = [
                np.where(pls_sd[h][i].mz == mz)[0][0] for mz in pls_sd[h][i].mz
                if mz < mz_ranges[i][0] or mz >= mz_ranges[i][1]
            ]
            for mz in pls_sd[h][i].mz:
                if mz < mz_ranges[i][0] or mz >= mz_ranges[i][1]:
                    remove.extend(list(np.where(pls_sd[h][i].mz == mz)[0]))
            pls_sd[h][i].remove_peak(remove)
    return pls_sd
예제 #3
0
    def peaklist(self, scan_id, function_noise="median"):

        if function_noise not in ["mean", "median", "mad"]:
            raise ValueError(
                "select a function that is available [mean, median, mad]")

        run = pymzml.run.Reader(self.filename)
        for scan in run:
            if scan["id"] == scan_id:
                peaks = scan.peaks("raw")
                if len(peaks) > 0:
                    mzs, ints = list(zip(*peaks))
                else:
                    mzs, ints = [], []

                scan_time = scan["MS:1000016"]
                tic = scan["total ion current"]
                if "MS:1000927" in scan:
                    ion_injection_time = scan["MS:1000927"]
                else:
                    ion_injection_time = None
                header = scan['MS:1000512']
                mz_range = mz_range_from_header(header)
                ms_level = scan['ms level']
                pl = PeakList(ID=scan["id"],
                              mz=mzs,
                              intensity=ints,
                              mz_range=mz_range,
                              header=header,
                              ms_level=ms_level,
                              ion_injection_time=ion_injection_time,
                              scan_time=scan_time,
                              tic=tic,
                              function_noise=function_noise)
                snr = np.divide(
                    ints, scan.estimated_noise_level(mode=function_noise))
                pl.add_attribute('snr', snr)
                run.info["file_object"].close()
                return pl
        return None
예제 #4
0
def read_scans(fn,
               source,
               function_noise,
               min_scans=1,
               filter_scan_events=None):

    if filter_scan_events is None:
        filter_scan_events = {}
    if not fn.lower().endswith(".mzml") and not fn.lower().endswith(".raw"):
        raise IOError("Check format raw data (.RAW or .mzML)")

    if min_scans is not None and type(min_scans) is not int:
        raise ValueError("Integer (>= 1) or None required for min_scans")

    if zipfile.is_zipfile(source):
        if fn.lower().endswith(".mzml"):
            run = mzml_portal.Mzml(fn, source)
        elif fn.lower().endswith(".raw"):
            raise IOError("Zip file with raw files not supported")
        else:
            raise IOError("Incorrect format: {}".format(os.path.basename(fn)))
    else:
        if fn.lower().endswith(".mzml"):
            run = mzml_portal.Mzml(fn)
        elif fn.lower().endswith(".raw"):
            run = thermo_raw_portal.ThermoRaw(fn)
        else:
            raise IOError("Incorrect format: {}".format(os.path.basename(fn)))

    h_sids = run.headers()

    if type(filter_scan_events) is dict and len(filter_scan_events) > 0:

        if ("include" in filter_scan_events and "exclude" in filter_scan_events) or \
                ("include" not in filter_scan_events and "exclude" not in filter_scan_events):
            raise ValueError(
                "Use 'exclude' or 'include' for filter_scan_events not both. E.g {'include': [[70.0, 170.0, 'sim']]}"
            )

        if len([
                True for fse in filter_scan_events.values()[0] if len(fse) == 3
        ]) != len(filter_scan_events.values()[0]):
            raise ValueError(
                "Provide a start, end and scan type (sim or full) for filter_scan_events."
            )

        filter_scan_events = {
            filter_scan_events.keys()[0]:
            [[float(fse[0]), float(fse[1]),
              str(fse[2])] for fse in filter_scan_events.values()[0]]
        }

        h_descs = {}
        for h in h_sids.copy():
            mzr = mz_range_from_header(h)
            h_descs[h] = [mzr[0], mzr[1], scan_type_from_header(h).lower()]

        incl_excl = filter_scan_events.keys()[0]
        for hd in filter_scan_events[incl_excl]:
            if hd not in h_descs.values():
                logging.warning("Event {} doest not exist".format(str(hd)))

        for hd in h_descs:
            if filter_scan_events.keys()[0] == "include":
                if h_descs[hd] not in filter_scan_events["include"]:
                    del h_sids[hd]
            elif filter_scan_events.keys()[0] == "exclude":
                if h_descs[hd] in filter_scan_events["exclude"]:
                    del h_sids[hd]

    if len(h_sids) == 0:
        raise Exception("No scan data to process. Check filter_scan_events")

    scans = collections.OrderedDict()
    for h, sids in h_sids.iteritems():
        if len(sids) >= min_scans:
            scans[h] = run.peaklists(sids, function_noise)
        else:
            logging.warning(
                'Not enough scans for [{}] [{} < {}]. Scan event {} has been removed.'
                .format(h, len(scans), min_scans, h))

    if fn.lower().endswith(".raw"):
        run.close()

    return scans