Beispiel #1
0
 def read_index_file(self, index_path=None):
     if index_path is None:
         index_path = self._index_file_name
     with get_opener(index_path) as handle:
         if 'b' in handle.mode:
             handle = io.TextIOWrapper(handle, 'utf8')
         self.extended_index = ExtendedScanIndex.deserialize(handle)
Beispiel #2
0
def test_ms_deisotope():
    runner = CliRunner(mix_stderr=False)
    path = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
    reference = datafile("20150710_3um_AGP_001_29_30.preprocessed.mzML.gz")
    outpath = tempfile.mktemp()
    result = runner.invoke(deisotoper.deisotope, [
        "-b", 0, "-t", 20, "-tn", 10, "-m", 3, "-mn", 1, path, outpath
    ])
    result_reader = ProcessedMzMLDeserializer(outpath)
    reference_reader = ProcessedMzMLDeserializer(_compression.get_opener(reference))
    assert len(result_reader) == len(reference_reader)
    for a_bunch, b_bunch in zip(result_reader, reference_reader):
        assert len(a_bunch.products) == len(b_bunch.products)
        aprec = a_bunch.precursor
        bprec = b_bunch.precursor
        assert aprec.id == bprec.id
        diffa, diffb = diff_deconvoluted_peak_set(
            aprec.deconvoluted_peak_set, bprec.deconvoluted_peak_set)
        assert len(aprec.deconvoluted_peak_set) == len(
            bprec.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprec.id, diffa, diffb)
        assert aprec.deconvoluted_peak_set == bprec.deconvoluted_peak_set, "Peaks Diff On %r, (%r, %r)" % (
            aprec.id, diffa, diffb)

        for aprod, bprod in zip(a_bunch.products, b_bunch.products):
            assert aprod.id == bprod.id
            diffa, diffb = diff_deconvoluted_peak_set(aprod.deconvoluted_peak_set, bprod.deconvoluted_peak_set)
            assert len(aprod.deconvoluted_peak_set) == len(
                bprod.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprod.id, diffa, diffb)
            assert aprod.deconvoluted_peak_set == bprod.deconvoluted_peak_set, "Peaks Diff On %r" % (
                aprod.id, diffa, diffb)

    result_reader.close()
    reference_reader.close()
    os.remove(outpath)
Beispiel #3
0
def scan_from_csv(file_handle, delimiter=',', ms_level=2, is_profile=True, polarity=1,
                  precursor_mz=None, precursor_charge=None):
    """Read an m/z-intensity point list from a text file stream.

    Parameters
    ----------
    file_handle : file
        The file to read from.
    delimiter : str, optional
        The field separator between m/z and intensity. (the default is ',')
    ms_level : int, optional
        The MS level of the read scan. (the default is 2)
    is_profile : bool, optional
        Whether the scan is in profile mode (the default is True)
    polarity : int, optional
        Whether the scan is positive mode or negative (the default is 1, which means positive)
    precursor_mz : float, optional
        The m/z of the precursor ion to record. If provided, it will be
        provided in :attr:`~.Scan.precursor_information`.
    precursor_charge : int, optional
        If provided, and `precursor_mz` is not :const:`None`, then it will be provided
        in :attr:`~.Scan.precursor_information`.

    Returns
    -------
    :class:`~.Scan`
    """
    file_handle = get_opener(file_handle)
    reader = csv.reader(file_handle, delimiter=delimiter)
    mzs = []
    intensities = []
    for i, line in enumerate(reader):
        if i == 0:
            try:
                float(line[0])
            except Exception:
                continue
        mzs.append(line[0])
        intensities.append(line[1])
    mzs = np.array(mzs, dtype=float)
    intensities = np.array(intensities, dtype=float)
    signal = RawDataArrays(mzs, intensities)
    pinfo = None
    if precursor_mz is not None:
        pinfo = PrecursorInformation(
            precursor_mz,
            0,
            ChargeNotProvided if precursor_charge is None else precursor_charge)
    scan = make_scan(
        signal, ms_level, "index=1", 0, 0, is_profile,
        polarity,
        precursor_information=pinfo)
    return scan
Beispiel #4
0
def test_mzml():
    runner = CliRunner(mix_stderr=False)
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists before running test")
    path = datafile("small.mzML")
    result = runner.invoke(conversion.mzml, ['-p', '-c', path, '-'])
    buff = io.BytesIO(result.output.encode("utf-8"))
    reader = MzMLLoader(buff)
    n = len(reader)
    assert n == 48
    if os.path.exists("-idx.json"):
        raise IOError(
            "Orphan index file exists after running uncompressed test")

    result = runner.invoke(
        conversion.mzml, ['-p', '-z', '-c', path, '-'], catch_exceptions=False)
    buff = io.BytesIO(result.stdout_bytes)
    reader = MzMLLoader(_compression.get_opener(buff))
    n = len(reader)
    assert n == 48
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running compressed test")
    def test_writer(self):
        source_reader = MzMLLoader(self.source_data_path)
        fd, name = tempfile.mkstemp()
        with open(name, 'wb') as fh:
            writer = MzMLSerializer(fh,
                                    n_spectra=len(source_reader.index),
                                    deconvoluted=True)
            description = source_reader.file_description()
            writer.add_file_information(description)
            writer.add_file_contents("profile spectrum")
            writer.add_file_contents("centroid spectrum")
            writer.remove_file_contents("profile spectrum")

            instrument_configs = source_reader.instrument_configuration()
            for config in instrument_configs:
                writer.add_instrument_configuration(config)

            software_list = source_reader.software_list()
            for software in software_list:
                writer.add_software(software)

            data_processing_list = source_reader.data_processing()
            for dp in data_processing_list:
                writer.add_data_processing(dp)

            processing = writer.build_processing_method()
            writer.add_data_processing(processing)
            bunch = next(source_reader)
            bunch.precursor.pick_peaks()
            bunch.precursor.deconvolute()
            for product in bunch.products:
                product.pick_peaks()
                product.deconvolute()
            writer.save(bunch)
            writer.complete()
            fh.flush()
            writer.format()
        source_reader.reset()
        processed_reader = ProcessedMzMLDeserializer(
            _compression.get_opener(writer.handle.name))

        for a, b in zip(source_reader.instrument_configuration(),
                        processed_reader.instrument_configuration()):
            assert a.analyzers == b.analyzers
        for a, b in zip(source_reader, processed_reader):
            assert a.precursor.id == b.precursor.id
            assert (a.precursor.acquisition_information ==
                    b.precursor.acquisition_information)
            for an, bn in zip(a.products, b.products):
                assert an.id == bn.id
                assert abs(an.precursor_information.neutral_mass -
                           bn.precursor_information.neutral_mass) < 1e-6
        processed_reader.reset()
        description = processed_reader.file_description()
        assert "profile spectrum" not in description.contents
        assert "centroid spectrum" in description.contents
        sf = description.source_files[0]
        assert 'location' not in sf.parameters
        assert sf.parameters[
            'SHA-1'] == 'a2a091b82f27676da87a6c7d17cc90d2d90b8fbf'
        index = processed_reader.extended_index
        pinfo = index.find_msms_by_precursor_mass(
            ms_deisotope.neutral_mass(562.7397, 2))
        assert len(pinfo) > 0

        processed_reader.close()
        try:
            os.remove(name)
            os.remove(processed_reader._index_file_name)
        except OSError:
            pass
def main(path, reference_path):
    reader = ProcessedMzMLDeserializer(get_opener(path))
    reference_reader = ProcessedMzMLDeserializer(get_opener(reference_path))
    compare_readers(reader, reference_reader)
    print("Processed Files Appear to Match Perfectly.")