Python MSFileLoader 예제들, ms_deisotope.MSFileLoader Python 예제들

예제 #1

0

파일 보기

파일: indexing.py 프로젝트: jacobomiranda/ms_deisotope

def _ensure_metadata_index(path):
    reader = ms_deisotope.MSFileLoader(path)
    name = path
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(name)
    if not os.path.exists(index_file_name):
        click.secho("Building Index For %s" % (path, ), fg='yellow', err=True)
        index = quick_index.ExtendedScanIndex()
        reader.reset()
        try:
            n = len(reader)
            progbar = click.progressbar(label='Building Index', length=n)
        except TypeError:
            progbar = spinner(title="Building Index")
        with progbar:
            for bunch in reader.make_iterator(grouped=True):
                i = 0
                i += bunch.precursor is not None
                i += len(bunch.products)
                index.add_scan_bunch(bunch)
                progbar.update(i)
        reader.reset()
        with open(index_file_name, 'wt') as fh:
            index.serialize(fh)
    else:
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
    return reader, index

예제 #2

0

파일 보기

파일: conversion.py 프로젝트: AspirinCode/ms_deisotope

def mzml(source, output, ms1_filters=None, msn_filters=None, pick_peaks=False):
    reader = ms_deisotope.MSFileLoader(source)
    to_mzml(reader,
            output,
            pick_peaks=pick_peaks,
            ms1_filters=ms1_filters,
            msn_filters=msn_filters)

예제 #3

0

파일 보기

파일: draw.py 프로젝트: mobiusklein/ms_deisotope

def draw_spectrum(ms_file_path, index=None, scan_id=None, time=None, output_path=None):
    options = map(lambda x: x is not None, (index, scan_id, time))
    if sum(options) == 0 or sum(options) > 1:
        raise click.UsageError(
            "Only one of `index`, `scan-id`, and `time` should be provided")

    reader = ms_deisotope.MSFileLoader(ms_file_path)

    if index is not None:
        source = "index"
        key = index
        scan = reader.get_scan_by_index(index)
    elif scan_id is not None:
        source = 'scan_id'
        key = scan_id
        scan = reader.get_scan_by_id(scan_id)
    elif time is not None:
        source = 'time'
        key = time
        scan = reader.get_scan_by_time(time)

    click.echo("Drawing %r" % (scan, ))

    if output_path is None:
        output_path = ms_file_path + '.%s-%s.png' % (source, key)

    figure, axis = _make_figure()

    if scan.is_profile:
        scan.arrays.plot(ax=axis)
    else:
        scan.pick_peaks()
        plot.draw_peaklist(scan.peak_set, ax=axis)
    figure.savefig(output_path, bbox_inches='tight', dpi=120)

예제 #4

0

파일 보기

def main(infile, outfile):
    '''Read the real HCD collision energy from a Thermo RAW file for each
    scan.
    '''
    reader = ms_deisotope.MSFileLoader(infile)
    columns = [
        'scan_id', 'ms_level', 'charge', 'precursor_mz', 'activation_name',
        'energy', 'Thermo_Trailer_Extra_HCD_Energy_eV',
        'Thermo_Trailer_Extra_HCD_Energy'
    ]

    if sys.version_info.major == 3:
        stream = open(outfile, 'wt', newline='')
    else:
        stream = open(outfile, 'wb')
    with stream:
        writer = csv.writer(stream, delimiter='\t')
        writer.writerow(columns)
        for scan in reader.make_iterator(grouped=False):
            if scan.ms_level == 1:
                continue
            activation = scan.activation
            annotations = scan.annotations
            fields = [
                scan.id,
                scan.ms_level,
                scan.precursor_information.charge,
                scan.precursor_information.mz,
                activation.method,
                activation.energy,
                annotations.get('[Thermo Trailer Extra]HCD Energy eV'),
                annotations.get('[Thermo Trailer Extra]HCD Energy'),
            ]
            writer.writerow(fields)

예제 #5

0

파일 보기

def test_quick_index():
    reader = ms_deisotope.MSFileLoader(mzml_path)
    index, _interval_tree = quick_index.index(reader)
    n_1 = len(index.ms1_ids)
    n_n = len(index.msn_ids)
    assert n_1 == 14
    assert n_n == 34

예제 #6

0

파일 보기

파일: indexing.py 프로젝트: AspirinCode/ms_deisotope

 def run():
     for path in paths:
         reader = ms_deisotope.MSFileLoader(path)
         chunk_out_of_order = quick_index.run_task_in_chunks(
             reader, processes, processes * 4, task=interval_extraction)
         for chunk in chunk_out_of_order:
             interval_set.extend(chunk)
             yield 0

예제 #7

0

파일 보기

파일: indexing.py 프로젝트: AspirinCode/ms_deisotope

def byte_index(paths):
    for path in paths:
        reader = ms_deisotope.MSFileLoader(path, use_index=False)
        try:
            fn = reader.prebuild_byte_offset_file
        except AttributeError:
            click.echo("\"%s\" does not support pre-indexing byte offsets" % (path,))
            return
        fn(path)

예제 #8

0

파일 보기

파일: conversion.py 프로젝트: mobiusklein/ms_deisotope

def mzml(source,
         output,
         ms1_filters=None,
         msn_filters=None,
         pick_peaks=False,
         reprofile=False,
         compress=False,
         correct_precursor_mz=False,
         update_metadata=True):
    """Convert `source` into mzML format written to `output`, applying a collection of optional data
    transformations along the way.
    """
    use_index = True
    if source == "-":
        click.secho(
            "Reading input file from STDIN, some file formats will not be supported.",
            err=True,
            fg='yellow')
        source = PreBufferedStreamReader(click.open_file(source, mode='rb'))
        # Cannot use the offset index of a file we cannot seek through
        use_index = False

    reader = ms_deisotope.MSFileLoader(source, use_index=use_index)
    is_a_tty = False
    if compress:
        if not output.endswith(".gz") and output != '-':
            output += '.gz'
        stream = click.open_file(output, 'wb')
        stream = GzipFile(fileobj=stream, mode='wb')
    else:
        stream = click.open_file(output, 'wb')

    try:
        is_a_tty = stream.isatty()
    except AttributeError:  # Not all file-like objects have this method...
        if output == "-":
            is_a_tty = True
    if is_a_tty:
        write_index = False
    else:
        write_index = True
    with stream:
        to_mzml(reader,
                stream,
                pick_peaks=pick_peaks,
                reprofile=reprofile,
                ms1_filters=ms1_filters,
                msn_filters=msn_filters,
                correct_precursor_mz=correct_precursor_mz,
                write_index=write_index,
                update_metadata=update_metadata,
                close=False)

예제 #9

0

파일 보기

파일: conversion.py 프로젝트: jacobomiranda/ms_deisotope

def mgf(source, output, compress=False, msn_filters=None):
    """Convert a mass spectrometry data file to MGF. MGF can only represent centroid spectra
    and generally does not contain any MS1 information.
    """
    if compress:
        if not output.endswith(".gz") and output != '-':
            output += '.gz'
        stream = click.open_file(output, 'wb')
        stream = GzipFile(fileobj=stream, mode='wb')
    else:
        stream = click.open_file(output, 'wb')
    reader = ms_deisotope.MSFileLoader(source)
    to_mgf(reader, stream, msn_filters=msn_filters)

예제 #10

0

파일 보기

파일: indexing.py 프로젝트: AspirinCode/ms_deisotope

def metadata_index(paths, processes=4):
    for path in paths:
        reader = ms_deisotope.MSFileLoader(path)
        try:
            fn = reader.prebuild_byte_offset_file
            if not reader.source._check_has_byte_offset_file():
                fn(path)
        except AttributeError:
            pass
        index, interval_tree = quick_index.index(reader, processes)
        name = path
        index_file_name = index.index_file_name(name)
        with open(index_file_name, 'w') as fh:
            index.serialize(fh)

예제 #11

0

파일 보기

 def open_ms_file(self, ms_file_uri, index_uri=None):
     if ms_file_uri in self.cache:
         logger.info("Found %r in the cache", ms_file_uri)
         return self.cache[ms_file_uri]
     logger.info("Opening %r", ms_file_uri)
     base_name = ms_file_uri.split("/")[-1]
     if base_name.split('.')[-1] in self.special_openers:
         reader = ms_deisotope.MSFileLoader(ms_file_uri)
     else:
         mzml_fh = self._opener(ms_file_uri)
         if ms_file_uri.endswith("gz"):
             mzml_fh = idzip.IdzipFile(fileobj=mzml_fh)
         index_fh = None
         if index_uri is not None:
             index_fh = self._opener(index_uri, block_size=BLOCK_SIZE)
         else:
             logger.warning("Byte offset index is missing for %r",
                            ms_file_uri)
         reader = ms_deisotope.MSFileLoader(mzml_fh, index_file=index_fh)
     logger.info("Finished opening %r", ms_file_uri)
     lock = RLock()
     self.cache[ms_file_uri] = (reader, lock)
     return reader, lock

예제 #12

0

파일 보기

파일: indexing.py 프로젝트: jacobomiranda/ms_deisotope

def byte_index(paths):
    '''Build an external byte offset index for a mass spectrometry data file, saving time when
    opening the file with indexing enabled.

    Supported Formats: mzML, mzXML
    '''
    for path in paths:
        click.echo("Indexing %s" % (path, ))
        reader = ms_deisotope.MSFileLoader(path, use_index=False)
        try:
            fn = reader.prebuild_byte_offset_file
        except AttributeError:
            click.echo("\"%s\" does not support pre-indexing byte offsets" % (path,))
            return
        fn(path)

예제 #13

0

파일 보기

파일: conversion.py 프로젝트: mobiusklein/ms_deisotope

    def mzmlb(source,
              output,
              ms1_filters=None,
              msn_filters=None,
              pick_peaks=False,
              reprofile=False,
              correct_precursor_mz=False,
              update_metadata=True,
              compression=DEFAULT_COMPRESSOR):
        """Convert `source` into mzML format written to `output`, applying a collection of optional data
        transformations along the way.
        """
        use_index = True
        if source == "-":
            click.secho(
                "Reading input file from STDIN, some file formats will not be supported.",
                err=True,
                fg='yellow')
            source = PreBufferedStreamReader(click.open_file(source,
                                                             mode='rb'))
            # Cannot use the offset index of a file we cannot seek through
            use_index = False

        reader = ms_deisotope.MSFileLoader(source, use_index=use_index)
        if output == '-':
            raise ValueError("Cannot write HDF5 to STDOUT")
        elif output is None:
            if reader.source_file_name.endswith('.gz'):
                out = reader.source_file_name[:-3]
            else:
                out = reader.source_file_name
            out = os.path.basename(out)
            base, _ext = out.rsplit(".", 1)
            base += '.mzMLb'
            output = base

        write_index = False
        to_mzml(reader,
                output,
                pick_peaks=pick_peaks,
                reprofile=reprofile,
                ms1_filters=ms1_filters,
                msn_filters=msn_filters,
                correct_precursor_mz=correct_precursor_mz,
                write_index=write_index,
                update_metadata=update_metadata,
                writer_type=MzMLbSerializer,
                compression=compression)

예제 #14

0

파일 보기

파일: indexing.py 프로젝트: AspirinCode/ms_deisotope

def _ensure_metadata_index(path):
    reader = ms_deisotope.MSFileLoader(path)
    name = path
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(name)
    if not os.path.exists(index_file_name):
        click.secho("Building Index", fg='yellow', err=True)
        index = quick_index.ExtendedScanIndex()
        reader.reset()
        for bunch in reader:
            index.add_scan_bunch(bunch)
        reader.reset()
        with open(index_file_name, 'w') as fh:
            index.serialize(fh)
    else:
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
    return reader, index

예제 #15

0

파일 보기

파일: faims_split.py 프로젝트: mobiusklein/ms_deisotope

def main(source_file, output_prefix):
    '''Read in `source_file` and split it based upon FAIMS compensation voltage into separate
    mzML files whose path prefix matches `output_prefix` and ends with the compensation voltage
    dedicated to that stream.
    '''
    reader = ms_deisotope.MSFileLoader(source_file)

    sinks = {}
    n = len(reader)
    i = 0
    last = 0
    interval = 1000
    demultiplexer = query.FAIMSDemultiplexingIterator(reader, 30)

    for heads in demultiplexer:
        empty_channels = 0
        for channel, value in heads.items():
            if value is not None:
                if channel not in sinks:
                    click.echo("Opening new channel for CV %r" % (channel, ))
                    writer = MzMLSerializer(
                        open(output_prefix + "_%r.mzML" % (channel, ), 'wb'),
                        len(reader), deconvoluted=False)
                    writer.copy_metadata_from(reader)
                    method = writer.build_processing_method(1, False, False, False, ["ion mobility seperation"])
                    writer.add_data_processing(method)
                    sinks[channel] = writer
                else:
                    writer = sinks[channel]
                writer.save_scan(value)
                i += 1
            else:
                empty_channels += 1
        if empty_channels == len(heads):
            click.echo("All channels empty, finishing")
            break
        if i - last >= interval:
            click.echo("Processed %d spectra (%0.3f%%)" % (i, i * 100.0 / n))
            last = i

    click.echo("Closing buffers.")
    for sink in sinks.values():
        click.echo("Closing %r" % (sink.handle.name, ))
        sink.close()

예제 #16

0

파일 보기

파일: conversion.py 프로젝트: jacobomiranda/ms_deisotope

def mzml(source,
         output,
         ms1_filters=None,
         msn_filters=None,
         pick_peaks=False,
         reprofile=False,
         compress=False,
         correct_precursor_mz=False,
         update_metadata=True):
    """Convert `source` into mzML format written to `output`, applying a collection of optional data
    transformations along the way.
    """
    reader = ms_deisotope.MSFileLoader(source)
    is_a_tty = False
    if compress:
        if not output.endswith(".gz") and output != '-':
            output += '.gz'
        stream = click.open_file(output, 'wb')
        stream = GzipFile(fileobj=stream, mode='wb')
    else:
        stream = click.open_file(output, 'wb')

    try:
        is_a_tty = stream.isatty()
    except AttributeError:  # Not all file-like objects have this method...
        if output == "-":
            is_a_tty = True
    if is_a_tty:
        write_index = False
    else:
        write_index = True
    with stream:
        to_mzml(reader,
                stream,
                pick_peaks=pick_peaks,
                reprofile=reprofile,
                ms1_filters=ms1_filters,
                msn_filters=msn_filters,
                correct_precursor_mz=correct_precursor_mz,
                write_index=write_index,
                update_metadata=update_metadata)

예제 #17

0

파일 보기

def extract_reporter_ions(path, output_path=None, reagent='tmt11', error_tolerance=1e-5):
    reader = ms_deisotope.MSFileLoader(path)

    if error_tolerance > 1e-3:
        logger.warn(
            f"Error tolerance {error_tolerance} looks like it is not in units of PPM, multiplying by 1e-6")
        error_tolerance *= 1e-6

    reader.make_iterator(grouped=True)

    extractor = TMTReporterExtractor(reagent=reagent)
    channels = [t.name for t in extractor.signature_ions]
    columns = ['scan_id', 'scan_time', 'precursor_mz', 'precursor_charge', 'precursor_intensity', 'tic', 'precursor_purity'] + channels

    out_file = click.open_file(output_path, mode='wb')
    out_file_wrapper = io.TextIOWrapper(out_file, encoding='utf8', newline='')
    writer = csv.DictWriter(out_file_wrapper, columns)
    writer.writeheader()

    products: List[Scan]
    for _precursor, products in reader:
        for product in products:
            pinfo = product.precursor_information
            if product.is_profile:
                product.pick_peaks()
            row = {
                "scan_id": product.id,
                "scan_time": product.scan_time,
                "tic": product.tic(),
                "precursor_purity": product.annotations.get("precursor purity")
            }
            if pinfo is not None:
                row['precursor_mz'] = pinfo.mz
                row['precursor_charge'] = pinfo.charge if isinstance(pinfo.charge, int) else None
                row['precursor_intensity'] = pinfo.intensity

            row.update(extractor.extract(
                product, error_tolerance=error_tolerance))
            writer.writerow(row)

예제 #18

0

파일 보기

파일: draw.py 프로젝트: jacobomiranda/ms_deisotope

def draw_tic(path, output_path=None, start_time=None, end_time=None):
    """Draw the Total Ion Chromatogram (TIC), the total signal at each time point.
    """
    if output_path is None:
        output_path = path + '.tic.png'
    if start_time is None:
        start_time = 0
    if end_time is None:
        end_time = float('inf')

    figure, axis = _make_figure()

    reader = ms_deisotope.MSFileLoader(path)
    reader.start_from_scan(rt=start_time, grouped=False)

    time = array('d')
    intensity = array('d')

    bar = click.progressbar(reader,
                            item_show_func=lambda x: str(x.id)
                            if x is not None else '')
    with bar:
        for scan in bar:
            if scan.ms_level != 1:
                continue
            time.append(scan.scan_time)
            intensity.append(scan.arrays.intensity.sum())

    click.echo("Total Ion Current: %e" % np.sum(intensity))

    axis.plot(time, intensity)
    axis.set_xlabel("Scan Time (Min)", fontsize=16)
    axis.set_ylabel("Relative Intensity", fontsize=16)
    ylim = axis.get_ylim()
    axis.set_ylim(-10, ylim[1])
    axis.set_xlim(time[0] - 2, time[-1] + 2)
    figure.text(0.15, 0.8, "%0.3e" % np.sum(intensity), ha='left')
    figure.savefig(output_path, bbox_inches='tight', dpi=120)

예제 #19

0

파일 보기

파일: mzml.py 프로젝트: mstim/glycresoft

def msfile_info(ms_file):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        index, intervals = quick_index.index(
            ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)
    click.echo("Name: %s" % (os.path.basename(ms_file), ))
    click.echo("MS1 Scans: %d" % (len(reader.extended_index.ms1_ids), ))
    click.echo("MSn Scans: %d" % (len(reader.extended_index.msn_ids), ))

    n_defaulted = 0
    n_orphan = 0

    charges = defaultdict(int)
    first_msn = float('inf')
    last_msn = 0
    for scan_info in reader.extended_index.msn_ids.values():
        n_defaulted += scan_info.get('defaulted', False)
        n_orphan += scan_info.get('orphan', False)
        charges[scan_info['charge']] += 1
        rt = scan_info['scan_time']
        if rt < first_msn:
            first_msn = rt
        if rt > last_msn:
            last_msn = rt

    click.echo("First MSn Scan: %0.2f Minutes" % (first_msn, ))
    click.echo("Last MSn Scan: %0.2f Minutes" % (last_msn, ))

    for charge, count in sorted(charges.items()):
        if not isinstance(charge, int):
            continue
        click.echo("Precursors with Charge State %d: %d" % (charge, count))

    click.echo("Defaulted MSn Scans: %d" % (n_defaulted, ))
    click.echo("Orphan MSn Scans: %d" % (n_orphan, ))

예제 #20

0

파일 보기

파일: mzml.py 프로젝트: mstim/glycresoft

def oxonium_signature(ms_file, g_score_threshold=0.05):
    reader = ProcessedMzMLDeserializer(ms_file)
    if not reader.has_index_file():
        click.secho("Building temporary index...", fg='yellow')
        index, intervals = quick_index.index(
            ms_deisotope.MSFileLoader(ms_file))
        reader.extended_index = index
        with open(reader._index_file_name, 'w') as handle:
            index.serialize(handle)

    from glycan_profiling.tandem.glycan.scoring.signature_ion_scoring import SignatureIonScorer
    from glycan_profiling.tandem.oxonium_ions import gscore_scanner
    refcomp = glypy.GlycanComposition.parse(
        "{Fuc:1; Hex:5; HexNAc:4; Neu5Ac:2}")
    for scan_id in reader.extended_index.msn_ids.keys():
        scan = reader.get_scan_by_id(scan_id)
        gscore = gscore_scanner(scan.deconvoluted_peak_set)
        if gscore >= g_score_threshold:
            signature_match = SignatureIonScorer.evaluate(scan, refcomp)
            click.echo("%s\t%f\t%r\t%f\t%f" %
                       (scan_id, scan.precursor_information.neutral_mass,
                        scan.precursor_information.charge, gscore,
                        signature_match.score))

예제 #21

0

파일 보기

파일: conversion.py 프로젝트: mobiusklein/ms_deisotope

def mgf(source, output, compress=False, msn_filters=None):
    """Convert a mass spectrometry data file to MGF. MGF can only represent centroid spectra
    and generally does not contain any MS1 information.
    """
    if compress:
        if not output.endswith(".gz") and output != '-':
            output += '.gz'
        stream = click.open_file(output, 'wb')
        stream = GzipFile(fileobj=stream, mode='wb')
    else:
        stream = click.open_file(output, 'wb')
    use_index = True
    if source == "-":
        click.secho(
            "Reading input file from STDIN, some file formats will not be supported.",
            err=True,
            fg='yellow')
        source = PreBufferedStreamReader(click.open_file(source, mode='rb'))
        # Cannot use the offset index of a file we cannot seek through
        use_index = False

    reader = ms_deisotope.MSFileLoader(source, use_index=use_index)
    to_mgf(reader, stream, msn_filters=msn_filters)

예제 #22

0

파일 보기

파일: view.py 프로젝트: mobiusklein/ms_deisotope

 def set_ms_file(self, value, populate=True):
     self._ms_file_name = value
     self.reader = ms_deisotope.MSFileLoader(self.ms_file_name)
     if populate:
         self.populate()

예제 #23

0

파일 보기

def ms1_spectrum_diagnostics(path, output_path=None):
    '''Collect diagnostic information from MS1 spectra.
    '''
    reader = ms_deisotope.MSFileLoader(path)

    reader.make_iterator(grouped=True)

    ms1_metric_names = [
        'scan_id', 'scan_index', 'scan_time', 'duty_cycle', 'tic',
        'base_peak_mz', 'base_peak_intensity', 'data_point_count',
        'injection_time', 'n_ms2_scans'
    ]
    ms1_metrics = []
    products = None
    last_ms1 = None
    prog = progress(length=len(reader), label='Processing Scans',
                    file=sys.stderr, item_show_func=lambda x: x.id if x else '')
    with prog:
        for precursor, products in reader:
            ms1_time = precursor.scan_time
            if last_ms1 is not None:
                duty_cycle = ms1_time - last_ms1
                ms1_metrics[-1]['duty_cycle'] = duty_cycle
            last_ms1 = ms1_time
            bp = precursor.base_peak()
            acquisition_info = precursor.acquisition_information
            if acquisition_info:
                scan_event = acquisition_info[0]
                inj = scan_event.injection_time
            else:
                inj = np.nan
            ms1_record = {
                "scan_id": precursor.id,
                "scan_index": precursor.index,
                "scan_time": precursor.scan_time,
                "duty_cycle": np.nan,
                "tic": precursor.tic(),
                "base_peak_mz": bp.mz,
                "base_peak_intensity": bp.intensity,
                "data_point_count": precursor.arrays.mz.size,
                "injection_time": inj,
                "n_ms2_scans": len([p for p in products if p.ms_level == 2])
            }
            ms1_metrics.append(ms1_record)
            prog.current_item = precursor
            prog.update(1 + len(products))

    if last_ms1 is not None:
        if products:
            last_time = max([p.scan_time for p in products])
            duty_cycle = last_time - last_ms1
            ms1_metrics[-1]['duty_cycle'] = duty_cycle


    if output_path is None:
        outfh = click.open_file("-", mode='wb')
    else:
        outfh = io.open(output_path, mode='wb')
    if six.PY3:
        stream = io.TextIOWrapper(outfh, encoding='utf8', newline='')
    else:
        stream = outfh
    writer = csv.DictWriter(stream, fieldnames=ms1_metric_names)
    writer.writeheader()
    writer.writerows(ms1_metrics)
    stream.flush()

예제 #24

0

파일 보기

def example_scan_bunch():
    import ms_deisotope
    reader = ms_deisotope.MSFileLoader(
        datafile("20150710_3um_AGP_001_29_30.mzML.gz"))
    return reader.next()

예제 #25

0

파일 보기

파일: view.py 프로젝트: mobiusklein/ms_deisotope

 def ms_file_name(self, value):
     if value not in (None, '') and os.path.exists(value):
         print("Loading %r" % value)
         self._ms_file_name = value
         self.reader = ms_deisotope.MSFileLoader(self.ms_file_name)
         self.populate()

예제 #26

0

파일 보기

파일: test_feature_map.py 프로젝트: mobiusklein/ms_deisotope

 def setUpClass(cls):
     reader = ms_deisotope.MSFileLoader(complex_compressed_mzml)
     features = feature_map.LCMSFeatureForest.from_reader(reader)
     cls.features = features

예제 #27

0

파일 보기

파일: test_query.py 프로젝트: mobiusklein/ms_deisotope

 def _get_reader(self):
     return ms_deisotope.MSFileLoader(self.complex_compressed_mzml)

예제 #28

0

파일 보기

파일: conversion.py 프로젝트: AspirinCode/ms_deisotope

def mgf(source, output, msn_filters=None):
    """Convert a mass spectrometry data file to MGF. MGF can only represent centroid spectra
    and generally does not contain any MS1 information.
    """
    reader = ms_deisotope.MSFileLoader(source)
    to_mgf(reader, output, msn_filters=msn_filters)

예제 #29

0

파일 보기

파일: indexing.py 프로젝트: AspirinCode/ms_deisotope

def describe(path):
    click.echo("Describing \"%s\"" % (path,))
    try:
        sf = SourceFile.from_path(path)
    except IOError:
        click.echo("Could not open", err=True)
    if sf.file_format is None:
        click.echo("It doesn't appear to be a mass spectrometry data file")
        return -1
    click.echo("File Format: %s" % (sf.file_format, ))
    click.echo("ID Format: %s" % (sf.id_format, ))
    reader = ms_deisotope.MSFileLoader(path)
    if isinstance(reader, RandomAccessScanSource):
        click.echo("Format Supports Random Access: True")
        first_scan = reader[0]
        last_scan = reader[-1]
        click.echo("First Scan: %s at %0.3f minutes" % (first_scan.id, first_scan.scan_time))
        click.echo("Last Scan: %s at %0.3f minutes" % (last_scan.id, last_scan.scan_time))
    else:
        click.echo("Format Supports Random Access: False")
    try:
        finfo = reader.file_description()
        click.echo("Contents:")
        for key in finfo.contents:
            click.echo("    %s" % (key, ))
    except AttributeError:
        pass
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(path)
    # Extra introspection if the extended index is available
    if os.path.exists(index_file_name):
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
        ms1_scans = len(index.ms1_ids)
        msn_scans = len(index.msn_ids)
        click.echo("MS1 Scans: %d" % (ms1_scans, ))
        click.echo("MSn Scans: %d" % (msn_scans, ))
        n_defaulted = 0
        n_orphan = 0

        charges = Counter()
        first_msn = float('inf')
        last_msn = 0
        for scan_info in index.msn_ids.values():
            n_defaulted += scan_info.get('defaulted', False)
            n_orphan += scan_info.get('orphan', False)
            charges[scan_info['charge']] += 1
            rt = scan_info['scan_time']
            if rt < first_msn:
                first_msn = rt
            if rt > last_msn:
                last_msn = rt
        click.echo("First MSn Scan: %0.3f minutes" % (first_msn,))
        click.echo("Last MSn Scan: %0.3f minutes" % (last_msn,))
        for charge, count in sorted(charges.items()):
            if not isinstance(charge, int):
                continue
            click.echo("Precursors with Charge State %d: %d" % (charge, count))
        if n_defaulted > 0:
            click.echo("Defaulted MSn Scans: %d" % (n_defaulted,))
        if n_orphan > 0:
            click.echo("Orphan MSn Scans: %d" % (n_orphan,))

예제 #30

0

파일 보기

파일: waters_cyclic_deconvolute.py 프로젝트: mobiusklein/ms_deisotope

def precursor_product_deconvolution(input_path, output_path):
    '''Takes a deconvolved LC-IM-MSe run and generate pseudospectra for
    precursor ions using correlated product ion features enclosed in the IM
    and RT dimensions.
    '''
    logging.basicConfig(level="INFO",
                        format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        filemode='w',
                        filename="precursor_product_deconvolution_%s.log" %
                        (os.path.basename(input_path).rsplit(".", 1)[0]))

    print(f"Running on PID {os.getpid()}")

    logging.getLogger().addHandler(_default_log_handler())
    input_path = str(input_path)

    scan_reader = ms_deisotope.MSFileLoader(input_path)
    frame_reader = ProcessedGeneric3DIonMobilityFrameSource(scan_reader)

    precursor_forest = IonMobilityProfileDeconvolutedLCMSFeatureForest()
    product_forest = IonMobilityProfileDeconvolutedLCMSFeatureForest()

    frame_reader.make_frame_iterator(grouped='mse')
    logger.info("Constructing LC-IM-MSe features")
    for i, bunch in enumerate(frame_reader):
        if bunch.precursor is None:
            continue
        if i % 100 == 0:
            logger.info("... Processing %r %0.3f", bunch.precursor.id,
                        bunch.precursor.time)
        if bunch.precursor:
            for f in bunch.precursor.deconvoluted_features:
                p = IonMobilityProfileDeconvolutedPeakSolution.from_feature(f)
                precursor_forest.handle_peak(p, bunch.precursor.time)

        for product in bunch.products:
            for f in product.deconvoluted_features:
                p = IonMobilityProfileDeconvolutedPeakSolution.from_feature(f)
                product_forest.handle_peak(p, product.time)

    logger.info("Smoothing precursors")
    precursor_forest.smooth_overlaps().split_sparse(0.5)
    logger.info("Smoothing products")
    product_forest.smooth_overlaps().split_sparse(0.5)

    logger.info("Building precursor-product graph")
    precursor_graph = IonMobilityProfileDeconvolutedFeatureGraph(
        precursor_forest)
    product_graph = IonMobilityProfileDeconvolutedFeatureGraph(product_forest)
    corr_graph = PrecursorProductCorrelationGraph(precursor_graph,
                                                  product_graph,
                                                  max_edge_count_per_node=1000)
    corr_graph.build()

    logger.info("Generating pseudo-spectra")
    fh = open(output_path, 'wb')
    with MzMLSerializer(fh,
                        len(scan_reader),
                        sample_name=os.path.basename(output_path)) as writer:
        writer.copy_metadata_from(scan_reader)
        proc_method = writer.build_processing_method()
        writer.add_data_processing(proc_method)
        for bunch in corr_graph.iterspectra():
            writer.save(bunch)