Esempio n. 1
0
def describe(path):
    '''Produces a minimal textual description of a mass spectrometry data file.
    '''
    click.echo("Describing \"%s\"" % (path,))
    try:
        sf = SourceFile.from_path(path)
    except IOError:
        click.echo("Could not open", err=True)
    if sf.file_format is None:
        click.echo("It doesn't appear to be a mass spectrometry data file")
        return -1
    click.echo("File Format: %s" % (sf.file_format, ))
    click.echo("ID Format: %s" % (sf.id_format, ))
    reader = MSFileLoader(path)
    if isinstance(reader, RandomAccessScanSource):
        click.echo("Format Supports Random Access: True")
        first_scan = reader[0]
        last_scan = reader[-1]
        click.echo("First Scan: %s at %0.3f minutes" % (first_scan.id, first_scan.scan_time))
        click.echo("Last Scan: %s at %0.3f minutes" % (last_scan.id, last_scan.scan_time))
    else:
        click.echo("Format Supports Random Access: False")
    try:
        finfo = reader.file_description()
        click.echo("Contents:")
        for key in finfo.contents:
            click.echo("    %s" % (key, ))
    except AttributeError:
        pass
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(path)
    # Extra introspection if the extended index is available
    if os.path.exists(index_file_name):
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
        ms1_scans = len(index.ms1_ids)
        msn_scans = len(index.msn_ids)
        click.echo("MS1 Scans: %d" % (ms1_scans, ))
        click.echo("MSn Scans: %d" % (msn_scans, ))
        n_defaulted = 0
        n_orphan = 0

        charges = Counter()
        first_msn = float('inf')
        last_msn = 0
        for scan_info in index.msn_ids.values():
            n_defaulted += scan_info.get('defaulted', False)
            n_orphan += scan_info.get('orphan', False)
            charges[scan_info['charge']] += 1
            rt = scan_info['scan_time']
            if rt < first_msn:
                first_msn = rt
            if rt > last_msn:
                last_msn = rt
        click.echo("First MSn Scan: %0.3f minutes" % (first_msn,))
        click.echo("Last MSn Scan: %0.3f minutes" % (last_msn,))
        for charge, count in sorted(charges.items()):
            if not isinstance(charge, int):
                continue
            click.echo("Precursors with Charge State %d: %d" % (charge, count))
        if n_defaulted > 0:
            click.echo("Defaulted MSn Scans: %d" % (n_defaulted,))
        if n_orphan > 0:
            click.echo("Orphan MSn Scans: %d" % (n_orphan,))
Esempio n. 2
0
    def configure_storage(cls, path=None, name=None, source=None):
        if path is not None:
            if name is None:
                sample_name = os.path.basename(path)
            else:
                sample_name = name
        else:
            path = "processed.mzML"
        if source is not None:
            reader = MSFileLoader(source.scan_source)
            n_spectra = len(reader.index)
            deconvoluting = source.deconvoluting
            inst = cls(path, sample_name, n_spectra=n_spectra, deconvoluted=deconvoluting)
            try:
                description = reader.file_description()
            except AttributeError:
                description = FileInformation()
            source_file_metadata = MetadataSourceFile.from_path(source.scan_source)
            inst.serializer.add_file_information(description)
            try:
                inst.serializer.remove_file_contents("profile spectrum")
            except KeyError:
                pass
            inst.serializer.add_file_contents("centroid spectrum")
            if source_file_metadata not in description.source_files:
                inst.serializer.add_source_file(source_file_metadata)
            try:
                instrument_configs = reader.instrument_configuration()
                for config in instrument_configs:
                    inst.serializer.add_instrument_configuration(config)
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing instrument configuration", e)
            for trans in source.ms1_peak_picking_args.get("transforms"):
                inst.register_parameter("parameter: ms1-%s" % trans.__class__.__name__, repr(trans))
            if deconvoluting:
                if source.ms1_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: ms1-averagine", repr(source.ms1_deconvolution_args.get("averagine")))
                if source.ms1_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: ms1-scorer", repr(source.ms1_deconvolution_args.get("scorer")))
                if source.ms1_averaging > 0:
                    inst.register_parameter("parameter: ms1-averaging", repr(source.ms1_averaging))
                if source.ignore_tandem_scans:
                    inst.register_parameter("parameter: ignore-tandem-scans", "")
                if source.extract_only_tandem_envelopes:
                    inst.register_parameter("parameter: extract-only-tandem-envelopes", "")

            if source.msn_peak_picking_args is not None:
                for trans in source.msn_peak_picking_args.get("transforms"):
                    inst.register_parameter("parameter: msn-%s" % trans.__class__.__name__, repr(trans))
            if deconvoluting:
                if source.msn_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: msn-averagine", repr(source.msn_deconvolution_args.get("averagine")))
                if source.msn_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: msn-scorer", repr(source.msn_deconvolution_args.get("scorer")))
            data_processing = inst.serializer.build_processing_method()
            inst.serializer.add_data_processing(data_processing)
        else:
            n_spectra = 2e5
            inst = cls(path, sample_name, n_spectra=n_spectra)
        # Force marshalling of controlled vocabularies early.
        inst.serializer.writer.param("32-bit float")
        return inst
Esempio n. 3
0
def describe(path):
    click.echo("Describing \"%s\"" % (path,))
    try:
        sf = SourceFile.from_path(path)
    except IOError:
        click.echo("Could not open", err=True)
    if sf.file_format is None:
        click.echo("It doesn't appear to be a mass spectrometry data file")
        return -1
    click.echo("File Format: %s" % (sf.file_format, ))
    click.echo("ID Format: %s" % (sf.id_format, ))
    reader = ms_deisotope.MSFileLoader(path)
    if isinstance(reader, RandomAccessScanSource):
        click.echo("Format Supports Random Access: True")
        first_scan = reader[0]
        last_scan = reader[-1]
        click.echo("First Scan: %s at %0.3f minutes" % (first_scan.id, first_scan.scan_time))
        click.echo("Last Scan: %s at %0.3f minutes" % (last_scan.id, last_scan.scan_time))
    else:
        click.echo("Format Supports Random Access: False")
    try:
        finfo = reader.file_description()
        click.echo("Contents:")
        for key in finfo.contents:
            click.echo("    %s" % (key, ))
    except AttributeError:
        pass
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(path)
    # Extra introspection if the extended index is available
    if os.path.exists(index_file_name):
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
        ms1_scans = len(index.ms1_ids)
        msn_scans = len(index.msn_ids)
        click.echo("MS1 Scans: %d" % (ms1_scans, ))
        click.echo("MSn Scans: %d" % (msn_scans, ))
        n_defaulted = 0
        n_orphan = 0

        charges = Counter()
        first_msn = float('inf')
        last_msn = 0
        for scan_info in index.msn_ids.values():
            n_defaulted += scan_info.get('defaulted', False)
            n_orphan += scan_info.get('orphan', False)
            charges[scan_info['charge']] += 1
            rt = scan_info['scan_time']
            if rt < first_msn:
                first_msn = rt
            if rt > last_msn:
                last_msn = rt
        click.echo("First MSn Scan: %0.3f minutes" % (first_msn,))
        click.echo("Last MSn Scan: %0.3f minutes" % (last_msn,))
        for charge, count in sorted(charges.items()):
            if not isinstance(charge, int):
                continue
            click.echo("Precursors with Charge State %d: %d" % (charge, count))
        if n_defaulted > 0:
            click.echo("Defaulted MSn Scans: %d" % (n_defaulted,))
        if n_orphan > 0:
            click.echo("Orphan MSn Scans: %d" % (n_orphan,))
Esempio n. 4
0
    def configure_storage(cls, path=None, name=None, source=None):
        if path is not None:
            if name is None:
                sample_name = os.path.basename(path)
            else:
                sample_name = name
        else:
            path = "processed.mzML"
        if source is not None:
            reader = MSFileLoader(source.scan_source)
            n_spectra = len(reader.index)
            deconvoluting = source.deconvoluting
            inst = cls(path,
                       sample_name,
                       n_spectra=n_spectra,
                       deconvoluted=deconvoluting)
            try:
                description = reader.file_description()
            except AttributeError:
                description = FileInformation()
            source_file_metadata = MetadataSourceFile.from_path(
                source.scan_source)
            inst.serializer.add_file_information(description)
            try:
                inst.serializer.remove_file_contents("profile spectrum")
            except KeyError:
                pass
            inst.serializer.add_file_contents("centroid spectrum")
            if source_file_metadata not in description.source_files:
                inst.serializer.add_source_file(source_file_metadata)
            try:
                instrument_configs = reader.instrument_configuration()
                for config in instrument_configs:
                    inst.serializer.add_instrument_configuration(config)
            except Exception as e:
                inst.error(
                    "An error occurred while writing instrument configuration",
                    e)
            for trans in source.ms1_peak_picking_args.get("transforms", []):
                inst.register_parameter(
                    "parameter: ms1-%s" % trans.__class__.__name__,
                    repr(trans))
            if deconvoluting:
                if source.ms1_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: ms1-averagine",
                        repr(source.ms1_deconvolution_args.get("averagine")))
                if source.ms1_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: ms1-scorer",
                        repr(source.ms1_deconvolution_args.get("scorer")))
                if source.ms1_averaging > 0:
                    inst.register_parameter("parameter: ms1-averaging",
                                            repr(source.ms1_averaging))
                if source.ignore_tandem_scans:
                    inst.register_parameter("parameter: ignore-tandem-scans",
                                            "")
                if source.extract_only_tandem_envelopes:
                    inst.register_parameter(
                        "parameter: extract-only-tandem-envelopes", "")

            if source.msn_peak_picking_args is not None:
                for trans in source.msn_peak_picking_args.get(
                        "transforms", []):
                    inst.register_parameter(
                        "parameter: msn-%s" % trans.__class__.__name__,
                        repr(trans))
            if deconvoluting:
                if source.msn_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: msn-averagine",
                        repr(source.msn_deconvolution_args.get("averagine")))
                if source.msn_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: msn-scorer",
                        repr(source.msn_deconvolution_args.get("scorer")))
            data_processing = inst.serializer.build_processing_method()
            inst.serializer.add_data_processing(data_processing)
        else:
            n_spectra = 2e5
            inst = cls(path, sample_name, n_spectra=n_spectra)
        # Force marshalling of controlled vocabularies early.
        inst.serializer.writer.param("32-bit float")
        return inst
Esempio n. 5
0
def describe(path, diagnostics=False):
    '''Produces a minimal textual description of a mass spectrometry data file.
    '''
    click.echo("Describing \"%s\"" % (path,))
    try:
        sf = SourceFile.from_path(path)
    except IOError:
        raise click.Abort("Could not open file \"%s\"" % (path, ), err=True)

    if sf.file_format is None:
        raise click.Abort("\"%s\" doesn't appear to be a mass spectrometry data file" % (path, ))
    click.echo("File Format: %s" % (sf.file_format, ))
    click.echo("ID Format: %s" % (sf.id_format, ))
    reader = MSFileLoader(path)
    if isinstance(reader, RandomAccessScanSource):
        click.echo("Format Supports Random Access: True")
        first_scan = reader[0]
        last_scan = reader[-1]
        click.echo("First Scan: %s at %0.3f minutes" % (first_scan.id, first_scan.scan_time))
        click.echo("Last Scan: %s at %0.3f minutes" % (last_scan.id, last_scan.scan_time))
    else:
        click.echo("Format Supports Random Access: False")
    try:
        finfo = reader.file_description()
        click.echo("Contents:")
        for key in finfo.contents:
            click.echo("    %s" % (key, ))
    except AttributeError:
        pass
    index_file_name = quick_index.ExtendedScanIndex.index_file_name(path)
    # Extra introspection if the extended index is available
    if os.path.exists(index_file_name):
        with open(index_file_name, 'rt') as fh:
            index = quick_index.ExtendedScanIndex.deserialize(fh)
        ms1_scans = len(index.ms1_ids)
        msn_scans = len(index.msn_ids)
        click.echo("MS1 Scans: %d" % (ms1_scans, ))
        click.echo("MSn Scans: %d" % (msn_scans, ))
        n_defaulted = 0
        n_orphan = 0

        charges = Counter()
        first_msn = float('inf')
        last_msn = 0
        for scan_info in index.msn_ids.values():
            n_defaulted += scan_info.get('defaulted', False)
            n_orphan += scan_info.get('orphan', False)
            charges[scan_info['charge']] += 1
            rt = scan_info['scan_time']
            if rt < first_msn:
                first_msn = rt
            if rt > last_msn:
                last_msn = rt
        click.echo("First MSn Scan: %0.3f minutes" % (first_msn,))
        click.echo("Last MSn Scan: %0.3f minutes" % (last_msn,))
        for charge, count in sorted(charges.items()):
            if not isinstance(charge, int):
                continue
            click.echo("Precursors with Charge State %d: %d" % (charge, count))
        if n_defaulted > 0:
            click.echo("Defaulted MSn Scans: %d" % (n_defaulted,))
        if n_orphan > 0:
            click.echo("Orphan MSn Scans: %d" % (n_orphan,))
    if diagnostics:
        reader.reset()
        scan_ids_with_invalid_isolation_window = []
        scan_ids_with_empty_isolation_window = []
        activation_counter = Counter()
        n_ms1 = 0
        n_msn = 0
        for precursor, products in reader:
            if precursor is not None:
                n_ms1 += 1
            for product in products:
                n_msn += 1
                if not isolation_window_valid(product):
                    scan_ids_with_invalid_isolation_window.append((precursor.id, product.id))
                if is_isolation_window_empty(product):
                    scan_ids_with_empty_isolation_window.append(
                        (precursor.id, product.id))
                activation_counter[product.activation] += 1

        click.echo("MS1 Spectra: %d" % (n_ms1, ))
        click.echo("MSn Spectra: %d" % (n_msn, ))
        click.echo("Invalid Isolation Windows: %d" % (
            len(scan_ids_with_invalid_isolation_window), ))
        click.echo("Empty Isolation Windows: %d" % (
            len(scan_ids_with_empty_isolation_window), ))

        click.echo("Activation Methods")
        for activation, count in activation_counter.items():
            if not activation.is_multiple_dissociation():
                click.echo("\t{method}:{energy} = {count}".format(
                    method=activation.method, energy=activation.energy, count=count))
            else:
                click.echo("\t{method}:{energy} = {count}".format(
                    method=','.join(map(str, activation.method)),
                    energy=','.join(map(str, activation.energies)), count=count))