Example #1
0
def main(args: Union[str, List[str]] = None) -> int:
    # Initialize logging.
    logging.basicConfig(format='{asctime} [{levelname}/{processName}] '
                        '{module}.{funcName} : {message}',
                        style='{',
                        level=logging.DEBUG)

    # Load the configuration.
    config.parse(args)

    # Perform the search.
    spec_lib = spectral_library.SpectralLibrary(
        config.spectral_library_filename)
    identifications = spec_lib.search(config.query_filename)
    writer.write_mztab(identifications, config.out_filename,
                       spec_lib._library_reader)
    spec_lib.shutdown()

    logging.shutdown()

    return 0
Example #2
0
def main():
    # initialize logging
    logging.basicConfig(
            format='%(asctime)s [%(levelname)s/%(processName)s] '
                   '%(module)s.%(funcName)s : %(message)s',
            level=logging.DEBUG)

    # load the config
    config.parse()

    # execute the search
    if config.mode == 'bf':
        spec_lib = spectral_library.SpectralLibraryBf(
                config.spectral_library_filename)
    elif config.mode == 'ann':
        spec_lib = spectral_library.SpectralLibraryAnnoy(
                config.spectral_library_filename)

    identifications = spec_lib.search(config.query_filename)
    writer.write_mztab(identifications, config.out_filename,
                       spec_lib._library_reader)
    spec_lib.shutdown()

    logging.shutdown()
# Search settings.
for key in metadata:
    if 'software[1]-setting' in key:
        param = metadata[key][:metadata[key].find(' ')]
        value = metadata[key][metadata[key].rfind(' ') + 1:]
        if value != 'None':
            if value != 'False':
                settings.append('--{}'.format(param))
            if value not in ('False', 'True'):
                settings.append(value)

# File names.
settings.append('dummy_spectral_library_filename')
settings.append('dummy_query_filename')
settings.append('dummy_output_filename')
config.parse(' '.join(settings))

# Retrieve information on the requested query.
query_id = args.Filename + "." + args.Scan + "." + args.Scan + "." + args.Charge
query_usi = "mzspec:PXD018117:" + args.Filename + ":scan:" + args.Scan + ":" + args.Sequence  #[UNIMOD:121]    [UNIMOD:21]    [UNIMOD:21#g1]	[-28.031300]
txt = 'm/z'
query_spectrum_number = "controllerType=0 controllerNumber=1 scan=" + args.Scan
query_uri = urlparse.urlparse(urlparse.unquote(metadata['ms_run[1]-location']))
query_filename = os.path.abspath(os.path.join(query_uri.netloc,
                                              query_uri.path))
ssm = ssms.loc[query_id]
library_id = ssm['accession']
library_uri = urlparse.urlparse(urlparse.unquote(ssm['database']))
library_filename = os.path.abspath(
    os.path.join(library_uri.netloc, library_uri.path))
score = ssm['search_engine_score[1]']
Example #4
0
def main():
    # Load the cmd arguments.
    parser = argparse.ArgumentParser(
        description='Visualize spectrum–spectrum matches from your '
        'ANN-SoLo identification results')
    parser.add_argument('mztab_filename',
                        help='Identifications in mzTab format')
    parser.add_argument('query_id',
                        help='The identifier of the query to visualize')
    args = parser.parse_args()

    # Read the mzTab file.
    metadata = {}
    with open(args.mztab_filename) as f_mztab:
        for line in f_mztab:
            line_split = line.strip().split('\t')
            if line_split[0] == 'MTD':
                metadata[line_split[1]] = line_split[2]
            else:
                break  # Metadata lines should be on top.
    ssms = reader.read_mztab_ssms(args.mztab_filename)
    # make sure the SSM ids are strings.
    ssms.index = ssms.index.map(str)

    # Recreate the search configuration.
    settings = []
    # Search settings.
    for key in metadata:
        if 'software[1]-setting' in key:
            param = metadata[key][:metadata[key].find(' ')]
            value = metadata[key][metadata[key].rfind(' ') + 1:]
            if value != 'None':
                if value != 'False':
                    settings.append('--{}'.format(param))
                if value not in ('False', 'True'):
                    settings.append(value)
    # File names.
    settings.append('dummy_spectral_library_filename')
    settings.append('dummy_query_filename')
    settings.append('dummy_output_filename')
    config.parse(' '.join(settings))

    # Retrieve information on the requested query.
    query_id = args.query_id
    query_uri = urlparse.urlparse(
        urlparse.unquote(metadata['ms_run[1]-location']))
    query_filename = os.path.abspath(
        os.path.join(query_uri.netloc, query_uri.path))
    ssm = ssms.loc[query_id]
    library_id = ssm['accession']
    library_uri = urlparse.urlparse(urlparse.unquote(ssm['database']))
    library_filename = os.path.abspath(
        os.path.join(library_uri.netloc, library_uri.path))
    score = ssm['search_engine_score[1]']

    # Read library and query spectrum.
    with reader.SpectralLibraryReader(library_filename) as lib_reader:
        library_spectrum = lib_reader.get_spectrum(library_id, True)
    query_spectrum = None
    for spec in reader.read_mgf(query_filename):
        if spec.identifier == query_id:
            query_spectrum = process_spectrum(spec, False)
            # Make sure that the precursor charge is set for query spectra
            # with a undefined precursor charge.
            query_spectrum.precursor_charge = library_spectrum.precursor_charge
            break
    # verify that the query spectrum was found
    if query_spectrum is None:
        raise ValueError('Could not find the specified query spectrum')

    # Set the matching peaks in the query spectrum to correctly color them.
    set_matching_peaks(library_spectrum, query_spectrum)
    # Modify the colors to differentiate non-matching peaks.
    plot.colors[None] = '#757575'

    # Plot the match.
    fig, ax = plt.subplots(figsize=(20, 10))

    # Plot without annotations.
    plot.mirror(query_spectrum, library_spectrum, {
        'color_ions': True,
        'annotate_ions': False
    }, ax)
    # Add annotations to the library spectrum.
    max_intensity = library_spectrum.intensity.max()
    for i, annotation in enumerate(library_spectrum.annotation):
        if annotation is not None and annotation.ion_type != 'unknown':
            x = library_spectrum.mz[i]
            y = -library_spectrum.intensity[i] / max_intensity
            ax.text(x,
                    y,
                    str(annotation),
                    color=plot.colors[annotation.ion_type],
                    zorder=5,
                    horizontalalignment='right',
                    verticalalignment='center',
                    rotation=90,
                    rotation_mode='anchor')

    ax.set_ylim(-1.1, 1.05)

    ax.text(0.5,
            1.06,
            f'{library_spectrum.peptide}, Score: {score:.3f}',
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize='x-large',
            fontweight='bold',
            transform=ax.transAxes)
    ax.text(0.5,
            1.02, f'File: {os.path.basename(query_filename)}, '
            f'Scan: {query_spectrum.identifier}, '
            f'Precursor m/z: {query_spectrum.precursor_mz:.4f}, '
            f'Library m/z: {library_spectrum.precursor_mz:.4f}, '
            f'Charge: {query_spectrum.precursor_charge}',
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize='large',
            transform=ax.transAxes)

    plt.savefig(f'{query_id}.png', dpi=300, bbox_inches='tight')
    plt.close()
Example #5
0
def main():
    # load the cmd arguments
    parser = argparse.ArgumentParser(
        description='Visualize spectrum-spectrum matches from your '
        'ANN-SoLo identification results')
    parser.add_argument('mztab_filename',
                        help='Identifications in mzTab format')
    parser.add_argument('query_id',
                        help='The identifier of the query to visualize')
    args = parser.parse_args()

    # read the mzTab file
    metadata = {}
    with open(args.mztab_filename) as f_mztab:
        for line in f_mztab:
            line_split = line.strip().split('\t')
            if line_split[0] == 'MTD':
                metadata[line_split[1]] = line_split[2]
            else:
                break  # metadata lines should be on top
    psms = reader.read_mztab_psms(args.mztab_filename)
    # make sure the PSM id's are strings
    psms.index = psms.index.map(str)

    # recreate the search configuration
    settings = []
    # search settings
    for key in metadata:
        if 'software[1]-setting' in key:
            param = metadata[key][:metadata[key].find(' ')]
            value = metadata[key][metadata[key].rfind(' ') + 1:]
            if value != 'False':
                settings.append('--{}'.format(param))
            if value != 'False' and value != 'True':
                settings.append(value)
    # file names
    settings.append('dummy_spectral_library_filename')
    settings.append('dummy_query_filename')
    settings.append('dummy_output_filename')
    config.parse(' '.join(settings))

    # retrieve information on the requested query
    query_id = args.query_id
    query_uri = urlparse.urlparse(
        urlparse.unquote(metadata['ms_run[1]-location']))
    query_filename = os.path.abspath(
        os.path.join(query_uri.netloc, query_uri.path))
    psm = psms.loc[query_id]
    library_id = psm['accession']
    library_uri = urlparse.urlparse(urlparse.unquote(psm['database']))
    library_filename = os.path.abspath(
        os.path.join(library_uri.netloc, library_uri.path))
    score = psm['search_engine_score[1]']

    # read library and query spectrum
    with reader.get_spectral_library_reader(library_filename) as lib_reader:
        library_spectrum = lib_reader.get_spectrum(library_id, True)
    query_spectrum = None
    for spec in reader.read_mgf(query_filename):
        if spec.identifier == query_id:
            query_spectrum = spec
            query_spectrum.process_peaks()
            # make sure that the precursor charge is set for query spectra
            # with a undefined precursor charge
            if query_spectrum.precursor_charge is None:
                query_spectrum.precursor_charge =\
                    library_spectrum.precursor_charge
            break
    # verify that the query spectrum was found
    if query_spectrum is None:
        raise ValueError('Could not find the specified query spectrum')

    # compute the matching peaks
    library_matches, query_matches, _ =\
        get_matching_peaks(library_spectrum, query_spectrum)

    # plot the match
    fix, ax = plt.subplots(figsize=(20, 10))

    # query spectrum on top
    max_intensity = np.max(query_spectrum.intensities)
    for i, (mass, intensity) in enumerate(
            zip(query_spectrum.masses, query_spectrum.intensities)):
        color = colors[query_matches.get(i)]
        zorder = zorders[query_matches.get(i)]
        ax.plot([mass, mass], [0, intensity / max_intensity],
                color=color,
                zorder=zorder)
    # library spectrum mirrored underneath
    max_intensity = np.max(library_spectrum.intensities)
    for i, (mass, intensity, annotation) in enumerate(
            zip(library_spectrum.masses, library_spectrum.intensities,
                library_spectrum.annotations)):
        color = colors[library_matches.get(i)]
        zorder = zorders[library_matches.get(i)]
        ax.plot([mass, mass], [0, -intensity / max_intensity],
                color=color,
                zorder=zorder)
        if annotation is not None:
            ax.text(mass - 5,
                    -intensity / max_intensity - 0.05,
                    '{}{}'.format(annotation[0], '+' * annotation[1]),
                    color=color,
                    rotation=270)

    # horizontal line between the two spectra
    ax.axhline(0, color='black')
    # consistent axes range and labels
    ax.set_xticks(np.arange(0, config.max_mz, 200))
    ax.set_xlim(config.min_mz, config.max_mz)
    y_ticks = np.arange(-1, 1.05, 0.25)
    y_ticklabels = np.arange(-1, 1.05, 0.25)
    y_ticklabels[y_ticklabels < 0] = -y_ticklabels[y_ticklabels < 0]
    y_ticklabels = ['{:.0%}'.format(l) for l in y_ticklabels]
    ax.set_yticks(y_ticks)
    ax.set_yticklabels(y_ticklabels)
    ax.set_ylim(-1.15, 1.05)

    # show major/minor tick lines
    ax.xaxis.set_minor_locator(mticker.AutoMinorLocator())
    ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
    ax.grid(b=True,
            which='major',
            color='lightgrey',
            linestyle='--',
            linewidth=1.0)
    ax.grid(b=True,
            which='minor',
            color='lightgrey',
            linestyle='--',
            linewidth=0.5)

    # small tick labels
    ax.tick_params(axis='both', which='both', labelsize='small')

    ax.set_xlabel('m/z')
    ax.set_ylabel('Intensity')

    ax.text(0.5,
            1.06,
            '{}, Score: {:.3f}'.format(library_spectrum.peptide, score),
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize='x-large',
            fontweight='bold',
            transform=plt.gca().transAxes)
    ax.text(0.5,
            1.02,
            'File: {}, Scan: {}, Precursor m/z: {:.4f}, '
            'Library m/z: {:.4f}, Charge: {}'.format(
                os.path.basename(query_filename), query_spectrum.identifier,
                query_spectrum.precursor_mz, library_spectrum.precursor_mz,
                query_spectrum.precursor_charge),
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize='large',
            transform=plt.gca().transAxes)

    plt.savefig('{}.png'.format(query_id), dpi=300, bbox_inches='tight')
    plt.close()