Esempio n. 1
0
def plot_psms(psmFile, spectrumFile, plotList = 'currPsms.html',
              highResMs2 = False,
              dripLearnedMeans = 'dripLearned.means',
              dripLearnedCovars = 'dripLearned.covars',
              mods = '', ntermMods = '', ctermMods = '',
              precursor_filter = False, 
              high_res_gauss_dist = 0.05):
    """
    """
    # initialize arguments for dripExtract
    args = dripExtractParams(psmFile, spectrumFile, 'all', 
                             mods, ntermMods, ctermMods, 
                             highResMs2, 
                             dripLearnedMeans, dripLearnedCovars)

    mods, varMods = parse_var_mods(mods, True)
    ntermMods, ntermVarMods = parse_var_mods(ntermMods, False)
    ctermMods, ctermVarMods = parse_var_mods(ctermMods, False)

    stde = open('gmtk_err', "w")
    # stdo = sys.stdout
    stdo = stde

    args.precursor_filter = False
    args.high_res_gauss_dist = high_res_gauss_dist
    if precursor_filter: 
        args.normalize = 'top300TightSequest'
    else:
        args.normalize = 'top300Sequest'

    # decode DRIP PSMs
    t, d, spectra0 = runDripExtract(args, stdo, stde)
    
    # if variable mods, get variable mod string per PSM
    if varMods or ntermVarMods or ctermVarMods:
        varModDict = psm_var_mods(psmFile)
        assert varModDict, "Variable mods specified in enzyme options, but strings denoting variables mods per peptide are not specified in %s, exitting"  (psmFile)
    spectra, minMz, maxMz, validCharges = load_spectra_minMaxMz(spectrumFile)

    # get original intensity values to plot
    for sid in spectra0:
        spectra[sid].mz = list(spectra0[sid].mz)
        mz_vals = set(spectra0[sid].mz)
        z = max(spectra0[sid].intensity)
        spectra[sid].intensity = [i/z for mz, i in zip(spectra[sid].mz, spectra[sid].intensity)
                                  if mz in mz_vals]

    if not highResMs2:
        dripMeans = load_drip_means(dripLearnedMeans)
    else:
        dripMeansSet = set([])
        for sid, c in t:
            for p in t[sid,c]:
                pep = p.peptide
                if varMods or ntermVarMods or ctermVarMods:
                    varModSequence = varModDict[sid, p.peptide]
                    bNy = interleave_b_y_ions_var_mods(Peptide(pep), c,
                                                       mods, ntermMods, ctermMods,
                                                       varMods, ntermVarMods, ctermVarMods,
                                                       varModSequence)
                else:
                    bNy = interleave_b_y_ions(Peptide(pep), c, 
                                              mods, ntermMods, ctermMods)
                filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist)
                dripMeansSet |= set(bNy)
                # for i, ion in enumerate(bNy):
                #     dripMeans[i] = ion
        for sid, c in d:
            for p in d[sid,c]:
                pep = p.peptide
                if varMods or ntermVarMods or ctermVarMods:
                    varModSequence = varModDict[sid, p.peptide]
                    bNy = interleave_b_y_ions_var_mods(Peptide(pep), c, 
                                                       mods, ntermMods, ctermMods,
                                                       varMods, ntermVarMods, ctermVarMods,
                                                       varModSequence)
                else:
                    bNy = interleave_b_y_ions(Peptide(pep), c, 
                                              mods, ntermMods, ctermMods)
                filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist)
                dripMeansSet |= set(bNy)
                # for i, ion in enumerate(bNy):
                #     dripMeans[i] = ion
        dripMeans = {}
        for ind, ion in enumerate(sorted(dripMeansSet)):
            dripMeans[ind] = ion

    ion_to_index_map = {} # reverse mapping, from ions to indices
    for ind in dripMeans:
        ion_to_index_map[dripMeans[ind]] = ind

    all_psms = []
    varModSequence = ''
    for sid, c in t:
        s = spectra[sid]
        for p in t[sid,c]:
            p.add_obs_spectrum(s)
            p.calculate_drip_features(dripMeans)
            if varMods or ntermVarMods or ctermVarMods:
                varModSequence = varModDict[sid, p.peptide]
            p.calc_by_sets(c,
                           mods, ntermMods, ctermMods,
                           highResMs2, 
                           ion_to_index_map,
                           varMods, ntermVarMods, ctermVarMods,
                           varModSequence)
        all_psms.append(p)
    for sid, c in d:
        s = spectra[sid]
        for p in d[sid,c]:
            p.add_obs_spectrum(s)
            p.calculate_drip_features(dripMeans)
            if varMods or ntermVarMods or ctermVarMods:
                varModSequence = varModDict[sid, p.peptide]
            p.calc_by_sets(c,
                           mods, ntermMods, ctermMods,
                           highResMs2, 
                           ion_to_index_map,
                           varMods, ntermVarMods, ctermVarMods,
                           varModSequence)
        all_psms.append(p)

    fid = open(plotList, "w")

    all_psms.sort(key = lambda r: r.score, reverse = True)
    for p in all_psms:
        if p.kind == 't':
            kind = 'target'
        elif p.kind == 'd':
            kind = 'decoy'
        else:
            continue

        plotName = kind + 'Scan' + str(p.scan) + \
            'Charge' + str(p.charge) + \
            p.peptide + '.png'

        p.plot_drip_viterbi(plotName)
        fid.write("<a href=\"%s\">%s Scan %d Charge %d %s</a><br>\n" %
                  (plotName, kind, p.scan, p.charge, p.peptide))

    fid.close()
Esempio n. 2
0
def plot_psms(psmFile, spectrumFile, plotList = 'currPsms.html',
              highResMs2 = False,
              dripLearnedMeans = 'dripLearned.means',
              dripLearnedCovars = 'dripLearned.covars',
              mods = '', ntermMods = '', ctermMods = '',
              precursor_filter = False, 
              high_res_gauss_dist = 0.05):
    """
    """
    # initialize arguments for dripExtract
    args = dripExtractParams(psmFile, spectrumFile, 'all', 
                             mods, ntermMods, ctermMods, 
                             highResMs2, 
                             dripLearnedMeans, dripLearnedCovars)

    mods, varMods = parse_var_mods(mods, True)
    ntermMods, ntermVarMods = parse_var_mods(ntermMods, False)
    ctermMods, ctermVarMods = parse_var_mods(ctermMods, False)

    stde = open('gmtk_err', "w")
    # stdo = sys.stdout
    stdo = stde

    args.precursor_filter = False
    args.high_res_gauss_dist = high_res_gauss_dist
    if precursor_filter: 
        args.normalize = 'top300TightSequest'
    else:
        args.normalize = 'top300Sequest'

    # decode DRIP PSMs
    t, d, spectra0 = runDripExtract(args, stdo, stde)
    
    # if variable mods, get variable mod string per PSM
    if varMods or ntermVarMods or ctermVarMods:
        varModDict = psm_var_mods(psmFile)
        assert varModDict, "Variable mods specified in enzyme options, but strings denoting variables mods per peptide are not specified in %s, exitting"  (psmFile)
    spectra, minMz, maxMz, validCharges = load_spectra_minMaxMz(spectrumFile)

    # get original intensity values to plot
    for sid in spectra0:
        spectra[sid].mz = list(spectra0[sid].mz)
        mz_vals = set(spectra0[sid].mz)
        z = max(spectra0[sid].intensity)
        spectra[sid].intensity = [i/z for mz, i in zip(spectra[sid].mz, spectra[sid].intensity)
                                  if mz in mz_vals]

    if not highResMs2:
        dripMeans = load_drip_means(dripLearnedMeans)
    else:
        dripMeansSet = set([])
        for sid, c in t:
            for p in t[sid,c]:
                pep = p.peptide
                if varMods or ntermVarMods or ctermVarMods:
                    varModSequence = varModDict[sid, p.peptide]
                    bNy = interleave_b_y_ions_var_mods(Peptide(pep), c,
                                                       mods, ntermMods, ctermMods,
                                                       varMods, ntermVarMods, ctermVarMods,
                                                       varModSequence)
                else:
                    bNy = interleave_b_y_ions(Peptide(pep), c, 
                                              mods, ntermMods, ctermMods)
                filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist)
                dripMeansSet |= set(bNy)
                # for i, ion in enumerate(bNy):
                #     dripMeans[i] = ion
        for sid, c in d:
            for p in d[sid,c]:
                pep = p.peptide
                if varMods or ntermVarMods or ctermVarMods:
                    varModSequence = varModDict[sid, p.peptide]
                    bNy = interleave_b_y_ions_var_mods(Peptide(pep), c, 
                                                       mods, ntermMods, ctermMods,
                                                       varMods, ntermVarMods, ctermVarMods,
                                                       varModSequence)
                else:
                    bNy = interleave_b_y_ions(Peptide(pep), c, 
                                              mods, ntermMods, ctermMods)
                filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist)
                dripMeansSet |= set(bNy)
                # for i, ion in enumerate(bNy):
                #     dripMeans[i] = ion
        dripMeans = {}
        for ind, ion in enumerate(sorted(dripMeansSet)):
            dripMeans[ind] = ion

    ion_to_index_map = {} # reverse mapping, from ions to indices
    for ind in dripMeans:
        ion_to_index_map[dripMeans[ind]] = ind

    all_psms = []
    varModSequence = ''
    for sid, c in t:
        s = spectra[sid]
        for p in t[sid,c]:
            p.add_obs_spectrum(s)
            p.calculate_drip_features(dripMeans)
            if varMods or ntermVarMods or ctermVarMods:
                varModSequence = varModDict[sid, p.peptide]
            p.calc_by_sets(c,
                           mods, ntermMods, ctermMods,
                           highResMs2, 
                           ion_to_index_map,
                           varMods, ntermVarMods, ctermVarMods,
                           varModSequence)
        all_psms.append(p)
    for sid, c in d:
        s = spectra[sid]
        for p in d[sid,c]:
            p.add_obs_spectrum(s)
            p.calculate_drip_features(dripMeans)
            if varMods or ntermVarMods or ctermVarMods:
                varModSequence = varModDict[sid, p.peptide]
            p.calc_by_sets(c,
                           mods, ntermMods, ctermMods,
                           highResMs2, 
                           ion_to_index_map,
                           varMods, ntermVarMods, ctermVarMods,
                           varModSequence)
        all_psms.append(p)

    fid = open(plotList, "w")

    all_psms.sort(key = lambda r: r.score, reverse = True)
    for p in all_psms:
        if p.kind == 't':
            kind = 'target'
        elif p.kind == 'd':
            kind = 'decoy'
        else:
            continue

        plotName = kind + 'Scan' + str(p.scan) + \
            'Charge' + str(p.charge) + \
            p.peptide + '.png'

        p.plot_drip_viterbi(plotName)
        fid.write("<a href=\"%s\">%s Scan %d Charge %d %s</a><br>\n" %
                  (plotName, kind, p.scan, p.charge, p.peptide))

    fid.close()
Esempio n. 3
0
        + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F"
    # gmtkViterbi command line
    vitValsFile = os.path.join(log_dir, 'vitVals.txt')
    vitStr = vitStr0 + ' -vitValsFile ' +  vitValsFile \
        + ' -of1 ' + spectrum_obs_file \
        + ' -fmt1 flatascii ' \
        + ' -of2 ' + peptide_obs_file \
        + ' -fmt2 flatascii ' \
        + ' -cppCommand ' + cppCommand
    # call(shlex.split(vitStr), stdout = sys.stdout, stderr = sys.stdout)
    call(shlex.split(vitStr), stdout = stdo, stderr = stde)

    # parse output
    t,d = ppsm.parse_dripExtract(vitValsFile, os.path.join(output_dir, 'pepDB.txt'))

    t = t[sid,c][0]
    # calculate insertions and deletions
    t.add_obs_spectrum(s0)
    t.calculate_drip_features(dripMeans)
    t.calc_by_sets(c, mods,
                   ntermMods, ctermMods, highResMs2, 
                   ion_to_index_map,
                   varMods, ntermVarMods, ctermVarMods,
                   varModSequence)
    return t
        
if __name__ == '__main__':
    # process input arguments
    args = process_args()
    targets, decoys, spec_dict = runDripExtract(args)
Esempio n. 4
0
        + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F"
    # gmtkViterbi command line
    vitValsFile = os.path.join(log_dir, 'vitVals.txt')
    vitStr = vitStr0 + ' -vitValsFile ' +  vitValsFile \
        + ' -of1 ' + spectrum_obs_file \
        + ' -fmt1 flatascii ' \
        + ' -of2 ' + peptide_obs_file \
        + ' -fmt2 flatascii ' \
        + ' -cppCommand ' + cppCommand
    # call(shlex.split(vitStr), stdout = sys.stdout, stderr = sys.stdout)
    call(shlex.split(vitStr), stdout = stdo, stderr = stde)

    # parse output
    t,d = ppsm.parse_dripExtract(vitValsFile, os.path.join(output_dir, 'pepDB.txt'))

    t = t[sid,c][0]
    # calculate insertions and deletions
    t.add_obs_spectrum(s0)
    t.calculate_drip_features(dripMeans)
    t.calc_by_sets(c, mods,
                   ntermMods, ctermMods, highResMs2, 
                   ion_to_index_map,
                   varMods, ntermVarMods, ctermVarMods,
                   varModSequence)
    return t
        
if __name__ == '__main__':
    # process input arguments
    args = process_args()
    targets, decoys, spec_dict = runDripExtract(args)