def plot_psms(psmFile, spectrumFile, plotList = 'currPsms.html', highResMs2 = False, dripLearnedMeans = 'dripLearned.means', dripLearnedCovars = 'dripLearned.covars', mods = '', ntermMods = '', ctermMods = '', precursor_filter = False, high_res_gauss_dist = 0.05): """ """ # initialize arguments for dripExtract args = dripExtractParams(psmFile, spectrumFile, 'all', mods, ntermMods, ctermMods, highResMs2, dripLearnedMeans, dripLearnedCovars) mods, varMods = parse_var_mods(mods, True) ntermMods, ntermVarMods = parse_var_mods(ntermMods, False) ctermMods, ctermVarMods = parse_var_mods(ctermMods, False) stde = open('gmtk_err', "w") # stdo = sys.stdout stdo = stde args.precursor_filter = False args.high_res_gauss_dist = high_res_gauss_dist if precursor_filter: args.normalize = 'top300TightSequest' else: args.normalize = 'top300Sequest' # decode DRIP PSMs t, d, spectra0 = runDripExtract(args, stdo, stde) # if variable mods, get variable mod string per PSM if varMods or ntermVarMods or ctermVarMods: varModDict = psm_var_mods(psmFile) assert varModDict, "Variable mods specified in enzyme options, but strings denoting variables mods per peptide are not specified in %s, exitting" (psmFile) spectra, minMz, maxMz, validCharges = load_spectra_minMaxMz(spectrumFile) # get original intensity values to plot for sid in spectra0: spectra[sid].mz = list(spectra0[sid].mz) mz_vals = set(spectra0[sid].mz) z = max(spectra0[sid].intensity) spectra[sid].intensity = [i/z for mz, i in zip(spectra[sid].mz, spectra[sid].intensity) if mz in mz_vals] if not highResMs2: dripMeans = load_drip_means(dripLearnedMeans) else: dripMeansSet = set([]) for sid, c in t: for p in t[sid,c]: pep = p.peptide if varMods or ntermVarMods or ctermVarMods: varModSequence = varModDict[sid, p.peptide] bNy = interleave_b_y_ions_var_mods(Peptide(pep), c, mods, ntermMods, ctermMods, varMods, ntermVarMods, ctermVarMods, varModSequence) else: bNy = interleave_b_y_ions(Peptide(pep), c, mods, ntermMods, ctermMods) filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist) dripMeansSet |= set(bNy) # for i, ion in enumerate(bNy): # dripMeans[i] = ion for sid, c in d: for p in d[sid,c]: pep = p.peptide if varMods or ntermVarMods or ctermVarMods: varModSequence = varModDict[sid, p.peptide] bNy = interleave_b_y_ions_var_mods(Peptide(pep), c, mods, ntermMods, ctermMods, varMods, ntermVarMods, ctermVarMods, varModSequence) else: bNy = interleave_b_y_ions(Peptide(pep), c, mods, ntermMods, ctermMods) filter_theoretical_peaks(bNy, minMz, maxMz, high_res_gauss_dist) dripMeansSet |= set(bNy) # for i, ion in enumerate(bNy): # dripMeans[i] = ion dripMeans = {} for ind, ion in enumerate(sorted(dripMeansSet)): dripMeans[ind] = ion ion_to_index_map = {} # reverse mapping, from ions to indices for ind in dripMeans: ion_to_index_map[dripMeans[ind]] = ind all_psms = [] varModSequence = '' for sid, c in t: s = spectra[sid] for p in t[sid,c]: p.add_obs_spectrum(s) p.calculate_drip_features(dripMeans) if varMods or ntermVarMods or ctermVarMods: varModSequence = varModDict[sid, p.peptide] p.calc_by_sets(c, mods, ntermMods, ctermMods, highResMs2, ion_to_index_map, varMods, ntermVarMods, ctermVarMods, varModSequence) all_psms.append(p) for sid, c in d: s = spectra[sid] for p in d[sid,c]: p.add_obs_spectrum(s) p.calculate_drip_features(dripMeans) if varMods or ntermVarMods or ctermVarMods: varModSequence = varModDict[sid, p.peptide] p.calc_by_sets(c, mods, ntermMods, ctermMods, highResMs2, ion_to_index_map, varMods, ntermVarMods, ctermVarMods, varModSequence) all_psms.append(p) fid = open(plotList, "w") all_psms.sort(key = lambda r: r.score, reverse = True) for p in all_psms: if p.kind == 't': kind = 'target' elif p.kind == 'd': kind = 'decoy' else: continue plotName = kind + 'Scan' + str(p.scan) + \ 'Charge' + str(p.charge) + \ p.peptide + '.png' p.plot_drip_viterbi(plotName) fid.write("<a href=\"%s\">%s Scan %d Charge %d %s</a><br>\n" % (plotName, kind, p.scan, p.charge, p.peptide)) fid.close()
+ " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F" # gmtkViterbi command line vitValsFile = os.path.join(log_dir, 'vitVals.txt') vitStr = vitStr0 + ' -vitValsFile ' + vitValsFile \ + ' -of1 ' + spectrum_obs_file \ + ' -fmt1 flatascii ' \ + ' -of2 ' + peptide_obs_file \ + ' -fmt2 flatascii ' \ + ' -cppCommand ' + cppCommand # call(shlex.split(vitStr), stdout = sys.stdout, stderr = sys.stdout) call(shlex.split(vitStr), stdout = stdo, stderr = stde) # parse output t,d = ppsm.parse_dripExtract(vitValsFile, os.path.join(output_dir, 'pepDB.txt')) t = t[sid,c][0] # calculate insertions and deletions t.add_obs_spectrum(s0) t.calculate_drip_features(dripMeans) t.calc_by_sets(c, mods, ntermMods, ctermMods, highResMs2, ion_to_index_map, varMods, ntermVarMods, ctermVarMods, varModSequence) return t if __name__ == '__main__': # process input arguments args = process_args() targets, decoys, spec_dict = runDripExtract(args)