def load_spectra(spectra): """ """ # currently ignore ident file input for spectra filtering spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(spectra, 'all') return spectra
##################################### start runDripExtract for multithreading #################################################################################### # create constant gmtkViterbi command line string # don't need frame/segment difference actions since each PSM corresponds to a specific spectrum, # so that there isn't much redudandancy to exploit vitStr0 = "gmtkViterbi -strFile " + args.structure_file \ + " -triFile " + args.structure_file + ".trifile -ni1 0 -nf1 2 -ni2 1 -nf2 0" \ + " -fdiffact2 rl" \ + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F" # for now, don't worry about checking whether peptide is in valid (i.e., present in the digested # set of peptide candidates given the protein database) # currently ignore ident file input for spectra filtering spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(args.spectra, args.charges) # update encountered charges args.charges = validcharges args.mz_lb = minMz args.mz_ub = maxMz # create GMTK observation files if args.high_res_ms2: spec_dict, num_psms = make_drip_data_highres(args, spectra, stdo, stde) else: spec_dict, num_psms = make_drip_data_lowres(args, spectra, stdo, stde) pfile_dir = os.path.join(args.output_dir, args.obs_dir) # create structure and master files then triangulate try:
def runDripExtract(args, stdo, stde): """ Run drip once per spectrum, collapsing all charge-varying candidates into a single GMTK call """ # create constant gmtkViterbi command line string # don't need frame/segment difference actions since each PSM corresponds to a specific spectrum, # so that there isn't much redudandancy to exploit vitStr0 = "gmtkViterbi -strFile " + args.structure_file \ + " -triFile " + args.structure_file + ".trifile -ni1 0 -nf1 2 -ni2 1 -nf2 0" \ + " -fdiffact2 rl" \ + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F" # for now, don't worry about checking whether peptide is in valid (i.e., present in the digested # set of peptide candidates given the protein database) # currently ignore ident file input for spectra filtering spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(args.spectra, args.charges) # update encountered charges args.charges = validcharges args.mz_lb = minMz args.mz_ub = maxMz # create GMTK observation files # add in support for cluster usage later; assume standalone with multithreading if args.high_res_ms2: spec_dict, num_psms = make_drip_data_highres(args, spectra, stdo, stde) else: spec_dict, num_psms = make_drip_data_lowres(args, spectra, stdo, stde) pfile_dir = os.path.join(args.output_dir, args.obs_dir) # create structure and master files then triangulate try: create_drip_structure(args.high_res_ms2, args.structure_file, args.max_obs_mass, False, False, args.high_res_gauss_dist) except: print "Could not create DRIP structure file %s, exitting" % args.structure_file exit(-1) try: create_drip_master(args.high_res_ms2, args.master_file, args.max_obs_mass, "DRIP_MZ", "drip_collection/covar.txt", "DRIP_GAUSSIAN_COMPONENTS", "DRIP_GAUSSIAN_MIXTURES", "DRIP_MZ_GAUSSIANS") except: print "Could not create DRIP master file %s, exitting" % args.master_file exit(-1) try: triangulate_drip(args.structure_file, args.master_file) except: print "Could not create triangulate structure file %s, exitting" % args.structure_file exit(-1) try: write_covar_file(args.high_res_ms2, args.covar_file, args.learned_covars, True, args.high_res_gauss_dist) except: print "Could not create covariance file %s, exitting" % args.covar_file exit(-1) # run GMTK dtFile = os.path.join(args.output_dir, 'iterable.dts') cppCommand = '\'-DITERABLE_DT=' + dtFile \ + ' -DDRIP_MZ=' + args.mean_file \ + ' -DDRIP_GAUSSIAN_COMPONENTS=' + args.gauss_file \ + ' -DDRIP_GAUSSIAN_MIXTURES=' + args.mixture_file \ + ' -DDRIP_MZ_GAUSSIANS=' + args.collection_file \ + '\'' # call gmtkViterbi # gmtkViterbi command line vitValsFile = os.path.join(args.logDir, 'vitVals.txt') vitStr = vitStr0 + ' -vitValsFile ' + vitValsFile \ + ' -of1 ' + pfile_dir + '/spectrum.pfile' \ + ' -of2 ' + pfile_dir + '/pep-lengths.pfile' \ + ' -cppCommand ' + cppCommand call(shlex.split(vitStr), stdout = stdo, stderr = stde) t,d = psm.parse_dripExtract(vitValsFile, os.path.join(args.output_dir, 'pepDB.txt')) return t,d, spec_dict