Exemple #1
0
def load_spectra(spectra):
    """
    """
    # currently ignore ident file input for spectra filtering
    spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(spectra, 'all')
    return spectra
    ##################################### start runDripExtract for multithreading
    ####################################################################################

        # create constant gmtkViterbi command line string
        # don't need frame/segment difference actions since each PSM corresponds to a specific spectrum, 
        # so that there isn't much redudandancy to exploit
        vitStr0 = "gmtkViterbi -strFile " + args.structure_file \
            + " -triFile " + args.structure_file + ".trifile -ni1 0 -nf1 2 -ni2 1 -nf2 0" \
            + " -fdiffact2 rl" \
            + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F"

        # for now, don't worry about checking whether peptide is in valid (i.e., present in the digested
        # set of peptide candidates given the protein database)

        # currently ignore ident file input for spectra filtering
        spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(args.spectra, args.charges)
        # update encountered charges
        args.charges = validcharges
        args.mz_lb = minMz
        args.mz_ub = maxMz

        # create GMTK observation files
        if args.high_res_ms2:
            spec_dict, num_psms = make_drip_data_highres(args, spectra, stdo, stde)
        else:
            spec_dict, num_psms = make_drip_data_lowres(args, spectra, stdo, stde)

        pfile_dir = os.path.join(args.output_dir, args.obs_dir)

        # create structure and master files then triangulate
        try:
def runDripExtract(args, stdo, stde):
    """ Run drip once per spectrum, collapsing all charge-varying candidates into a single GMTK call
    """
    # create constant gmtkViterbi command line string
    # don't need frame/segment difference actions since each PSM corresponds to a specific spectrum, 
    # so that there isn't much redudandancy to exploit
    vitStr0 = "gmtkViterbi -strFile " + args.structure_file \
        + " -triFile " + args.structure_file + ".trifile -ni1 0 -nf1 2 -ni2 1 -nf2 0" \
        + " -fdiffact2 rl" \
        + " -inputMasterFile " + args.master_file + " -inputTrainableParameters trained.params -failOnZeroClique F"

    # for now, don't worry about checking whether peptide is in valid (i.e., present in the digested
    # set of peptide candidates given the protein database)

    # currently ignore ident file input for spectra filtering
    spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(args.spectra, args.charges)
    # update encountered charges
    args.charges = validcharges
    args.mz_lb = minMz
    args.mz_ub = maxMz

    # create GMTK observation files
    # add in support for cluster usage later; assume standalone with multithreading
    if args.high_res_ms2:
        spec_dict, num_psms = make_drip_data_highres(args, spectra, stdo, stde)
    else:
        spec_dict, num_psms = make_drip_data_lowres(args, spectra, stdo, stde)

    pfile_dir = os.path.join(args.output_dir, args.obs_dir)

    # create structure and master files then triangulate
    try:
        create_drip_structure(args.high_res_ms2, args.structure_file, 
                              args.max_obs_mass, False, False,
                              args.high_res_gauss_dist)
    except:
        print "Could not create DRIP structure file %s, exitting" % args.structure_file
        exit(-1)

    try:
        create_drip_master(args.high_res_ms2, args.master_file, 
                           args.max_obs_mass,
                           "DRIP_MZ",
                           "drip_collection/covar.txt",
                           "DRIP_GAUSSIAN_COMPONENTS",
                           "DRIP_GAUSSIAN_MIXTURES",
                           "DRIP_MZ_GAUSSIANS")
    except:
        print "Could not create DRIP master file %s, exitting" % args.master_file
        exit(-1)

    try:
        triangulate_drip(args.structure_file, args.master_file)
    except:
        print "Could not create triangulate structure file %s, exitting" % args.structure_file
        exit(-1)

    try:
        write_covar_file(args.high_res_ms2, args.covar_file,
                         args.learned_covars, True,
                         args.high_res_gauss_dist)
    except:
        print "Could not create covariance file %s, exitting" % args.covar_file
        exit(-1)

    # run GMTK
    dtFile = os.path.join(args.output_dir, 'iterable.dts')
    cppCommand = '\'-DITERABLE_DT=' + dtFile \
        + ' -DDRIP_MZ=' + args.mean_file \
        + ' -DDRIP_GAUSSIAN_COMPONENTS=' + args.gauss_file \
        + ' -DDRIP_GAUSSIAN_MIXTURES=' + args.mixture_file \
        + ' -DDRIP_MZ_GAUSSIANS=' + args.collection_file \
        + '\''

    # call gmtkViterbi
    # gmtkViterbi command line
    vitValsFile = os.path.join(args.logDir, 'vitVals.txt')
    vitStr = vitStr0 + ' -vitValsFile ' +  vitValsFile \
        + ' -of1 ' + pfile_dir + '/spectrum.pfile' \
        + ' -of2 ' + pfile_dir + '/pep-lengths.pfile' \
        + ' -cppCommand ' + cppCommand
    call(shlex.split(vitStr), stdout = stdo, stderr = stde)

    t,d = psm.parse_dripExtract(vitValsFile, os.path.join(args.output_dir, 'pepDB.txt'))

    return t,d, spec_dict
Exemple #4
0
def load_spectra(spectra):
    """
    """
    # currently ignore ident file input for spectra filtering
    spectra, minMz, maxMz, validcharges, _ = load_spectra_ret_dict(spectra, 'all')
    return spectra