Beispiel #1
0
def main():
	# parse some argument lists
	inputArgs = parser.parse_args()
	
	#### PathoID modules ####
	
	start = time();
	
	if (inputArgs.subcommand=='LIB'):
		################################################$
		#append taxon id in the front of sequence header
		################################################$
		NAs = 'X'
		if inputArgs.lib_dbuser!=NAs and inputArgs.lib_dbpasswd==NAs:
			print 'if you want to use mysql, make sure that you install pathoDB and '
			'also specify the corresponding mysql password correctly '
			'(Ask your mysql admin to access the database).'
		MysqlConf=(inputArgs.lib_dbhost,inputArgs.lib_dbport,inputArgs.lib_dbuser,inputArgs.lib_dbpasswd,inputArgs.lib_db)
		taxon_ids=pathoLib.parse_input_app_build_nt_tgt(inputArgs.lib_taxon_ids)
		exclude_taxon_ids=pathoLib.parse_input_app_build_nt_tgt(inputArgs.lib_exclude_taxon_ids)
		(ncbiNt_ti,ncbiNt_invalid) = pathoLib.append_ti_into_fasta_app(inputArgs.lib_reference,
			taxon_ids, exclude_taxon_ids, inputArgs.lib_subtax,MysqlConf, 
			not(inputArgs.lib_nodesc), inputArgs.lib_online_search, inputArgs.lib_outprefix, 
			inputArgs.lib_outdir)
	
	if (inputArgs.subcommand=='MAP'):
		pathoMapOptions = PathoMapA.PathoMapOptions()
		pathoMapOptions.verbose = inputArgs.verbose
		pathoMapOptions.outDir = inputArgs.map_outdir
		pathoMapOptions.indexDir = inputArgs.map_indexdir
		pathoMapOptions.outAlignFile = inputArgs.map_outalign
		pathoMapOptions.inReadFile = inputArgs.map_inputread
		pathoMapOptions.inReadFilePair1 = inputArgs.map_inputread1
		pathoMapOptions.inReadFilePair2 = inputArgs.map_inputread2
		pathoMapOptions.targetAlignParameters = inputArgs.map_targetalignparams
		pathoMapOptions.filterAlignParameters = inputArgs.map_filteralignparams
		if (len(inputArgs.map_targetref)>0):
			pathoMapOptions.targetRefFiles = inputArgs.map_targetref.split(",")
		if (len(inputArgs.map_filterref)>0):
			pathoMapOptions.filterRefFiles = inputArgs.map_filterref.split(",")
		if (len(inputArgs.map_targetindex)>0):
			pathoMapOptions.targetIndexPrefixes = inputArgs.map_targetindex.split(",")
		if (len(inputArgs.map_filterindex)>0):
			pathoMapOptions.filterIndexPrefixes = inputArgs.map_filterindex.split(",")
		if (len(inputArgs.map_targetalign)>0):
			pathoMapOptions.targetAlignFiles = inputArgs.map_targetalign.split(",")
		if (len(inputArgs.map_filteralign)>0):
			pathoMapOptions.filterAlignFiles = inputArgs.map_filteralign.split(",")
		pathoMapOptions.btHome = inputArgs.map_bthome
		pathoMapOptions.numThreads = inputArgs.map_numthreads
		pathoMapOptions.exp_tag = inputArgs.map_exp_tag + "-"
		PathoMapA.processPathoMap(pathoMapOptions)
	
	if (inputArgs.subcommand=='ID'):
		pathoIdOptions = PathoID.PathoIdOptions(inputArgs.id_ali_file)
		pathoIdOptions.ali_format = inputArgs.id_ali_format
		pathoIdOptions.verbose = inputArgs.verbose
		pathoIdOptions.out_matrix_flag = inputArgs.id_out_matrix
		pathoIdOptions.score_cutoff = inputArgs.id_score_cutoff
		pathoIdOptions.exp_tag = inputArgs.id_exp_tag
		pathoIdOptions.outdir = inputArgs.id_outdir
		pathoIdOptions.emEpsilon = inputArgs.id_emEpsilon
		pathoIdOptions.maxIter = inputArgs.id_maxIter
		pathoIdOptions.piPrior = inputArgs.id_piPrior
		pathoIdOptions.thetaPrior = inputArgs.id_thetaPrior
		pathoIdOptions.noalign = inputArgs.id_noalign
		pathoIdOptions.noCutOff = inputArgs.id_nocutoff
		PathoID.pathoscope_reassign(pathoIdOptions)
	
	if (inputArgs.subcommand=='REP'):
		pathoReportOptions = PathoReportA.PathoReportOptions(inputArgs.rep_ali_file)
		pathoReportOptions.verbose = inputArgs.verbose
		pathoReportOptions.contigFlag = inputArgs.rep_contig_flag
		pathoReportOptions.outDir = inputArgs.rep_outdir
		pathoReportOptions.samtoolsHome = inputArgs.rep_samtoolshome
		pathoReportOptions.noCutOff = inputArgs.rep_nocutoff
		mysqlConf=(inputArgs.rep_dbhost,inputArgs.rep_dbport,inputArgs.rep_dbuser,
			inputArgs.rep_dbpasswd,inputArgs.rep_db)
		pathoReportOptions.mysqlConf = mysqlConf
		PathoReportA.processPathoReport(pathoReportOptions)
	
	if (inputArgs.subcommand=='QC'):
		qcargs = sys.argv[2:]
		pathoqcdir = pathoscopedir + os.path.sep + 'pathoscope' + os.path.sep + 'pathoqc'
		pathoqcfile = pathoqcdir + os.path.sep + 'pathoqc.py'
		if os.path.exists(pathoqcfile):
			cmd = sys.executable
			cmd += " " + pathoqcfile + " "
			cmd += " ".join(qcargs)
			print(cmd)
			os.system(cmd)
		else:
			print("PathoQC (" + pathoqcfile + ") not found. Please download pathoqc_vXXX.tar.gz and "
			"install it ("+pathoqcdir+") from http://sourceforge.net/projects/pathoscope/")
	
	elapsed = time() - start;
	if inputArgs.verbose:
		print "Total Elapsed Time: %d" % (elapsed)
Beispiel #2
0
def pathoscope_reassign(pathoIdOptions):
    out_matrix = pathoIdOptions.out_matrix_flag
    verbose = pathoIdOptions.verbose
    scoreCutoff = pathoIdOptions.score_cutoff
    expTag = pathoIdOptions.exp_tag
    ali_format = pathoIdOptions.ali_format
    ali_file = pathoIdOptions.ali_file
    outdir = pathoIdOptions.outdir
    emEpsilon = pathoIdOptions.emEpsilon
    maxIter = pathoIdOptions.maxIter
    upalign = not (pathoIdOptions.noalign)
    piPrior = pathoIdOptions.piPrior
    thetaPrior = pathoIdOptions.thetaPrior
    noCutOff = pathoIdOptions.noCutOff

    if float(os.stat(ali_file).st_size) < 1.0:
        print 'the alignment file [%s] is empty.' % ali_file
        sys.exit(1)

    if ali_format == 'gnu-sam':
        aliFormat = 0
        if verbose:
            print "parsing gnu-sam file/likelihood score/reads and mapped genomes..."
    elif ali_format == 'sam':  #standard sam
        aliFormat = 1
        if verbose:
            print "parsing sam file/likelihood score/reads and mapped genomes..."
    elif ali_format == 'bl8':  #blat m8 format
        aliFormat = 2
        if verbose:
            print "parsing bl8 file/likelihood score/reads and mapped genomes..."
    else:
        print "unknown alignment format file..."
        return
    (U, NU, genomes, reads) = conv_align2GRmat(ali_file, scoreCutoff,
                                               aliFormat)

    nG = len(genomes)
    nR = len(reads)
    if verbose:
        print "EM iteration..."
        print "(Genomes,Reads)=%dx%d" % (nG, nR)
        print "Delta Change:"

    if out_matrix:
        if verbose:
            print "writing initial alignment ..."
        out_initial_align_matrix(genomes, reads, U, NU, expTag, ali_file,
                                 outdir)

    (bestHitInitialReads, bestHitInitial, level1Initial, level2Initial) = \
     PathoReportA.computeBestHit(U, NU, genomes, reads)

    (initPi, pi, _, NU) = pathoscope_em(U, NU, genomes, maxIter, emEpsilon,
                                        verbose, piPrior, thetaPrior)
    tmp = zip(initPi, genomes)
    tmp = sorted(tmp, reverse=True)  #similar to sort row

    if out_matrix:
        initialGuess = outdir + os.sep + expTag + '-initGuess.txt'
        oFp = open(initialGuess, 'wb')
        csv_writer = csv.writer(oFp, delimiter='\t')
        csv_writer.writerows(tmp)
        oFp.close()

    del tmp

    (bestHitFinalReads, bestHitFinal, level1Final, level2Final) = \
     PathoReportA.computeBestHit(U, NU, genomes, reads)

    if out_matrix:
        finalGuess = outdir + os.sep + expTag + '-finGuess.txt'
        oFp = open(finalGuess, 'wb')
        tmp = zip(pi, genomes)
        tmp = sorted(tmp, reverse=True)
        csv_writer = csv.writer(oFp, delimiter='\t')
        csv_writer.writerows(tmp)
        oFp.close()

    finalReport = outdir + os.sep + expTag + '-' + ali_format + '-report.tsv'
    header = ['Genome', 'Final Guess', 'Final Best Hit', 'Final Best Hit Read Numbers', \
     'Final High Confidence Hits', 'Final Low Confidence Hits', 'Initial Guess', \
     'Initial Best Hit', 'Initial Best Hit Read Numbers', \
     'Initial High Confidence Hits', 'Initial Low Confidence Hits']
    (x1, x2, x3, x4, x5, x6,
     x7, x8, x9, x10, x11) = PathoReportA.write_tsv_report(
         finalReport, nR, nG, pi, genomes, initPi, bestHitInitial,
         bestHitInitialReads, bestHitFinal, bestHitFinalReads, level1Initial,
         level2Initial, level1Final, level2Final, header, noCutOff)

    reAlignfile = ali_file
    if upalign:
        reAlignfile = rewrite_align(U, NU, ali_file, scoreCutoff, aliFormat,
                                    outdir)

    return (finalReport, x2, x3, x4, x5, x1, x6, x7, x8, x9, x10, x11,
            reAlignfile)
Beispiel #3
0
def main():
    # parse some argument lists
    inputArgs = parser.parse_args()

    #### PathoID modules ####

    start = time()

    if (inputArgs.subcommand == 'LIB'):
        ################################################$
        #append taxon id in the front of sequence header
        ################################################$
        NAs = 'X'
        if inputArgs.lib_dbuser != NAs and inputArgs.lib_dbpasswd == NAs:
            print 'if you want to use mysql, make sure that you install pathoDB and '
            'also specify the corresponding mysql password correctly '
            '(Ask your mysql admin to access the database).'
        MysqlConf = (inputArgs.lib_dbhost, inputArgs.lib_dbport,
                     inputArgs.lib_dbuser, inputArgs.lib_dbpasswd,
                     inputArgs.lib_db)
        taxon_ids = pathoLib.parse_input_app_build_nt_tgt(
            inputArgs.lib_taxon_ids)
        exclude_taxon_ids = pathoLib.parse_input_app_build_nt_tgt(
            inputArgs.lib_exclude_taxon_ids)
        (ncbiNt_ti, ncbiNt_invalid) = pathoLib.append_ti_into_fasta_app(
            inputArgs.lib_reference, taxon_ids, exclude_taxon_ids,
            inputArgs.lib_subtax, MysqlConf, not (inputArgs.lib_nodesc),
            inputArgs.lib_online_search, inputArgs.lib_outprefix,
            inputArgs.lib_outdir)

    if (inputArgs.subcommand == 'MAP'):
        pathoMapOptions = PathoMapA.PathoMapOptions()
        pathoMapOptions.verbose = inputArgs.verbose
        pathoMapOptions.outDir = inputArgs.map_outdir
        pathoMapOptions.indexDir = inputArgs.map_indexdir
        pathoMapOptions.outAlignFile = inputArgs.map_outalign
        pathoMapOptions.inReadFile = inputArgs.map_inputread
        pathoMapOptions.inReadFilePair1 = inputArgs.map_inputread1
        pathoMapOptions.inReadFilePair2 = inputArgs.map_inputread2
        pathoMapOptions.targetAlignParameters = inputArgs.map_targetalignparams
        pathoMapOptions.filterAlignParameters = inputArgs.map_filteralignparams
        if (len(inputArgs.map_targetref) > 0):
            pathoMapOptions.targetRefFiles = inputArgs.map_targetref.split(",")
        if (len(inputArgs.map_filterref) > 0):
            pathoMapOptions.filterRefFiles = inputArgs.map_filterref.split(",")
        if (len(inputArgs.map_targetindex) > 0):
            pathoMapOptions.targetIndexPrefixes = inputArgs.map_targetindex.split(
                ",")
        if (len(inputArgs.map_filterindex) > 0):
            pathoMapOptions.filterIndexPrefixes = inputArgs.map_filterindex.split(
                ",")
        if (len(inputArgs.map_targetalign) > 0):
            pathoMapOptions.targetAlignFiles = inputArgs.map_targetalign.split(
                ",")
        if (len(inputArgs.map_filteralign) > 0):
            pathoMapOptions.filterAlignFiles = inputArgs.map_filteralign.split(
                ",")
        pathoMapOptions.btHome = inputArgs.map_bthome
        pathoMapOptions.numThreads = inputArgs.map_numthreads
        pathoMapOptions.exp_tag = inputArgs.map_exp_tag + "-"
        PathoMapA.processPathoMap(pathoMapOptions)

    if (inputArgs.subcommand == 'ID'):
        pathoIdOptions = PathoID.PathoIdOptions(inputArgs.id_ali_file)
        pathoIdOptions.ali_format = inputArgs.id_ali_format
        pathoIdOptions.verbose = inputArgs.verbose
        pathoIdOptions.out_matrix_flag = inputArgs.id_out_matrix
        pathoIdOptions.score_cutoff = inputArgs.id_score_cutoff
        pathoIdOptions.exp_tag = inputArgs.id_exp_tag
        pathoIdOptions.outdir = inputArgs.id_outdir
        pathoIdOptions.emEpsilon = inputArgs.id_emEpsilon
        pathoIdOptions.maxIter = inputArgs.id_maxIter
        pathoIdOptions.piPrior = inputArgs.id_piPrior
        pathoIdOptions.thetaPrior = inputArgs.id_thetaPrior
        pathoIdOptions.noalign = inputArgs.id_noalign
        pathoIdOptions.noCutOff = inputArgs.id_nocutoff
        PathoID.pathoscope_reassign(pathoIdOptions)

    if (inputArgs.subcommand == 'REP'):
        pathoReportOptions = PathoReportA.PathoReportOptions(
            inputArgs.rep_ali_file)
        pathoReportOptions.verbose = inputArgs.verbose
        pathoReportOptions.contigFlag = inputArgs.rep_contig_flag
        pathoReportOptions.outDir = inputArgs.rep_outdir
        pathoReportOptions.samtoolsHome = inputArgs.rep_samtoolshome
        pathoReportOptions.noCutOff = inputArgs.rep_nocutoff
        mysqlConf = (inputArgs.rep_dbhost, inputArgs.rep_dbport,
                     inputArgs.rep_dbuser, inputArgs.rep_dbpasswd,
                     inputArgs.rep_db)
        pathoReportOptions.mysqlConf = mysqlConf
        PathoReportA.processPathoReport(pathoReportOptions)

    if (inputArgs.subcommand == 'QC'):
        qcargs = sys.argv[2:]
        pathoqcdir = pathoscopedir + os.path.sep + 'pathoscope' + os.path.sep + 'pathoqc'
        pathoqcfile = pathoqcdir + os.path.sep + 'pathoqc.py'
        if os.path.exists(pathoqcfile):
            cmd = sys.executable
            cmd += " " + pathoqcfile + " "
            cmd += " ".join(qcargs)
            print(cmd)
            os.system(cmd)
        else:
            print(
                "PathoQC (" + pathoqcfile +
                ") not found. Please download pathoqc_vXXX.tar.gz and "
                "install it (" + pathoqcdir +
                ") from http://sourceforge.net/projects/pathoscope/")

    elapsed = time() - start
    if inputArgs.verbose:
        print "Total Elapsed Time: %d" % (elapsed)
Beispiel #4
0
def pathoscope_reassign(pathoIdOptions):
	out_matrix = pathoIdOptions.out_matrix_flag
	verbose = pathoIdOptions.verbose
	scoreCutoff = pathoIdOptions.score_cutoff
	expTag = pathoIdOptions.exp_tag
	ali_format = pathoIdOptions.ali_format
	ali_file = pathoIdOptions.ali_file
	outdir = pathoIdOptions.outdir
	emEpsilon = pathoIdOptions.emEpsilon
	maxIter = pathoIdOptions.maxIter
	upalign = not(pathoIdOptions.noalign)
	piPrior = pathoIdOptions.piPrior
	thetaPrior = pathoIdOptions.thetaPrior
	noCutOff = pathoIdOptions.noCutOff
	
	if float(os.stat(ali_file).st_size)<1.0:
		print 'the alignment file [%s] is empty.' % ali_file
		sys.exit(1)

	if ali_format == 'gnu-sam':
		aliFormat = 0
		if verbose:
			print "parsing gnu-sam file/likelihood score/reads and mapped genomes..."
	elif ali_format == 'sam': #standard sam
		aliFormat = 1
		if verbose:
			print "parsing sam file/likelihood score/reads and mapped genomes..."
	elif ali_format == 'bl8': #blat m8 format
		aliFormat = 2
		if verbose:
			print "parsing bl8 file/likelihood score/reads and mapped genomes..."
	else:
		print "unknown alignment format file..."
		return
	(U, NU, genomes, reads) = conv_align2GRmat(ali_file,scoreCutoff,aliFormat)
	
	nG = len(genomes)
	nR = len(reads)
	if verbose:
		print "EM iteration..."
		print "(Genomes,Reads)=%dx%d" % (nG, nR)
		print "Delta Change:"
	
	if out_matrix:
		if verbose:
			print "writing initial alignment ..."
		out_initial_align_matrix(genomes, reads, U, NU, expTag, ali_file, outdir)	

	(bestHitInitialReads, bestHitInitial, level1Initial, level2Initial) = \
		PathoReportA.computeBestHit(U, NU, genomes, reads)
	
	(initPi, pi, _, NU) = pathoscope_em(U, NU, genomes, maxIter, emEpsilon, verbose,
		piPrior, thetaPrior)
	tmp = zip(initPi,genomes)
	tmp = sorted(tmp,reverse=True) #similar to sort row
	
	if out_matrix:
		initialGuess = outdir + os.sep + expTag + '-initGuess.txt'
		oFp = open(initialGuess,'wb')
		csv_writer = csv.writer(oFp, delimiter='\t')
		csv_writer.writerows(tmp)
		oFp.close()
	
	del tmp
	
	(bestHitFinalReads, bestHitFinal, level1Final, level2Final) = \
		PathoReportA.computeBestHit(U, NU, genomes, reads)

	if out_matrix:
		finalGuess = outdir + os.sep + expTag + '-finGuess.txt'
		oFp = open(finalGuess,'wb')
		tmp = zip(pi,genomes)
		tmp = sorted(tmp,reverse=True)
		csv_writer = csv.writer(oFp, delimiter='\t')
		csv_writer.writerows(tmp)
		oFp.close()

	finalReport = outdir + os.sep + expTag +'-'+ ali_format + '-report.tsv'
	header = ['Genome', 'Final Guess', 'Final Best Hit', 'Final Best Hit Read Numbers', \
		'Final High Confidence Hits', 'Final Low Confidence Hits', 'Initial Guess', \
		'Initial Best Hit', 'Initial Best Hit Read Numbers', \
		'Initial High Confidence Hits', 'Initial Low Confidence Hits']
	(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) = PathoReportA.write_tsv_report(
		finalReport, nR, nG, pi, genomes, initPi, bestHitInitial, bestHitInitialReads, 
		bestHitFinal, bestHitFinalReads, level1Initial, level2Initial, level1Final, 
		level2Final, header, noCutOff)
	
	reAlignfile = ali_file
	if upalign:
		reAlignfile = rewrite_align(U, NU, ali_file, scoreCutoff, aliFormat, outdir)

	return (finalReport, x2, x3, x4, x5, x1, x6, x7, x8, x9, x10, x11, reAlignfile)
Beispiel #5
0
    pathoIdOptions = PathoID.PathoIdOptions(inputArgs.id_ali_file)
    pathoIdOptions.ali_format = inputArgs.id_ali_format
    pathoIdOptions.verbose = inputArgs.verbose
    pathoIdOptions.out_matrix_flag = inputArgs.id_out_matrix
    pathoIdOptions.score_cutoff = inputArgs.id_score_cutoff
    pathoIdOptions.exp_tag = inputArgs.id_exp_tag
    pathoIdOptions.outdir = inputArgs.id_outdir
    pathoIdOptions.emEpsilon = inputArgs.id_emEpsilon
    pathoIdOptions.maxIter = inputArgs.id_maxIter
    pathoIdOptions.piPrior = inputArgs.id_piPrior
    pathoIdOptions.thetaPrior = inputArgs.id_thetaPrior
    pathoIdOptions.noalign = inputArgs.id_noalign
    PathoID.pathoscope_reassign(pathoIdOptions)

if (inputArgs.subcommand == 'REP'):
    pathoReportOptions = PathoReportA.PathoReportOptions(
        inputArgs.rep_ali_file)
    pathoReportOptions.verbose = inputArgs.verbose
    pathoReportOptions.contigFlag = inputArgs.rep_contig_flag
    pathoReportOptions.outDir = inputArgs.rep_outdir
    pathoReportOptions.samtoolsHome = inputArgs.rep_samtoolshome
    mysqlConf = (inputArgs.rep_dbhost, inputArgs.rep_dbport,
                 inputArgs.rep_dbuser, inputArgs.rep_dbpasswd,
                 inputArgs.rep_db)
    pathoReportOptions.mysqlConf = mysqlConf
    PathoReportA.processPathoReport(pathoReportOptions)

elapsed = time() - start
if inputArgs.verbose:
    print "Total Elapsed Time: %d" % (elapsed)
if (inputArgs.subcommand=='ID'):
	pathoIdOptions = PathoID.PathoIdOptions(inputArgs.id_ali_file)
	pathoIdOptions.ali_format = inputArgs.id_ali_format
	pathoIdOptions.verbose = inputArgs.verbose
	pathoIdOptions.out_matrix_flag = inputArgs.id_out_matrix
	pathoIdOptions.score_cutoff = inputArgs.id_score_cutoff
	pathoIdOptions.exp_tag = inputArgs.id_exp_tag
	pathoIdOptions.outdir = inputArgs.id_outdir
	pathoIdOptions.emEpsilon = inputArgs.id_emEpsilon
	pathoIdOptions.maxIter = inputArgs.id_maxIter
	pathoIdOptions.piPrior = inputArgs.id_piPrior
	pathoIdOptions.thetaPrior = inputArgs.id_thetaPrior
	pathoIdOptions.noalign = inputArgs.id_noalign
	PathoID.pathoscope_reassign(pathoIdOptions)

if (inputArgs.subcommand=='REP'):
	pathoReportOptions = PathoReportA.PathoReportOptions(inputArgs.rep_ali_file)
	pathoReportOptions.verbose = inputArgs.verbose
	pathoReportOptions.contigFlag = inputArgs.rep_contig_flag
	pathoReportOptions.outDir = inputArgs.rep_outdir
	pathoReportOptions.samtoolsHome = inputArgs.rep_samtoolshome
	mysqlConf=(inputArgs.rep_dbhost,inputArgs.rep_dbport,inputArgs.rep_dbuser,
		inputArgs.rep_dbpasswd,inputArgs.rep_db)
	pathoReportOptions.mysqlConf = mysqlConf
	PathoReportA.processPathoReport(pathoReportOptions)

elapsed = time() - start;
if inputArgs.verbose:
	print "Total Elapsed Time: %d" % (elapsed)