def compress_vcf(args, myvcf): """Compress a vcf file with bgzip and tabix""" # make sure a zipped version of the file doesnt already exist cmd = "rm -f " + myvcf + ".bgz" whichcmd(cmd, args, 0) cmd = "bgzip " + myvcf whichcmd(cmd, args, 0) # move extension from .gz to .bgz cmd = "mv " + myvcf + ".gz " + myvcf + ".bgz " whichcmd(cmd, args, 0) cmd = "tabix -p vcf " + myvcf + ".bgz" whichcmd(cmd, args, 0)
def run_vlad_code_compare(args, savidir, inplist): """Run Vladimir's savi computations to compare samples""" # declare as global or else Python will treat as a local variable global qvtargs # variable to hold comma-delimited list of SGE job IDs sgejobids = "" # if hybrid, SAMP 1 tot depth = RD + AD, SAMP 2 tot depth = SDP if (args.hybrid): qvtargs = "--hybrid" # make sure the header file doesnt exist because we're going to append to it cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt}" whichcmd(cmd, args, 0) if (args.verbose): print("\n# savi comparison") pairwise_list = [s for s in args.sample.split(",") if ":" in s] for s in pairwise_list: # the priors str_a = s.split(":")[0] str_b = s.split(":")[1] prior_a = prior_dict[str_a] prior_b = prior_dict[str_b] # paste testout/sample_1_1.1p testout/sample_2_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_12.txt # paste testout/sample_1_1.1p testout/sample_3_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_13.txt # paste testout/sample_1_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_14.txt # paste testout/sample_2_1.1p testout/sample_3_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_23.txt # paste testout/sample_2_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_24.txt # paste testout/sample_3_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5 > testout/savi/pd_34.txt cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \ args.bin + "/make_qvt -1 -2s " + str_a + "," + str_b + " " + qvtargs + " | " + \ args.bin + "/savi_poster -pd " + prior_a + " " + prior_b + " | " + \ args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp1=" + str_a + " -v samp2=" + str_b + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_" + str_a + str_b + ".txt" # args.bin + "/savi_conf -fc " + args.saviconf + " | awk -v samp1=" + str_a + " -v samp2=" + str_b + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_" + str_a + str_b + ".txt" cmd = escape_special_char(cmd) # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0): # run cmd and store SGE job id myjobid = whichcmd(cmd, args, 1, args.sge, "j_s" + str_a + "_s" + str_b + "_cmp") if (args.sge): print("Your job " + myjobid + " has been submitted") sgejobids = myjobid + "," + sgejobids # fix vcf header with open(savidir + "/header_addition.txt", 'a') as f: f.write( "##INFO=<ID=PD" + str_a + str_b + "_F,Number=1,Type=Integer,Description=\"Savi freq delta for sample " + str_a + " vs " + str_b + "\">\n") f.write( "##INFO=<ID=PD" + str_a + str_b + "_L,Number=1,Type=Integer,Description=\"Savi freq delta lower bound for sample " + str_a + " vs " + str_b + "\">\n") f.write( "##INFO=<ID=PD" + str_a + str_b + "_U,Number=1,Type=Integer,Description=\"Savi freq delta upper bound for sample " + str_a + " vs " + str_b + "\">\n") # a there was at least one savi comparison and there are more than 2 samples, run a 1 vs ALL comparision per order of JiGuang if (pairwise_list and len(inplist) > 2): # arbitarily use the first prior str_a = pairwise_list[0].split(":")[0] str_b = pairwise_list[0].split(":")[0] prior_a = prior_dict[str_a] prior_b = prior_dict[str_b] cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \ args.bin + "/make_qvt -1 -1vsall | " + \ args.bin + "/savi_poster -pd " + prior_a + " " + prior_b + " | " + \ args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp1=0 -v samp2=0" + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_00.txt" cmd = escape_special_char(cmd) # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0): # run cmd and store SGE job id myjobid = whichcmd(cmd, args, 1, args.sge, "j_s" + str_a + "_s" + str_b + "_cmp") if (args.sge): print("Your job " + myjobid + " has been submitted") sgejobids = myjobid + "," + sgejobids # fix vcf header with open(savidir + "/header_addition.txt", 'a') as f: f.write( "##INFO=<ID=PD00_F,Number=1,Type=Integer,Description=\"Savi freq delta for samples 1 vs all the others\">\n" ) f.write( "##INFO=<ID=PD00_L,Number=1,Type=Integer,Description=\"Savi freq delta lower bound for samples 1 vs all the others\">\n" ) f.write( "##INFO=<ID=PD00_U,Number=1,Type=Integer,Description=\"Savi freq delta upper bound for samples 1 vs all the others\">\n" ) # a there was at least one savi comparison if (pairwise_list): if (args.verbose): print("\n# paste savi numbers into INFO field") cmd = 'paste -d";" ' + savidir + "/pd_*.txt > " + savidir + "/pd.txt" cmd = escape_special_char(cmd) # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0): mysyncjob = whichcmd(cmd, args, 1, args.sge, "mysync", sgejobids, 1) if (args.sge): print("Your job " + mysyncjob + " has been submitted") if (not args.debug): if (args.verbose): print("\n# clean up") cmd = "rm -f " + savidir + "/pd_*.txt" whichcmd(cmd, args, 0) cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \ args.bin + "/add_to_info -f " + savidir + "/pd.txt --header " + savidir + "/vcfheader.txt > " + savidir + "/tmp0.txt" whichcmd(cmd, args, 0) cmd = "cat " + savidir + "/header_addition.txt " + savidir + "/vcfheader.txt " + savidir + "/tmp0.txt > " + savidir + "/finalsavi.vcf" whichcmd(cmd, args, 0) if (args.verbose): print("\n# compress finalsavi.vcf") compress_vcf(args, savidir + "/finalsavi.vcf") if (not args.debug): if (not args.keepfreqfile): if (args.verbose): print("\n# clean up") cmd = "rm -f " + savidir + "/{freqsavi.vcf.bgz,freqsavi.vcf.bgz.tbi}" whichcmd(cmd, args, 0) if (not args.debug): if (args.verbose): print("\n# clean up") cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt,tmp0.txt,pd.txt,addsavi.vcf.bgz,addsavi.vcf.bgz.tbi,filtersavi.vcf.bgz,filtersavi.vcf.bgz.tbi}" whichcmd(cmd, args, 0) if (args.verbose): print("[END]")
def run_vlad_code_freq(args, savidir, inplist): """Run Vladimir's savi computations to get freq""" # declare as global or else Python will treat as a local variable global qvtargs # variable to hold comma-delimited list of SGE job IDs sgejobids = "" # this variable is true only for the first iteration of the list bool_first = 1 # make sure the header file doesnt exist because we're going to append to it cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt}" whichcmd(cmd, args, 0) # prepend this to cmd str: firstcmd = "" if (args.nofilter): firstcmd = "zcat " + savidir + "/addsavi.vcf.bgz | " else: firstcmd = "zcat " + savidir + "/filtersavi.vcf.bgz | " # make freq files for i in inplist: if bool_first: # if hybrid, start with tot depth = RD + AD for first iteration, then turn off if (args.hybrid): qvtargs = "--rdplusad" bool_first = 0 else: # if hybrid, start with tot depth = RD + AD for first iteration, then turn off if (args.hybrid): qvtargs = "" if (args.verbose): print("\n# compute freq for sample " + i) cmd = firstcmd + args.bin + "/make_qvt -1 -s " + i + " " + qvtargs + " | " + \ args.bin + "/savi_poster -p " + prior_dict[i] + " | " + \ args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp=" + i + " '" + '{mystr="P"samp; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/freq_" + i + ".txt" # args.bin + "/savi_conf -fc " + args.saviconf + " | awk -v samp=" + i + " '" + '{mystr="P"samp; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/freq_" + i + ".txt" cmd = escape_special_char(cmd) # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0): # run cmd and store SGE job id myjobid = whichcmd(cmd, args, 1, args.sge, "j_s" + i + "_freq") if (args.sge): print("Your job " + myjobid + " has been submitted") sgejobids = myjobid + "," + sgejobids # fix vcf header with open(savidir + "/header_addition.txt", 'a') as f: f.write( "##INFO=<ID=P" + i + "_F,Number=1,Type=Integer,Description=\"Savi freq for sample " + i + "\">\n") f.write( "##INFO=<ID=P" + i + "_L,Number=1,Type=Integer,Description=\"Savi freq lower bound for sample " + i + "\">\n") f.write( "##INFO=<ID=P" + i + "_U,Number=1,Type=Integer,Description=\"Savi freq upper bound for sample " + i + "\">\n") if (args.verbose): print("\n# paste savi freq numbers into INFO field") cmd = 'paste -d";" ' + savidir + "/freq_*.txt > " + savidir + "/freq.txt" cmd = escape_special_char(cmd) # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0): mysyncjob = whichcmd(cmd, args, 1, args.sge, "mysync", sgejobids, 1) if (args.sge): print("Your job " + mysyncjob + " has been submitted") if (not args.debug): if (args.verbose): print("\n# clean up") cmd = "rm -f " + savidir + "/freq_*.txt" whichcmd(cmd, args, 0) # add into INFO field cmd = firstcmd + args.bin + "/add_to_info -f " + savidir + "/freq.txt --header " + savidir + "/vcfheader.txt > " + savidir + "/tmp0.txt" whichcmd(cmd, args, 0) # add new header lines to vcf cmd = "cat " + savidir + "/header_addition.txt " + savidir + "/vcfheader.txt " + savidir + "/tmp0.txt > " + savidir + "/freqsavi.vcf" whichcmd(cmd, args, 0) if (not args.debug): if (args.verbose): print("\n# clean up") cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt,tmp0.txt,freq.txt}" whichcmd(cmd, args, 0) # compress if (args.verbose): print("\n# compress freqsavi.vcf") compress_vcf(args, savidir + "/freqsavi.vcf")