def generate_pvcf(args, result, contigINFO, argv): file = open(args.output, 'w') Generation_VCF_header(file, contigINFO, args.sample, argv) file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % (args.sample)) for i in result: if i == []: continue info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[5], CIPOS=i[6][0] + ',' + i[6][1], CILEN=i[7][0] + ',' + i[7][1], RE=i[8], RNAMES=i[9] if args.report_readid else "NULL", STRAND=i[14]) file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=i[11], ALT=i[12], QUAL='.' if i[13] == None else i[13], PASS='******' if i[15] == [] else i[15], INFO=info_list, FORMAT="GT", GT=i[2]))
def generate_pvcf(args, result, contigINFO, argv, ref_g): file = open(args.output, 'w') Generation_VCF_header(file, contigINFO, args.sample, argv) file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % (args.sample)) for i in result: if i == []: continue if i[13] == "." or i[13] == None: filter_lable = "PASS" else: filter_lable = "PASS" if float(i[13]) >= 5.0 else "q5" if i[3] == 'INS': if abs(i[4]) > args.max_size and args.max_size != -1: continue elif i[12] == '<INS>': ref = str(ref_g[i[0]].seq[max(i[1] - 1, 0)]) alt = str(ref_g[i[0]].seq[max(i[1] - 1, 0)]) + i[15] else: ref = i[11] alt = i[12] info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[1], CIPOS=i[6], CILEN=i[7], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") try: info_list += ";AF=" + str( round(i[8][0] / (i[8][0] + i[8][1]), 4)) except: info_list += ";AF=." file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=ref, ALT=alt, QUAL=i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3])) elif i[3] == 'DEL': if abs(i[4]) > args.max_size and args.max_size != -1: continue elif i[12] == '<DEL>': ref = str(ref_g[i[0]].seq[max(int(i[1]) - 1, 0):int(i[1]) - int(i[4])]) alt = str(ref_g[i[0]].seq[max(int(i[1]) - 1, 0)]) else: ref = i[11] alt = i[12] info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND=+-".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[1] + abs(i[4]), CIPOS=i[6], CILEN=i[7], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") try: info_list += ";AF=" + str( round(i[8][0] / (i[8][0] + i[8][1]), 4)) except: info_list += ";AF=." file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=ref, ALT=alt, QUAL=i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3])) elif i[3] == 'DUP': if abs(i[4]) > args.max_size and args.max_size != -1: continue info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND=-+".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[5], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") try: info_list += ";AF=" + str( round(i[8][0] / (i[8][0] + i[8][1]), 4)) except: info_list += ";AF=." file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=i[11], ALT=i[12], QUAL=i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3])) elif i[3] == 'INV': if abs(i[4]) > args.max_size and args.max_size != -1: continue info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[5], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL", STRAND=i[14]) try: info_list += ";AF=" + str( round(i[8][0] / (i[8][0] + i[8][1]), 4)) except: info_list += ";AF=." file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=i[11], ALT=i[12], QUAL=i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3])) else: # BND info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") try: info_list += ";AF=" + str( round(i[8][0] / (i[8][0] + i[8][1]), 4)) except: info_list += ";AF=." if ':' in i[15]: info_list += ";CHR2={CHR2};END={END}".format( CHR2=i[15].split(':')[0], END=i[15].split(':')[1]) file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=i[11], ALT=i[12], QUAL=i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3]))
def generate_output(args, semi_result, contigINFO, argv, ref_g): ''' Generation of VCF format file. VCF version: 4.2 ''' # genotype_trigger = TriggerGT[args.genotype] svid = dict() svid["INS"] = 0 svid["DEL"] = 0 svid["BND"] = 0 svid["DUP"] = 0 svid["INV"] = 0 file = open(args.output, 'w') action = args.genotype Generation_VCF_header(file, contigINFO, args.sample, argv) file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % (args.sample)) for i in semi_result: if i[1] in ["DEL", "INS"]: if abs(int(float(i[3]))) > args.max_size and args.max_size != -1: continue if i[1] == "INS": cal_end = int(i[2]) else: cal_end = int(i[2]) + abs(int(float(i[3]))) info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[8] == "0/0" else "PRECISE", SVTYPE=i[1], SVLEN=i[3], END=str(cal_end), CIPOS=i[5], CILEN=i[6], RE=i[4], RNAMES=i[12] if args.report_readid else "NULL") if action: try: info_list += ";AF=" + str( round(int(i[4]) / (int(i[4]) + int(i[7])), 4)) except: info_list += ";AF=." if i[1] == "DEL": info_list += ";STRAND=+-" if i[11] == "." or i[11] == None: filter_lable = "PASS" else: filter_lable = "PASS" if float(i[11]) >= 5.0 else "q5" file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format( CHR=i[0], POS=str(int(i[2])), ID="cuteSV.%s.%d" % (i[1], svid[i[1]]), REF=str(ref_g[i[0]].seq[max(int(i[2]) - 1, 0)]) if i[1] == 'INS' else str(ref_g[i[0]].seq[max(int(i[2]) - 1, 0):int(i[2]) - int(i[3])]), ALT="%s" % (str(ref_g[i[0]].seq[max(int(i[2]) - 1, 0)]) + i[13] if i[1] == 'INS' else str( ref_g[i[0]].seq[max(int(i[2]) - 1, 0)])), INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[8], DR=i[7], RE=i[4], PL=i[9], GQ=i[10], QUAL=i[11], PASS=filter_lable)) svid[i[1]] += 1 elif i[1] == "DUP": if abs(int(float(i[3]))) > args.max_size and args.max_size != -1: continue cal_end = int(i[2]) + 1 + abs(int(float(i[3]))) info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};STRAND=-+;RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[6] == "0/0" else "PRECISE", SVTYPE=i[1], SVLEN=i[3], END=str(cal_end), RE=i[4], RNAMES=i[10] if args.report_readid else "NULL") if action: try: info_list += ";AF=" + str( round(int(i[4]) / (int(i[4]) + int(i[5])), 4)) except: info_list += ";AF=." if i[9] == ".": filter_lable = "PASS" else: filter_lable = "PASS" if float(i[9]) >= 5.0 else "q5" file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(int(i[2]) + 1), ID="cuteSV.%s.%d" % (i[1], svid[i[1]]), REF=str(ref_g[i[0]].seq[int(i[2])]), ALT="<%s>" % (i[1]), INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[6], DR=i[5], RE=i[4], PL=i[7], GQ=i[8], QUAL=i[9], PASS=filter_lable)) svid[i[1]] += 1 elif i[1] == "INV": if abs(int(float(i[3]))) > args.max_size and args.max_size != -1: continue cal_end = int(i[2]) + 1 + abs(int(float(i[3]))) info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};STRAND={STRAND};RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[6] == "0/0" else "PRECISE", SVTYPE=i[1], SVLEN=i[3], END=str(cal_end), RE=i[4], STRAND=i[7], RNAMES=i[11] if args.report_readid else "NULL") if action: try: info_list += ";AF=" + str( round(int(i[4]) / (int(i[4]) + int(i[5])), 4)) except: info_list += ";AF=." if i[10] == ".": filter_lable = "PASS" else: filter_lable = "PASS" if float(i[10]) >= 5.0 else "q5" file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(int(i[2]) + 1), ID="cuteSV.%s.%d" % (i[1], svid[i[1]]), REF=str(ref_g[i[0]].seq[int(i[2])]), ALT="<%s>" % (i[1]), INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[6], DR=i[5], RE=i[4], PL=i[8], GQ=i[9], QUAL=i[10], PASS=filter_lable)) svid[i[1]] += 1 else: # BND # info_list = "{PRECISION};SVTYPE={SVTYPE};CHR2={CHR2};END={END};RE={RE};RNAMES={RNAMES}".format( info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES}".format( PRECISION="IMPRECISE" if i[7] == "0/0" else "PRECISE", SVTYPE="BND", # CHR2 = i[3], # END = str(int(i[4]) + 1), RE=i[5], RNAMES=i[11] if args.report_readid else "NULL") if action: try: info_list += ";AF=" + str( round(int(i[5]) / (int(i[5]) + int(i[6])), 4)) except: info_list += ";AF=." if i[10] == ".": filter_lable = "PASS" else: filter_lable = "PASS" if float(i[10]) >= 5.0 else "q5" file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(int(i[2]) + 1), ID="cuteSV.%s.%d" % ("BND", svid["BND"]), REF='N', ALT=i[1], INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[7], DR=i[6], RE=i[5], PL=i[8], GQ=i[9], QUAL=i[10], PASS=filter_lable)) svid["BND"] += 1
def generate_pvcf(args, result, contigINFO, argv): file = open(args.output, 'w') Generation_VCF_header(file, contigINFO, args.sample, argv) file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % (args.sample)) for i in result: if i == []: continue if i[3] == 'INS': info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[1], CIPOS=str(i[6][0]) + ',' + str(i[6][1]), CILEN=str(i[7][0]) + ',' + str(i[7][1]), RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") elif i[3] == 'DEL': info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND=+-".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[1] + abs(i[4]), CIPOS=str(i[6][0]) + ',' + str(i[6][1]), CILEN=str(i[7][0]) + ',' + str(i[7][1]), RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") elif i[3] == 'DUP': info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND=-+".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[5], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") elif i[3] == 'INV': info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], SVLEN=i[4], END=i[5], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL", STRAND=i[14]) else: # BND info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES};".format( PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE", SVTYPE=i[3], RE=i[8][0], RNAMES=i[9] if args.report_readid else "NULL") if i[13] == "." or i[13] == None: filter_lable = "PASS" else: filter_lable = "PASS" if float(i[13]) >= 5.0 else "q5" file.write( "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n" .format(CHR=i[0], POS=str(i[1]), ID=i[10], REF=i[11], ALT=i[12], QUAL='.' if i[13] == None else i[13], PASS=filter_lable, INFO=info_list, FORMAT="GT:DR:DV:PL:GQ", GT=i[2], DR=i[8][1], RE=i[8][0], PL=i[8][2], GQ=i[8][3]))