예제 #1
0
def generate_pvcf(args, result, contigINFO, argv):
    file = open(args.output, 'w')
    Generation_VCF_header(file, contigINFO, args.sample, argv)
    file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" %
               (args.sample))

    for i in result:
        if i == []:
            continue
        info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format(
            PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
            SVTYPE=i[3],
            SVLEN=i[4],
            END=i[5],
            CIPOS=i[6][0] + ',' + i[6][1],
            CILEN=i[7][0] + ',' + i[7][1],
            RE=i[8],
            RNAMES=i[9] if args.report_readid else "NULL",
            STRAND=i[14])

        file.write(
            "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}\n"
            .format(CHR=i[0],
                    POS=str(i[1]),
                    ID=i[10],
                    REF=i[11],
                    ALT=i[12],
                    QUAL='.' if i[13] == None else i[13],
                    PASS='******' if i[15] == [] else i[15],
                    INFO=info_list,
                    FORMAT="GT",
                    GT=i[2]))
예제 #2
0
def generate_pvcf(args, result, contigINFO, argv, ref_g):
    file = open(args.output, 'w')
    Generation_VCF_header(file, contigINFO, args.sample, argv)
    file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" %
               (args.sample))
    for i in result:
        if i == []:
            continue
        if i[13] == "." or i[13] == None:
            filter_lable = "PASS"
        else:
            filter_lable = "PASS" if float(i[13]) >= 5.0 else "q5"
        if i[3] == 'INS':
            if abs(i[4]) > args.max_size and args.max_size != -1:
                continue
            elif i[12] == '<INS>':
                ref = str(ref_g[i[0]].seq[max(i[1] - 1, 0)])
                alt = str(ref_g[i[0]].seq[max(i[1] - 1, 0)]) + i[15]
            else:
                ref = i[11]
                alt = i[12]
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[1],
                CIPOS=i[6],
                CILEN=i[7],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
            try:
                info_list += ";AF=" + str(
                    round(i[8][0] / (i[8][0] + i[8][1]), 4))
            except:
                info_list += ";AF=."
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(i[1]),
                        ID=i[10],
                        REF=ref,
                        ALT=alt,
                        QUAL=i[13],
                        PASS=filter_lable,
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[2],
                        DR=i[8][1],
                        RE=i[8][0],
                        PL=i[8][2],
                        GQ=i[8][3]))
        elif i[3] == 'DEL':
            if abs(i[4]) > args.max_size and args.max_size != -1:
                continue
            elif i[12] == '<DEL>':
                ref = str(ref_g[i[0]].seq[max(int(i[1]) - 1, 0):int(i[1]) -
                                          int(i[4])])
                alt = str(ref_g[i[0]].seq[max(int(i[1]) - 1, 0)])
            else:
                ref = i[11]
                alt = i[12]
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND=+-".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[1] + abs(i[4]),
                CIPOS=i[6],
                CILEN=i[7],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
            try:
                info_list += ";AF=" + str(
                    round(i[8][0] / (i[8][0] + i[8][1]), 4))
            except:
                info_list += ";AF=."
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(i[1]),
                        ID=i[10],
                        REF=ref,
                        ALT=alt,
                        QUAL=i[13],
                        PASS=filter_lable,
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[2],
                        DR=i[8][1],
                        RE=i[8][0],
                        PL=i[8][2],
                        GQ=i[8][3]))
        elif i[3] == 'DUP':
            if abs(i[4]) > args.max_size and args.max_size != -1:
                continue
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND=-+".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[5],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
            try:
                info_list += ";AF=" + str(
                    round(i[8][0] / (i[8][0] + i[8][1]), 4))
            except:
                info_list += ";AF=."
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(i[1]),
                        ID=i[10],
                        REF=i[11],
                        ALT=i[12],
                        QUAL=i[13],
                        PASS=filter_lable,
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[2],
                        DR=i[8][1],
                        RE=i[8][0],
                        PL=i[8][2],
                        GQ=i[8][3]))
        elif i[3] == 'INV':
            if abs(i[4]) > args.max_size and args.max_size != -1:
                continue
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[5],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL",
                STRAND=i[14])
            try:
                info_list += ";AF=" + str(
                    round(i[8][0] / (i[8][0] + i[8][1]), 4))
            except:
                info_list += ";AF=."
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(i[1]),
                        ID=i[10],
                        REF=i[11],
                        ALT=i[12],
                        QUAL=i[13],
                        PASS=filter_lable,
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[2],
                        DR=i[8][1],
                        RE=i[8][0],
                        PL=i[8][2],
                        GQ=i[8][3]))
        else:
            # BND
            info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
            try:
                info_list += ";AF=" + str(
                    round(i[8][0] / (i[8][0] + i[8][1]), 4))
            except:
                info_list += ";AF=."
            if ':' in i[15]:
                info_list += ";CHR2={CHR2};END={END}".format(
                    CHR2=i[15].split(':')[0], END=i[15].split(':')[1])
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(i[1]),
                        ID=i[10],
                        REF=i[11],
                        ALT=i[12],
                        QUAL=i[13],
                        PASS=filter_lable,
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[2],
                        DR=i[8][1],
                        RE=i[8][0],
                        PL=i[8][2],
                        GQ=i[8][3]))
예제 #3
0
def generate_output(args, semi_result, contigINFO, argv, ref_g):
    '''
	Generation of VCF format file.
	VCF version: 4.2
	'''

    # genotype_trigger = TriggerGT[args.genotype]

    svid = dict()
    svid["INS"] = 0
    svid["DEL"] = 0
    svid["BND"] = 0
    svid["DUP"] = 0
    svid["INV"] = 0

    file = open(args.output, 'w')
    action = args.genotype
    Generation_VCF_header(file, contigINFO, args.sample, argv)
    file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" %
               (args.sample))
    for i in semi_result:
        if i[1] in ["DEL", "INS"]:
            if abs(int(float(i[3]))) > args.max_size and args.max_size != -1:
                continue
            if i[1] == "INS":
                cal_end = int(i[2])
            else:
                cal_end = int(i[2]) + abs(int(float(i[3])))
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[8] == "0/0" else "PRECISE",
                SVTYPE=i[1],
                SVLEN=i[3],
                END=str(cal_end),
                CIPOS=i[5],
                CILEN=i[6],
                RE=i[4],
                RNAMES=i[12] if args.report_readid else "NULL")
            if action:
                try:
                    info_list += ";AF=" + str(
                        round(int(i[4]) / (int(i[4]) + int(i[7])), 4))
                except:
                    info_list += ";AF=."
            if i[1] == "DEL":
                info_list += ";STRAND=+-"
            if i[11] == "." or i[11] == None:
                filter_lable = "PASS"
            else:
                filter_lable = "PASS" if float(i[11]) >= 5.0 else "q5"
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(
                    CHR=i[0],
                    POS=str(int(i[2])),
                    ID="cuteSV.%s.%d" % (i[1], svid[i[1]]),
                    REF=str(ref_g[i[0]].seq[max(int(i[2]) - 1, 0)]) if i[1]
                    == 'INS' else str(ref_g[i[0]].seq[max(int(i[2]) -
                                                          1, 0):int(i[2]) -
                                                      int(i[3])]),
                    ALT="%s" % (str(ref_g[i[0]].seq[max(int(i[2]) - 1, 0)]) +
                                i[13] if i[1] == 'INS' else str(
                                    ref_g[i[0]].seq[max(int(i[2]) - 1, 0)])),
                    INFO=info_list,
                    FORMAT="GT:DR:DV:PL:GQ",
                    GT=i[8],
                    DR=i[7],
                    RE=i[4],
                    PL=i[9],
                    GQ=i[10],
                    QUAL=i[11],
                    PASS=filter_lable))
            svid[i[1]] += 1
        elif i[1] == "DUP":
            if abs(int(float(i[3]))) > args.max_size and args.max_size != -1:
                continue
            cal_end = int(i[2]) + 1 + abs(int(float(i[3])))
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};STRAND=-+;RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[6] == "0/0" else "PRECISE",
                SVTYPE=i[1],
                SVLEN=i[3],
                END=str(cal_end),
                RE=i[4],
                RNAMES=i[10] if args.report_readid else "NULL")
            if action:
                try:
                    info_list += ";AF=" + str(
                        round(int(i[4]) / (int(i[4]) + int(i[5])), 4))
                except:
                    info_list += ";AF=."
            if i[9] == ".":
                filter_lable = "PASS"
            else:
                filter_lable = "PASS" if float(i[9]) >= 5.0 else "q5"
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(int(i[2]) + 1),
                        ID="cuteSV.%s.%d" % (i[1], svid[i[1]]),
                        REF=str(ref_g[i[0]].seq[int(i[2])]),
                        ALT="<%s>" % (i[1]),
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[6],
                        DR=i[5],
                        RE=i[4],
                        PL=i[7],
                        GQ=i[8],
                        QUAL=i[9],
                        PASS=filter_lable))
            svid[i[1]] += 1
        elif i[1] == "INV":
            if abs(int(float(i[3]))) > args.max_size and args.max_size != -1:
                continue
            cal_end = int(i[2]) + 1 + abs(int(float(i[3])))
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};STRAND={STRAND};RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[6] == "0/0" else "PRECISE",
                SVTYPE=i[1],
                SVLEN=i[3],
                END=str(cal_end),
                RE=i[4],
                STRAND=i[7],
                RNAMES=i[11] if args.report_readid else "NULL")
            if action:
                try:
                    info_list += ";AF=" + str(
                        round(int(i[4]) / (int(i[4]) + int(i[5])), 4))
                except:
                    info_list += ";AF=."
            if i[10] == ".":
                filter_lable = "PASS"
            else:
                filter_lable = "PASS" if float(i[10]) >= 5.0 else "q5"
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(int(i[2]) + 1),
                        ID="cuteSV.%s.%d" % (i[1], svid[i[1]]),
                        REF=str(ref_g[i[0]].seq[int(i[2])]),
                        ALT="<%s>" % (i[1]),
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[6],
                        DR=i[5],
                        RE=i[4],
                        PL=i[8],
                        GQ=i[9],
                        QUAL=i[10],
                        PASS=filter_lable))
            svid[i[1]] += 1
        else:
            # BND
            # info_list = "{PRECISION};SVTYPE={SVTYPE};CHR2={CHR2};END={END};RE={RE};RNAMES={RNAMES}".format(
            info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES}".format(
                PRECISION="IMPRECISE" if i[7] == "0/0" else "PRECISE",
                SVTYPE="BND",
                # CHR2 = i[3],
                # END = str(int(i[4]) + 1),
                RE=i[5],
                RNAMES=i[11] if args.report_readid else "NULL")
            if action:
                try:
                    info_list += ";AF=" + str(
                        round(int(i[5]) / (int(i[5]) + int(i[6])), 4))
                except:
                    info_list += ";AF=."
            if i[10] == ".":
                filter_lable = "PASS"
            else:
                filter_lable = "PASS" if float(i[10]) >= 5.0 else "q5"
            file.write(
                "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
                .format(CHR=i[0],
                        POS=str(int(i[2]) + 1),
                        ID="cuteSV.%s.%d" % ("BND", svid["BND"]),
                        REF='N',
                        ALT=i[1],
                        INFO=info_list,
                        FORMAT="GT:DR:DV:PL:GQ",
                        GT=i[7],
                        DR=i[6],
                        RE=i[5],
                        PL=i[8],
                        GQ=i[9],
                        QUAL=i[10],
                        PASS=filter_lable))
            svid["BND"] += 1
예제 #4
0
def generate_pvcf(args, result, contigINFO, argv):
    file = open(args.output, 'w')
    Generation_VCF_header(file, contigINFO, args.sample, argv)
    file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" %
               (args.sample))
    for i in result:
        if i == []:
            continue
        if i[3] == 'INS':
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[1],
                CIPOS=str(i[6][0]) + ',' + str(i[6][1]),
                CILEN=str(i[7][0]) + ',' + str(i[7][1]),
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
        elif i[3] == 'DEL':
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};CIPOS={CIPOS};CILEN={CILEN};RE={RE};RNAMES={RNAMES};STRAND=+-".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[1] + abs(i[4]),
                CIPOS=str(i[6][0]) + ',' + str(i[6][1]),
                CILEN=str(i[7][0]) + ',' + str(i[7][1]),
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
        elif i[3] == 'DUP':
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND=-+".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[5],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
        elif i[3] == 'INV':
            info_list = "{PRECISION};SVTYPE={SVTYPE};SVLEN={SVLEN};END={END};RE={RE};RNAMES={RNAMES};STRAND={STRAND}".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                SVLEN=i[4],
                END=i[5],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL",
                STRAND=i[14])
        else:
            # BND
            info_list = "{PRECISION};SVTYPE={SVTYPE};RE={RE};RNAMES={RNAMES};".format(
                PRECISION="IMPRECISE" if i[2] == "0/0" else "PRECISE",
                SVTYPE=i[3],
                RE=i[8][0],
                RNAMES=i[9] if args.report_readid else "NULL")
        if i[13] == "." or i[13] == None:
            filter_lable = "PASS"
        else:
            filter_lable = "PASS" if float(i[13]) >= 5.0 else "q5"
        file.write(
            "{CHR}\t{POS}\t{ID}\t{REF}\t{ALT}\t{QUAL}\t{PASS}\t{INFO}\t{FORMAT}\t{GT}:{DR}:{RE}:{PL}:{GQ}\n"
            .format(CHR=i[0],
                    POS=str(i[1]),
                    ID=i[10],
                    REF=i[11],
                    ALT=i[12],
                    QUAL='.' if i[13] == None else i[13],
                    PASS=filter_lable,
                    INFO=info_list,
                    FORMAT="GT:DR:DV:PL:GQ",
                    GT=i[2],
                    DR=i[8][1],
                    RE=i[8][0],
                    PL=i[8][2],
                    GQ=i[8][3]))