예제 #1
0
def filter_vcf(pileup, outfile=None,  chr_col=0, ref_col=3, alt_col=4, sep='\t'):
    """ Removes lines where ALT==REF and chromosomes other than 1 - 22, X, Y and MT"""

    fh = open(pileup, "r")
    if outfile is None:
        outfile=pileup+'.filt'

    fu.delete(outfile)
    fh_out = open(outfile, "w")


    for line in fh:
        line = line.strip()
        if line.startswith('#'):
            fh_out.write(str(line)+'\n')
        else:

            fields=line.split(sep)
            if(len(fields)>=8):
                chr=str(fields[chr_col])
                ref=str(fields[ref_col])
                alt=str(fields[alt_col])

                if (alt != ref) and (fu.find_first_index(ACCEPTED_CHR, chr.strip()) > -1):
                    fh_out.write(str(line)+'\n')
예제 #2
0
def filter_vcf(pileup,
               outfile=None,
               chr_col=0,
               ref_col=3,
               alt_col=4,
               sep='\t'):
    """ Removes lines where ALT==REF and chromosomes other than 1 - 22, X, Y and MT"""

    fh = open(pileup, "r")
    if outfile is None:
        outfile = pileup + '.filt'

    fu.delete(outfile)
    fh_out = open(outfile, "w")

    for line in fh:
        line = line.strip()
        if line.startswith('#'):
            fh_out.write(str(line) + '\n')
        else:

            fields = line.split(sep)
            if (len(fields) >= 8):
                chr = str(fields[chr_col])
                ref = str(fields[ref_col])
                alt = str(fields[alt_col])

                if (alt != ref) and (fu.find_first_index(
                        ACCEPTED_CHR, chr.strip()) > -1):
                    fh_out.write(str(line) + '\n')
예제 #3
0
def filter_pileup(pileup,
                  outfile=None,
                  chr_col=0,
                  ref_col=2,
                  alt_col=3,
                  sep='\t'):

    fh = open(pileup, "r")
    if outfile is None:
        outfile = pileup + '.vcf'

    fu.delete(outfile)
    fh_out = open(outfile, "w")
    fh_out.write(vcfheader(pileup) + '\n')

    for line in fh:

        line = line.strip()
        fields = line.split(sep)

        chr = str(fields[chr_col])
        ref = str(fields[ref_col])
        alt = str(fields[alt_col])

        if (alt != ref) and (fu.find_first_index(ACCEPTED_CHR, chr.strip()) >
                             -1):
            fh_out.write(varpileup_line2vcf_line(fields[0:9]) + '\n')
예제 #4
0
def filter_vcf(pileup,
               outfile=None,
               chr_col=0,
               ref_col=3,
               alt_col=4,
               sep='\t'):

    fh = open(pileup, "r")
    if (outfile is None):
        outfile = pileup + '.filt'

    fu.delete(outfile)
    fh_out = open(outfile, "w")

    for line in fh:
        line = line.strip()
        if line.startswith('#'):
            fh_out.write(str(line) + '\n')
        else:
            fields = line.split(sep)
            if (len(fields) >= 8):
                chr = str(fields[chr_col])
                ref = str(fields[ref_col])
                alt = str(fields[alt_col])

                if ((alt != ref) and \
                    (fu.find_first_index(ACCEPTED_CHR, chr.strip()) > -1)):
                    fh_out.write(str(line) + '\n')


### EOF
예제 #5
0
def filter_pileup(pileup, outfile=None, chr_col=0, ref_col=2, alt_col=3, sep='\t'):


    fh = open(pileup, "r")
    if outfile is None:
        outfile=pileup+'.vcf'

    fu.delete(outfile)
    fh_out = open(outfile, "w")
    fh_out.write(vcfheader(pileup)+'\n')


    for line in fh:

        line = line.strip()
        fields=line.split(sep)

        chr=str(fields[chr_col])
        ref=str(fields[ref_col])
        alt=str(fields[alt_col])

        if (alt != ref) and (fu.find_first_index(ACCEPTED_CHR, chr.strip()) > -1):
            fh_out.write(varpileup_line2vcf_line(fields[0:9]) +'\n' )
def run(infile, format):

    print("Running . . .")

    ann.getSnpsFromDbSnp(vcf=infile, format='vcf', tmpextin='', tmpextout='.1' )
    #print("Done dbSNP")
    # Set numbering
    tmpextin=1
    tmpextout=2

    ann.getBigRefGene(vcf=infile, format='vcf', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("Done BigRefGene ")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.getGenes(vcf=infile, format='vcf', table='refGene', promoter_offset=500, tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("Done RefGene")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithCytoband(vcf=infile, format='vcf', table='cytoBand', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("cytoband ")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithGadAll(vcf=infile, format='vcf', table='gadAll', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("gadAll ")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithGwasCatalog(vcf=infile, format='vcf', table='gwasCatalog', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("GwasCatalog ")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithMiRNA(vcf=infile, format='vcf', table='targetScanS', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("miRNA")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWitHUGOGeneNomenclature(vcf=infile, format='vcf', table='hugo', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("HUGO Gene Nomenclature Committee (HGNC) ")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithCnvDatabase(vcf=infile, format='vcf', table='dgv_Cnv', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("dgv_Cnv")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithCnvDatabase(vcf=infile, format='vcf', table='abParts_IG_T_CelReceptors', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("abParts_IG_T_CelReceptors")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithCnvDatabase(vcf=infile, format='vcf', table='mcCarroll_Cnv', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("mcCarroll_Cnv")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithCnvDatabase(vcf=infile, format='vcf', table='conrad_Cnv', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("conrad_Cnv")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithGenomicSuperDups(vcf=infile, format='vcf', table='genomicSuperDups', tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("genomicSuperDups")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ann.addOverlapWithTfbsConsSites(vcf=infile, table='tfbsConsSites',tmpextin='.'+str(tmpextin), tmpextout='.'+str(tmpextout))
    #print("addOverlapWithTfbsConsSites")
    tmpextin=tmpextin+1
    tmpextout=tmpextout+1

    ## Cleanup
    for i in range(1, tmpextin):
        fu.delete(infile+'.'+ str(i))

    os.rename(infile+'.'+str(tmpextin), infile+'.annot')
    finalout=(infile+'.annot').replace('.vcf.annot', '.annot.vcf')
    os.rename(infile+'.annot', finalout)