Esempio n. 1
0
def averagesFromFile(VEPannotatedVCF, column2retain , lSOTerm):
	"""
	VEPannotatedVCF= vcf annotated using VEP
	column2retain= list, index of samples to consider
		lSOTerm= list of Sequence Ontolgy terms that define variant consequences
	"""
	dInfo={};  dSOT={}; dImpact={}
	for line in gzip.open(args.f, 'r'):
		decodedLine=line.decode()  ## line.decode() is necessary to read encoded data using gzip in python3
		if re.match('##', decodedLine):
			if re.search('ID=CSQ' , decodedLine ): csqHeader=decodedLine.rstrip().split(':')[1].lstrip().rstrip('\'>').split('|')
		elif re.match('#', decodedLine):
			for ind in sampleToConsider:
				column2retain.append(decodedLine.split().index(ind))
		else:
			linesplit=decodedLine.rstrip().split()
			mychr=linesplit[0]; mypos=linesplit[1]; myref=linesplit[3]; myalt=linesplit[4] ## basic info
			##~~ split INFO field
			tempinfo=linesplit[7] 
			for i in tempinfo.split(';'):
				if re.search('=', i): # check if INFO fields has a value corresponding
					temp=i.split('=')
					dInfo[temp[0]]=temp[1]
					
				else: pass
					
			##~~ work on dInfo[CSQ]
			
			##~~ split for multiple consequences separated by ','
			multipleCsq=dInfo['CSQ'].split(',') 
			##~~ single consequence
			#print ('~~~  this is a consequence in a line ')
			for mcsq in multipleCsq:
				dCsq=dict(zip(csqHeader, mcsq.split('|') ))  #############    ALL VEP INFO 
					 
				#~~~~~~~~~~~  identify the allele with consequences
							
				mycsqAllele=dCsq['Allele']
				GTfields=[]
				for col in range(args.n): GTfields+=[linesplit[column2retain[col]]]
				#nbAploidSamples=len(GTfields)*2
				
				freqCSQ_REF_ALT=gp.AnnotateFreqCSQ_REF_ALT(mycsqAllele,myref, myalt, GTfields) # calculate allelic frequencies 
				
				for cons in dCsq['Consequence'].split('&'):
					#~~~~~~~~~~~~ assign severity score at the  most severe csq

                                                
					if freqCSQ_REF_ALT[0]!='NA':
						if not dCsq['Consequence'] in dSOT: dSOT[dCsq['Consequence']]=[0,0] #inizialize di dictionary with [counter, allele freq] if the key is not present 
						dSOT[dCsq['Consequence']][0]+=1 #add +1 to the counter  
						dSOT[dCsq['Consequence']][1]+=float(freqCSQ_REF_ALT[0]) #add the value of the consequence allele 

					else: listOfErrors.append((mychr, mypos,myref, myalt, dCsq["Allele"]) ) #to be printed in the error file to compare allele matching  
	
	CsqMeans=[mychr,freqCSQ_REF_ALT[4]]
	for vcsq in lSOTerm:
		if vcsq in dSOT: CsqMeans.append(dSOT[vel][1]/float(dSOT[vel][0]))
		else: CsqMeans.append('na')
	return CsqMeans 
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f",
                        help="path to  input  file ",
                        type=str,
                        required=True)
    parser.add_argument("-v",
                        help="path to table of vep consequences  ",
                        type=str,
                        required=True)
    parser.add_argument("-o",
                        help="path to output file  ",
                        type=str,
                        required=True)
    parser.add_argument("-e",
                        help="path to error file",
                        type=str,
                        required=True)
    #parser.add_argument("-m", help="path to metadata file",type=str,required=True)
    args = parser.parse_args()
    #output = open(args.o,'w')
    #print(args)

    #############################################################

    ##  READ VEP consequences rank ########
    """read external file with info on VEP consequences  """
    dRank = {
        "HIGH": "HIGH",
        "LOW": "LOW",
        "MODERATE": "MODERATE",
        "MODIFIER": "MODIFIER"
    }
    dSOTermRank = {}
    lSOTerm = []  ### list of SOTerm ordered by severity

    countlinesCsq = True
    for csqLine in open(args.v, 'r'):
        if countlinesCsq:
            csqTitle = csqLine.rstrip().split('\t')
            countlinesCsq = False
        else:
            myRowList = csqLine.rstrip().split('\t')
            dCsq = dict(zip(csqTitle, myRowList))
            dSOTermRank[dCsq['SO term']] = dRank[dCsq['IMPACT']]
            lSOTerm.append(myRowList[0])
##########~~~~~~~~~~~~~~ Read metadata and randomize sample to choose
#	Region=[]
#	Sample=[]
#
#	for line in open(args.m, 'r'):
#		if re.match('sample', line):
#			header= line.rstrip().split()
#		else:
#			other=line.rstrip().split()
#			dMeta= dict(zip(header, other))
#
#			Region.append(dMeta['region'])
#			Sample.append(dMeta['sample'])
#
#	dSampleRegion=dict(zip(Sample, Region))
#
#	EUR = [key  for (key, value) in dSampleRegion.items() if value == 'EUROPE']
#	random.seed(899)
#
#	sampleToConsider=random.sample(EUR, 6)

##########~~~~~~~~~~~~~~  Loop of vcf lines
    sys.stdout = open(args.o, 'w')
    listOfErrors = []
    dInfo = {}
    #column2retain=[]
    print("Chr", "\t", "Pos", "\t", "VariantClass", "\t", "CSQallele", "\t",
          "CSQrank", "\t", "Consequence", "\t", "CSQfreq", "\t", "REFfreq",
          "\t", "ALTfreq", "\t", "MAF", "\t", "Population")
    for line in gzip.open(args.f, 'r'):
        decodedLine = line.decode(
        )  ## line.decode() is necessary to read encoded data using gzip in python3
        if re.match('#', decodedLine):
            if re.search("ID=CSQ", decodedLine):
                csqHeader = decodedLine.rstrip().split(":")[1].lstrip().rstrip(
                    "\">").split("|")
            #filemyres.write(decodedLine)


#		elif re.match('#', decodedLine):
#			for ind in sampleToConsider:
#				column2retain.append(decodedLine.split().index(ind))
        else:
            linesplit = decodedLine.rstrip().split()
            mychr = linesplit[0]
            mypos = linesplit[1]
            myref = linesplit[3]
            myalt = linesplit[4]  ## basic info

            ##~~ split INFO field
            tempinfo = linesplit[7]
            for i in tempinfo.split(";"):
                if re.search('=', i):
                    temp = i.split("=")
                    dInfo[temp[0]] = temp[1]

                else:
                    pass

            ##~~ work on dInfo[CSQ]

            ##~~ split for multiple consequences separated by ","
            multipleCsq = dInfo["CSQ"].split(",")

            ##~~ single consequence
            #print ('~~~  this is a consequence in a line ')
            CSQcount = 0
            for mcsq in multipleCsq:
                CSQcount += 1
                dCsq = dict(zip(
                    csqHeader, mcsq.split("|")))  #############    ALL VEP INFO

                #~~~~~~~~~~~  identify the allele with consequences
                mycsqAllele = dCsq["Allele"]
                GTfields = linesplit[9:]
                nbAploidSamples = len(GTfields) * 2
                freqCSQ_REF_ALT = gp.AnnotateFreqCSQ_REF_ALT(
                    mycsqAllele, myref, myalt, nbAploidSamples, GTfields)

                for c in dCsq['Consequence'].split("&"):
                    #~~~~~~~~~~~~ assign severity score at the  most severe csq
                    myindexes = []
                    myindexes.append(lSOTerm.index(c))
                    mostSevereCsq = lSOTerm[min(myindexes)]
                    linesplit[7] = tempinfo  # reset info field
                    print(linesplit[0], "\t", linesplit[1], "\t",
                          dCsq["VARIANT_CLASS"], "\t", dCsq["Allele"], "\t",
                          dSOTermRank[mostSevereCsq], "\t", c, "\t",
                          freqCSQ_REF_ALT[0], "\t", freqCSQ_REF_ALT[1], "\t",
                          freqCSQ_REF_ALT[2], "\t", freqCSQ_REF_ALT[3], "\t",
                          "GREP")

    #fileToWrite=open(args.e, 'w')
    #for i in listOfErrors: fileToWrite.write( i )
    sys.stdout.close()
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f",
                        help="path to  input  file ",
                        type=str,
                        required=True)
    parser.add_argument("-v",
                        help="path to table of vep consequences  ",
                        type=str,
                        required=True)
    parser.add_argument("-o",
                        help="path to output file  ",
                        type=str,
                        required=True)
    parser.add_argument("-e",
                        help="path to error file",
                        type=str,
                        required=True)
    args = parser.parse_args()
    #output = open(args.o,'w')
    #print(args)

    #############################################################

    ##  READ VEP consequences rank ########
    """read external file with info on VEP consequences  """
    dRank = {
        "HIGH": "HIGH",
        "LOW": "LOW",
        "MODERATE": "MODERATE",
        "MODIFIER": "MODIFIER"
    }
    dSOTermRank = {}
    lSOTerm = []  ### list of SOTerm ordered by severity

    countlinesCsq = True
    for csqLine in open(args.v, 'r'):
        if countlinesCsq:
            csqTitle = csqLine.rstrip().split('\t')
            countlinesCsq = False
        else:
            myRowList = csqLine.rstrip().split('\t')
            dCsq = dict(zip(csqTitle, myRowList))
            dSOTermRank[dCsq['SO term']] = dRank[dCsq['IMPACT']]
            lSOTerm.append(myRowList[0])


##########~~~~~~~~~~~~~~  Loop of vcf lines
    filemyres = open(args.o, 'w')
    listOfErrors = []
    dInfo = {}

    for line in gzip.open(args.f, 'r'):
        decodedLine = line.decode(
        )  ## line.decode() is necessary to read encoded data using gzip in python3
        if re.match('##', decodedLine):
            if re.search("ID=CSQ", decodedLine):
                csqHeader = decodedLine.rstrip().split(":")[1].lstrip().rstrip(
                    "\">").split("|")
            filemyres.write(decodedLine)

        elif re.match('#', decodedLine):
            filemyres.write(
                '##INFO=<ID=CSQfreq,Number=A,Type=Float,Description="Frequency of CSQ allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=REFfreq,Number=A,Type=Float,Description="Frequency of REF allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=ALTfreq,Number=A,Type=Float,Description="Frequency of ALT allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=MAF,Number=A,Type=Float,Description="Frequency of Minor allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=nbCSQ,Number=A,Type=Integer,Description="1st, 2nd .... CSQ allele">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=CSQallele,Number=A,Type=String,Description="Describe CSQ allele">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=IMPACT,Number=A,Type=String,Description="Impact of CSQ allele">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=Existing_variation,Number=A,Type=String,Description="RS of CSQallele">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=VARIANT_CLASS,Number=A,Type=String,Description="type of variant : MODIFIER ecc..">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=Consequence,Number=A,Type=String,Description="type of variant : SNP, indels, ecc...">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=CSQrank,Number=A,Type=String,Description="Rank of Consequence: HIGH, MODERATE, LOW, MODIFIER">'
            )
            filemyres.write("\n")

            filemyres.write(decodedLine)
        else:
            #print("this is a new line ") ## line split by  tab
            linesplit = decodedLine.rstrip().split()
            #print(linesplit)
            mychr = linesplit[0]
            mypos = linesplit[1]
            myref = linesplit[3]
            myalt = linesplit[4]  ## basic info

            ##~~ split INFO field
            tempinfo = linesplit[7]
            for i in tempinfo.split(";"):
                temp = i.split("=")
                dInfo[temp[0]] = temp[1]

            ##~~ work on dInfo[CSQ]

            ##~~ split for multiple consequences separated by ","
            multipleCsq = dInfo["CSQ"].split(",")

            ##~~ single consequence
            #print ('~~~  this is a consequence in a line ')
            CSQcount = 0
            for mcsq in multipleCsq:
                CSQcount += 1
                myres = []
                #myres+=[mychr, mypos]
                dCsq = dict(zip(
                    csqHeader, mcsq.split("|")))  #############    ALL VEP INFO
                #print (dCsq)
                #myres.append(dCsq['Existing_variation'])

                #~~~~~~~~~~~  identify the allele with consequences
                #linesplit[7]=tempinfo # reset info field
                mycsqAllele = dCsq["Allele"]
                GTfields = linesplit[9:]
                nbAploidSamples = len(GTfields) * 2
                freqCSQ_REF_ALT = gp.AnnotateFreqCSQ_REF_ALT(
                    mycsqAllele, myref, myalt, nbAploidSamples, GTfields)
                #print(freqCSQ_REF_ALT)
                #print (dCsq['Consequence'].split("&"))
                for c in dCsq['Consequence'].split("&"):
                    #~~~~~~~~~~~~ assign severity score at the  most severe csq
                    myindexes = []
                    myindexes.append(lSOTerm.index(c))
                    mostSevereCsq = lSOTerm[min(myindexes)]
                    linesplit[7] = tempinfo  # reset info field
                    linesplit[7] += ";CSQallele="
                    linesplit[7] += dCsq["Allele"]
                    linesplit[7] += ";Consequence="
                    linesplit[7] += c
                    linesplit[7] += ";CSQrank="
                    linesplit[7] += dSOTermRank[mostSevereCsq]
                    #linesplit[7]+=";IMPACT="
                    #linesplit[7]+=dCsq["IMPACT"]
                    linesplit[7] += ";ExistingVariation="
                    linesplit[7] += dCsq["Existing_variation"]
                    linesplit[7] += ";VariantClass="
                    linesplit[7] += dCsq["VARIANT_CLASS"]
                    linesplit[
                        7] += ";nbCSQ="  # for specify the number of CSQ allele
                    linesplit[7] += str(
                        CSQcount)  # for specify the number of CSQ allele
                    linesplit[7] += ";CSQfreq=%s" % (freqCSQ_REF_ALT[0])
                    linesplit[7] += ";REFfreq=%s" % (freqCSQ_REF_ALT[1])
                    linesplit[7] += ";ALTfreq=%s" % (freqCSQ_REF_ALT[2])
                    linesplit[7] += ";MAF=%s" % (freqCSQ_REF_ALT[3])
                    filemyres.write("\t".join(linesplit))
                    filemyres.write("\n")
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f",
                        help="path to  input  file ",
                        type=str,
                        required=True)
    parser.add_argument("-o",
                        help="path to output file  ",
                        type=str,
                        required=True)
    parser.add_argument("-e",
                        help="path to error file",
                        type=str,
                        required=True)
    args = parser.parse_args()
    #output = open(args.o,'w')
    #print(args)

    ##########~~~~~~~~~~~~~~  Loop of vcf lines
    filemyres = open(args.o, 'w')
    listOfErrors = []
    dInfo = {}

    for line in gzip.open(args.f, 'r'):
        decodedLine = line.decode(
        )  ## line.decode() is necessary to read encoded data using gzip in python3
        if re.match('##', decodedLine):
            if re.search("ID=CSQ", decodedLine):
                csqHeader = decodedLine.rstrip().split(":")[1].lstrip().rstrip(
                    "\">").split("|")
            filemyres.write(decodedLine)

        elif re.match('#', decodedLine):
            filemyres.write(
                '##INFO=<ID=CSQfreq,Number=A,Type=Float,Description="Frequency of CSQ allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=REFfreq,Number=A,Type=Float,Description="Frequency of REF allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=ALTfreq,Number=A,Type=Float,Description="Frequency of ALT allele in set of samples">'
            )
            filemyres.write("\n")
            filemyres.write(
                '##INFO=<ID=nbCSQ,Number=A,Type=Integer,Description="1st, 2nd .... CSQ allele">'
            )
            filemyres.write("\n")
            filemyres.write(decodedLine)
        else:
            #print("this is a new line ") ## line split by  tab
            linesplit = decodedLine.rstrip().split()
            #print(linesplit)
            mychr = linesplit[0]
            mypos = linesplit[1]
            myref = linesplit[3]
            myalt = linesplit[4]  ## basic info

            #nbOfAltAlleles=len(myalt.split(","))

            #if nbOfAltAlleles> 2:   #~~ excludes cases with more than two alt allele, want to add the excluded in the error output
            #listOfErrors+=( '\t'.join([mychr, mypos, 'more than two alternate alleles', nbOfAltAlleles ,  '\n']))
            #pass
            #else:
            ##~~ split INFO field
            tempinfo = linesplit[7]
            for i in tempinfo.split(";"):
                temp = i.split("=")
                dInfo[temp[0]] = temp[1]

            ##~~ work on dInfo[CSQ]

            ##~~ split for multiple consequences separated by ","
            multipleCsq = dInfo["CSQ"].split(",")

            ##~~ single consequence
            #print ('~~~  this is a consequence in a line ')
            CSQcount = 0
            for mcsq in multipleCsq:
                CSQcount += 1
                myres = []
                #myres+=[mychr, mypos]
                dCsq = dict(zip(
                    csqHeader, mcsq.split("|")))  #############    ALL VEP INFO
                #print (dCsq)
                #myres.append(dCsq['Existing_variation'])

                #~~~~~~~~~~~  identify the allele with consequences
                linesplit[7] = tempinfo  # reset info field
                mycsqAllele = dCsq["Allele"]
                GTfields = linesplit[9:]
                nbAploidSamples = len(GTfields) * 2
                freqCSQ_REF_ALT = gp.AnnotateFreqCSQ_REF_ALT(
                    mycsqAllele, myref, myalt, nbAploidSamples, GTfields)
                #print(freqCSQ_REF_ALT)
                linesplit[7] += ";"
                linesplit[
                    7] += "nbCSQ="  # for specify the number of CSQ allele
                linesplit[7] += str(
                    CSQcount)  # for specify the number of CSQ allele
                linesplit[7] += ";CSQfreq=%s" % (freqCSQ_REF_ALT[0])
                linesplit[7] += ";REFfreq=%s" % (freqCSQ_REF_ALT[1])
                linesplit[7] += ";ALTfreq=%s" % (freqCSQ_REF_ALT[2])
                #CSQfreq="CSQfreq=%s" %(freqCSQ_REF_ALT[0])
                #REFfreq="REFfreq=%s" %(freqCSQ_REF_ALT[1])
                #ALTfreq="ALTfreq=%s" %(freqCSQ_REF_ALT[2])
                #allfreq=CSQfreq,REFfreq,ALTfreq
                #freqField=";".join(map(str,allfreq))
                #rowWithFreq=linesplit[7],freqField
                #row2print=";".join(map(str,rowWithFreq))
                #linesplit[7]=row2print
                filemyres.write("\t".join(linesplit))
                filemyres.write("\n")
Esempio n. 5
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("-f", help="path to  input  file ",type=str,required=True)
	parser.add_argument("-v", help="path to table of vep consequences  ",type=str, required= True)	
	parser.add_argument("-o", help="path to output file  ",type=str, required= True)
	parser.add_argument("-e", help="path to error file",type=str,required=True)
	parser.add_argument("-m", help="path to metadata file",type=str,required=True)
	parser.add_argument("-c", help="number of random cycle",type=int,required=False, default=1)
	parser.add_argument("-s", help="seed's number",type=int,required=True)
	args = parser.parse_args()
	#output = open(args.o,'w')
	#print(args) 


#############################################################

##########~~~~~~~~~~~~~~ READ VEP consequences rank
	
	"""read external file with info on VEP consequences  """
	dRank={"HIGH":"HIGH", "LOW": "LOW", "MODERATE":"MODERATE", "MODIFIER":"MODIFIER"}
	dSOTermRank={}
	lSOTerm=[]  ### list of SOTerm ordered by severity

	countlinesCsq= True
	for csqLine in open(args.v, 'r'):
		if countlinesCsq:
			csqTitle=csqLine.rstrip().split('\t')
			countlinesCsq=False
		else:
			myRowList=csqLine.rstrip().split('\t')
			dCsq= dict(zip(csqTitle, myRowList ))
			dSOTermRank[dCsq['SO term']]=dRank[dCsq['IMPACT']]
			lSOTerm.append(myRowList[0])

##########~~~~~~~~~~~~~~ Read metadata

	Region=[]
	Sample=[]
	
	for line in open(args.m, 'r'):
		if re.match('sample', line):
			header= line.rstrip().split()
		else:
			other=line.rstrip().split()
			dMeta= dict(zip(header, other))
			Region.append(dMeta['region'])
			Sample.append(dMeta['sample'])

	dSampleRegion=dict(zip(Sample, Region))
	
	EUR = [key  for (key, value) in dSampleRegion.items() if value == 'EUROPE']

	EURsorted = sorted(EUR) ## needed for seed

	random.seed(args.s) ## need a sorted list of EUR

	#sampleToConsider=random.sample(EUR, 6)

##########~~~~~~~~~~~~~~  Loop of vcf lines 

	sys.stdout=open(args.o, 'w')  
	listOfErrors=[]
	print("Chr","\t", "Pos","\t","VariantClass","\t", "CSQallele","\t","CSQrank","\t","Consequence","\t","CSQfreq","\t","REFfreq","\t","ALTfreq","\t","MAF","\t","Cycle","\t","Population")
	cycle=0
	while cycle < args.c:
		cycle+=1
		myinput=gzip.open(args.f, 'r')
		dInfo={}
		column2retain=[]
		sampleToConsider=random.sample(EURsorted, 6)
		for line in myinput:
			decodedLine=line.decode()  ## line.decode() is necessary to read encoded data using gzip in python3
			if re.match('##', decodedLine):
				if re.search("ID=CSQ" , decodedLine ):
					csqHeader=decodedLine.rstrip().split(":")[1].lstrip().rstrip("\">").split("|")		
					
			elif re.match('#', decodedLine):
				for ind in sampleToConsider:
					column2retain.append(decodedLine.split().index(ind))
			else:
				#print("this is a new line ") ## line split by  tab
				linesplit=decodedLine.rstrip().split()
				mychr=linesplit[0]; mypos=linesplit[1]; myref=linesplit[3]; myalt=linesplit[4] ## basic info

				##~~ split INFO field
				tempinfo=linesplit[7] 
				for i in tempinfo.split(";"):
					if re.search('=', i): # check if INFO fields has a value corresponding
						temp=i.split("=")
						dInfo[temp[0]]=temp[1]
					
					else: pass 
					
				##~~ work on dInfo[CSQ]
				
				##~~ split for multiple consequences separated by ","
				multipleCsq=dInfo["CSQ"].split(",") 
				##~~ single consequence
				#print ('~~~  this is a consequence in a line ')
				CSQcount=0
				for mcsq in multipleCsq:
					CSQcount+=1
					dCsq=dict(zip(csqHeader, mcsq.split("|") ))  #############    ALL VEP INFO 
					 
					#~~~~~~~~~~~  identify the allele with consequences
							
					mycsqAllele=dCsq["Allele"]
					GTfields=[]
					for col in range(6): GTfields+=[linesplit[column2retain[col]]]
					nbAploidSamples=len(GTfields)*2
					freqCSQ_REF_ALT=gp.AnnotateFreqCSQ_REF_ALT(mycsqAllele,myref, myalt, nbAploidSamples, GTfields)

					for cons in dCsq['Consequence'].split("&"):
						#~~~~~~~~~~~~ assign severity score at the  most severe csq
						myindexes=[]
						myindexes.append(lSOTerm.index(cons))
						mostSevereCsq=lSOTerm[min(myindexes)]
						print(linesplit[0],"\t",linesplit[1],"\t",dCsq["VARIANT_CLASS"],"\t",dCsq["Allele"],"\t",dSOTermRank[mostSevereCsq],"\t",cons,"\t",freqCSQ_REF_ALT[0],"\t",freqCSQ_REF_ALT[1],"\t",freqCSQ_REF_ALT[2],"\t",freqCSQ_REF_ALT[3],"\t",cycle,"\t","HGDP")