コード例 #1
0
def processPhasedData (vcf_filename, chromosome):
	[hit_snp_positions, hit_snps] = readHitSNPs(chromosome)
	genes = hg19.read(chromosome)
	indices_children = vcf.returnColumnsOfChildren(vcf_filename)	
	deletions = vcf.returnDeletions(chromosome, indices_children, MAJOR_AF_THRESHOLD)
	vcf_file = vcf.openVCFFile(vcf_filename)
	vcf.discardVCFHeaders(vcf_file)
	vcf_file.readline() # discard the column description header

	output = [] # output is stored here
	
	for variant in vcf_file.readlines():
		data = [value for value in variant.split()]
		if len(data[3]) == 1: # variant is a SNP
			# then determine whether this is a hit snp:
			snp_pos = int(data[1])
			#if snp_pos in hit_snp_positions: # hit snp
			#	[phase, snp_af] = processHitSNP (snp_pos, data, hit_snps, indices_children)
			#	if snp_af <= MAJOR_AF_THRESHOLD and 1 - snp_af <= MAJOR_AF_THRESHOLD:
			#		snp_type 	= hg19.determineType(snp_pos, genes)
			#		dist_tss 	= hg19.distanceToTSS(snp_pos, genes)		
			#		[max_r, p, d] 	= maximumR_hitsnp (snp_pos, phase, deletions)
			#		#print '1', max_r, p
			#		if d is not None:
			#			dist_snp_del = distanceSNP_CNV(snp_pos, d['pos'], d['length']) 
			#			output.append([chromosome, 1, snp_pos, snp_type, snp_af, d['pos'], d['length'], d['af'], max_r, p, dist_snp_del, dist_tss])
			#else: # non-hit snp
			phase 	= vcf.returnPhase(data, indices_children) 
			snp_af 	= vcf.determineAlleleFrequency (phase)
			if snp_af <= MAJOR_AF_THRESHOLD and 1 - snp_af <= MAJOR_AF_THRESHOLD:
				snp_pos 	= int(data[1])
				snp_type 	= hg19.determineType(snp_pos, genes)
				dist_tss 	= hg19.distanceToTSS(snp_pos, genes)		
				[max_r, p, d] 	= maximumR (snp_pos, phase, deletions)
				type_snp = 0
				if snp_pos in hit_snp_positions:
					type_snp = 1 
				#print '0', max_r, p
				if d is not None:
					dist_snp_del = distanceSNP_CNV(snp_pos, d['pos'], d['length']) 
					output.append([chromosome, type_snp, snp_pos, snp_type, snp_af, d['pos'], d['length'], d['af'], max_r, p, dist_snp_del, dist_tss])
	writeToOutputFile(output, chromosome)
	vcf_file.close()	
コード例 #2
0
def processSNPs (vcf_filename, chromosome, individuals=('parents', 'children')):
	# gather the required data
	[hit_snps_positions, hit_snps, discarded_snps_positions] = pr.readHitSNPs (chromosome)
	genes = hg19.read (chromosome)
	indices_individuals = vcf.returnColumns (vcf_filename, individuals) 
	deletions = vcf.returnDeletions (chromosome, indices_individuals, MAJOR_AF_THRESHOLD)
	# open file	
	vcf_file = vcf.openVCFFile(vcf_filename)
	vcf.discardVCFHeaders(vcf_file)
	vcf_file.readline() # discard the column description header
	
	for variant in vcf_file.readlines(): # move through the genetic variants
		snp_output = [] 
		data = [value for value in variant.split()]
		if len(data[3]) == 1: # variant is a SNP and no deletion
			snp_pos = int(data[1])
			snp_type = hg19.determineType(snp_pos, genes)
			dist_tss = hg19.distanceToTSS(snp_pos, genes)	
			if snp_pos in hit_snps_positions: # SNP is a hit SNP
				[phase, snp_af] = pr.processHitSNP (snp_pos, data, hit_snps, indices_individuals)
				if 1 - MAJOR_AF_THRESHOLD <= snp_af <= MAJOR_AF_THRESHOLD:
					snp_output.append(['HITSNP', snp_pos, snp_af, snp_type, dist_tss]) 
					dels = returnDeletions(snp_pos, snp_af, phase, deletions) # TODO implement
					if len(dels) > 0:
						snp_output.append(dels)
			elif snp_pos not in discarded_snps_positions:
				phase 	= vcf.returnPhase(data, indices_individuals) 
				snp_af 	= vcf.determineAlleleFrequency (phase)
				if 1 - MAJOR_AF_THRESHOLD <= snp_af <= MAJOR_AF_THRESHOLD:				
					snp_output.append(['NONHITSNP', snp_pos, snp_af, snp_type, dist_tss]) 
					dels = returnDeletions(snp_pos, snp_af, phase, deletions) # TODO implement
					if len(dels) > 0:
						snp_output.append(dels)
			if len(snp_output) > 0:
				writeSNPToOutputFile(snp_output, chromosome)
	vcf_file.close()
コード例 #3
0
def gatherData (vcf_filename, chromosome, individuals):
	[hit_snps_positions, hit_snps, discarded_snps_positions] = readHitSNPs (chromosome) 
	genes = hg19.read (chromosome)
	indices_individuals = vcf.returnColumns (vcf_filename, individuals) 
	deletions = vcf.returnDeletions (chromosome, indices_individuals, MAJOR_AF_THRESHOLD)
	return [hit_snps_positions, hit_snps, discarded_snps_positions, genes, indices_individuals, deletions]