Example #1
0
	def __init__(self, chromosome, max_major_af = .96):
		self.max_major_af = max_major_af
	
		self.gwas_catalogue_reader = GWASCatalogueReader() 
		self.gwas_catalogue_reader.read(chromosome)

		self.human_omni_reader = HumanOmniReader()
		self.human_omni_reader.read(chromosome) 

		self.hg19_reader = HG19Reader()
		self.hg19_reader.read(chromosome)

		self.vcf_reader = vcf.Reader(open(self.returnLocation(chromosome)))
Example #2
0
class SNPReader: 
	"""Reads the deletions from file."""
	def __init__(self, chromosome, max_major_af = .96):
		self.max_major_af = max_major_af
	
		self.gwas_catalogue_reader = GWASCatalogueReader() 
		self.gwas_catalogue_reader.read(chromosome)

		self.human_omni_reader = HumanOmniReader()
		self.human_omni_reader.read(chromosome) 

		self.hg19_reader = HG19Reader()
		self.hg19_reader.read(chromosome)

		self.vcf_reader = vcf.Reader(open(self.returnLocation(chromosome)))

	def returnLocation(self, chromosome):
		if chromosome != 1 and chromosome != 22:
			return '/data2/as/hitSNP-CNV/data/phased-snps-dels/chr' + str(chromosome) + '_snps_and_dels.vcf.gz'
		elif chromosome == 1:
			return '/data2/as/hitSNP-CNV/data/phased-snps-dels/chr1_snps_and_dels.vcf'
		return '/data2/as/hitSNP-CNV/data/phased-snps-dels/chr22_snps_and_dels.vcf'
	
	def readHitSNPs(self):
		"""Reads in only the hit SNPs present in the given VCF file."""
		self.snps = []
		for vcf_record in self.vcf_reader: 
			if not isSNP(vcf_record): # if vcf is not a SNP
				continue
			snp = SNP(vcf_record)	
			if snp.major_af > max_major_af: # major allele frequency too high
				continue
			if not self.human_omni_reader.snpPresent(snp.position):  # not present in the HumanOmni data set
				continue
			hit_allele = self.gwas_catalogue_reader.getHitAllele(snp.position)
			if hit_allele == None:
				continue
			
			snp_type, dist_tss = self.hg19_reader.determineTypeAndDistanceTSS(snp.position)
			self.snps.append(SNP(vcf_record, snp_type = snp_type, hit_allele = hit_allele, dist_tss = dist_tss))			


	def readRawList(self, snp_positions): 
		"""Reads in all SNPs for which their position is present in the given list.
		   Their distance to TSS, type and (when relevant) hit allele are not determined."""
		self.snps = []
		for vcf_record in self.vcf_reader:
			if not isSNP(vcf_record):
				continue
			if not vcf_record.POS in snp_positions:
				continue
			snp = SNP(vcf_record)	
			if snp.major_af > self.max_major_af:
				continue
			self.snps.append(snp)
			
			
	def read(self):
		"""Reads all the SNPs present in the given VCF file. 
 		   NOTE: this function must be called first before any other function can be called."""
		self.snps = [] 
		for vcf_record in self.vcf_reader: 
			if not isSNP(vcf_record):
				continue
			snp = SNP(vcf_record)	
			if snp.major_af > self.max_major_af:
				continue
			if not self.human_omni_reader.snpPresent(snp.position): 
				continue

			snp_type, dist_tss = self.hg19_reader.determineTypeAndDistanceTSS(snp.position)
			hit_allele = self.gwas_catalogue_reader.getHitAllele(snp.position)
			self.snps.append(SNP(vcf_record, snp_type = snp_type, hit_allele = hit_allele, dist_tss = dist_tss))			
		
	def getHitSNPs(self):
		self.hit_snps = []
		for snp in self.snps:
			if snp.isHitSNP():
				self.hit_snps.append(snp)
			

	def printAll(self):
		for snp in snps:
			snp.print()