def __iter__(self): from pysam import Tabixfile, asTuple f = Tabixfile(self.filename, mode='r') try: # header row if self.header is not None: yield self.header else: # assume last header line has fields h = list(f.header) if len(h) > 0: header_line = text_type(h[-1], encoding='ascii') yield tuple(header_line.split('\t')) # data rows for row in f.fetch(reference=self.reference, start=self.start, end=self.stop, region=self.region, parser=asTuple()): yield tuple(row) except: raise finally: f.close()
def __init__(self, chromosome, position, annotation_table_file): annotation_table = Tabixfile(annotation_table_file) self.line = annotation_table.fetch(reference=chromosome, start=position - 1, end=position).next() self.chromosome, \ self.position, \ self.reference_base, \ self.genic, \ self.exonic, \ self.intronic, \ self.intergenic, \ self.utr5, \ self.utr3, \ self.fold0, \ self.fold4, \ self.fold2, \ self.fold3, \ self.CDS, \ self.mRNA, \ self.rRNA, \ self.tRNA, \ self.feature_names, \ self.feature_types, \ self.feature_ID, \ self.cds_position, \ self.strand, \ self.frame, \ self.codon, \ self.aa, \ self.degen, \ self.FPKM, \ self.rho, \ self.FAIRE, \ self.recombination, \ self.mutability, \ self.quebec_alleles = self.line.split('\t') self.position = int(self.position) annotation_table.close()
def __iter__(self): try: from pysam import Tabixfile, asTuple except ImportError as e: raise UnsatisfiedDependency(e, dep_message) f = Tabixfile(self.filename, mode='r') try: # header row if self.header is not None: yield self.header else: # assume last header line has fields h = list(f.header) if len(h) > 0: yield tuple(h[-1].split('\t')) # data rows for row in f.fetch(reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple()): yield tuple(row) except: raise finally: f.close()
def __iter__(self): try: from pysam import Tabixfile, asTuple except ImportError as e: raise UnsatisfiedDependency(e, dep_message) f = Tabixfile(self.filename, mode="r") try: # header row if self.header is not None: yield self.header else: # assume last header line has fields h = list(f.header) if len(h) > 0: yield tuple(h[-1].split("\t")) # data rows for row in f.fetch( reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple() ): yield tuple(row) except: raise finally: f.close()
genofinfile.close() genoinds = [genofins.index(x) + 6 for x in officialfindivs] y = {} currbimbam = open(currfiles + '.bimbam','w') #t0 = time.time() for snp in masterdic.keys(): #for snp in masterdic.keys()[0:1000]: chrm = masterdic[snp][0] if chrm == 'chrm': continue tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.all.imputed.' + chrm + '.txt.gz') tempgenos = [x.split('\t') for x in tabixer.fetch(chrm,int(masterdic[snp][1])-1,int(masterdic[snp][2]))][0] genos = [tempgenos[x] for x in range(0,6) + genoinds] tabixer.close() y[snp] = [genos[3], 'A', 'G'] + genos[6:] print >> currbimbam, ", ".join(y) #t1 = time.time() #print t1-t0 currbimbam.close() #genomat = matrix_reader(genodir + 'hutt.imputed.dhssnps.bimbam',sep=",") print "Running GEMMA..." gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.covariates -lmm 4 -maf 0.05 -o curr_' + pheno) t0 = time.time() ifier(gemmer) t1 = time.time() print t1-t0 #currresults = open(genodir + 'output/curr_' + pheno + '.assoc.txt','r')
if not regressPCs: phener = ('cut -f' + str(int(exprcoldic[gene]) + 1) + ' -d" " ' + hmdir + '500HT/Exprs/qqnorm.500ht' + gccor + covcor + '.ordered.' + chrm + '.bimbam > ' + currfiles + '.pheno') ifier(phener) currgenos = [] ####Pull genotypes for the SNPs in cis, if genotypes not already in dictionary: go to geno file and pull in appropriate data for snp in masterdic[gene]: try: currgenos.append(", ".join(genodic[snp])) except KeyError: #tabixer = pysam.Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.imputed.' + chrm + '.txt.gz') #tabixer = pysam.Tabixfile('/mnt/lustre/home/cusanovich/500HT/' + mapper + '/ByChr/hutt.' + mapper + '.' + distance + '.' + chrm + '.txt.gz') tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt' + mapper + '.' + chrm + '.txt.gz') genos = [x.split('\t') for x in tabixer.fetch(chrm,int(snpdic[snp][1]),int(snpdic[snp][2]))][0] tabixer.close() y = [genos[3], 'A', 'G'] + genos[6:len(genos)] genodic[snp] = y currgenos.append(", ".join(genodic[snp])) currbimbam = open(currfiles + '.bimbam','w') print >> currbimbam, "\n".join(currgenos) currbimbam.close() #print "Running GEMMA..." if regressPCs: gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -lmm 4 -maf 0.05 -o curr_' + chrm + '_pc' + str(pcs) + '_' + correction) ifier(gemmer) if not regressPCs: gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.pcs.txt -lmm 4 -maf 0.05 -o curr_' + chrm + '_pc' + str(pcs) + '_' + correction) ifier(gemmer) currresults = open(genodir + '/output/curr_' + chrm + '_pc' + str(pcs) + '_' + correction + '.assoc.txt','r') pmin = 1.1