def __init__(self, filename, pat_name) : self.pat_name = pat_name self.fin = open(filename) self.allow_absent = False self.group_repeats = False self.iterator = splitIterator( self.fin, \ skipLine = skipper )
def iterate(self) : prev_rsid = -1 #[rsid, {allele:(count,freq),allele2:(count,freq)] rsid_info = [] for splt in globes.splitIterator( self.freq_file ) : #print splt rsid,alleleid,count,freq = int(splt[0]), int(splt[1]), \ int(float(splt[2])), float(splt[3]) if len(self.dallele[alleleid]) > 1 \ or alleleid == 5 \ or alleleid == 8 : continue if rsid != prev_rsid : if prev_rsid > 0 : freq = 0 for k in rsid_info[1] : freq += rsid_info[1][k][1] if freq < .999 : print "the way we skip indels isnt getting it right" assert False yield rsid_info prev_rsid = rsid_info[0] rsid_info = [] rsid_info.append(rsid) rsid_info.append( self.dallele[alleleid] ) rsid_info.append( count ) #rsid_info.append( {self.dallele[alleleid] : (count,freq)} ) prev_rsid = rsid else : rsid_info.append( self.dallele[alleleid] ) rsid_info.append( count )
def iterate(self, fast_forward = 0) : count = 0 for row in globes.splitIterator( self.fin, burn=0 ) : if count < fast_forward : count += 1 continue else : yield row
def iterate(self) : prev_rsid = -1 #[rsid, allele, count, allele2, count] rsid_info = [] for splt in globes.splitIterator( self.freq_file ) : #print splt try : rsid,alleleid,count = int(splt[0]), int(splt[1]), \ int(float(splt[2])) except ValueError : print splt assert "Value" == "Error" if alleleid > 13 or alleleid == 5 or alleleid == 8 : # where the indels start # + (used for generic indels?) # - (used for generic indels?) continue if rsid != prev_rsid : if prev_rsid > 0 : yield rsid_info prev_rsid = rsid_info[0] rsid_info = [] rsid_info.append(rsid) rsid_info.append( self.dallele[alleleid] ) rsid_info.append( count ) #rsid_info.append( {self.dallele[alleleid] : (count,freq)} ) prev_rsid = rsid else : rsid_info.append( self.dallele[alleleid] ) rsid_info.append( count )
def sortFile( filename ) : fin = open(filename) fout = open("%s.sorted" % filename,'w') sorted_splts = sorted( splitIterator( fin ), key = sortHelper ) fout.write( "\n".join( ['\t'.join(splt) for splt in sorted_splts] ) ) fin.close() fout.close()
def iterate(self, fast_forward = 0) : count = 0 for row in globes.splitIterator( self.file, \ burn=1, \ stopper=self.stopper ) : if count < fast_forward : count += 1 continue else : yield row
def __init__(self, vcf_file, fast_forward=0) : self.indexOf = COLUMN_MAP globes.printColumnWarning( vcf_file, self.indexOf ) self.fin = open( vcf_file, "rb" ) self.patients = getPatients( self.fin ) print "after getPatients" self.allow_absent = False self.group_repeats = False self.iterator = globes.splitIterator( self.fin, burn=fast_forward )
def sortFile( filename ) : fin = open(filename) header = fin.readline() fout = open( "%s.sorted" % filename, 'w' ) fout.write(header) sorted_splts = sorted( globes.splitIterator( fin, \ stopIter=stopper ), \ key = sortHelper ) fout.write( "\n".join( ['\t'.join(splt) for splt in sorted_splts] ) ) fout.close() fin.close()
def iterate( self, fast_forward = 0 ) : count = 0 for row in globes.splitIterator( \ self.file, \ burn = ?, \ header_line_num = ?, \ skipLine = self.skipper, \ headerSanityCheck = self.headerCheck, \ stopIter = self.stopper ) : if count < fast_forward : count += 1 continue else : yield row
def __init__(self, file, switch, fast_forward=1) : self.file = file self.switch = switch self.indexOf = indexOf self.iterator = globes.splitIterator( self.file, \ burn=1, \ header_line_num=0, \ headerSanityCheck=headerCheck, \ stopIter=stopper, \ skipLine=skipper) self.allow_absent = False self.group_repeats = True #the columns concerning a Variant - want cDNAPosition? self.cols = ["accession", "functionGVS", "polyPhen", \ "granthamScore", "scorePhastCons", "consScoreGERP", \ "distanceToSplice", "AfricanHapMapFreq", \ "EuropeanHapMapFreq", "AsianHapMapFreq", \ "clinicalAssociation"]
def register( self, dargs ) : prediction_num = dargs["prediction"] predictions_file = "%s/%d/%d_predictions.tsv" \ % (globes.SIFT_OUTPUT, \ prediction_num, \ prediction_num) predictions_file = sort( predictions_file ) self.iterator = globes.splitIterator( predictions_file, stopper=globes.tritonStop ) #the default interaction will be with a varList self.comp = self.varListComparator self.eqfunc = self.varListIntegrator self.ltfunc = doNothing self.gtfunc = doNothing #If merging into a SQL iterator (iSQL), then will need to be passed a db #connection in the arg list k = "dbconn" if k in dargs : self.conn = dargs[k] self.comp = self.sqlComparator self.eqfunc = self.sqlIntegrator
def register( self, dargs ) : switch = dargs['switch'].lower() if switch == 'snp' : skipper = globes.dontStop elif switch == 'indel' : skipper = self.skipSNPS self.allow_unmatched = True else : assert False self.iterator = globes.splitIterator(dargs['file'], \ burn=3, \ skipper=skipper, \ stopper=globes.tritonStop) self.comp = self.varListINDELComparator self.eqfunc = self.varListIntegrator self.ltfunc = doNothing self.gtfunc = doNothing self.conn = dargs['dbconn'] k = 'target' if k in dargs and dargs[k] == "sql" : self.comp = self.sqlComparator self.eqfunc = self.sqlIntegrator