Esempio n. 1
0
 def __init__(self, filename, pat_name) :
     self.pat_name = pat_name
     self.fin = open(filename)
     self.allow_absent = False
     self.group_repeats = False
     self.iterator = splitIterator( self.fin, \
                                    skipLine = skipper )
Esempio n. 2
0
    def iterate(self) :
        prev_rsid = -1
        #[rsid, {allele:(count,freq),allele2:(count,freq)]
        rsid_info = []
        for splt in globes.splitIterator( self.freq_file ) :
            #print splt
            rsid,alleleid,count,freq = int(splt[0]), int(splt[1]), \
                                       int(float(splt[2])), float(splt[3])

            if len(self.dallele[alleleid]) > 1 \
               or alleleid == 5 \
               or alleleid == 8 :
                continue
            if rsid != prev_rsid :
                if prev_rsid > 0 :
                    freq = 0
                    for k in rsid_info[1] :
                        freq += rsid_info[1][k][1]
                    if freq < .999 :
                        print "the way we skip indels isnt getting it right"
                        assert False

                    yield rsid_info
                    prev_rsid = rsid_info[0]
                    rsid_info = []
                rsid_info.append(rsid)
                rsid_info.append( self.dallele[alleleid] )
                rsid_info.append( count )
                #rsid_info.append( {self.dallele[alleleid] : (count,freq)} )
                prev_rsid = rsid
            else :
                rsid_info.append( self.dallele[alleleid] )
                rsid_info.append( count )
Esempio n. 3
0
 def iterate(self, fast_forward = 0) :
     count = 0
     for row in globes.splitIterator( self.fin, burn=0 ) :
         if count < fast_forward :
             count += 1
             continue
         else : yield row
Esempio n. 4
0
    def iterate(self) :
        prev_rsid = -1
        #[rsid, allele, count, allele2, count]
        rsid_info = []
        for splt in globes.splitIterator( self.freq_file ) :
            #print splt
            try :
                rsid,alleleid,count = int(splt[0]), int(splt[1]), \
                                      int(float(splt[2]))
            except ValueError :
                print splt
                assert "Value" == "Error"

            if alleleid > 13 or alleleid == 5 or alleleid == 8 :
                # where the indels start
                # + (used for generic indels?) 
                # - (used for generic indels?)
                continue

            if rsid != prev_rsid :
                if prev_rsid > 0 :
                    yield rsid_info
                    prev_rsid = rsid_info[0]
                    rsid_info = []
                rsid_info.append(rsid)
                rsid_info.append( self.dallele[alleleid] )
                rsid_info.append( count )
                #rsid_info.append( {self.dallele[alleleid] : (count,freq)} )
                prev_rsid = rsid
            else :
                rsid_info.append( self.dallele[alleleid] )
                rsid_info.append( count )
Esempio n. 5
0
def sortFile( filename ) :
    fin = open(filename)
    fout = open("%s.sorted" % filename,'w')

    sorted_splts = sorted( splitIterator( fin ), key = sortHelper )
    fout.write( "\n".join( ['\t'.join(splt) for splt in sorted_splts] ) )

    fin.close()
    fout.close()
Esempio n. 6
0
 def iterate(self, fast_forward = 0) :
     count = 0
     for row in  globes.splitIterator( self.file, \
                                       burn=1, \
                                       stopper=self.stopper ) :
         if count < fast_forward :
             count += 1
             continue
         else : yield row
Esempio n. 7
0
 def __init__(self, vcf_file, fast_forward=0) :
     self.indexOf = COLUMN_MAP
     globes.printColumnWarning( vcf_file, self.indexOf )
     self.fin = open( vcf_file, "rb" )
     self.patients = getPatients( self.fin )
     print "after getPatients"
     self.allow_absent = False
     self.group_repeats = False
     self.iterator = globes.splitIterator( self.fin, burn=fast_forward )
Esempio n. 8
0
def sortFile( filename ) :
    fin = open(filename)
    header = fin.readline()
    fout = open( "%s.sorted" % filename, 'w' )
    fout.write(header)

    sorted_splts = sorted( globes.splitIterator( fin, \
                                                 stopIter=stopper ), \
                           key = sortHelper )
    fout.write( "\n".join( ['\t'.join(splt) for splt in sorted_splts] ) )

    fout.close()
    fin.close()
Esempio n. 9
0
 def iterate( self, fast_forward = 0 ) :
     count = 0
     for row in globes.splitIterator( \
                         self.file, \
                         burn = ?, \
                         header_line_num = ?, \
                         skipLine = self.skipper, \
                         headerSanityCheck = self.headerCheck, \
                         stopIter = self.stopper ) :
         if count < fast_forward :
             count += 1
             continue
         else : yield row
Esempio n. 10
0
    def __init__(self, file, switch, fast_forward=1) :
        self.file = file
        self.switch = switch
        self.indexOf = indexOf
        self.iterator = globes.splitIterator( self.file, \
                                          burn=1, \
                                          header_line_num=0, \
                                          headerSanityCheck=headerCheck, \
                                          stopIter=stopper, \
                                          skipLine=skipper)
        self.allow_absent = False
        self.group_repeats = True

        #the columns concerning a Variant - want cDNAPosition?
        self.cols = ["accession", "functionGVS", "polyPhen", \
                     "granthamScore", "scorePhastCons", "consScoreGERP", \
                     "distanceToSplice", "AfricanHapMapFreq", \
                     "EuropeanHapMapFreq", "AsianHapMapFreq", \
                     "clinicalAssociation"]
Esempio n. 11
0
    def register( self, dargs ) :
        prediction_num = dargs["prediction"]
        predictions_file = "%s/%d/%d_predictions.tsv" \
                                % (globes.SIFT_OUTPUT, \
                                   prediction_num, \
                                   prediction_num)
        predictions_file = sort( predictions_file )
        self.iterator = globes.splitIterator( predictions_file, stopper=globes.tritonStop )

        #the default interaction will be with a varList
        self.comp = self.varListComparator
        self.eqfunc = self.varListIntegrator
        self.ltfunc = doNothing
        self.gtfunc = doNothing

        #If merging into a SQL iterator (iSQL), then will need to be passed a db 
        #connection in the arg list
        k = "dbconn"
        if k in dargs :
            self.conn = dargs[k]
            self.comp = self.sqlComparator
            self.eqfunc = self.sqlIntegrator
Esempio n. 12
0
    def register( self, dargs ) :
        switch = dargs['switch'].lower()
        if switch == 'snp' : 
            skipper = globes.dontStop
        elif switch == 'indel' :
            skipper = self.skipSNPS
            self.allow_unmatched = True
        else : assert False
        self.iterator = globes.splitIterator(dargs['file'], \
                                             burn=3, \
                                             skipper=skipper, \
                                             stopper=globes.tritonStop)
        self.comp = self.varListINDELComparator
        self.eqfunc = self.varListIntegrator
        self.ltfunc = doNothing
        self.gtfunc = doNothing
        

        self.conn = dargs['dbconn']

        k = 'target'
        if k in dargs and dargs[k] == "sql" :
            self.comp = self.sqlComparator
            self.eqfunc = self.sqlIntegrator