def updateLocationBasedTargets(editFN, contextFN, miLocationFN, gFN):

        eSites = cgEdit.loadEditingSites(editFN)
        cgEdit.updateContextEditingSites(eSites, contextFN) #puts the UTR, EXON in eSite.context
        
        geneSet = cgGenes3.createGeneSetEditing(gFN)

        tName_t = {}
        for t in geneSet.transcripts:
                tName_t[t.id] = t


        tName_miInfo = {}
        f = open(miLocationFN, 'r')
        for line in f:
                ls = line.strip().split('\t')
                tName = ls[0]
                miName = ls[1]
                loc = int(ls[2])
                tName_miInfo.setdefault(tName, []).append([miName, loc])


        for eSite in eSites:
                if '3UTR' not in eSite.context:
                        continue
                for tName in eSite.transcripts:
                        if tName in tName_miInfo:
                                
                                t = tName_t[tName]
                                
                                for info in tName_miInfo[tName]:
                                        
                                        
                                        miName = info[0]
                                        loc = info[1]

                                        #get the position of e site in mrna for this transcript
                                        ePosition = t.getRelativePositionMRNA(eSite.coordinate, coding = False)
                                        
                                        print tName, miName, loc, ePosition
                                        if loc - 22 <= ePosition <= loc:
                                                print tName, miName, '%s:%s' % (eSite.chromosome, eSite.coordinate)
                                                pass
def updateValidatedMicroTargets(editFN, microTargetFN, microSequenceFN, outFN, gFN):
        flankAmount = 6
        eSites = cgEdit.loadEditingSites(editFN)
        cgEdit.updateContextEditingSites(eSites)

        miNames_micros = cgMicroRNA.loadMicroRNAFromValidated(microTargetFN, microSequenceFN)
        gf = GenomeFetch.GenomeFetch('hg19')
        
        #update flanking region
        for eSite in eSites:

                chrom = eSite.chromosome
                coord = eSite.coordinate
                strand = eSite.strand

                flankingSeq = gf.get_seq_from_to(chrom, coord - flankAmount, coord + flankAmount, strand)
                eFlankingSeq = flankingSeq[:flankAmount] + 'G' + flankingSeq[flankAmount + 1:]

                eSite.flank = flankingSeq.replace('T', 'U')
                eSite.eFlank = eFlankingSeq.replace('T', 'U')

        #joint
        gName_micros = {}
        for micro in miNames_micros.values():
                for target in micro.targetGenes:
                        if micro not in gName_micros.setdefault(target, []): gName_micros[target].append(micro)

        gene_m = {}       
        for eSite in eSites:

                sharedMicros = gName_micros.get(eSite.gene)
                if sharedMicros is None:
                        continue
                for micro in sharedMicros:
                        print '' 
                        print micro.name, eSite.gene, micro.sequence, micro.seed
                        print micro.comSeed
                        print eSite.flank, eSite.eFlank
                        print '%s:%s' % (eSite.chromosome, eSite.coordinate), eSite.flank, eSite.gene
                        if micro.comSeed == None:
                                #dumpObj.dumpObj(micro)
                                #print 'miR not in sequence file:', micro.name
                                #print micro.targetGenes
                                continue

                        if eSite.gene in gene_m:
                                if micro not in gene_m[eSite.gene]:
                                        gene_m[eSite.gene].append(micro)
                        else:
                                gene_m[eSite.gene] = [micro]

                        #flanking
                        if micro.comSeed in eSite.flank:
                                eSite.before.append(micro.name)

                        if micro.comSeed in eSite.eFlank:
                                eSite.after.append(micro.name)

        print len(gene_m)
        count = 0
        for g in gene_m:
                print g
                for m in gene_m[g]:
                        print '...', m.name
                        count += 1
        print count
        
        #check if these seeds are in the 
        checkIfSeedPresent(gene_m, gFN) 
                
     
        #write contents to file...
        outF = open(outFN, 'w')
        for eSite in eSites:
                if len(eSite.before) == 0:
                        targets = 'None'
                else:
                        targets = ','.join(eSite.before)

                if len(eSite.after) == 0:
                        eTargets = 'None'
                else:
                        eTargets = ','.join(eSite.after)

                outF.write('%s\t%s:%s\t%s\t%s\t%s\n' % (eSite.ID, eSite.chromosome, eSite.coordinate, eSite.strand, targets, eTargets))
def checkSeeds(editFN, contextFN, miLocationFN, miSequenceFN, gFN):

        eSites = cgEdit.loadEditingSites(editFN)
        cgEdit.updateContextEditingSites(eSites, contextFN) #puts the UTR, EXON in eSite.context
        
        geneSet = cgGenes3.createGeneSetEditing(gFN)

        tName_t = {}
        for t in geneSet.transcripts:
                tName_t[t.id] = t

        
        miName_miSequence = {}
        f = open(miSequenceFN, 'r')
        for line in f:
                ls = line.strip().split('\t')
                name = ls[0]
                seq = ls[1]
                name = 'hsa-' + name
                miName_miSequence[name] = seq

        tName_miInfo = {}
        f = open(miLocationFN, 'r')
        for line in f:
                ls = line.strip().split('\t')
                tName = ls[0]
                miName = ls[1]
                loc = int(ls[2])
                tName_miInfo.setdefault(tName, []).append([miName, loc])

        foundIt = []
        notFoundIt = []
        for tName in tName_miInfo:
                
                try:
                        t = tName_t[tName]
                except:
                        continue
                checkSeq = get3UTRSeq(t)
                try:
                        mRNA = t.getMRNA()
                except:
                        continue
                for miInfo in tName_miInfo[tName]:
                
                        miName = miInfo[0]
                        loc = miInfo[1]
                        try:
                                miSequence = miName_miSequence[miName]
                                miSeed = miSequence[1:8]
                        except:
                                continue

                        rcMiSeed = cgSeqMod.reverseComplementSequence(miSeed, True)
                        
                        newLoc = loc - (len(mRNA) - len(checkSeq)) 
                        finding = checkSeq.find(rcMiSeed, newLoc - 25)    
                        if finding != -1:
                                if (0 < newLoc - finding < 30):
                                        newResult = '%s\t%s\t%s\t%s\t%s' % (miName, tName, finding, newLoc, loc)
                                        if newResult not in foundIt: foundIt.append(newResult)
                        else:
                                        
                                        if miName == 'hsa-miR-21':
                                                print loc, len(checkSeq), len(mRNA)
                                                print mRNA
                                                print checkSeq
                                               
                                        newResult = '%s\t%s\t%s\t%s\t%s' % (miName, tName, finding, newLoc, loc)
                                        if newResult not in notFoundIt: notFoundIt.append(newResult)

        print len(foundIt)
        print len(notFoundIt)
        print ''
        for i in foundIt:
                print i
        print ''
        for i in notFoundIt:
                print i