def reprime(lociFile, alleleFile=None, minimalPrimersize = 10, maxPrimersize = 30, maxStretch = 2, uniqueness = 0, replace=True): """ Reprimes a loci configfile and optionally an allelefile. minimalPrimersize will be the minimal size for the primer if it is already unique maxStretch will be the maximal homopolymersize in the primer uniqueness is the amount of editing distance the primer will be different from any other substring in the configuration alleles """ from myflq.MyFLq import complement loci = getLoci(lociFile) alleles = {loci[l][5] for l in loci} | {complement(loci[l][5]) for l in loci} if not replace: lociFile = lociFile.replace('.csv','_reprimed.csv') lociFile = open(lociFile,'wt') for locus in loci: for p in (2,3): locusSeq = loci[locus][5] locusComp = complement(loci[locus][5]) if p == 3: locusSeq, locusComp = locusComp, locusSeq for i in range(minimalPrimersize,maxPrimersize+1): primer = locusSeq[:i] if locusSeq.count(primer) != 1 or locusComp.count(primer) != 0: continue unique = True for a in (alleles - {locusSeq,locusComp}): if primer in a: unique = False break if unique: break if i != maxPrimersize: loci[locus][p] = primer print(','.join(loci[locus]),file=lociFile)
def closeMatch(cls, sequence, differences=10, minimalKmerSize=5): """ Searches if there exists a close match with a maximum number of differences that can be provided as argument. It uses a heuristic that if fast, but could miss some matches, notwithstanding that there are less than the differences allowed. Returns the matching allele, with transformCode attribute. If no match is found raises ObjectDoesNotExist """ from myflq.MyFLq import complement, Alignment #First filter based on length seqlen = len(sequence) alleles = cls.objects.filter( length__gt=(0 if seqlen < differences else seqlen - differences)).filter(length__lt=seqlen + differences) alleles = {a: [0, 0] for a in alleles} #Filter based on kmer from sequence if seqlen > minimalKmerSize: kmersize = int(seqlen / (differences + 1)) if kmersize < minimalKmerSize: kmersize = minimalKmerSize kmers = { sequence[i:i + kmersize] for i in range(0, seqlen - kmersize, kmersize) } kmers_c = {complement(k) for k in kmers} for a in alleles: for k in kmers: if k in a.sequence: alleles[a][0] += 1 for k in kmers_c: if k in a.sequence: alleles[a][1] += 1 alleles = {a: alleles[a] for a in alleles if sum(alleles[a])} #Look for match that meets differences requirement matchedAllele = None for allele in sorted(alleles, key=lambda x: max(alleles[x]), reverse=True): complementary = alleles[allele][0] < alleles[allele][1] alignment = Alignment( allele.sequence, sequence #Without stutter info for now #TODO if not complementary else complement(sequence), gapPenalty=-10, gapExtension=-5) if alignment.getDifferences() <= differences: tc = alignment.getTransformCode(startSequence=allele.sequence) tc = ('tc' if complementary else 'to') + tc[1:] allele.transformCode = tc if not matchedAllele or len( matchedAllele.transformCode) > len(tc): matchedAllele = allele #If difference in k-mer count is already substantial => break elif max(alleles[matchedAllele]) > max(alleles[allele]) + 2: break if matchedAllele: return matchedAllele else: raise ObjectDoesNotExist
def closeMatch(cls,sequence,differences=10,minimalKmerSize=5): """ Searches if there exists a close match with a maximum number of differences that can be provided as argument. It uses a heuristic that if fast, but could miss some matches, notwithstanding that there are less than the differences allowed. Returns the matching allele, with transformCode attribute. If no match is found raises ObjectDoesNotExist """ from myflq.MyFLq import complement, Alignment #First filter based on length seqlen = len(sequence) alleles = cls.objects.filter(length__gt=(0 if seqlen < differences else seqlen-differences)).filter( length__lt = seqlen+differences) alleles = {a:[0,0] for a in alleles} #Filter based on kmer from sequence if seqlen > minimalKmerSize: kmersize = int(seqlen/(differences+1)) if kmersize < minimalKmerSize: kmersize = minimalKmerSize kmers = {sequence[i:i+kmersize] for i in range(0,seqlen-kmersize,kmersize)} kmers_c = {complement(k) for k in kmers} for a in alleles: for k in kmers: if k in a.sequence: alleles[a][0]+=1 for k in kmers_c: if k in a.sequence: alleles[a][1]+=1 alleles = {a:alleles[a] for a in alleles if sum(alleles[a])} #Look for match that meets differences requirement matchedAllele = None for allele in sorted(alleles,key=lambda x: max(alleles[x]),reverse=True): complementary = alleles[allele][0] < alleles[allele][1] alignment = Alignment(allele.sequence,sequence #Without stutter info for now #TODO if not complementary else complement(sequence), gapPenalty=-10,gapExtension=-5) if alignment.getDifferences() <= differences: tc = alignment.getTransformCode(startSequence=allele.sequence) tc = ('tc' if complementary else 'to')+tc[1:] allele.transformCode = tc if not matchedAllele or len(matchedAllele.transformCode) > len(tc): matchedAllele = allele #If difference in k-mer count is already substantial => break elif max(alleles[matchedAllele]) > max(alleles[allele])+2: break if matchedAllele: return matchedAllele else: raise ObjectDoesNotExist
def transform(self,transformCode): """ Transforms the sequence of the FLAD allele according to the transformCode. The transformCode is the FLAD version, and has to start with either 'to' or 'tc' to indicate which strand needs to be transformed Returns the transformed sequence. However if the exact sequence is already in the database, an StopIteration exception is raised. """ from myflq.MyFLq import complement, Alignment cls = type(self) self.transformCode = transformCode if transformCode.startswith('tc'): seq=complement(self.sequence) else: seq = self.sequence if len(transformCode) > 2: transformCode = 't'+transformCode[2:] seq = Alignment.transformSeq(transformCode,seq) #Test if exact transformed sequence is in database if self.transformCode in ('tc','to'): if self.transformCode == 'to': del self.transformCode return seq try: allele = cls.search(seq,seqid=True) if allele.sequence != seq: allele.transformCode = 'tc' raise StopIteration(allele) except ObjectDoesNotExist: return seq
def search(cls,fladid=False,locus=None,seq=False,closeMatch=False): """ Searches for an allele, either with a FLADid or a locus/sequence combo In case of a locus/sequence, if no exact match is found, it will look for any close match up to 10 difference If looking up a sequence, with closeMatch, similar sequences will also be considered. Their FLADid will then be returned with transformCode. """ from myflq.MyFLq import complement if fladid: match = cls.fladrex.match(fladid) fladid = int(match.group('fladid'),base=16) locus = match.group('locus') if locus: locus = Locus.objects.get(id=locus) allele = cls.objects.get(fladid=fladid, locus=locus) else: if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0] try: allele = cls.objects.get(sequence=seq,locus=locus) except ObjectDoesNotExist: try: allele = cls.objects.get(sequence=complement(seq), locus=locus) allele.transformCode = 'tc' except ObjectDoesNotExist: if closeMatch: allele = cls.closeMatch(sequence=seq, locus=locus, differences=10) else: raise return allele
def add(cls,sequence,locus=None,user=None): """ Adds a locus/sequence to FLAD Works like get_or_create, but only returns the Allele object """ from myflq.MyFLq import complement #For anonymous user => database does not change for anonymous user #2015/11/17 Commented out to allow generic testing #Other solution would be automated generation of FLADkeys also for testing #if user and not user.is_authenticated(): # return {'fladid': True, # 'getsequence':sequence, # 'getcomplement':complement(sequence), # 'getfladid':'{}XDUMMY'.format(cls.context)} if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0] #Last check to see if complement is not in database assert not cls.objects.filter(sequence=complement(sequence), locus=locus).exists() allele,crtd = cls.objects.get_or_create(sequence=sequence,locus=locus) if crtd: #if created allelePosition = list(cls.objects.filter(locus=locus)).index(allele) if not locus: randomSampleSpace = 1000 else: randomSampleSpace = 100 alleleBin = int(allelePosition/randomSampleSpace)*randomSampleSpace allelePosition = allelePosition % randomSampleSpace alleleChoices = list(range(alleleBin+1,alleleBin+randomSampleSpace+1)) import random random.seed(str(locus)+str(alleleBin)) random.shuffle(alleleChoices) allele.fladid = alleleChoices[allelePosition] allele.save() #In case already added, just add user if user and user.is_authenticated(): allele.users.add(user) return allele
def getcomplement(self): """ Returns the complement sequence uses getseq, so if their is a transformCode it will be applied. """ from myflq.MyFLq import complement return complement(self.getseq())
def transform(self, transformCode): """ Transforms the sequence of the FLAD allele according to the transformCode. The transformCode is the FLAD version, and has to start with either 'to' or 'tc' to indicate which strand needs to be transformed Returns the transformed sequence. However if the exact sequence is already in the database, an StopIteration exception is raised. """ from myflq.MyFLq import complement, Alignment cls = type(self) self.transformCode = transformCode if transformCode.startswith('tc'): seq = complement(self.sequence) else: seq = self.sequence if len(transformCode) > 2: transformCode = 't' + transformCode[2:] seq = Alignment.transformSeq(transformCode, seq) #Test if exact transformed sequence is in database if self.transformCode in ('tc', 'to'): if self.transformCode == 'to': del self.transformCode return seq try: allele = cls.search(seq, seqid=True) if allele.sequence != seq: allele.transformCode = 'tc' raise StopIteration(allele) except ObjectDoesNotExist: return seq
def search(cls, fladid=False, locus=None, seq=False, closeMatch=False): """ Searches for an allele, either with a FLADid or a locus/sequence combo In case of a locus/sequence, if no exact match is found, it will look for any close match up to 10 difference If looking up a sequence, with closeMatch, similar sequences will also be considered. Their FLADid will then be returned with transformCode. """ from myflq.MyFLq import complement if fladid: match = cls.fladrex.match(fladid) fladid = int(match.group('fladid'), base=16) locus = match.group('locus') if locus: locus = Locus.objects.get(id=locus) allele = cls.objects.get(fladid=fladid, locus=locus) else: if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0] try: allele = cls.objects.get(sequence=seq, locus=locus) except ObjectDoesNotExist: try: allele = cls.objects.get(sequence=complement(seq), locus=locus) allele.transformCode = 'tc' except ObjectDoesNotExist: if closeMatch: allele = cls.closeMatch(sequence=seq, locus=locus, differences=10) else: raise return allele
def result(request,analysis=False): #User post request result if request.method == 'POST': analysis = request.POST['viewResult'] #Process AJAX for adding alleles if request.is_ajax(): from myflq.MyFLq import complement sequence = (request.GET['beg']+request.GET['roi']+ complement(request.GET['cend'])) analysis = Analysis.objects.get(pk=int(analysis)) locus = Locus.objects.get(name=request.GET['locus'], configuration=analysis.configuration) allele,created = Allele.objects.get_or_create( configuration = analysis.configuration, locus = locus, FLADid = getFLAD(locus,sequence,analysis.configuration.user), sequence = sequence) if created and Allele.objects.filter(locus=locus,sequence=sequence, isFLAD=False).exists(): Allele.objects.get(locus=locus,sequence=sequence, isFLAD=False).delete() #todo report in json if created analysis.analysisresults.updateXML(request.GET['roi'],allele) data = '[{}]'.format(allele.FLADid)#fladid here return HttpResponse(data, 'application/json') analysis = Analysis.objects.get(pk=analysis,configuration__user=request.user) #todo check if user analysis return render(request,'myflq/result.html', {'myflq':True, 'analysis':analysis})
def result(request, analysis=False): #User post request result if request.method == 'POST': analysis = request.POST['viewResult'] #Process AJAX for adding alleles if request.is_ajax(): from myflq.MyFLq import complement sequence = (request.GET['beg'] + request.GET['roi'] + complement(request.GET['cend'])) analysis = Analysis.objects.get(pk=int(analysis)) locus = Locus.objects.get(name=request.GET['locus'], configuration=analysis.configuration) allele, created = Allele.objects.get_or_create( configuration=analysis.configuration, locus=locus, FLADid=getFLAD(locus, sequence, analysis.configuration.user), sequence=sequence) if created and Allele.objects.filter( locus=locus, sequence=sequence, isFLAD=False).exists(): Allele.objects.get(locus=locus, sequence=sequence, isFLAD=False).delete() #todo report in json if created analysis.analysisresults.updateXML(request.GET['roi'], allele) data = '[{}]'.format(allele.FLADid) #fladid here return HttpResponse(data, 'application/json') analysis = Analysis.objects.get( pk=analysis, configuration__user=request.user) #todo check if user analysis return render(request, 'myflq/result.html', { 'myflq': True, 'analysis': analysis })
def reprime(lociFile, alleleFile=None, minimalPrimersize=10, maxPrimersize=30, maxStretch=2, uniqueness=0, replace=True): """ Reprimes a loci configfile and optionally an allelefile. minimalPrimersize will be the minimal size for the primer if it is already unique maxStretch will be the maximal homopolymersize in the primer uniqueness is the amount of editing distance the primer will be different from any other substring in the configuration alleles """ from myflq.MyFLq import complement loci = getLoci(lociFile) alleles = {loci[l][5] for l in loci} | {complement(loci[l][5]) for l in loci} if not replace: lociFile = lociFile.replace('.csv', '_reprimed.csv') lociFile = open(lociFile, 'wt') for locus in loci: for p in (2, 3): locusSeq = loci[locus][5] locusComp = complement(loci[locus][5]) if p == 3: locusSeq, locusComp = locusComp, locusSeq for i in range(minimalPrimersize, maxPrimersize + 1): primer = locusSeq[:i] if locusSeq.count(primer) != 1 or locusComp.count(primer) != 0: continue unique = True for a in (alleles - {locusSeq, locusComp}): if primer in a: unique = False break if unique: break if i != maxPrimersize: loci[locus][p] = primer print(','.join(loci[locus]), file=lociFile)
def add(cls, sequence, locus=None, user=None): """ Adds a locus/sequence to FLAD Works like get_or_create, but only returns the Allele object """ from myflq.MyFLq import complement #For anonymous user => database does not change for anonymous user #2015/11/17 Commented out to allow generic testing #Other solution would be automated generation of FLADkeys also for testing #if user and not user.is_authenticated(): # return {'fladid': True, # 'getsequence':sequence, # 'getcomplement':complement(sequence), # 'getfladid':'{}XDUMMY'.format(cls.context)} if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0] #Last check to see if complement is not in database assert not cls.objects.filter(sequence=complement(sequence), locus=locus).exists() allele, crtd = cls.objects.get_or_create(sequence=sequence, locus=locus) if crtd: #if created allelePosition = list( cls.objects.filter(locus=locus)).index(allele) if not locus: randomSampleSpace = 1000 else: randomSampleSpace = 100 alleleBin = int( allelePosition / randomSampleSpace) * randomSampleSpace allelePosition = allelePosition % randomSampleSpace alleleChoices = list( range(alleleBin + 1, alleleBin + randomSampleSpace + 1)) import random random.seed(str(locus) + str(alleleBin)) random.shuffle(alleleChoices) allele.fladid = alleleChoices[allelePosition] allele.save() #In case already added, just add user if user and user.is_authenticated(): allele.users.add(user) return allele
def profile(request,analysis): analysis = Analysis.objects.get(pk=int(analysis),configuration__user=request.user) config = analysis.configuration #Process AJAX for adding/removing profile to/from population stats if request.is_ajax(): analysis.profile.toggleDB() analysis.profile.save() data = '["completed"]' return HttpResponse(data, 'application/json') #Make profile if request.method == 'POST': from myflq.MyFLq import complement POSTdict = {} for k in request.POST: POSTdict[k] = request.POST[k] POSTdict.pop('csrfmiddlewaretoken') threshold = POSTdict.pop('threshold') thresholdReads = POSTdict.pop('thresholdReads') POSTre = re.compile('^(locus|a)_(\w+)_(\d+)_(\d+)$') locusDict = {} alleles = {} def sortPost(x): m = POSTre.match(x) return (0 if m.group(1) == 'locus' else 1, int(m.group(3)),int(m.group(4))) #Retrieve alleles for key in sorted(POSTdict, key = sortPost): m = POSTre.match(key) if m.group(1) == 'locus': try: locusDict[m.group(3)][m.group(2)] = ( POSTdict[key] if not 'reverse' in key else complement(POSTdict[key])) except: locusDict[m.group(3)] = {m.group(2): (POSTdict[key] if not 'reverse' in key else complement(POSTdict[key]))} else: uniqueKey = float(m.group(3)+'.'+m.group(4)) if not uniqueKey in alleles: alleles[uniqueKey] = [locusDict[m.group(3)]['name'],None,None] if m.group(2) == 'roi': alleles[uniqueKey][1] = ( locusDict[m.group(3)]['forwardPrimer']+ locusDict[m.group(3)]['forwardFlank']+ POSTdict[key]+ locusDict[m.group(3)]['reverseFlank']+ locusDict[m.group(3)]['reversePrimer'] ) elif POSTdict[key].startswith('FA'): alleles[uniqueKey][2] = POSTdict[key] for key in alleles: l = Locus.objects.get(name=alleles[key][0],configuration=config) try: a = Allele.objects.get( sequence = alleles[key][1], locus = l) assert not alleles[key][2] or a.FLADid == alleles[key][2] except Allele.DoesNotExist: a = Allele(configuration = config, locus = l, FLADid = Allele.NAreference(l), isFLAD = False, sequence = alleles[key][1], analysis = analysis) a.save() alleles[key] = a alleles = set(alleles.values()) #Make profile object profile,created = Profile.objects.get_or_create( analysis = analysis) profile.threshold = float(threshold) profile.minimalReads = int(thresholdReads) profile.updateAlleles(alleles) profile.save() else: #Retrieve profile for normal request profile = analysis.profile kwargs = {'myflq':True, 'profile': profile, 'profileError': ''} return render(request,'myflq/profile.html',kwargs)
def profile(request, analysis): analysis = Analysis.objects.get(pk=int(analysis), configuration__user=request.user) config = analysis.configuration #Process AJAX for adding/removing profile to/from population stats if request.is_ajax(): analysis.profile.toggleDB() analysis.profile.save() data = '["completed"]' return HttpResponse(data, 'application/json') #Make profile if request.method == 'POST': from myflq.MyFLq import complement POSTdict = {} for k in request.POST: POSTdict[k] = request.POST[k] POSTdict.pop('csrfmiddlewaretoken') threshold = POSTdict.pop('threshold') thresholdReads = POSTdict.pop('thresholdReads') POSTre = re.compile('^(locus|a)_(\w+)_(\d+)_(\d+)$') locusDict = {} alleles = {} def sortPost(x): m = POSTre.match(x) return (0 if m.group(1) == 'locus' else 1, int(m.group(3)), int(m.group(4))) #Retrieve alleles for key in sorted(POSTdict, key=sortPost): m = POSTre.match(key) if m.group(1) == 'locus': try: locusDict[m.group(3)][m.group(2)] = ( POSTdict[key] if not 'reverse' in key else complement(POSTdict[key])) except: locusDict[m.group(3)] = { m.group(2): (POSTdict[key] if not 'reverse' in key else complement(POSTdict[key])) } else: uniqueKey = float(m.group(3) + '.' + m.group(4)) if not uniqueKey in alleles: alleles[uniqueKey] = [ locusDict[m.group(3)]['name'], None, None ] if m.group(2) == 'roi': alleles[uniqueKey][1] = ( locusDict[m.group(3)]['forwardPrimer'] + locusDict[m.group(3)]['forwardFlank'] + POSTdict[key] + locusDict[m.group(3)]['reverseFlank'] + locusDict[m.group(3)]['reversePrimer']) elif POSTdict[key].startswith('FA'): alleles[uniqueKey][2] = POSTdict[key] for key in alleles: l = Locus.objects.get(name=alleles[key][0], configuration=config) try: a = Allele.objects.get(sequence=alleles[key][1], locus=l) assert not alleles[key][2] or a.FLADid == alleles[key][2] except Allele.DoesNotExist: a = Allele(configuration=config, locus=l, FLADid=Allele.NAreference(l), isFLAD=False, sequence=alleles[key][1], analysis=analysis) a.save() alleles[key] = a alleles = set(alleles.values()) #Make profile object profile, created = Profile.objects.get_or_create(analysis=analysis) profile.threshold = float(threshold) profile.minimalReads = int(thresholdReads) profile.updateAlleles(alleles) profile.save() else: #Retrieve profile for normal request profile = analysis.profile kwargs = {'myflq': True, 'profile': profile, 'profileError': ''} return render(request, 'myflq/profile.html', kwargs)