Ejemplo n.º 1
0
def reprime(lociFile, alleleFile=None, minimalPrimersize = 10, maxPrimersize = 30,
            maxStretch = 2, uniqueness = 0, replace=True):
    """
    Reprimes a loci configfile and optionally an allelefile.
    minimalPrimersize will be the minimal size for the primer if it is already unique
    maxStretch will be the maximal homopolymersize in the primer
    uniqueness is the amount of editing distance the primer will be different from any
     other substring in the configuration alleles
    """
    from myflq.MyFLq import complement
    loci = getLoci(lociFile)
    alleles = {loci[l][5] for l in loci} | {complement(loci[l][5]) for l in loci}
    if not replace: lociFile = lociFile.replace('.csv','_reprimed.csv')
    lociFile = open(lociFile,'wt')
    for locus in loci:
        for p in (2,3):
            locusSeq = loci[locus][5]
            locusComp = complement(loci[locus][5])
            if p == 3: locusSeq, locusComp = locusComp, locusSeq
            for i in range(minimalPrimersize,maxPrimersize+1):
                primer = locusSeq[:i]
                if locusSeq.count(primer) != 1 or locusComp.count(primer) != 0: continue
                unique = True
                for a in (alleles - {locusSeq,locusComp}):
                    if primer in a:
                        unique = False
                        break
                if unique: break
            if i != maxPrimersize: loci[locus][p] = primer
        print(','.join(loci[locus]),file=lociFile)
Ejemplo n.º 2
0
    def closeMatch(cls, sequence, differences=10, minimalKmerSize=5):
        """
        Searches if there exists a close match with a maximum number
        of differences that can be provided as argument.
        It uses a heuristic that if fast, but could miss some matches,
        notwithstanding that there are less than the differences allowed.

        Returns the matching allele, with transformCode attribute.
        If no match is found raises ObjectDoesNotExist
        """
        from myflq.MyFLq import complement, Alignment

        #First filter based on length
        seqlen = len(sequence)
        alleles = cls.objects.filter(
            length__gt=(0 if seqlen < differences else seqlen -
                        differences)).filter(length__lt=seqlen + differences)
        alleles = {a: [0, 0] for a in alleles}

        #Filter based on kmer from sequence
        if seqlen > minimalKmerSize:
            kmersize = int(seqlen / (differences + 1))
            if kmersize < minimalKmerSize: kmersize = minimalKmerSize
            kmers = {
                sequence[i:i + kmersize]
                for i in range(0, seqlen - kmersize, kmersize)
            }
            kmers_c = {complement(k) for k in kmers}
            for a in alleles:
                for k in kmers:
                    if k in a.sequence: alleles[a][0] += 1
                for k in kmers_c:
                    if k in a.sequence: alleles[a][1] += 1
            alleles = {a: alleles[a] for a in alleles if sum(alleles[a])}

        #Look for match that meets differences requirement
        matchedAllele = None
        for allele in sorted(alleles,
                             key=lambda x: max(alleles[x]),
                             reverse=True):
            complementary = alleles[allele][0] < alleles[allele][1]
            alignment = Alignment(
                allele.sequence,
                sequence  #Without stutter info for now #TODO
                if not complementary else complement(sequence),
                gapPenalty=-10,
                gapExtension=-5)
            if alignment.getDifferences() <= differences:
                tc = alignment.getTransformCode(startSequence=allele.sequence)
                tc = ('tc' if complementary else 'to') + tc[1:]
                allele.transformCode = tc
                if not matchedAllele or len(
                        matchedAllele.transformCode) > len(tc):
                    matchedAllele = allele
                #If difference in k-mer count is already substantial => break
                elif max(alleles[matchedAllele]) > max(alleles[allele]) + 2:
                    break
        if matchedAllele: return matchedAllele
        else: raise ObjectDoesNotExist
Ejemplo n.º 3
0
    def closeMatch(cls,sequence,differences=10,minimalKmerSize=5):
        """
        Searches if there exists a close match with a maximum number
        of differences that can be provided as argument.
        It uses a heuristic that if fast, but could miss some matches,
        notwithstanding that there are less than the differences allowed.

        Returns the matching allele, with transformCode attribute.
        If no match is found raises ObjectDoesNotExist
        """
        from myflq.MyFLq import complement, Alignment
        
        #First filter based on length
        seqlen = len(sequence)
        alleles = cls.objects.filter(length__gt=(0 if seqlen < differences
                                       else seqlen-differences)).filter(
                                               length__lt = seqlen+differences)
        alleles = {a:[0,0] for a in alleles}
        
        #Filter based on kmer from sequence
        if seqlen > minimalKmerSize:
            kmersize = int(seqlen/(differences+1))
            if kmersize < minimalKmerSize: kmersize = minimalKmerSize
            kmers = {sequence[i:i+kmersize]
                     for i in range(0,seqlen-kmersize,kmersize)}
            kmers_c = {complement(k) for k in kmers}
            for a in alleles:
                for k in kmers:
                    if k in a.sequence: alleles[a][0]+=1
                for k in kmers_c:
                    if k in a.sequence: alleles[a][1]+=1
            alleles = {a:alleles[a] for a in alleles if sum(alleles[a])}

        #Look for match that meets differences requirement
        matchedAllele = None
        for allele in sorted(alleles,key=lambda x: max(alleles[x]),reverse=True):
            complementary = alleles[allele][0] < alleles[allele][1]
            alignment = Alignment(allele.sequence,sequence #Without stutter info for now #TODO
                                  if not complementary else complement(sequence),
                                  gapPenalty=-10,gapExtension=-5)
            if alignment.getDifferences() <= differences:
                tc = alignment.getTransformCode(startSequence=allele.sequence)
                tc = ('tc' if complementary else 'to')+tc[1:]
                allele.transformCode = tc
                if not matchedAllele or len(matchedAllele.transformCode) > len(tc):
                    matchedAllele = allele
                #If difference in k-mer count is already substantial => break
                elif max(alleles[matchedAllele]) > max(alleles[allele])+2: break
        if matchedAllele: return matchedAllele
        else: raise ObjectDoesNotExist
Ejemplo n.º 4
0
    def transform(self,transformCode):
        """
        Transforms the sequence of the FLAD allele according to the
        transformCode.
        The transformCode is the FLAD version, and has to start with
        either 'to' or 'tc' to indicate which strand needs to be transformed

        Returns the transformed sequence.
        However if the exact sequence is already in the database, an StopIteration
        exception is raised.
        """
        from myflq.MyFLq import complement, Alignment
        cls = type(self)
        self.transformCode = transformCode
        if transformCode.startswith('tc'): seq=complement(self.sequence)
        else: seq = self.sequence
        if len(transformCode) > 2:
            transformCode = 't'+transformCode[2:]
            seq = Alignment.transformSeq(transformCode,seq)
            
        #Test if exact transformed sequence is in database
        if self.transformCode in ('tc','to'):
            if self.transformCode == 'to': del self.transformCode
            return seq
        try:
            allele = cls.search(seq,seqid=True)
            if allele.sequence != seq:
                allele.transformCode = 'tc'
            raise StopIteration(allele)
        except ObjectDoesNotExist: return seq
Ejemplo n.º 5
0
 def search(cls,fladid=False,locus=None,seq=False,closeMatch=False):
     """
     Searches for an allele, either with a FLADid or a locus/sequence combo
     In case of a locus/sequence, if no exact match is found, it will
     look for any close match up to 10 difference
     If looking up a sequence, with closeMatch, similar sequences
     will also be considered. Their FLADid will then be returned with
     transformCode.
     """
     from myflq.MyFLq import complement
     if fladid:
         match = cls.fladrex.match(fladid)
         fladid = int(match.group('fladid'),base=16)
         locus = match.group('locus')
         if locus: locus = Locus.objects.get(id=locus)
         allele = cls.objects.get(fladid=fladid,
                                  locus=locus)
     else:
         if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0]
         try: allele = cls.objects.get(sequence=seq,locus=locus)
         except ObjectDoesNotExist:
             try:
                 allele = cls.objects.get(sequence=complement(seq),
                                          locus=locus)
                 allele.transformCode = 'tc'
             except ObjectDoesNotExist:
                 if closeMatch:
                     allele = cls.closeMatch(sequence=seq,
                                             locus=locus,
                                             differences=10)
                 else: raise
     return allele
Ejemplo n.º 6
0
 def add(cls,sequence,locus=None,user=None):
     """
     Adds a locus/sequence to FLAD
     Works like get_or_create, but only returns the Allele object
     """
     from myflq.MyFLq import complement
     #For anonymous user => database does not change for anonymous user
     #2015/11/17 Commented out to allow generic testing
     #Other solution would be automated generation of FLADkeys also for testing
     #if user and not user.is_authenticated():
     #    return {'fladid': True,
     #            'getsequence':sequence,
     #            'getcomplement':complement(sequence),
     #            'getfladid':'{}XDUMMY'.format(cls.context)}
     if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0]
     #Last check to see if complement is not in database
     assert not cls.objects.filter(sequence=complement(sequence),
                                   locus=locus).exists()
     allele,crtd = cls.objects.get_or_create(sequence=sequence,locus=locus)
     if crtd: #if created
         allelePosition = list(cls.objects.filter(locus=locus)).index(allele)
         if not locus: randomSampleSpace = 1000
         else: randomSampleSpace = 100
         alleleBin = int(allelePosition/randomSampleSpace)*randomSampleSpace
         allelePosition = allelePosition % randomSampleSpace
         alleleChoices = list(range(alleleBin+1,alleleBin+randomSampleSpace+1))
         import random
         random.seed(str(locus)+str(alleleBin))
         random.shuffle(alleleChoices)
         allele.fladid = alleleChoices[allelePosition]
         allele.save()
     #In case already added, just add user
     if user and user.is_authenticated(): allele.users.add(user)
     return allele
Ejemplo n.º 7
0
 def getcomplement(self):
     """
     Returns the complement sequence
     uses getseq, so if their is a transformCode it will be applied.
     """
     from myflq.MyFLq import complement
     return complement(self.getseq())
Ejemplo n.º 8
0
    def transform(self, transformCode):
        """
        Transforms the sequence of the FLAD allele according to the
        transformCode.
        The transformCode is the FLAD version, and has to start with
        either 'to' or 'tc' to indicate which strand needs to be transformed

        Returns the transformed sequence.
        However if the exact sequence is already in the database, an StopIteration
        exception is raised.
        """
        from myflq.MyFLq import complement, Alignment
        cls = type(self)
        self.transformCode = transformCode
        if transformCode.startswith('tc'): seq = complement(self.sequence)
        else: seq = self.sequence
        if len(transformCode) > 2:
            transformCode = 't' + transformCode[2:]
            seq = Alignment.transformSeq(transformCode, seq)

        #Test if exact transformed sequence is in database
        if self.transformCode in ('tc', 'to'):
            if self.transformCode == 'to': del self.transformCode
            return seq
        try:
            allele = cls.search(seq, seqid=True)
            if allele.sequence != seq:
                allele.transformCode = 'tc'
            raise StopIteration(allele)
        except ObjectDoesNotExist:
            return seq
Ejemplo n.º 9
0
 def search(cls, fladid=False, locus=None, seq=False, closeMatch=False):
     """
     Searches for an allele, either with a FLADid or a locus/sequence combo
     In case of a locus/sequence, if no exact match is found, it will
     look for any close match up to 10 difference
     If looking up a sequence, with closeMatch, similar sequences
     will also be considered. Their FLADid will then be returned with
     transformCode.
     """
     from myflq.MyFLq import complement
     if fladid:
         match = cls.fladrex.match(fladid)
         fladid = int(match.group('fladid'), base=16)
         locus = match.group('locus')
         if locus: locus = Locus.objects.get(id=locus)
         allele = cls.objects.get(fladid=fladid, locus=locus)
     else:
         if locus:
             locus = Locus.objects.get_or_create(name=locus.upper())[0]
         try:
             allele = cls.objects.get(sequence=seq, locus=locus)
         except ObjectDoesNotExist:
             try:
                 allele = cls.objects.get(sequence=complement(seq),
                                          locus=locus)
                 allele.transformCode = 'tc'
             except ObjectDoesNotExist:
                 if closeMatch:
                     allele = cls.closeMatch(sequence=seq,
                                             locus=locus,
                                             differences=10)
                 else:
                     raise
     return allele
Ejemplo n.º 10
0
 def getcomplement(self):
     """
     Returns the complement sequence
     uses getseq, so if their is a transformCode it will be applied.
     """
     from myflq.MyFLq import complement
     return complement(self.getseq())
Ejemplo n.º 11
0
def result(request,analysis=False):
    #User post request result
    if request.method == 'POST':
        analysis = request.POST['viewResult']

    #Process AJAX for adding alleles
    if request.is_ajax():
        from myflq.MyFLq import complement
        sequence = (request.GET['beg']+request.GET['roi']+
                    complement(request.GET['cend']))
        analysis = Analysis.objects.get(pk=int(analysis))
        locus = Locus.objects.get(name=request.GET['locus'],
                                  configuration=analysis.configuration)
        allele,created = Allele.objects.get_or_create(
            configuration = analysis.configuration,
            locus = locus,
            FLADid = getFLAD(locus,sequence,analysis.configuration.user),
            sequence = sequence)
        if created and Allele.objects.filter(locus=locus,sequence=sequence,
                                          isFLAD=False).exists():
            Allele.objects.get(locus=locus,sequence=sequence,
                               isFLAD=False).delete()
            #todo report in json if created
            
        analysis.analysisresults.updateXML(request.GET['roi'],allele)
        data = '[{}]'.format(allele.FLADid)#fladid here
        return HttpResponse(data, 'application/json')

    analysis = Analysis.objects.get(pk=analysis,configuration__user=request.user) #todo check if user analysis
    return render(request,'myflq/result.html',
                  {'myflq':True,
                   'analysis':analysis})
Ejemplo n.º 12
0
def result(request, analysis=False):
    #User post request result
    if request.method == 'POST':
        analysis = request.POST['viewResult']

    #Process AJAX for adding alleles
    if request.is_ajax():
        from myflq.MyFLq import complement
        sequence = (request.GET['beg'] + request.GET['roi'] +
                    complement(request.GET['cend']))
        analysis = Analysis.objects.get(pk=int(analysis))
        locus = Locus.objects.get(name=request.GET['locus'],
                                  configuration=analysis.configuration)
        allele, created = Allele.objects.get_or_create(
            configuration=analysis.configuration,
            locus=locus,
            FLADid=getFLAD(locus, sequence, analysis.configuration.user),
            sequence=sequence)
        if created and Allele.objects.filter(
                locus=locus, sequence=sequence, isFLAD=False).exists():
            Allele.objects.get(locus=locus, sequence=sequence,
                               isFLAD=False).delete()
            #todo report in json if created

        analysis.analysisresults.updateXML(request.GET['roi'], allele)
        data = '[{}]'.format(allele.FLADid)  #fladid here
        return HttpResponse(data, 'application/json')

    analysis = Analysis.objects.get(
        pk=analysis,
        configuration__user=request.user)  #todo check if user analysis
    return render(request, 'myflq/result.html', {
        'myflq': True,
        'analysis': analysis
    })
Ejemplo n.º 13
0
def reprime(lociFile,
            alleleFile=None,
            minimalPrimersize=10,
            maxPrimersize=30,
            maxStretch=2,
            uniqueness=0,
            replace=True):
    """
    Reprimes a loci configfile and optionally an allelefile.
    minimalPrimersize will be the minimal size for the primer if it is already unique
    maxStretch will be the maximal homopolymersize in the primer
    uniqueness is the amount of editing distance the primer will be different from any
     other substring in the configuration alleles
    """
    from myflq.MyFLq import complement
    loci = getLoci(lociFile)
    alleles = {loci[l][5]
               for l in loci} | {complement(loci[l][5])
                                 for l in loci}
    if not replace: lociFile = lociFile.replace('.csv', '_reprimed.csv')
    lociFile = open(lociFile, 'wt')
    for locus in loci:
        for p in (2, 3):
            locusSeq = loci[locus][5]
            locusComp = complement(loci[locus][5])
            if p == 3: locusSeq, locusComp = locusComp, locusSeq
            for i in range(minimalPrimersize, maxPrimersize + 1):
                primer = locusSeq[:i]
                if locusSeq.count(primer) != 1 or locusComp.count(primer) != 0:
                    continue
                unique = True
                for a in (alleles - {locusSeq, locusComp}):
                    if primer in a:
                        unique = False
                        break
                if unique: break
            if i != maxPrimersize: loci[locus][p] = primer
        print(','.join(loci[locus]), file=lociFile)
Ejemplo n.º 14
0
 def add(cls, sequence, locus=None, user=None):
     """
     Adds a locus/sequence to FLAD
     Works like get_or_create, but only returns the Allele object
     """
     from myflq.MyFLq import complement
     #For anonymous user => database does not change for anonymous user
     #2015/11/17 Commented out to allow generic testing
     #Other solution would be automated generation of FLADkeys also for testing
     #if user and not user.is_authenticated():
     #    return {'fladid': True,
     #            'getsequence':sequence,
     #            'getcomplement':complement(sequence),
     #            'getfladid':'{}XDUMMY'.format(cls.context)}
     if locus: locus = Locus.objects.get_or_create(name=locus.upper())[0]
     #Last check to see if complement is not in database
     assert not cls.objects.filter(sequence=complement(sequence),
                                   locus=locus).exists()
     allele, crtd = cls.objects.get_or_create(sequence=sequence,
                                              locus=locus)
     if crtd:  #if created
         allelePosition = list(
             cls.objects.filter(locus=locus)).index(allele)
         if not locus: randomSampleSpace = 1000
         else: randomSampleSpace = 100
         alleleBin = int(
             allelePosition / randomSampleSpace) * randomSampleSpace
         allelePosition = allelePosition % randomSampleSpace
         alleleChoices = list(
             range(alleleBin + 1, alleleBin + randomSampleSpace + 1))
         import random
         random.seed(str(locus) + str(alleleBin))
         random.shuffle(alleleChoices)
         allele.fladid = alleleChoices[allelePosition]
         allele.save()
     #In case already added, just add user
     if user and user.is_authenticated(): allele.users.add(user)
     return allele
Ejemplo n.º 15
0
def profile(request,analysis):
    analysis = Analysis.objects.get(pk=int(analysis),configuration__user=request.user)
    config = analysis.configuration

    #Process AJAX for adding/removing profile to/from population stats
    if request.is_ajax():
        analysis.profile.toggleDB()
        analysis.profile.save()
        data = '["completed"]'
        return HttpResponse(data, 'application/json')
    
    #Make profile
    if request.method == 'POST':
        from myflq.MyFLq import complement
        POSTdict = {}
        for k in request.POST: POSTdict[k] = request.POST[k]
        POSTdict.pop('csrfmiddlewaretoken')
        threshold = POSTdict.pop('threshold')
        thresholdReads = POSTdict.pop('thresholdReads')
        POSTre = re.compile('^(locus|a)_(\w+)_(\d+)_(\d+)$')
        locusDict = {}
        alleles = {}
        def sortPost(x):
            m = POSTre.match(x)
            return (0 if m.group(1) == 'locus' else 1,
                    int(m.group(3)),int(m.group(4)))
        #Retrieve alleles
        for key in sorted(POSTdict, key = sortPost):
            m = POSTre.match(key)
            if m.group(1) == 'locus':
                try: locusDict[m.group(3)][m.group(2)] = (
                        POSTdict[key] if not 'reverse' in key else complement(POSTdict[key]))
                except: locusDict[m.group(3)] = {m.group(2):
                    (POSTdict[key] if not 'reverse' in key else complement(POSTdict[key]))}
            else:
                uniqueKey = float(m.group(3)+'.'+m.group(4))
                if not uniqueKey in alleles: alleles[uniqueKey] = [locusDict[m.group(3)]['name'],None,None]
                if m.group(2) == 'roi':
                    alleles[uniqueKey][1] = (
                        locusDict[m.group(3)]['forwardPrimer']+
                        locusDict[m.group(3)]['forwardFlank']+
                        POSTdict[key]+
                        locusDict[m.group(3)]['reverseFlank']+
                        locusDict[m.group(3)]['reversePrimer']
                        )
                elif POSTdict[key].startswith('FA'): alleles[uniqueKey][2] = POSTdict[key]

        for key in alleles:
            l = Locus.objects.get(name=alleles[key][0],configuration=config)
            try:
                a = Allele.objects.get(
                    sequence = alleles[key][1],
                    locus = l)
                assert not alleles[key][2] or a.FLADid == alleles[key][2]
            except Allele.DoesNotExist:
                a = Allele(configuration = config,
                           locus = l,
                           FLADid = Allele.NAreference(l),
                           isFLAD = False,
                           sequence = alleles[key][1],
                           analysis = analysis)
                a.save()
            alleles[key] = a
        alleles = set(alleles.values())

        #Make profile object
        profile,created = Profile.objects.get_or_create(
            analysis = analysis)
        profile.threshold = float(threshold)
        profile.minimalReads = int(thresholdReads)
        profile.updateAlleles(alleles)
        profile.save()

    else: #Retrieve profile for normal request
        profile = analysis.profile

    kwargs = {'myflq':True,
              'profile': profile,
              'profileError': ''}
    return render(request,'myflq/profile.html',kwargs)
Ejemplo n.º 16
0
def profile(request, analysis):
    analysis = Analysis.objects.get(pk=int(analysis),
                                    configuration__user=request.user)
    config = analysis.configuration

    #Process AJAX for adding/removing profile to/from population stats
    if request.is_ajax():
        analysis.profile.toggleDB()
        analysis.profile.save()
        data = '["completed"]'
        return HttpResponse(data, 'application/json')

    #Make profile
    if request.method == 'POST':
        from myflq.MyFLq import complement
        POSTdict = {}
        for k in request.POST:
            POSTdict[k] = request.POST[k]
        POSTdict.pop('csrfmiddlewaretoken')
        threshold = POSTdict.pop('threshold')
        thresholdReads = POSTdict.pop('thresholdReads')
        POSTre = re.compile('^(locus|a)_(\w+)_(\d+)_(\d+)$')
        locusDict = {}
        alleles = {}

        def sortPost(x):
            m = POSTre.match(x)
            return (0 if m.group(1) == 'locus' else 1, int(m.group(3)),
                    int(m.group(4)))

        #Retrieve alleles
        for key in sorted(POSTdict, key=sortPost):
            m = POSTre.match(key)
            if m.group(1) == 'locus':
                try:
                    locusDict[m.group(3)][m.group(2)] = (
                        POSTdict[key]
                        if not 'reverse' in key else complement(POSTdict[key]))
                except:
                    locusDict[m.group(3)] = {
                        m.group(2): (POSTdict[key] if not 'reverse' in key else
                                     complement(POSTdict[key]))
                    }
            else:
                uniqueKey = float(m.group(3) + '.' + m.group(4))
                if not uniqueKey in alleles:
                    alleles[uniqueKey] = [
                        locusDict[m.group(3)]['name'], None, None
                    ]
                if m.group(2) == 'roi':
                    alleles[uniqueKey][1] = (
                        locusDict[m.group(3)]['forwardPrimer'] +
                        locusDict[m.group(3)]['forwardFlank'] + POSTdict[key] +
                        locusDict[m.group(3)]['reverseFlank'] +
                        locusDict[m.group(3)]['reversePrimer'])
                elif POSTdict[key].startswith('FA'):
                    alleles[uniqueKey][2] = POSTdict[key]

        for key in alleles:
            l = Locus.objects.get(name=alleles[key][0], configuration=config)
            try:
                a = Allele.objects.get(sequence=alleles[key][1], locus=l)
                assert not alleles[key][2] or a.FLADid == alleles[key][2]
            except Allele.DoesNotExist:
                a = Allele(configuration=config,
                           locus=l,
                           FLADid=Allele.NAreference(l),
                           isFLAD=False,
                           sequence=alleles[key][1],
                           analysis=analysis)
                a.save()
            alleles[key] = a
        alleles = set(alleles.values())

        #Make profile object
        profile, created = Profile.objects.get_or_create(analysis=analysis)
        profile.threshold = float(threshold)
        profile.minimalReads = int(thresholdReads)
        profile.updateAlleles(alleles)
        profile.save()

    else:  #Retrieve profile for normal request
        profile = analysis.profile

    kwargs = {'myflq': True, 'profile': profile, 'profileError': ''}
    return render(request, 'myflq/profile.html', kwargs)