예제 #1
0
#!/usr/bin/env python

from splicesite import SpliceSite
import fruitfly

with open('sequence.txt','rb') as f:
    a = f.read()

with open('variant.txt','rb') as f:
    b = f.read()

sitesA = fruitfly.getSpliceSitePredictions(a)
sitesB = fruitfly.getSpliceSitePredictions(b)

print "~"*10 + " Expected " + "~"*10
for ss in sitesA:
    print ss

print ""
print "~"*10 + " Variant " + "~"*10
for ss in sitesB:
    print ss

print ""
print "~"*10 + " Comparing Results " + "~"*10
print ""

# List where we will store the SpliceSites to print
# These are the sites that are "interesting"
printList = []
예제 #2
0
def predictSpliceSites(self,
                       rows,
                       genomeFile,
                       db='hg38',
                       chromcol='#CHROMCOL',
                       poscol='POS',
                       varcol='VARIANT'):
    # celery kung fu
    self.predictions = list()
    #self.warnings = list()
    warnings = list()
    task_id = predictSpliceSites.request.id

    rowCount = len(rows)

    for idx, row in enumerate(rows):
        logger.info('Processing row %d' % (idx))
        warnings.append('Processing row %d' % (idx))
        chrom = "chr%s" % (row[chromcol])
        seqStart = int(row[poscol]) - 501
        seqEnd = int(row[poscol]) + 500
        genome = sequenceutils.loadGenome(genomeFile)
        seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)

        # make a copy of seq but with the base modified at specified position
        # python doesn't support item assignments w/in strings so build pieces
        var = seq[0:500]
        var = var + row[varcol]
        var = var + seq[501:]

        expectedSites = fruitfly.getSpliceSitePredictions(seq)
        variantSites = fruitfly.getSpliceSitePredictions(var)

        expectedList = []
        variantList = []
        msgList = []

        # not used ... yet
        for ss in expectedSites:
            expectedList.append(
                [ss.start, ss.end, ss.score, ss.intron, ss.exon])

        for ss in variantSites:
            variantList.append(
                [ss.start, ss.end, ss.score, ss.intron, ss.exon])

        for i, variantSite in enumerate(variantSites):
            (posB, baseB, scoreB) = variantSite.getSpliceSite()
            foundMatch = False
            for j, expectedSite in enumerate(expectedSites):
                (posA, baseA, scoreA) = expectedSite.getSpliceSite()
                if posA == posB:
                    foundMatch = True
                    if scoreA == scoreB:
                        if baseA != baseB:
                            #flash("Base changed from %s to %s at position %d "
                            #    "with score %0.2f\n"%(baseA, baseB, posA, scoreA))
                            warn = (
                                "Base changed from %s to %s at position %d "
                                "with score %0.2f" %
                                (baseA, baseB, posA, scoreA))
                            warnings.append(warn)
                    else:
                        delta = abs(scoreA - scoreB)
                        if delta >= 0.4:
                            #flash("Score changed by %0.2f from %0.2f to %0.2f "
                            #    "at position %d.\n"%(delta, scoreA, scoreB, posA))
                            warn = (
                                "Score changed by %0.2f from %0.2f to %0.2f "
                                "at position %d." %
                                (delta, scoreA, scoreB, posA))
                            warnings.append(warn)
            if not foundMatch:

                if posB >= len(seq):
                    #flash("Oops! Somehow the length of our expected and varied "
                    #    "sequences is not equivalent (%d for expected, "
                    #    "%d for varied).\n"%(len(seq),len(var)))
                    warn = (
                        "Oops! Somehow the length of our expected and varied "
                        "sequences is not equivalent (%d for expected, "
                        "%d for varied)." % (len(seq), len(var)))
                    warnings.append(warn)
                    break
                origBase = seq[posB].lower()

                msgList.append(
                    "New splice site predicted at position %d with score %0.2f. "
                    "Original base was '%s' and new base is '%s'." %
                    (posB, scoreB, origBase, baseB))

                # Add to the session predictions list
                self.predictions.append(variantSite)

    logger.debug('Creating output file ...')
    filename = str(task_id) + '.csv'
    path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    logger.debug('Filename: %s' % (filename))
    logger.debug('Path: %s' % (path))
    fileutils.predictionsToCsv(self.predictions, path)
    logger.debug('Done writing to file')

    logger.info('Returning result')
    return {'current': rowCount, 'total': rowCount, 'warnings': warnings}
예제 #3
0
def predictSpliceSites(self,
                       rows,
                       genomeFile,
                       db='hg38',
                       chromcol='#CHROMCOL',
                       poscol='POS',
                       varcol='VARIANT'):
    # celery kung fu
    self.predictions = list()
    #self.warnings = list()
    warnings = list()
    task_id = predictSpliceSites.request.id

    rowCount = len(rows)

    for idx,row in enumerate(rows):
        logger.info('Processing row %d'%(idx))
        warnings.append('Processing row %d'%(idx))
        chrom = "chr%s"%(row[chromcol])
        seqStart = int(row[poscol]) - 501
        seqEnd = int(row[poscol]) + 500
        genome = sequenceutils.loadGenome(genomeFile)
        seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)

        # make a copy of seq but with the base modified at specified position
        # python doesn't support item assignments w/in strings so build pieces
        var = seq[0:500]
        var = var + row[varcol]
        var = var + seq[501:]

        expectedSites = fruitfly.getSpliceSitePredictions(seq)
        variantSites = fruitfly.getSpliceSitePredictions(var)

        expectedList = []
        variantList = []
        msgList = []

        # not used ... yet
        for ss in expectedSites:
            expectedList.append([ss.start,
                                 ss.end,
                                 ss.score,
                                 ss.intron,
                                 ss.exon])

        for ss in variantSites:
            variantList.append([ss.start,
                                ss.end,
                                ss.score,
                                ss.intron,
                                ss.exon])

        for i, variantSite in enumerate(variantSites):
            (posB,baseB,scoreB) = variantSite.getSpliceSite()
            foundMatch = False
            for j, expectedSite in enumerate(expectedSites):
                (posA,baseA,scoreA) = expectedSite.getSpliceSite()
                if posA==posB:
                    foundMatch = True
                    if scoreA==scoreB:
                        if baseA != baseB:
                            #flash("Base changed from %s to %s at position %d "
                            #    "with score %0.2f\n"%(baseA, baseB, posA, scoreA))
                            warn = ("Base changed from %s to %s at position %d "
                                    "with score %0.2f"%(baseA,baseB,posA,scoreA))
                            warnings.append(warn)
                    else:
                        delta = abs(scoreA - scoreB)
                        if delta >= 0.4:
                            #flash("Score changed by %0.2f from %0.2f to %0.2f "
                            #    "at position %d.\n"%(delta, scoreA, scoreB, posA))
                            warn = ("Score changed by %0.2f from %0.2f to %0.2f "
                                    "at position %d."%(delta,scoreA,scoreB,posA))
                            warnings.append(warn)
            if not foundMatch:

                if posB >= len(seq):
                    #flash("Oops! Somehow the length of our expected and varied "
                    #    "sequences is not equivalent (%d for expected, "
                    #    "%d for varied).\n"%(len(seq),len(var)))
                    warn = ("Oops! Somehow the length of our expected and varied "
                            "sequences is not equivalent (%d for expected, "
                            "%d for varied)."%(len(seq),len(var)))
                    warnings.append(warn)
                    break
                origBase = seq[posB].lower()

                msgList.append("New splice site predicted at position %d with score %0.2f. "
                    "Original base was '%s' and new base is '%s'."
                    %(posB, scoreB, origBase, baseB))

                # Add to the session predictions list
                self.predictions.append(variantSite) 

    logger.debug('Creating output file ...')
    filename = str(task_id) + '.csv' 
    path = os.path.join(app.config['UPLOAD_FOLDER'],filename)
    logger.debug('Filename: %s'%(filename))
    logger.debug('Path: %s'%(path))
    fileutils.predictionsToCsv(self.predictions,path)
    logger.debug('Done writing to file')

    logger.info('Returning result')
    return {'current': rowCount, 'total': rowCount, 'warnings': warnings } 
예제 #4
0
def splice_site():
    if request.method == 'POST':
        session['db'] = 'hg38'  # use the latest genome by default
        if 'db' in request.form:
            if request.form['db'] == 'hg19':
                session['db'] = 'hg19'
            elif request.form['db'] != 'hg38':
                flash('Unsuported genome option %s. Using hg38.' %
                      (request.form['session']))
        else:
            flash('Genome not specified. Using hg38.')

        # Set up the mutation information for this run
        session['chromosome'] = 'chr1'
        session['position'] = '1'
        session['base'] = 'A'
        if 'chromosome' in request.form and \
            request.form['chromosome'] != session['chromosome']:
            session['chromosome'] = request.form['chromosome']
        if 'position' in request.form and \
            request.form['position'] != session['position']:
            session['position'] = request.form['position']
        if 'base' in request.form and \
            request.form['base'] != session['base']:
            session['base'] = request.form['base']

        createSession()
        genomePath = "genomes"
        if session['db'] == 'hg19':
            genomeFile = "hg19.2bit"
        else:
            session['db'] = 'hg38'
            genomeFile = "hg38.2bit"
        genomeFilePath = "/".join([genomePath, genomeFile])

        #if app.config['GB'] == 'UCSC':
        #    seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom,
        #                                       left=(int(pos)-1),
        #                                       right=(int(pos)),
        #                                       leftPad=500,
        #                                       rightPad=500)
        #else:
        chrom = "chr%s" % (session['chromosome'])
        seqStart = int(session['position']) - 501
        seqEnd = int(session['position']) + 500
        genome = sequenceutils.loadGenome(genomeFilePath)
        seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)

        # make a copy of seq but with the base modified at specified position
        # python doesn't support item assignments w/in strings so build pieces
        var = seq[0:500]
        var = var + session['base']
        var = var + seq[501:]

        expectedSites = fruitfly.getSpliceSitePredictions(seq)
        variantSites = fruitfly.getSpliceSitePredictions(var)

        expectedList = []
        variantList = []
        msgList = []

        for ss in expectedSites:
            expectedList.append(
                [ss.start, ss.end, ss.score, ss.intron, ss.exon])

        for ss in variantSites:
            variantList.append(
                [ss.start, ss.end, ss.score, ss.intron, ss.exon])

        # List where we will store the SpliceSites to print
        # These are the sites that are "interesting"
        reportList = []

        for i, variantSite in enumerate(variantSites):
            (posB, baseB, scoreB) = variantSite.getSpliceSite()
            foundMatch = False
            for j, expectedSite in enumerate(expectedSites):
                (posA, baseA, scoreA) = expectedSite.getSpliceSite()
                if posA == posB:
                    foundMatch = True
                    if scoreA == scoreB:
                        if baseA != baseB:
                            flash("Base changed from %s to %s at position %d "
                                  "with score %0.2f\n" %
                                  (baseA, baseB, posA, scoreA))
                    else:
                        delta = abs(scoreA - scoreB)
                        if delta >= 0.4:
                            flash("Score changed by %0.2f from %0.2f to %0.2f "
                                  "at position %d.\n" %
                                  (delta, scoreA, scoreB, posA))
            if not foundMatch:

                if posB >= len(seq):
                    flash(
                        "Oops! Somehow the length of our expected and varied "
                        "sequences is not equivalent (%d for expected, "
                        "%d for varied).\n" % (len(seq), len(var)))
                    break
                origBase = seq[posB].lower()

                msgList.append(
                    "New splice site predicted at position %d with score %0.2f. "
                    "Original base was '%s' and new base is '%s'." %
                    (posB, scoreB, origBase, baseB))

                reportList.append([
                    variantSite.start, variantSite.end, variantSite.score,
                    variantSite.intron, variantSite.exon
                ])

        return render_template('splicesite.html',
                               expectedList=expectedList,
                               variantList=variantList,
                               msgList=msgList,
                               reportList=reportList,
                               db=session['db'],
                               chromosome=session['chromosome'],
                               position=session['position'],
                               base=session['base'])

    # if not post, return index.html
    return render_template('index.html')
예제 #5
0
def splice_site():
    if request.method == 'POST':
        session['db'] = 'hg38' # use the latest genome by default
        if 'db' in request.form:
            if request.form['db'] == 'hg19':
                session['db'] = 'hg19'
            elif request.form['db'] != 'hg38':
                flash('Unsuported genome option %s. Using hg38.'
                    %(request.form['session']))
        else:
            flash('Genome not specified. Using hg38.')

        # Set up the mutation information for this run 
        session['chromosome'] = 'chr1'
        session['position'] = '1'
        session['base'] = 'A'
        if 'chromosome' in request.form and \
            request.form['chromosome'] != session['chromosome']:
            session['chromosome'] = request.form['chromosome']
        if 'position' in request.form and \
            request.form['position'] != session['position']:
            session['position'] = request.form['position']
        if 'base' in request.form and \
            request.form['base'] != session['base']:
            session['base'] = request.form['base']

        createSession()
        genomePath = "genomes"
        if session['db'] == 'hg19':
            genomeFile = "hg19.2bit"
        else:
            session['db'] = 'hg38'
            genomeFile = "hg38.2bit"
        genomeFilePath = "/".join([genomePath,genomeFile])

        #if app.config['GB'] == 'UCSC':
        #    seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom,
        #                                       left=(int(pos)-1),
        #                                       right=(int(pos)),
        #                                       leftPad=500,
        #                                       rightPad=500)
        #else:    
        chrom = "chr%s"%(session['chromosome'])
        seqStart = int(session['position']) - 501
        seqEnd = int(session['position']) + 500
        genome = sequenceutils.loadGenome(genomeFilePath)
        seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)

        # make a copy of seq but with the base modified at specified position
        # python doesn't support item assignments w/in strings so build pieces
        var = seq[0:500]
        var = var + session['base']
        var = var + seq[501:]

        expectedSites = fruitfly.getSpliceSitePredictions(seq)
        variantSites = fruitfly.getSpliceSitePredictions(var)

        expectedList = []
        variantList = []
        msgList = []

        for ss in expectedSites:
            expectedList.append([ss.start,
                                 ss.end,
                                 ss.score,
                                 ss.intron,
                                 ss.exon])

        for ss in variantSites:
            variantList.append([ss.start,
                                ss.end,
                                ss.score,
                                ss.intron,
                                ss.exon])

        # List where we will store the SpliceSites to print
        # These are the sites that are "interesting"
        reportList = []

        for i, variantSite in enumerate(variantSites):
            (posB,baseB,scoreB) = variantSite.getSpliceSite()
            foundMatch = False
            for j, expectedSite in enumerate(expectedSites):
                (posA,baseA,scoreA) = expectedSite.getSpliceSite()
                if posA==posB:
                    foundMatch = True
                    if scoreA==scoreB:
                        if baseA != baseB:
                            flash("Base changed from %s to %s at position %d "
                                "with score %0.2f\n"%(baseA, baseB, posA, scoreA))
                    else:
                        delta = abs(scoreA - scoreB)
                        if delta >= 0.4:
                            flash("Score changed by %0.2f from %0.2f to %0.2f "
                                "at position %d.\n"%(delta, scoreA, scoreB, posA))
            if not foundMatch:

                if posB >= len(seq):
                    flash("Oops! Somehow the length of our expected and varied "
                        "sequences is not equivalent (%d for expected, "
                        "%d for varied).\n"%(len(seq),len(var)))
                    break
                origBase = seq[posB].lower()

                msgList.append("New splice site predicted at position %d with score %0.2f. "
                    "Original base was '%s' and new base is '%s'."
                    %(posB, scoreB, origBase, baseB))

                reportList.append([variantSite.start,
                                   variantSite.end,
                                   variantSite.score,
                                   variantSite.intron,
                                   variantSite.exon])
                

        return render_template('splicesite.html',
                               expectedList=expectedList,
                               variantList=variantList,
                               msgList=msgList,
                               reportList=reportList,
                               db=session['db'],
                               chromosome=session['chromosome'],
                               position=session['position'],
                               base=session['base'])

    # if not post, return index.html
    return render_template('index.html')