#!/usr/bin/env python from splicesite import SpliceSite import fruitfly with open('sequence.txt','rb') as f: a = f.read() with open('variant.txt','rb') as f: b = f.read() sitesA = fruitfly.getSpliceSitePredictions(a) sitesB = fruitfly.getSpliceSitePredictions(b) print "~"*10 + " Expected " + "~"*10 for ss in sitesA: print ss print "" print "~"*10 + " Variant " + "~"*10 for ss in sitesB: print ss print "" print "~"*10 + " Comparing Results " + "~"*10 print "" # List where we will store the SpliceSites to print # These are the sites that are "interesting" printList = []
def predictSpliceSites(self, rows, genomeFile, db='hg38', chromcol='#CHROMCOL', poscol='POS', varcol='VARIANT'): # celery kung fu self.predictions = list() #self.warnings = list() warnings = list() task_id = predictSpliceSites.request.id rowCount = len(rows) for idx, row in enumerate(rows): logger.info('Processing row %d' % (idx)) warnings.append('Processing row %d' % (idx)) chrom = "chr%s" % (row[chromcol]) seqStart = int(row[poscol]) - 501 seqEnd = int(row[poscol]) + 500 genome = sequenceutils.loadGenome(genomeFile) seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) # make a copy of seq but with the base modified at specified position # python doesn't support item assignments w/in strings so build pieces var = seq[0:500] var = var + row[varcol] var = var + seq[501:] expectedSites = fruitfly.getSpliceSitePredictions(seq) variantSites = fruitfly.getSpliceSitePredictions(var) expectedList = [] variantList = [] msgList = [] # not used ... yet for ss in expectedSites: expectedList.append( [ss.start, ss.end, ss.score, ss.intron, ss.exon]) for ss in variantSites: variantList.append( [ss.start, ss.end, ss.score, ss.intron, ss.exon]) for i, variantSite in enumerate(variantSites): (posB, baseB, scoreB) = variantSite.getSpliceSite() foundMatch = False for j, expectedSite in enumerate(expectedSites): (posA, baseA, scoreA) = expectedSite.getSpliceSite() if posA == posB: foundMatch = True if scoreA == scoreB: if baseA != baseB: #flash("Base changed from %s to %s at position %d " # "with score %0.2f\n"%(baseA, baseB, posA, scoreA)) warn = ( "Base changed from %s to %s at position %d " "with score %0.2f" % (baseA, baseB, posA, scoreA)) warnings.append(warn) else: delta = abs(scoreA - scoreB) if delta >= 0.4: #flash("Score changed by %0.2f from %0.2f to %0.2f " # "at position %d.\n"%(delta, scoreA, scoreB, posA)) warn = ( "Score changed by %0.2f from %0.2f to %0.2f " "at position %d." % (delta, scoreA, scoreB, posA)) warnings.append(warn) if not foundMatch: if posB >= len(seq): #flash("Oops! Somehow the length of our expected and varied " # "sequences is not equivalent (%d for expected, " # "%d for varied).\n"%(len(seq),len(var))) warn = ( "Oops! Somehow the length of our expected and varied " "sequences is not equivalent (%d for expected, " "%d for varied)." % (len(seq), len(var))) warnings.append(warn) break origBase = seq[posB].lower() msgList.append( "New splice site predicted at position %d with score %0.2f. " "Original base was '%s' and new base is '%s'." % (posB, scoreB, origBase, baseB)) # Add to the session predictions list self.predictions.append(variantSite) logger.debug('Creating output file ...') filename = str(task_id) + '.csv' path = os.path.join(app.config['UPLOAD_FOLDER'], filename) logger.debug('Filename: %s' % (filename)) logger.debug('Path: %s' % (path)) fileutils.predictionsToCsv(self.predictions, path) logger.debug('Done writing to file') logger.info('Returning result') return {'current': rowCount, 'total': rowCount, 'warnings': warnings}
def predictSpliceSites(self, rows, genomeFile, db='hg38', chromcol='#CHROMCOL', poscol='POS', varcol='VARIANT'): # celery kung fu self.predictions = list() #self.warnings = list() warnings = list() task_id = predictSpliceSites.request.id rowCount = len(rows) for idx,row in enumerate(rows): logger.info('Processing row %d'%(idx)) warnings.append('Processing row %d'%(idx)) chrom = "chr%s"%(row[chromcol]) seqStart = int(row[poscol]) - 501 seqEnd = int(row[poscol]) + 500 genome = sequenceutils.loadGenome(genomeFile) seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) # make a copy of seq but with the base modified at specified position # python doesn't support item assignments w/in strings so build pieces var = seq[0:500] var = var + row[varcol] var = var + seq[501:] expectedSites = fruitfly.getSpliceSitePredictions(seq) variantSites = fruitfly.getSpliceSitePredictions(var) expectedList = [] variantList = [] msgList = [] # not used ... yet for ss in expectedSites: expectedList.append([ss.start, ss.end, ss.score, ss.intron, ss.exon]) for ss in variantSites: variantList.append([ss.start, ss.end, ss.score, ss.intron, ss.exon]) for i, variantSite in enumerate(variantSites): (posB,baseB,scoreB) = variantSite.getSpliceSite() foundMatch = False for j, expectedSite in enumerate(expectedSites): (posA,baseA,scoreA) = expectedSite.getSpliceSite() if posA==posB: foundMatch = True if scoreA==scoreB: if baseA != baseB: #flash("Base changed from %s to %s at position %d " # "with score %0.2f\n"%(baseA, baseB, posA, scoreA)) warn = ("Base changed from %s to %s at position %d " "with score %0.2f"%(baseA,baseB,posA,scoreA)) warnings.append(warn) else: delta = abs(scoreA - scoreB) if delta >= 0.4: #flash("Score changed by %0.2f from %0.2f to %0.2f " # "at position %d.\n"%(delta, scoreA, scoreB, posA)) warn = ("Score changed by %0.2f from %0.2f to %0.2f " "at position %d."%(delta,scoreA,scoreB,posA)) warnings.append(warn) if not foundMatch: if posB >= len(seq): #flash("Oops! Somehow the length of our expected and varied " # "sequences is not equivalent (%d for expected, " # "%d for varied).\n"%(len(seq),len(var))) warn = ("Oops! Somehow the length of our expected and varied " "sequences is not equivalent (%d for expected, " "%d for varied)."%(len(seq),len(var))) warnings.append(warn) break origBase = seq[posB].lower() msgList.append("New splice site predicted at position %d with score %0.2f. " "Original base was '%s' and new base is '%s'." %(posB, scoreB, origBase, baseB)) # Add to the session predictions list self.predictions.append(variantSite) logger.debug('Creating output file ...') filename = str(task_id) + '.csv' path = os.path.join(app.config['UPLOAD_FOLDER'],filename) logger.debug('Filename: %s'%(filename)) logger.debug('Path: %s'%(path)) fileutils.predictionsToCsv(self.predictions,path) logger.debug('Done writing to file') logger.info('Returning result') return {'current': rowCount, 'total': rowCount, 'warnings': warnings }
def splice_site(): if request.method == 'POST': session['db'] = 'hg38' # use the latest genome by default if 'db' in request.form: if request.form['db'] == 'hg19': session['db'] = 'hg19' elif request.form['db'] != 'hg38': flash('Unsuported genome option %s. Using hg38.' % (request.form['session'])) else: flash('Genome not specified. Using hg38.') # Set up the mutation information for this run session['chromosome'] = 'chr1' session['position'] = '1' session['base'] = 'A' if 'chromosome' in request.form and \ request.form['chromosome'] != session['chromosome']: session['chromosome'] = request.form['chromosome'] if 'position' in request.form and \ request.form['position'] != session['position']: session['position'] = request.form['position'] if 'base' in request.form and \ request.form['base'] != session['base']: session['base'] = request.form['base'] createSession() genomePath = "genomes" if session['db'] == 'hg19': genomeFile = "hg19.2bit" else: session['db'] = 'hg38' genomeFile = "hg38.2bit" genomeFilePath = "/".join([genomePath, genomeFile]) #if app.config['GB'] == 'UCSC': # seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, # left=(int(pos)-1), # right=(int(pos)), # leftPad=500, # rightPad=500) #else: chrom = "chr%s" % (session['chromosome']) seqStart = int(session['position']) - 501 seqEnd = int(session['position']) + 500 genome = sequenceutils.loadGenome(genomeFilePath) seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) # make a copy of seq but with the base modified at specified position # python doesn't support item assignments w/in strings so build pieces var = seq[0:500] var = var + session['base'] var = var + seq[501:] expectedSites = fruitfly.getSpliceSitePredictions(seq) variantSites = fruitfly.getSpliceSitePredictions(var) expectedList = [] variantList = [] msgList = [] for ss in expectedSites: expectedList.append( [ss.start, ss.end, ss.score, ss.intron, ss.exon]) for ss in variantSites: variantList.append( [ss.start, ss.end, ss.score, ss.intron, ss.exon]) # List where we will store the SpliceSites to print # These are the sites that are "interesting" reportList = [] for i, variantSite in enumerate(variantSites): (posB, baseB, scoreB) = variantSite.getSpliceSite() foundMatch = False for j, expectedSite in enumerate(expectedSites): (posA, baseA, scoreA) = expectedSite.getSpliceSite() if posA == posB: foundMatch = True if scoreA == scoreB: if baseA != baseB: flash("Base changed from %s to %s at position %d " "with score %0.2f\n" % (baseA, baseB, posA, scoreA)) else: delta = abs(scoreA - scoreB) if delta >= 0.4: flash("Score changed by %0.2f from %0.2f to %0.2f " "at position %d.\n" % (delta, scoreA, scoreB, posA)) if not foundMatch: if posB >= len(seq): flash( "Oops! Somehow the length of our expected and varied " "sequences is not equivalent (%d for expected, " "%d for varied).\n" % (len(seq), len(var))) break origBase = seq[posB].lower() msgList.append( "New splice site predicted at position %d with score %0.2f. " "Original base was '%s' and new base is '%s'." % (posB, scoreB, origBase, baseB)) reportList.append([ variantSite.start, variantSite.end, variantSite.score, variantSite.intron, variantSite.exon ]) return render_template('splicesite.html', expectedList=expectedList, variantList=variantList, msgList=msgList, reportList=reportList, db=session['db'], chromosome=session['chromosome'], position=session['position'], base=session['base']) # if not post, return index.html return render_template('index.html')
def splice_site(): if request.method == 'POST': session['db'] = 'hg38' # use the latest genome by default if 'db' in request.form: if request.form['db'] == 'hg19': session['db'] = 'hg19' elif request.form['db'] != 'hg38': flash('Unsuported genome option %s. Using hg38.' %(request.form['session'])) else: flash('Genome not specified. Using hg38.') # Set up the mutation information for this run session['chromosome'] = 'chr1' session['position'] = '1' session['base'] = 'A' if 'chromosome' in request.form and \ request.form['chromosome'] != session['chromosome']: session['chromosome'] = request.form['chromosome'] if 'position' in request.form and \ request.form['position'] != session['position']: session['position'] = request.form['position'] if 'base' in request.form and \ request.form['base'] != session['base']: session['base'] = request.form['base'] createSession() genomePath = "genomes" if session['db'] == 'hg19': genomeFile = "hg19.2bit" else: session['db'] = 'hg38' genomeFile = "hg38.2bit" genomeFilePath = "/".join([genomePath,genomeFile]) #if app.config['GB'] == 'UCSC': # seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, # left=(int(pos)-1), # right=(int(pos)), # leftPad=500, # rightPad=500) #else: chrom = "chr%s"%(session['chromosome']) seqStart = int(session['position']) - 501 seqEnd = int(session['position']) + 500 genome = sequenceutils.loadGenome(genomeFilePath) seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) # make a copy of seq but with the base modified at specified position # python doesn't support item assignments w/in strings so build pieces var = seq[0:500] var = var + session['base'] var = var + seq[501:] expectedSites = fruitfly.getSpliceSitePredictions(seq) variantSites = fruitfly.getSpliceSitePredictions(var) expectedList = [] variantList = [] msgList = [] for ss in expectedSites: expectedList.append([ss.start, ss.end, ss.score, ss.intron, ss.exon]) for ss in variantSites: variantList.append([ss.start, ss.end, ss.score, ss.intron, ss.exon]) # List where we will store the SpliceSites to print # These are the sites that are "interesting" reportList = [] for i, variantSite in enumerate(variantSites): (posB,baseB,scoreB) = variantSite.getSpliceSite() foundMatch = False for j, expectedSite in enumerate(expectedSites): (posA,baseA,scoreA) = expectedSite.getSpliceSite() if posA==posB: foundMatch = True if scoreA==scoreB: if baseA != baseB: flash("Base changed from %s to %s at position %d " "with score %0.2f\n"%(baseA, baseB, posA, scoreA)) else: delta = abs(scoreA - scoreB) if delta >= 0.4: flash("Score changed by %0.2f from %0.2f to %0.2f " "at position %d.\n"%(delta, scoreA, scoreB, posA)) if not foundMatch: if posB >= len(seq): flash("Oops! Somehow the length of our expected and varied " "sequences is not equivalent (%d for expected, " "%d for varied).\n"%(len(seq),len(var))) break origBase = seq[posB].lower() msgList.append("New splice site predicted at position %d with score %0.2f. " "Original base was '%s' and new base is '%s'." %(posB, scoreB, origBase, baseB)) reportList.append([variantSite.start, variantSite.end, variantSite.score, variantSite.intron, variantSite.exon]) return render_template('splicesite.html', expectedList=expectedList, variantList=variantList, msgList=msgList, reportList=reportList, db=session['db'], chromosome=session['chromosome'], position=session['position'], base=session['base']) # if not post, return index.html return render_template('index.html')