def check_MaxEntScan_5ss(pairs): scores = [] for i in pairs: score = [0, 0] [ref, alt] = i score[0] = maxent_fast.score5(ref, matrix=matrix5) score[1] = maxent_fast.score5(alt, matrix=matrix5) scores.append(score[1] - score[0]) return scores
def score_ss_seq(matrix5, matrix3, seq_ss, seq_ss_mut, ss): if 'N' in seq_ss or 'N' in seq_ss_mut or 'n' in seq_ss or 'n' in seq_ss_mut: return '.', '.', '.' if ss == 'ss5': score_wt = score5(seq_ss, matrix=matrix5) score_mut = score5(seq_ss_mut, matrix=matrix5) return str(round(score_wt, 3)), str(round(score_mut, 3)), str( round((score_mut - score_wt) / score_wt, 3)) elif ss == 'ss3': score_wt = score3(seq_ss, matrix=matrix3) score_mut = score3(seq_ss_mut, matrix=matrix3) return str(round(score_wt, 3)), str(round(score_mut, 3)), str( round((score_mut - score_wt) / score_wt, 3)) else: return '.', '.', '.'
def runMaxEntScan(sequence, donor=False, usePerl=False): """Run maxEntScan on the indicated sequence. Run score5.pl on candidate donor sequences, score3.pl on candidate acceptor sequences. Return the score""" if usePerl: (fd, tmpfile) = tempfile.mkstemp() fp = os.fdopen(fd, "w") fp.write(sequence) fp.close() if donor: pipe = subprocess.Popen([ "perl", os.path.join(os.path.dirname(__file__), 'score5.pl'), tmpfile ], stdout=subprocess.PIPE) else: pipe = subprocess.Popen([ "perl", os.path.join(os.path.dirname(__file__), 'score3.pl'), tmpfile ], stdout=subprocess.PIPE) result = pipe.stdout.read() entScore = re.findall("[+-]?\d+(?:\.\d+)?", str(result)) os.remove(tmpfile) return (float(entScore[0])) else: # use the fastest available maxentpy implementation # we round to two decimal places to match the perl script's output if donor: return round(maxent_fast.score5(str(sequence), matrix=matrix5), 2) else: return round(maxent_fast.score3(str(sequence), matrix=matrix3), 2)
def compute_mes(interval, matrix5, matrix3, genome): genome = pysam.FastaFile(genome) # 5ss 3bases in exon and 6 bases in intron # 3ss 20 bases in intron and 3 bases in exon if interval['strand'] == '+': seq5 = genome.fetch(interval.chrom, interval.end - 3, interval.end + 6).upper() seq3 = genome.fetch(interval.chrom, interval.start - 20, interval.start + 3).upper() else: seq5 = reverse_complement( genome.fetch(interval.chrom, interval.start - 6, interval.start + 3).upper()) seq3 = reverse_complement( genome.fetch(interval.chrom, interval.end - 3, interval.end + 20).upper()) name_format_str = '{seq5}:{mes5}|{seq3}:{mes3}' if set(seq5).issubset('ACGT') and set(seq3).issubset('ACGT'): mes5 = maxent_fast.score5(seq5, matrix=matrix5) mes3 = maxent_fast.score3(seq3, matrix=matrix3) interval['seq5'] = seq5 interval['mes5'] = mes5 interval['seq3'] = seq3 interval['mes3'] = mes3 else: interval['seq5'] = seq5 interval['mes5'] = 'NA' interval['seq3'] = seq3 interval['mes3'] = 'NA' return interval
def runMaxEntScan(sequence, donor=False, usePerl=False): """Run maxEntScan on the indicated sequence. Run score5.pl on candidate donor sequences, score3.pl on candidate acceptor sequences. Return the score""" if usePerl: (fd, tmpfile) = tempfile.mkstemp() fp = os.fdopen(fd, "w") fp.write(sequence) fp.close() if donor: pipe = subprocess.Popen(["perl", os.path.join(os.path.dirname(__file__), 'score5.pl'), tmpfile], stdout=subprocess.PIPE) else: pipe = subprocess.Popen(["perl", os.path.join(os.path.dirname(__file__), 'score3.pl'), tmpfile], stdout=subprocess.PIPE) result = pipe.stdout.read() entScore = re.findall("[+-]?\d+(?:\.\d+)?", str(result)) os.remove(tmpfile) return(float(entScore[0])) else: # use the fastest available maxentpy implementation # we round to two decimal places to match the perl script's output if donor: return round(maxent_fast.score5(str(sequence), matrix=matrix5), 2) else: return round(maxent_fast.score3(str(sequence), matrix=matrix3), 2)
def compute_five_score(dna): if len(dna) != 9: print("ERROR INCORRECT LENGTH: %s" % dna) return maxent_fast.score5(dna, matrix=mat5)