def _score_splice_site(seq, splicetype="donor"): """ Return PSSM splice site score @type seq: string @param seq: DNA sequence of EXACT length of the PSSM @type splicetype: string @param splicetype: 'donor' or 'acceptor' @rtype: float @return: PSSM score for this splice site """ if splicetype == 'acceptor': return pssmscore(seq, IC_ACCEPTOR) else: return pssmscore(seq, IC_DONOR)
def scan_pssm_splice_site(seq, splicetype="donor", override_pattern_offset=(), min_pssm_score=None, allow_non_canonical=False, non_canonical_min_pssm_score=0.0): """ Find splice sites by a PSSM on input sequence @type seq: string @param seq: DNA sequence of EXACT length of the PSSM @type splicetype: string @param splicetype: 'donor' or 'acceptor' @type min_pssm_score: float @param min_pssm_score: @type allow_non_canonical: boolean @param allow_non_canonical: True of False @type non_canonical_min_pssm_score: float @param non_canonical_min_pssm_score: @type override_pattern_offset: tuple @param override_pattern_offset: tuple with 2 integers; use cautiously!! @rtype: list @return: list with SpliceDonors or SpliceAcceptors """ if splicetype == 'acceptor': PSSM_MATRIX = IC_ACCEPTOR pattern_offset = IC_ACCEPTOR_PATTERN_OFFSET canonical = "AG" else: PSSM_MATRIX = IC_DONOR pattern_offset = IC_DONOR_PATTERN_OFFSET canonical = "GT" if allow_non_canonical: # obtain PSSM_IC for non-canonical (GC) donors IC_NCGC_DONOR = parse_ic_file(IC_DONOR_NCGC_DATA_FILE) # hmm... somebody knows what he or she is doing ;-) if override_pattern_offset: pattern_offset = override_pattern_offset pssmlength = len(PSSM_MATRIX) sites = [] for offset in range(0, len(seq) - pssmlength + 1): # get sequence slice of pattern and actual splice site seqpart = seq[offset:offset + pssmlength].upper() splicesite = seqpart[pattern_offset[0]:-pattern_offset[1]] # continue if non-canonical sites if not requested for if not allow_non_canonical and splicesite != canonical: continue elif splicesite == canonical: # score this splicesite score = _score_splice_site(seqpart, splicetype=splicetype) # check if site must be stored if min_pssm_score or min_pssm_score == 0.0: if score < min_pssm_score: continue elif splicesite != canonical and splicetype == 'donor': # score non-canonical donor site score = pssmscore(seqpart, IC_NCGC_DONOR) # check if site must be stored if non_canonical_min_pssm_score or non_canonical_min_pssm_score == 0.0: if score < non_canonical_min_pssm_score: continue else: continue if splicetype == 'acceptor': a = SpliceAcceptor(offset, seqpart, acceptor=splicesite, pssm_score=score) sites.append(a) else: d = SpliceDonor(offset, seqpart, donor=splicesite, pssm_score=score) sites.append(d) # return sites for Donor if splicetype == 'donor': sites.reverse() # and return return sites