コード例 #1
0
def bridge_two_pacbporfs_by_tinyexon(preceding_orf,subsequent_orf,
    preceding_donor_sites=[],
    subsequent_acceptor_sites=[],
    orflist=[],
    max_tinyexon_nt_length=TINYEXON_MAX_NT_LENGTH,
    min_tinyexon_nt_length=TINYEXON_MIN_NT_LENGTH,
    max_tinyexon_intron_nt_length=TINYEXON_MAX_INTRON_NT_LENGTH,
    min_tinyexon_intron_nt_length=TINYEXON_MIN_INTRON_NT_LENGTH,
    min_donor_pssm_score=TINYEXON_MIN_DONOR_PSSM_SCORE,
    min_acceptor_pssm_score=TINYEXON_MIN_ACCEPTOR_PSSM_SCORE,
    min_total_pssm_score=TINYEXON_MIN_TOTAL_PSSM_SCORE):
    """
    Bridge two `neighbouring` Orfs by a tinyexon by applying preceding donors and subsequent acceptors

    @type  preceding_orf: Orf object
	@param preceding_orf: Orf object that contains preceding_donor_site(s)

    @type  subsequent_orf: Orf object
	@param subsequent_orf: Orf object that contains subsequent_acceptor_site(s)

    @type  preceding_donor_sites: list
	@param preceding_donor_sites: list with SpliceDonorGT and/or SpliceDonor objects

    @type  subsequent_acceptor_sites: list
	@param subsequent_acceptor_sites: list with SpliceAcceptorAG and/or SpliceAcceptor objects

    @type  orflist: list
	@param orflist: list with Orf objects

    @type  max_tinyexon_nt_length: integer
	@param max_tinyexon_nt_length: positive integer, largest length of tinyexon in nt

    @type  min_tinyexon_nt_length: integer
	@param min_tinyexon_nt_length: positive integer, smallest length of tinyexon in nt

    @type  max_tinyexon_intron_nt_length: integer
    @param max_tinyexon_intron_nt_length: positive integer, largest length of intron around tinyexon in nt

    @type  min_tinyexon_intron_nt_length: integer
    @param min_tinyexon_intron_nt_length: positive integer, smallest length of intron around tinyexon in nt

    @type  min_total_pssm_score: float or None
	@param min_total_pssm_score: minimal sum of donor - acceptor pssm score pair of tinyexon

    @type  min_donor_pssm_score: float or None
	@param min_donor_pssm_score: minimal donor pssm score of tinyexon

    @type  min_acceptor_pssm_score: float or None
	@param min_acceptor_pssm_score: minimal acceptor pssm score of tinyexon

    @rtype:  list
	@return: list of tuples ( preceding_intron, tinyexon, subsequent_intron )

    @attention: Global vars that have to be set upon usage:
        MIN_DONOR_PSSM_SCORE
        MIN_ACCEPTOR_PSSM_SCORE
        # and all TINYEXON variable named
        TINYEXON_MAX_NT_LENGTH                          
        TINYEXON_MIN_NT_LENGTH                          
        TINYEXON_MAX_INTRON_NT_LENGTH                   
        TINYEXON_MIN_INTRON_NT_LENGTH                   
        TINYEXON_MIN_PSSM_SCORE                         
        TINYEXON_MIN_DONOR_PSSM_SCORE                   
        TINYEXON_MIN_ACCEPTOR_PSSM_SCORE                
        TINYEXON_ALLOW_NON_CANONICAL_DONOR              
        TINYEXON_ALLOW_NON_CANONICAL_ACCEPTOR           
        TINYEXON_NON_CANONICAL_MIN_PSSM_SCORE           
        TINYEXON_NON_CANONICAL_MIN_DONOR_PSSM_SCORE     
        TINYEXON_NON_CANONICAL_MIN_ACCEPTOR_PSSM_SCORE  

    """
    if not preceding_donor_sites:
        return []
    if not subsequent_acceptor_sites:
        return []
    if not orflist:
        return []

    # return dictionary with exon coordinates (keys) and exons/introns
    returnexons = {}
    min_preceding_donor_sites_pos     = min([ d.pos for d in preceding_donor_sites ])
    max_subsequent_acceptor_sites_pos = max([ a.pos for a in subsequent_acceptor_sites ]) 
    for orfX in orflist:
        # check if orf is correctly positions towards the splice sites' extremes
        if orfX.endPY   <= min_preceding_donor_sites_pos: continue
        if orfX.startPY >= max_subsequent_acceptor_sites_pos: continue

        # if here, we can try to make a bridge by a tinyexon
        for donor in preceding_donor_sites:
            # orf not correctly positions towards the donor site
            if orfX.endPY <= donor.pos: continue

            # check pssm_score of donor site
            # TODO: this is in fact the donor on the normal, large orf
            # TODO: do we want to check this pssm score?
            if donor.pssm_score < min_donor_pssm_score: continue

            for acceptor in subsequent_acceptor_sites:
                if orfX.startPY >= acceptor.pos: continue

                # check pssm_score of acceptor site
                # TODO: this is in fact the acceptor on the normal, large orf
                # TODO: do we want to check this pssm score?
                if acceptor.pssm_score < min_acceptor_pssm_score: continue

                # okay, now try to bridge it!
                exons = find_tiny_exon_on_orf(orfX,order_by='total_pssm',
                        max_tinyexon_nt_length=max_tinyexon_nt_length,
                        min_tinyexon_nt_length=min_tinyexon_nt_length,
                        max_tinyexon_intron_nt_length=max_tinyexon_intron_nt_length,
                        min_tinyexon_intron_nt_length=min_tinyexon_intron_nt_length,
                        min_donor_pssm_score=min_donor_pssm_score,
                        min_acceptor_pssm_score=min_acceptor_pssm_score,
                        min_total_pssm_score=min_total_pssm_score,
                        preceding_donor=donor,
                        subsequent_acceptor=acceptor
                        )
                # and append to returnexons
                for tinyexon in exons:

                    # make preceding intron
                    shared_nts_A = "TODO"
                    preceding_intron = IntronConnectingOrfs(
                        donor,tinyexon.acceptor,
                        shared_nts_A,preceding_orf,tinyexon.orf )

                    # make subsequent intron
                    shared_nts_B = "TODO"
                    subsequent_intron = IntronConnectingOrfs(
                        tinyexon.donor, acceptor,
                        shared_nts_B,tinyexon.orf,subsequent_orf )

                    # and append to exons
                    key = ( tinyexon.acceptor.pos, tinyexon.donor.pos )
                    #returnexons.append( ( preceding_intron, tinyexon, subsequent_intron ) )
                    if key not in returnexons.keys():
                        returnexons[key] = tinyexon

    # and return the list of intron/exon/intron
    return _order_intron_list( returnexons.values() )
コード例 #2
0
def scan_orf_for_tiny_exon(orfX,order_by='total_pssm',
    max_tinyexon_nt_length=TINYEXON_MAX_NT_LENGTH,
    min_tinyexon_nt_length=TINYEXON_MIN_NT_LENGTH,
    min_donor_pssm_score=TINYEXON_MIN_DONOR_PSSM_SCORE,
    min_acceptor_pssm_score=TINYEXON_MIN_ACCEPTOR_PSSM_SCORE,
    min_total_pssm_score=TINYEXON_MIN_TOTAL_PSSM_SCORE,
    allow_non_canonical_donor=TINYEXON_ALLOW_NON_CANONICAL_DONOR,
    allow_non_canonical_acceptor=TINYEXON_ALLOW_NON_CANONICAL_ACCEPTOR,
    min_intron_nt_length=None,
    max_intron_nt_length=None,
    donor_phase=None,
    acceptor_phase=None,
    preceeding_donor_site=None,
    subsequent_acceptor_site=None,
    min_acceptor_pos=None,
    max_donor_pos=None):
    """
    Find tiny exons on an orf by length range

    @type  orfX: Orf object
	@param orfX: Orf object to scan for a tinyexon

    @type  max_tinyexon_nt_length: integer
	@param max_tinyexon_nt_length: positive integer, largest length of tinyexon in nt

    @type  min_tinyexon_nt_length: integer
	@param min_tinyexon_nt_length: positive integer, smallest length of tinyexon in nt

    @type  min_total_pssm_score: float or None
	@param min_total_pssm_score: minimal sum of donor - acceptor pssm score pair of tinyexon

    @type  min_donor_pssm_score: float or None
	@param min_donor_pssm_score: minimal donor pssm score of tinyexon

    @type  min_acceptor_pssm_score: float or None
	@param min_acceptor_pssm_score: minimal acceptor pssm score of tinyexon

    @type  max_donor_pos: integer or None
	@param max_donor_pos: maximal elegiable donor position

    @type  min_acceptor_pos: integer or None
	@param min_acceptor_pos: minimal elegiable acceptor position

    @type  order_by: TODO
	@param order_by: TODO
    """

    # scan for splice sites on this (tiny) orf
    orfX.scan_orf_for_pssm_splice_sites(
            splicetype="donor",
            min_pssm_score=min_donor_pssm_score,
            allow_non_canonical=TINYEXON_ALLOW_NON_CANONICAL_DONOR,
            non_canonical_min_pssm_score=TINYEXON_NON_CANONICAL_MIN_DONOR_PSSM_SCORE)
    orfX.scan_orf_for_pssm_splice_sites(
            splicetype="acceptor",
            min_pssm_score=min_acceptor_pssm_score,
            allow_non_canonical=TINYEXON_ALLOW_NON_CANONICAL_ACCEPTOR,
            non_canonical_min_pssm_score=TINYEXON_NON_CANONICAL_MIN_ACCEPTOR_PSSM_SCORE)

    # return list with exons
    exons = []

    # most quickest scan possible: are there donors & acceptors?
    if orfX._donor_sites == [] or orfX._acceptor_sites == []:
        # no exons possible because splice sites are missing
        return exons

    # make a list of compatible_acceptor_sites
    compatible_acceptor_sites = []
    for acceptor in orfX._acceptor_sites:
        if acceptor_phase in [0,1,2] and acceptor.phase != acceptor_phase:
            continue
        if acceptor.pssm_score < min_acceptor_pssm_score:
            continue
        if min_acceptor_pos and acceptor.pos < min_acceptor_pos:
            continue
        if preceeding_donor_site:
            if preceeding_donor_site.phase != acceptor.phase:
                continue
            if min_intron_nt_length and acceptor.pos - preceeding_donor_site.pos < min_intron_nt_length:
                continue
            if max_intron_nt_length and acceptor.pos - preceeding_donor_site.pos > max_intron_nt_length:
                continue

        # if we reach this point, compatible site!
        compatible_acceptor_sites.append( acceptor )

    # make a list of compatible_donor_sites
    compatible_donor_sites = []
    for donor in orfX._donor_sites:
        if donor_phase in [0,1,2] and donor.phase != donor_phase:
            continue
        if donor.pssm_score < min_donor_pssm_score:
            continue
        if max_donor_pos and donor.pos > max_donor_pos:
            continue
        if subsequent_acceptor_site:
            if subsequent_acceptor_site.phase != donor.phase:
                continue
            if min_intron_nt_length and subsequent_acceptor_site.pos - donor.pos < min_intron_nt_length:
                continue
            if max_intron_nt_length and subsequent_acceptor_site.pos - donor.pos > max_intron_nt_length:
                continue
        # if we reach this point, compatible site!
        compatible_donor_sites.append( donor )

    ###print "lib_tinyexon, comp d & a:", len(compatible_donor_sites), len(compatible_acceptor_sites), "orf:", orfX.id, min_donor_pssm_score, min_acceptor_pssm_score


    # and combine sites to exons!
    for acceptor in compatible_acceptor_sites:
        for donor in compatible_donor_sites:
            # length of exon
            exon_length = donor.pos - acceptor.pos
            # continue if exon to short
            if exon_length < min_tinyexon_nt_length: continue
            # continue if exon to long
            if exon_length > max_tinyexon_nt_length: continue

            # check sum of donor and acceptor pssm score
            if (min_total_pssm_score or min_total_pssm_score==0.0) and\
            donor.pssm_score + acceptor.pssm_score < min_total_pssm_score:
                continue

            # make a Exon object
            exon = ExonOnOrf(acceptor,donor,orfX)
            exons.append(exon)

    # return ordered exon list
    return _order_intron_list(exons,order_by=order_by)
コード例 #3
0
def find_tiny_exon_on_orf(orfX,order_by='total_pssm',
    max_tinyexon_nt_length=TINYEXON_MAX_NT_LENGTH,
    min_tinyexon_nt_length=TINYEXON_MIN_NT_LENGTH,
    max_tinyexon_intron_nt_length=TINYEXON_MAX_INTRON_NT_LENGTH,
    min_tinyexon_intron_nt_length=TINYEXON_MIN_INTRON_NT_LENGTH,
    min_donor_pssm_score=TINYEXON_MIN_DONOR_PSSM_SCORE,
    min_acceptor_pssm_score=TINYEXON_MIN_ACCEPTOR_PSSM_SCORE,
    min_total_pssm_score=TINYEXON_MIN_TOTAL_PSSM_SCORE,
    preceding_donor=None,
    subsequent_acceptor=None,
    preceding_donor_pos=None,
    subsequent_acceptor_pos=None):
    """
    Find a tiny exon on an orf by a leading donor and a trailing acceptor site.

    @type  orfX: Orf object
	@param orfX: Orf object to scan for a tinyexon

    @type  preceding_donor: object
	@param preceding_donor: SpliceDonorGT or SpliceDonor object

    @type  subsequent_acceptor: object
	@param subsequent_acceptor: SpliceAcceptorAG or SpliceAcceptor object

    @type  max_tinyexon_nt_length: integer
	@param max_tinyexon_nt_length: positive integer, largest length of tinyexon in nt

    @type  min_tinyexon_nt_length: integer
	@param min_tinyexon_nt_length: positive integer, smallest length of tinyexon in nt

    @type  max_tinyexon_intron_nt_length: integer
    @param max_tinyexon_intron_nt_length: positive integer, largest length of intron around tinyexon in nt

    @type  min_tinyexon_intron_nt_length: integer
    @param min_tinyexon_intron_nt_length: positive integer, smallest length of intron around tinyexon in nt

    @type  min_total_pssm_score: float or None
	@param min_total_pssm_score: minimal sum of donor - acceptor pssm score pair of tinyexon

    @type  min_donor_pssm_score: float or None
	@param min_donor_pssm_score: minimal donor pssm score of tinyexon

    @type  min_acceptor_pssm_score: float or None
	@param min_acceptor_pssm_score: minimal acceptor pssm score of tinyexon

    @type  order_by: TODO
	@param order_by: TODO

    @attention: Global vars that have to be set upon usage:
        MIN_DONOR_PSSM_SCORE
        MIN_ACCEPTOR_PSSM_SCORE
        # and all TINYEXON variable named
        TINYEXON_MAX_NT_LENGTH                          
        TINYEXON_MIN_NT_LENGTH                          
        TINYEXON_MAX_INTRON_NT_LENGTH                   
        TINYEXON_MIN_INTRON_NT_LENGTH                   
        TINYEXON_MIN_PSSM_SCORE                         
        TINYEXON_MIN_DONOR_PSSM_SCORE                   
        TINYEXON_MIN_ACCEPTOR_PSSM_SCORE                
        TINYEXON_ALLOW_NON_CANONICAL_DONOR              
        TINYEXON_ALLOW_NON_CANONICAL_ACCEPTOR           
        TINYEXON_NON_CANONICAL_MIN_PSSM_SCORE           
        TINYEXON_NON_CANONICAL_MIN_DONOR_PSSM_SCORE     
        TINYEXON_NON_CANONICAL_MIN_ACCEPTOR_PSSM_SCORE  

    """

    # scan for splice sites on this (tiny) orf
    orfX.scan_orf_for_pssm_splice_sites(
            splicetype="donor",
            min_pssm_score=TINYEXON_MIN_DONOR_PSSM_SCORE,
            allow_non_canonical=TINYEXON_ALLOW_NON_CANONICAL_DONOR,
            non_canonical_min_pssm_score=TINYEXON_NON_CANONICAL_MIN_DONOR_PSSM_SCORE)
    orfX.scan_orf_for_pssm_splice_sites(
            splicetype="acceptor",
            min_pssm_score=TINYEXON_MIN_ACCEPTOR_PSSM_SCORE,
            allow_non_canonical=TINYEXON_ALLOW_NON_CANONICAL_ACCEPTOR,
            non_canonical_min_pssm_score=TINYEXON_NON_CANONICAL_MIN_ACCEPTOR_PSSM_SCORE)

    # return list with exons
    exons = []


    # do some input data processing on preceding_donor
    if preceding_donor == None:
        # preceding donor MUST be set!
        message = "preceding_donor is not a `SpliceDonorGT` or `SpliceDonor` object"
        raise InproperlyAppliedArgument, message
    elif preceding_donor.__class__.__name__ in ['SpliceDonorGT','SpliceDonor']:
        pass
    else:
        message = "preceding_donor is not a `SpliceDonorGT` or `SpliceDonor` object, but a `%s`" % preceding_donor.__class__.__name__
        raise InproperlyAppliedArgument, message

    # do some input data processing on subsequent_acceptor
    if subsequent_acceptor == None:
        # subsequent acceptor MUST be set
        message = "subsequent_acceptor is not a `SpliceAcceptorAG` or `SpliceAcceptor` object"
        raise InproperlyAppliedArgument, message
    elif subsequent_acceptor.__class__.__name__ in ['SpliceAcceptorAG','SpliceAcceptor']:
        pass
    else:
        message = "subsequent_acceptor is not a `SpliceAcceptorAG` or `SpliceAcceptor` object, but a `%s`" % subsequent_acceptor.__class__.__name__
        raise InproperlyAppliedArgument, message

    # check phases of acceptor and donor
    if subsequent_acceptor.phase not in [0,1,2]:
        raise UnexpectedSpliceSitePhase
    if preceding_donor.phase not in [0,1,2]:
        raise UnexpectedSpliceSitePhase

    # some further integrity check on integer arguments
    for variable in ( max_tinyexon_nt_length, min_tinyexon_nt_length,
    max_tinyexon_intron_nt_length, min_tinyexon_intron_nt_length):
        try:
            variable = int(variable)
            if variable <= 0:
                raise 
        except:
            message = "a variable is NOT a positive integer as expected"
            raise InproperlyAppliedArgument, message


    # most quickest scan possible: are there donors & acceptors?
    if orfX._donor_sites == [] or orfX._acceptor_sites == []:
        # no exons possible because splice sites are missing
        return exons

    # make a list of compatible_acceptor_sites
    compatible_acceptor_sites = []
    for acceptor in orfX._acceptor_sites:
        # TODO: check! do we need a combi of donor and acceptor or acceptor and acceptor?
        if acceptor.phase != preceding_donor.phase:
            continue
        if acceptor.pssm_score < min_acceptor_pssm_score:
            continue
        if acceptor.pos - preceding_donor.pos < min_tinyexon_intron_nt_length:
            # intron to short
            continue
        if acceptor.pos - preceding_donor.pos > max_tinyexon_intron_nt_length:
            # intron to long
            continue
        # if we reach this point, compatible site!
        compatible_acceptor_sites.append( acceptor )

    # make a list of compatible_donor_sites
    compatible_donor_sites = []
    for donor in orfX._donor_sites:
        # TODO: check! do we need a combi of donor and acceptor or donor and donor?
        if donor.phase != subsequent_acceptor.phase:
            continue
        if donor.pssm_score < min_donor_pssm_score:
            continue
        if subsequent_acceptor.pos - donor.pos > max_tinyexon_intron_nt_length:
            # intron to long
            continue
        if subsequent_acceptor.pos - donor.pos < min_tinyexon_intron_nt_length:
            # intron to short
            continue
        # if we reach this point, compatible site!
        compatible_donor_sites.append( donor )

    # and combine sites to exons!
    for acceptor in compatible_acceptor_sites:
        for donor in compatible_donor_sites:
            # length of exon
            exon_length = donor.pos - acceptor.pos
            # continue if exon to short
            if exon_length < min_tinyexon_nt_length: continue
            # continue if exon to long
            if exon_length > max_tinyexon_nt_length: continue

            # check sum of donor and acceptor pssm score
            if (min_total_pssm_score or min_total_pssm_score==0.0) and\
            donor.pssm_score + acceptor.pssm_score < min_total_pssm_score:
                continue

            # make a Exon object
            exon = ExonOnOrf(acceptor,donor,orfX)
            exons.append(exon)

    # return ordered exon list
    return _order_intron_list(exons,order_by=order_by)