Ejemplo n.º 1
0
def update_PCG_with_signalpexons(signalpexonseqs,
                                 PCG,
                                 OPTIONS,
                                 min_pacbporf_identityscore=0.20,
                                 verbose=True):
    """ """
    if not signalpexonseqs.has_key(OPTIONS.target): return False
    is_any_pacbporf_added = False
    for targetSPexon in signalpexonseqs[OPTIONS.target]:
        target = OPTIONS.target
        for informant, infSPlist in signalpexonseqs.iteritems():
            if informant == OPTIONS.target: continue
            # check if informant has been deleted in the meanwhile
            if informant not in PCG.organism_set(): continue
            # list to store signalp exons into
            signalpexon_pacbp_list = []
            # get ordered pacbporfs fromt he PCG
            thepacbporfs = order_pacbporf_list(
                PCG.get_pacbps_by_organisms(OPTIONS.target, informant))
            if not thepacbporfs:
                # no alignments present for this organism (can happen!)
                continue
            for informantSPexon in infSPlist:
                coords = [
                    targetSPexon.protein_start(),
                    targetSPexon.protein_end(),
                    informantSPexon.protein_start(),
                    informantSPexon.protein_end(),
                ]

                # prior to making ClustalW-PacbP, check PacbPCOORD placeability
                # into the list of pacbporfs
                pacbpCoordsObj = PacbPCOORDS(input=(
                    targetSPexon.proteinsequence(),
                    informantSPexon.proteinsequence(),
                    targetSPexon.protein_start(),
                    informantSPexon.protein_start(),
                ))

                if False in [
                        pacbpCoordsObj.is_positioned_compatibly(pacbporf)
                        for pacbporf in thepacbporfs
                ]:
                    # *NOT* placable in current ordered list of PacbPORFS
                    continue

                dist = pacbpCoordsObj.distance_towards(thepacbporfs[0])
                if dist > SIGNALP_FIRSTEXON_MAX_INTRON_NT_LENGTH / 3:
                    # WAY TO FAR in front of current gene structure parts.
                    # Do not allow (pooras a *NOT* placable in current ordered list of PacbPORFS
                    continue
                elif dist == 0:
                    # NOT placeable in front of the rest of the PacbPORFS.
                    continue
                else:
                    pass

                    # perform ClustalW alignment on the SP exons
                    (alignedseqs,alignment) =\
                clustalw( seqs= {
                    OPTIONS.target: targetSPexon.proteinsequence(),
                    informant: informantSPexon.proteinsequence() } )

                # make pacbp from clustalw alignment
                pacbp = pacbp_from_clustalw(
                    alignment=(alignedseqs[OPTIONS.target], alignment,
                               alignedseqs[informant]),
                    coords=coords)

                # is there any alignment constructed?
                if not pacbp: continue

                # ignore (very) poor identyscore alignments
                if pacbp.identityscore < min_pacbporf_identityscore: continue

                # if here make extended pacbpORF
                signalpexonPacbpORF = pacbp2pacbporf(pacbp, targetSPexon.orf,
                                                     informantSPexon.orf)
                signalpexonPacbpORF.extend_pacbporf_after_stops()
                # and store in signalpexon_pacbp_list
                signalpexon_pacbp_list.append(signalpexonPacbpORF)

                ################################################################
                if verbose:
                    print alignedseqs[OPTIONS.target], OPTIONS.target
                    print alignment
                    print alignedseqs[informant], informant
                    if pacbp:
                        print pacbp, (OPTIONS.target, targetSPexon.orf.id),
                        print(informant, informantSPexon.orf.id),
                        print "DISTANCE::", dist
                        pacbp.print_protein()
                        print ""
                ################################################################

            # If there are signalpexon-guided pacbporfs found, store the one
            # with the highest bitscore
            if signalpexon_pacbp_list:
                signalpexon_pacbp_list = order_list_by_attribute(
                    signalpexon_pacbp_list, order_by='bits', reversed=True)
                # store best bitscoring pacbporf to PCG
                signalp_pacbporf = signalpexon_pacbp_list[0]
                pacbporf2PCG(signalp_pacbporf,
                             OPTIONS.target,
                             informant,
                             PCG,
                             source='SignalP-ClustalW')
                is_any_pacbporf_added = True
                ####################################################################
                if verbose:
                    print "SignalP Exon added to PCG:", signalp_pacbporf, informant
                ####################################################################
            else:
                pass

    # return pointer is_any_pacbporf_added
    return is_any_pacbporf_added
Ejemplo n.º 2
0
def update_PCG_with_signalpexons(signalpexonseqs,PCG,OPTIONS,
    min_pacbporf_identityscore=0.20,verbose=True):
    """ """
    if not signalpexonseqs.has_key(OPTIONS.target): return False
    is_any_pacbporf_added = False
    for targetSPexon in signalpexonseqs[OPTIONS.target]:
        target = OPTIONS.target
        for informant,infSPlist in signalpexonseqs.iteritems():
            if informant == OPTIONS.target: continue
            # check if informant has been deleted in the meanwhile
            if informant not in PCG.organism_set(): continue
            # list to store signalp exons into
            signalpexon_pacbp_list = []
            # get ordered pacbporfs fromt he PCG
            thepacbporfs = order_pacbporf_list(PCG.get_pacbps_by_organisms(OPTIONS.target,informant))
            if not thepacbporfs:
                # no alignments present for this organism (can happen!)
                continue
            for informantSPexon in infSPlist:
                coords  = [ targetSPexon.protein_start(),
                            targetSPexon.protein_end(),
                            informantSPexon.protein_start(),
                            informantSPexon.protein_end(), ]

                # prior to making ClustalW-PacbP, check PacbPCOORD placeability
                # into the list of pacbporfs
                pacbpCoordsObj = PacbPCOORDS(input=(
                        targetSPexon.proteinsequence(),
                        informantSPexon.proteinsequence(),
                        targetSPexon.protein_start(),
                        informantSPexon.protein_start(),
                        ) )

                if False in [ pacbpCoordsObj.is_positioned_compatibly(pacbporf) for pacbporf in thepacbporfs ]:
                    # *NOT* placable in current ordered list of PacbPORFS
                    continue

                dist = pacbpCoordsObj.distance_towards(thepacbporfs[0])
                if dist > SIGNALP_FIRSTEXON_MAX_INTRON_NT_LENGTH/3:
                    # WAY TO FAR in front of current gene structure parts.
                    # Do not allow (pooras a *NOT* placable in current ordered list of PacbPORFS
                    continue
                elif dist == 0:
                    # NOT placeable in front of the rest of the PacbPORFS.
                    continue
                else:
                    pass

                # perform ClustalW alignment on the SP exons
                    (alignedseqs,alignment) =\
                clustalw( seqs= { 
                    OPTIONS.target: targetSPexon.proteinsequence(),
                    informant: informantSPexon.proteinsequence() } )

                # make pacbp from clustalw alignment
                pacbp = pacbp_from_clustalw(
                            alignment=(
                                    alignedseqs[OPTIONS.target],
                                    alignment,
                                    alignedseqs[informant]
                                    ),
                            coords=coords
                            )

                # is there any alignment constructed?
                if not pacbp: continue

                # ignore (very) poor identyscore alignments
                if pacbp.identityscore < min_pacbporf_identityscore: continue

                # if here make extended pacbpORF
                signalpexonPacbpORF = pacbp2pacbporf(pacbp,
                        targetSPexon.orf,informantSPexon.orf)
                signalpexonPacbpORF.extend_pacbporf_after_stops()
                # and store in signalpexon_pacbp_list
                signalpexon_pacbp_list.append( signalpexonPacbpORF )

                ################################################################
                if verbose:
                    print alignedseqs[OPTIONS.target], OPTIONS.target
                    print alignment
                    print alignedseqs[informant], informant
                    if pacbp:
                        print pacbp, (OPTIONS.target, targetSPexon.orf.id),
                        print (informant, informantSPexon.orf.id),
                        print "DISTANCE::", dist
                        pacbp.print_protein()
                        print ""
                ################################################################

            # If there are signalpexon-guided pacbporfs found, store the one
            # with the highest bitscore
            if signalpexon_pacbp_list:
                signalpexon_pacbp_list = order_list_by_attribute(
                        signalpexon_pacbp_list,order_by='bits',reversed=True)
                # store best bitscoring pacbporf to PCG
                signalp_pacbporf = signalpexon_pacbp_list[0]
                pacbporf2PCG(signalp_pacbporf,OPTIONS.target,informant,PCG,source='SignalP-ClustalW') 
                is_any_pacbporf_added = True
                ####################################################################
                if verbose:
                    print "SignalP Exon added to PCG:", signalp_pacbporf, informant
                ####################################################################
            else:
                pass

    # return pointer is_any_pacbporf_added
    return is_any_pacbporf_added
Ejemplo n.º 3
0
def hmmpacbporf2PCG(hmmpacbporf, target, informant, PCG, source=''):
    """ """

    hmmpacbporf.source = 'HMM'
    hmmpacbporf._gff['fsource'] = 'HMM'
    pacbporf2PCG(hmmpacbporf, target, informant, PCG, source=source)
Ejemplo n.º 4
0
def hmmpacbporf2PCG(hmmpacbporf,target,informant,PCG,source=''):
    """ """

    hmmpacbporf.source = 'HMM'
    hmmpacbporf._gff['fsource'] = 'HMM'
    pacbporf2PCG(hmmpacbporf,target,informant,PCG,source=source)