コード例 #1
0
def _create_failed_intron_gff(introndata, assessed_interfaces, intron2label):
    """ """
    failed_introns = {}
    for kk in introndata.keys():
        start, end = kk
        introns = introndata[kk]
        label = intron2label[kk]
        value = assessed_interfaces[label]
        if start in [s for s, e in failed_introns.keys()]:
            if end < min([e for s, e in failed_introns.keys()]):
                # replace this key
                for s, e in failed_introns.keys():
                    if start == s and e == min([_e for _s, _e in failed_introns.keys()]):
                        del (failed_introns[(s, e)])
                        failed_introns[kk] = value
                        break
        elif end in [e for s, e in failed_introns.keys()]:
            if start > max([s for s, e in failed_introns.keys()]):
                # replace this key
                for s, e in failed_introns.keys():
                    if end == e and s == max([_s for _s, _e in failed_introns.keys()]):
                        del (failed_introns[(s, e)])
                        failed_introns[kk] = value
                        break
        else:
            # new key -> store
            failed_introns[kk] = value

    # create `failed` intron gff lines
    gfflines = []
    for (start, end) in failed_introns.keys():
        assesed_org_list = failed_introns[(start, end)]
        for orgid in assesed_org_list:
            if orgid in [intron._reference for intron in introndata[(start, end)]]:
                continue
            # if here, a failed organism
            gclass = "failedintron"  # "%s_%s" % (start, end)
            gname = "%s_%s_%s" % (orgid, start, end)

            orgSfullname = _get_organism_full_name(orgid)
            # if ABGP_ORGANISM2FULLSPECIESNAME_MAPPING.has_key(orgid):
            #    orgSfullname = ABGP_ORGANISM2FULLSPECIESNAME_MAPPING[orgid]
            # elif ABGP_ORGANISM2FULLSPECIESNAME_MAPPING.has_key(orgid[0:-1]):
            #    orgSfullname = ABGP_ORGANISM2FULLSPECIESNAME_MAPPING[orgid[0:-1]]
            # else:
            #    orgSfullname = orgid
            # create gclass/gname and GFF decoration string
            lastcol = """%s %s; Reference %s; Note '%s'""" % (gclass, gname, orgid, orgSfullname)
            # get fmethod/class from 1 of the intron(s)
            example = introndata[(start, end)][0]
            fmethod = example.__class__.__name__
            gffline = (None, "ABGPfailed", fmethod, start + 1, end, ".", "+", ".", lastcol)
            gfflines.append(gffline)

    # return gfflines list
    return gfflines
コード例 #2
0
ファイル: lib_signalp.py プロジェクト: dongqing7/ABFGP
def create_projected_signalp_for_informants(organism,
                                            PCG,
                                            input,
                                            minimal_aa_overlap=None,
                                            verbose=False):
    """ """
    # return list with ProjectedSignalPeptides
    return_projsignalpeptides = []

    for orgS in PCG.organism_set():
        if organism == orgS: continue
        if verbose: print orgS, organism
        # dict in order to create unique projections
        projections = {}
        for pacbporf in PCG.get_pacbps_by_organisms(organism, orgS):

            if not (pacbporf.orfS._has_signalp_sites_predicted and\
            pacbporf.orfS._signalp_sites):
                # no SignalPeptides on this Orf of the informant
                continue

            if minimal_aa_overlap and not pacbporf.orfQ._signalp_sites:
                # overlap required AND no SignalPeptides on the target Orf
                continue
            else:
                # calculate DNA range of target gene's SignalPeptides
                qdnarange = []
                for spQ in pacbporf.orfQ._signalp_sites:
                    qdnarange.extend(range(spQ.start, spQ.end))

            # get full name tag of informant
            orgSfullname = "%s [%s]" % (_get_organism_full_name(
                orgS, truncate=True), input[orgS]['proteinfref'])

            # loop over the informant's SignalPeptides
            for spS in pacbporf.orfS._signalp_sites:
                prjQSPstart = pacbporf.dnapos_sbjct2query(spS.start)
                prjQSPend = pacbporf.dnapos_sbjct2query(spS.end)
                prjQTSSstart = pacbporf.dnapos_sbjct2query(spS.tss.start)
                prjQTSSend = pacbporf.dnapos_sbjct2query(spS.tss.end)
                coords = [prjQSPstart, prjQSPend, prjQTSSstart, prjQTSSend]
                if CoordinateOutOfRange in coords:
                    ############################################################
                    if verbose: print "OUT-OF-RANGE:", spS
                    ############################################################
                    # projection on query NOT possible (out of range of Orf)
                    continue

                ################################################################
                if verbose:
                    print orgS, pacbporf, len(pacbporf.orfQ._signalp_sites),
                    print len(pacbporf.orfS._signalp_sites)
                ################################################################

                # calculate AA overlap with target organism SignalPeptides
                sdnarange = Set(range(prjQSPstart, prjQSPend))
                dna_overlap = sdnarange.intersection(qdnarange)
                aa_overlap = len(dna_overlap) / 3

                if minimal_aa_overlap and aa_overlap < minimal_aa_overlap:
                    # minimal overlap required and not achieved
                    continue

                # create a ProjectedSignalPSignalPeptide
                prjTSS = TranslationalStartSite(prjQTSSstart,
                                                "n" * 19,
                                                pssm_score=spS.tss.pssm_score)
                prjTSS._gff['fmethod'] = 'projectedTSSpssm'
                prjTSS._gff['column9data'] = {'Informant': orgSfullname}
                prjSignalP_gff = {
                    'fmethod': 'projectedSignalPeptide',
                    'column9data': {
                        'Informant': orgSfullname
                    },
                    'gname': "%s_%s_%s" % (orgS, prjQSPstart, prjQSPend)
                }
                prjSignalP = ProjectedSignalPSignalPeptide(prjQSPstart,
                                                           prjQSPend,
                                                           spS.pssm_score,
                                                           tss=prjTSS,
                                                           gff=prjSignalP_gff)
                prjSignalP._gff['fmethod'] = 'projectedSignalPeptide'
                prjSignalP._gff['column9data'] = {'Informant': orgSfullname}

                # store to prjections dict
                projections[(prjSignalP.start, prjSignalP.end)] = prjSignalP

                ################################################################
                if verbose: print prjSignalP, aa_overlap
                ################################################################

        # store unique projections to return_projsignalpeptides
        for sgp in projections.values():
            return_projsignalpeptides.append(sgp)

    # return list of ProjectedSignalPeptides
    return return_projsignalpeptides
コード例 #3
0
ファイル: lib_introns_pairwise.py プロジェクト: IanReid/ABFGP
def _create_failed_intron_gff(introndata, assessed_interfaces, intron2label):
    """ """
    failed_introns = {}
    for kk in introndata.keys():
        start, end = kk
        introns = introndata[kk]
        label = intron2label[kk]
        value = assessed_interfaces[label]
        if start in [s for s, e in failed_introns.keys()]:
            if end < min([e for s, e in failed_introns.keys()]):
                # replace this key
                for s, e in failed_introns.keys():
                    if start == s and e == min(
                        [_e for _s, _e in failed_introns.keys()]):
                        del (failed_introns[(s, e)])
                        failed_introns[kk] = value
                        break
        elif end in [e for s, e in failed_introns.keys()]:
            if start > max([s for s, e in failed_introns.keys()]):
                # replace this key
                for s, e in failed_introns.keys():
                    if end == e and s == max(
                        [_s for _s, _e in failed_introns.keys()]):
                        del (failed_introns[(s, e)])
                        failed_introns[kk] = value
                        break
        else:
            # new key -> store
            failed_introns[kk] = value

    # create `failed` intron gff lines
    gfflines = []
    for (start, end) in failed_introns.keys():
        assesed_org_list = failed_introns[(start, end)]
        for orgid in assesed_org_list:
            if orgid in [
                    intron._reference for intron in introndata[(start, end)]
            ]:
                continue
            # if here, a failed organism
            gclass = "failedintron"  # "%s_%s" % (start, end)
            gname = "%s_%s_%s" % (orgid, start, end)

            orgSfullname = _get_organism_full_name(orgid)
            #if ABGP_ORGANISM2FULLSPECIESNAME_MAPPING.has_key(orgid):
            #    orgSfullname = ABGP_ORGANISM2FULLSPECIESNAME_MAPPING[orgid]
            #elif ABGP_ORGANISM2FULLSPECIESNAME_MAPPING.has_key(orgid[0:-1]):
            #    orgSfullname = ABGP_ORGANISM2FULLSPECIESNAME_MAPPING[orgid[0:-1]]
            #else:
            #    orgSfullname = orgid
            # create gclass/gname and GFF decoration string
            lastcol = """%s %s; Reference %s; Note '%s'""" % (
                gclass, gname, orgid, orgSfullname)
            # get fmethod/class from 1 of the intron(s)
            example = introndata[(start, end)][0]
            fmethod = example.__class__.__name__
            gffline = (None, 'ABGPfailed', fmethod, start + 1, end, ".", "+",
                       ".", lastcol)
            gfflines.append(gffline)

    # return gfflines list
    return gfflines
コード例 #4
0
ファイル: lib_signalp.py プロジェクト: IanReid/ABFGP
def create_projected_signalp_for_informants(organism,PCG,input,
    minimal_aa_overlap=None,verbose=False):
    """ """
    # return list with ProjectedSignalPeptides
    return_projsignalpeptides = []

    for orgS in PCG.organism_set():
        if organism == orgS: continue
        if verbose: print orgS, organism
        # dict in order to create unique projections
        projections = {}
        for pacbporf in PCG.get_pacbps_by_organisms(organism,orgS):

            if not (pacbporf.orfS._has_signalp_sites_predicted and\
            pacbporf.orfS._signalp_sites):
                # no SignalPeptides on this Orf of the informant
                continue

            if minimal_aa_overlap and not pacbporf.orfQ._signalp_sites:
                # overlap required AND no SignalPeptides on the target Orf
                continue
            else:
                # calculate DNA range of target gene's SignalPeptides
                qdnarange = []
                for spQ in pacbporf.orfQ._signalp_sites:
                    qdnarange.extend( range(spQ.start,spQ.end) )


            # get full name tag of informant
            orgSfullname =  "%s [%s]" % (
                    _get_organism_full_name(orgS,truncate=True),
                    input[orgS]['proteinfref'] )

            # loop over the informant's SignalPeptides
            for spS in pacbporf.orfS._signalp_sites:
                prjQSPstart = pacbporf.dnapos_sbjct2query(spS.start)
                prjQSPend   = pacbporf.dnapos_sbjct2query(spS.end)
                prjQTSSstart= pacbporf.dnapos_sbjct2query(spS.tss.start)
                prjQTSSend = pacbporf.dnapos_sbjct2query(spS.tss.end)
                coords = [ prjQSPstart, prjQSPend, prjQTSSstart,prjQTSSend ]
                if CoordinateOutOfRange in coords:
                    ############################################################
                    if verbose: print "OUT-OF-RANGE:", spS
                    ############################################################
                    # projection on query NOT possible (out of range of Orf)                    
                    continue

                ################################################################
                if verbose:
                    print orgS, pacbporf, len(pacbporf.orfQ._signalp_sites), 
                    print len(pacbporf.orfS._signalp_sites)
                ################################################################

                # calculate AA overlap with target organism SignalPeptides    
                sdnarange = Set(range(prjQSPstart,prjQSPend))
                dna_overlap = sdnarange.intersection(qdnarange)
                aa_overlap  = len(dna_overlap)/3

                if minimal_aa_overlap and aa_overlap < minimal_aa_overlap:
                    # minimal overlap required and not achieved
                    continue
    
                # create a ProjectedSignalPSignalPeptide
                prjTSS = TranslationalStartSite(
                    prjQTSSstart,"n"*19,
                    pssm_score=spS.tss.pssm_score )
                prjTSS._gff['fmethod'] = 'projectedTSSpssm'
                prjTSS._gff['column9data'] = {'Informant': orgSfullname}
                prjSignalP_gff = {
                    'fmethod'    : 'projectedSignalPeptide',
                    'column9data': {'Informant': orgSfullname},
                    'gname'      : "%s_%s_%s" % (orgS,prjQSPstart,prjQSPend) }
                prjSignalP = ProjectedSignalPSignalPeptide(
                    prjQSPstart,prjQSPend,
                    spS.pssm_score,tss=prjTSS,
                    gff=prjSignalP_gff )
                prjSignalP._gff['fmethod'] = 'projectedSignalPeptide'
                prjSignalP._gff['column9data'] = {'Informant': orgSfullname}

                # store to prjections dict
                projections[(prjSignalP.start,prjSignalP.end)] = prjSignalP

                ################################################################
                if verbose: print prjSignalP, aa_overlap
                ################################################################

        # store unique projections to return_projsignalpeptides
        for sgp in projections.values():
            return_projsignalpeptides.append(sgp)

    # return list of ProjectedSignalPeptides
    return return_projsignalpeptides