Python CDSHelper.getGeneId Examples

Programming Language: Python

Namespace/Package Name: data_helpers

Class/Type: CDSHelper

Method/Function: getGeneId

Examples at hotexamples.com: 3

Python CDSHelper.getGeneId - 3 examples found. These are the top rated real world Python examples of data_helpers.CDSHelper.getGeneId extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CDSHelper(14)

sequence(7)

shuffledSeqIds(7)

length(6)

seqId(4)

CDSlength(4)

getGeneId(3)

getCalculationResult2(2)

getShuffledSeq2(2)

getShuffledSeq(2)

dropShuffledSeqs(2)

flankingRegion3UtrLength(1)

getProtId(1)

commitChanges(1)

dropRecord(1)

getShuffledSeqId(1)

getTaxId(1)

dropNativeSeq(1)

nextCDSOnOppositeStrand(1)

saveCalculationResult2(1)

crc(1)

_fetchSequence(1)

checkCalculationResultWithWindows(1)

Example #1

Show file

File: convert_webgester_list.py Project: michaelpeeri/rnafold-rts-public

def getIdentifiersConversionTableUsingGff3():
    global altIdentifiers

    if altIdentifiers:
        return altIdentifiers

    gm = getGenomeModelFromCache(taxId)

    for protId in SpeciesCDSSource(taxId):
        cds = CDSHelper(taxId, protId)
        geneId = cds.getGeneId()
        alts = gm.findEquivalentIdentifiers(geneId)
        for i in alts:
            altIdentifiers[i] = protId
        altIdentifiers[geneId] = protId

Example #2

Show file

def testSpecies(taxId):
    paData = getSpeciesPaxdbData( taxId )

    countFound = 0
    countNotFound = 0
    
    for protId in SpeciesCDSSource(taxId):
        cds = CDSHelper( taxId=taxId, protId=protId )
        geneId = cds.getGeneId()
        
        if geneId in paData:
            countFound += 1
        else:
            countNotFound += 1

    print("Species: {} -> Found: {} ({:.3}%) Not found: {}".format(taxId, countFound, countFound/(countFound+countNotFound)*100, countNotFound))
    return( countFound, countNotFound)

Example #3

Show file

File: export_intergenic_distances.py Project: michaelpeeri/rnafold-public

def processGenome(args, taxId):

    alreadyProcessedGenes = {}
    totalProteinsProcessed = 0
    totalSkipped = 0

    seqsForWriting=[]
    recordsForWriting={}
    
    gm = getGenomeModelFromCache( taxId )

    for protId in SpeciesCDSSource(taxId):
        cds = CDSHelper( taxId, protId )
        totalProteinsProcessed += 1

        #feature = gm.findFeatureById( protId )
        geneId = cds.getGeneId()

        #flanking3UTRRegionLengthNt = cds.flankingRegion3UtrLength()

        feature = gm.findFeatureById( protId )
        #feature = cds.getMatchingFeatureFromGenomeModel()
        #print(feature)
        strand = feature[1].data['strand']

        if strand=='+':
            otherFeature = gm.moleculeModels[ feature[0] ].find5PrimeFlankingRegion( feature[1] )

            if otherFeature is None:
                totalSkipped += 1
                continue

            assert( otherFeature['downstream-feature'].begin <= otherFeature['downstream-feature'].end)
            flanking3UTRRegionLengthNt = otherFeature['curr-feature'].begin       -  otherFeature['downstream-feature'].end

            threePrimeUTRCoords = (feature[1].begin-20, feature[1].begin+2, False) # include the first 3 nucleotides of the CDS

        else:
            otherFeature = gm.moleculeModels[ feature[0] ].find5PrimeFlankingRegion( feature[1] )

            if otherFeature is None:
                totalSkipped += 1
                continue

            assert( otherFeature['downstream-feature'].begin <= otherFeature['downstream-feature'].end)
            flanking3UTRRegionLengthNt = otherFeature['downstream-feature'].begin - otherFeature['curr-feature'].end

            threePrimeUTRCoords = (feature[1].end-3, feature[1].end+20, True) # include the first 3 nucleotides of the CDS

        threePrimeUTR = gm.moleculeModels[ feature[0] ].getSequence( *threePrimeUTRCoords )

        if flanking3UTRRegionLengthNt < -50:
            print("Warning: found gene with apparent long overlap: {},{},{},{},{}".format( protId, geneId, strand, flanking3UTRRegionLengthNt, threePrimeUTR.seq ))
            #totalSkipped += 1
            #continue

        if threePrimeUTR.seq[-2:] != 'TG':
            print("Warning: skipping gene with start codon at the correct place: {},{},{},{},{}".format( protId, geneId, strand, flanking3UTRRegionLengthNt, threePrimeUTR.seq ))
            totalSkipped += 1
            continue

        # All done - emit the output
        #fout.write("{},{},{},{},{}".format( protId, geneId, strand, flanking3UTRRegionLengthNt, threePrimeUTR.seq ))
        recordsForWriting[protId] = (geneId, strand, flanking3UTRRegionLengthNt, threePrimeUTR.seq )

        seqsForWriting.append( SeqRecord( Seq(threePrimeUTR.seq[:-3], NucleotideAlphabet), id=protId) )

    aSD = calculateaSDEnergies( seqsForWriting, args, taxId )
    print(len(aSD))

    with open( outputData.format(taxId), 'wt') as fout:
        for protId, record in recordsForWriting.items():
            aSDval = aSD.get(protId, None)
            vals = (protId,) + record + (aSDval,)
            fout.write("{},{},{},{},{},{}\n".format( *vals ))
    

    print("Processed {} coding sequences for taxid {}".format( totalProteinsProcessed, taxId ))
    print("Skipped {} coding sequences".format( totalSkipped ))