Python Gene.orientation Examples

Programming Language: Python

Namespace/Package Name: mutalyzer.GenRecord

Class/Type: Gene

Method/Function: orientation

Examples at hotexamples.com: 5

Python Gene.orientation - 5 examples found. These are the top rated real world Python examples of mutalyzer.GenRecord.Gene.orientation extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Gene(4)

newLocusTag(3)

orientation(3)

location(1)

Example #1

Show file

def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for db_transcript in db_transcripts:
        transcript = {
            'gene':
            db_transcript.gene,
            'strand':
            db_transcript.strand,
            'transcript_start':
            db_transcript.transcript_start,
            'transcript_stop':
            db_transcript.transcript_stop,
            'transcript_product':
            db_transcript.transcript_product,
            'exons': [],
            'exons_start':
            db_transcript.exons_start,
            'exons_stop':
            db_transcript.exons_stop,
            'transcriptID':
            db_transcript.transcript_accession + '.' +
            db_transcript.transcript_version,
        }
        if db_transcript.protein_accession is not None \
                and db_transcript.protein_version is not None:
            transcript['cds_start'] = db_transcript.cds_start
            transcript['cds_stop'] = db_transcript.cds_stop
            transcript['protein_product'] = db_transcript.protein_product
            transcript['proteinID'] = '%s.%s' %\
                                            (db_transcript.protein_accession,
                                             db_transcript.protein_version)
            transcript['linkMethod'] = 'ncbi'
        starts = map(int, db_transcript.exons_start.split(',')) \
            if db_transcript.exons_start else None
        stops = map(int, db_transcript.exons_stop.split(',')) \
            if db_transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start, 'stop': stop}
                transcript['exons'].append(exon)
        transcripts.append(transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for db_transcript in transcripts:
        if db_transcript['gene'] in gene_dict:
            gene = gene_dict[db_transcript['gene']]
        else:
            gene = Gene(db_transcript['gene'])

        if db_transcript['strand'] == '+':
            gene.orientation = 1
        if db_transcript['strand'] == '-':
            gene.orientation = -1

        transcript = Locus(gene.newLocusTag())

        transcript.mRNA = PList()
        transcript.mRNA.location = [
            db_transcript['transcript_start'], db_transcript['transcript_stop']
        ]

        transcript.transcriptID = db_transcript['transcriptID']
        transcript.exon = PList()
        if db_transcript.get('exons') \
                and isinstance(db_transcript.get('exons'), list):
            exon_list = []
            for exon in db_transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            transcript.exon.positionList = exon_list
        else:
            transcript.exon.positionList = transcript.mRNA.location

        transcript.mRNA.positionList = transcript.exon.positionList
        transcript.mRNA.positionList.sort()

        if db_transcript.get('proteinID'):
            transcript.CDS = PList()
            transcript.CDS.location = [
                db_transcript['cds_start'], db_transcript['cds_stop']
            ]

            transcript.CDS.positionList = cds_position_list(
                transcript.mRNA.positionList, transcript.CDS.location)

            transcript.proteinID = db_transcript['proteinID']

            transcript.transcriptProduct = db_transcript['transcript_product']
            transcript.proteinProduct = db_transcript['protein_product']
            transcript.linkMethod = 'ncbi'
            transcript.transcribe = True
            transcript.translate = True
        else:
            transcript.linkMethod = None
            transcript.transcribe = True
            transcript.translate = False
            transcript.locusTag = ''

        # transcript.molType = db_transcript['molType']

        gene.transcriptList.append(transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record

Example #2

Show file

File: nc_db.py Project: mutalyzer/mutalyzer

def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for db_transcript in db_transcripts:
        transcript = {
            'gene': db_transcript.gene,
            'strand': db_transcript.strand,
            'transcript_start': db_transcript.transcript_start,
            'transcript_stop': db_transcript.transcript_stop,
            'transcript_product': db_transcript.transcript_product,
            'exons': [],
            'exons_start': db_transcript.exons_start,
            'exons_stop': db_transcript.exons_stop,
            'transcriptID': db_transcript.transcript_accession + '.' +
                            db_transcript.transcript_version,
        }
        if db_transcript.protein_accession is not None \
                and db_transcript.protein_version is not None:
            transcript['cds_start'] = db_transcript.cds_start
            transcript['cds_stop'] = db_transcript.cds_stop
            transcript['protein_product'] = db_transcript.protein_product
            transcript['proteinID'] = '%s.%s' %\
                                            (db_transcript.protein_accession,
                                             db_transcript.protein_version)
            transcript['linkMethod'] = 'ncbi'
        starts = map(int, db_transcript.exons_start.split(',')) \
            if db_transcript.exons_start else None
        stops = map(int, db_transcript.exons_stop.split(',')) \
            if db_transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start,
                        'stop': stop}
                transcript['exons'].append(exon)
        transcripts.append(transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for db_transcript in transcripts:
        if db_transcript['gene'] in gene_dict:
            gene = gene_dict[db_transcript['gene']]
        else:
            gene = Gene(db_transcript['gene'])

        if db_transcript['strand'] == '+':
            gene.orientation = 1
        if db_transcript['strand'] == '-':
            gene.orientation = -1

        transcript = Locus(gene.newLocusTag())

        transcript.mRNA = PList()
        transcript.mRNA.location = [db_transcript['transcript_start'],
                                       db_transcript['transcript_stop']]

        transcript.transcriptID = db_transcript['transcriptID']
        transcript.exon = PList()
        if db_transcript.get('exons') \
                and isinstance(db_transcript.get('exons'), list):
            exon_list = []
            for exon in db_transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            transcript.exon.positionList = exon_list
        else:
            transcript.exon.positionList = transcript.mRNA.location

        transcript.mRNA.positionList = transcript.exon.positionList
        transcript.mRNA.positionList.sort()

        if db_transcript.get('proteinID'):
            transcript.CDS = PList()
            transcript.CDS.location = [db_transcript['cds_start'],
                                          db_transcript['cds_stop']]

            transcript.CDS.positionList = cds_position_list(
                transcript.mRNA.positionList,
                transcript.CDS.location)

            transcript.proteinID = db_transcript['proteinID']

            transcript.transcriptProduct = db_transcript['transcript_product']
            transcript.proteinProduct = db_transcript['protein_product']
            transcript.linkMethod = 'ncbi'
            transcript.transcribe = True
            transcript.translate = True
        else:
            transcript.linkMethod = None
            transcript.transcribe = True
            transcript.translate = False
            transcript.locusTag = ''

        # transcript.molType = db_transcript['molType']

        gene.transcriptList.append(transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record

Example #3

Show file

File: nc_db.py Project: raux/mutalyzer

def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for transcript in db_transcripts:
        my_transcript = {
            'gene': transcript.gene,
            'strand': transcript.strand,
            'transcript_start': transcript.transcript_start,
            'transcript_stop': transcript.transcript_stop,
            'cds_start': transcript.cds_start,
            'cds_stop': transcript.cds_stop,
            'exons': [],
            'exons_start': transcript.exons_start,
            'exons_stop': transcript.exons_stop,
            'transcriptID': transcript.transcript_accession + '.' +
            transcript.transcript_version,
            'proteinID':
            transcript.protein_accession + '.' + transcript.protein_version,
            'linkMethod': 'ncbi'
        }
        # if transcript.exons_start:
        #     starts = transcript.exons_start.split(',')
        # if transcripts.exons_stop:
        #     stops = transcript.exons_stopts.split(',')
        starts = map(int, transcript.exons_start.split(
            ',')) if transcript.exons_start else None
        stops = map(int, transcript.exons_stop.split(
            ',')) if transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start, 'stop': stop}
                my_transcript['exons'].append(exon)
        # if transcript.exons and isinstance(transcript.exons, list):
        #     for exon in transcript.exons:
        #         exon = {'start': exon.start,
        #                 'stop': exon.stop}
        #         my_transcript['exons'].append(exon)
        transcripts.append(my_transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for transcript in transcripts:
        if transcript['gene'] in gene_dict:
            gene = gene_dict[transcript['gene']]
        else:
            gene = Gene(transcript['gene'])

        if transcript['strand'] == '+':
            gene.orientation = 1
        if transcript['strand'] == '-':
            gene.orientation = -1

        my_transcript = Locus(gene.newLocusTag())

        my_transcript.mRNA = PList()
        my_transcript.mRNA.location = [
            transcript['transcript_start'], transcript['transcript_stop']
        ]

        my_transcript.CDS = PList()
        my_transcript.CDS.location = [
            transcript['cds_start'], transcript['cds_stop']
        ]
        my_transcript.exon = PList()
        if transcript.get('exons') and isinstance(transcript.get('exons'),
                                                  list):
            exon_list = []
            for exon in transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            my_transcript.exon.positionList = exon_list
        else:
            my_transcript.exon.positionList = my_transcript.mRNA.location

        my_transcript.mRNA.positionList = my_transcript.exon.positionList
        my_transcript.mRNA.positionList.sort()

        my_transcript.CDS.positionList = cds_position_list(
            my_transcript.mRNA.positionList, my_transcript.CDS.location)

        my_transcript.transcriptID = transcript['transcriptID']
        my_transcript.proteinID = transcript['proteinID']
        my_transcript.linkMethod = 'ncbi'
        my_transcript.transcribe = True
        my_transcript.translate = True
        gene.transcriptList.append(my_transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record

Example #4

Show file

    def create_record(self, filename):
        """
        Create a GenRecord.Record from a GenBank file

        @arg filename: The full path to the compressed GenBank file
        @type filename: unicode

        @return: A GenRecord.Record instance
        @rtype: object (record)
        """
        # first create an intermediate genbank record with BioPython
        file_handle = bz2.BZ2File(filename, "r")
        file_handle = codecs.getreader('utf-8')(file_handle)
        biorecord = SeqIO.read(file_handle, "genbank")
        file_handle.close()

        record = Record()
        record.seq = biorecord.seq

        # Note: The .source_* values may be different from the values we are
        #     working with, e.g. for UD slices where these values (taken from
        #     the genbank file) are from the original NC reference. We try to
        #     set the .id field to the working value in the caller.
        record.source_id = biorecord.id
        record.source_accession, record.source_version = biorecord.id.split(
            '.')[:2]
        record.source_gi = biorecord.annotations['gi']
        record.organism = biorecord.annotations['organism']

        # Todo: This will change once we support protein references
        if isinstance(biorecord.seq.alphabet, ProteinAlphabet):
            return record

        exonList = []
        geneDict = {}

        accInfo = biorecord.annotations['accessions']
        if len(accInfo) >= 3 and accInfo[1] == "REGION:":
            # Todo: This information is present in the genbank file if it is a
            #     UD sliced from a chromosome. We can get the same information
            #     for NM references from our mapping database and that way
            #     also provide chromosomal variant descriptions for those.
            region = accInfo[2]
            if "complement" in region:
                record.orientation = -1
                record.chromOffset = int(region.split('.')[2][:-1])
            #if
            else:
                record.chromOffset = int(accInfo[2].split('.')[0])
        #if
        for i in biorecord.features:
            if i.qualifiers:
                if i.type == "source":
                    if i.qualifiers.has_key("mol_type"):
                        if i.qualifiers["mol_type"][0] in ["mRNA", \
                           "transcribed RNA"] :
                            record.molType = 'n'
                        else:
                            record.molType = 'g'
                    #if
                    if i.qualifiers.has_key("organelle"):
                        record.organelle = i.qualifiers["organelle"][0]
                        if record.organelle == "mitochondrion":
                            record.molType = 'm'
                    #if

                    fakeGene = Locus("001")
                    record.source.transcriptList.append(fakeGene)
                    fakeGene.CDS = PList()
                    fakeGene.CDS.location = self.__location2pos(i.location)
                #if

                if i.qualifiers.has_key("gene"):
                    if not unicode(i.location.start).isdigit() or \
                       not unicode(i.location.end).isdigit():
                        # Feature is not completely in reference. Either start
                        # or end is not a Bio.SeqFeature.ExactPosition.
                        continue

                    geneName = i.qualifiers["gene"][0]
                    if i.type == "gene":
                        if not geneDict.has_key(geneName):
                            myGene = Gene(geneName)
                            record.geneList.append(myGene)
                            if i.strand:
                                myGene.orientation = i.strand
                            myGene.location = self.__location2pos(i.location)
                            geneDict[geneName] = tempGene(geneName)
                        #if
                    else:
                        if geneName not in geneDict:
                            # We should have seen a gene entry for this gene
                            # by now. Could be that it was skipped because it
                            # was not completely in reference (see check
                            # above). In that case we just ignore any of its
                            # features.
                            continue
                    #if

                    if i.type in [
                            "mRNA", "misc_RNA", "ncRNA", "rRNA", "tRNA",
                            "tmRNA"
                    ]:
                        geneDict[geneName].rnaList.append(i)
                    if i.type == "CDS":
                        geneDict[geneName].cdsList.append(i)
                    if i.type == "exon":
                        exonLocation = self.__location2pos(i.location)
                        if exonLocation:
                            exonList.extend(exonLocation)
                    #if
                #if
            #if
        #for
        if record.molType in ['g', 'm']:
            for j in geneDict.keys():
                myGene = geneDict[j]
                self.link(myGene.rnaList, myGene.cdsList)
                for i in myGene.rnaList:
                    if i.usable:
                        myRealGene = record.findGene(i.gene)
                        if i.locus_tag:
                            # Note: We use the last three characters of the
                            # locus_tag as a unique transcript version id.
                            # This is also used to for the protein-transcript
                            # link table.
                            # Normally, locus_tag ends with three digits, but
                            # for some (e.g. mobA on NC_011228, a plasmid) it
                            # ends with two digits prepended with an
                            # underscore. Or prepended with a letter. We
                            # really want a number, so 'fix' this by only
                            # looking for a numeric part.
                            try:
                                version = LOCUS_TAG_VERSION.findall(
                                    i.locus_tag)[0].zfill(3)
                            except IndexError:
                                version = '000'
                            myTranscript = Locus(version)
                        else:
                            myTranscript = Locus(myRealGene.newLocusTag())
                        myTranscript.mRNA = PList()
                        myTranscript.mRNA.positionList = i.positionList
                        myTranscript.mRNA.location = i.location
                        myTranscript.transcribe = True
                        myTranscript.transcriptID = i.transcript_id
                        myTranscript.transcriptProduct = i.product
                        myTranscript.locusTag = i.locus_tag
                        if i.link:
                            myTranscript.CDS = PList()
                            myTranscript.CDS.positionList = i.link.positionList
                            myTranscript.CDS.location = i.link.location
                            myTranscript.translate = True
                            myTranscript.proteinID = i.link.protein_id
                            myTranscript.linkMethod = i.linkMethod
                            myTranscript.proteinProduct = i.link.product
                            if i.link.qualifiers.has_key("transl_table"):
                                myTranscript.txTable = \
                                    int(i.qualifiers["transl_table"][0])
                        #if
                        myRealGene.transcriptList.append(myTranscript)
                    #if
                #for
                for i in myGene.cdsList:
                    if not i.linked and \
                       (i.usable or not geneDict[myGene.name].rnaList) :
                        myRealGene = record.findGene(i.gene)
                        if i.locus_tag:
                            # Note: We use the last three characters of the
                            # locus_tag as a unique transcript version id.
                            # This is also used to for the protein-transcript
                            # link table.
                            # Normally, locus_tag ends with three digits, but
                            # for some (e.g. mobA on NC_011228, a plasmid) it
                            # ends with two digits prepended with an
                            # underscore. Or prepended with a letter. We
                            # really want a number, so 'fix' this by only
                            # looking for a numeric part.
                            try:
                                version = LOCUS_TAG_VERSION.findall(
                                    i.locus_tag)[0].zfill(3)
                            except IndexError:
                                version = '000'
                            myTranscript = Locus(version)
                        else:
                            myTranscript = Locus(myRealGene.newLocusTag())
                        myTranscript.CDS = PList()
                        myTranscript.CDS.positionList = i.positionList
                        myTranscript.CDS.location = i.location
                        myTranscript.proteinID = i.protein_id
                        myTranscript.proteinProduct = i.product
                        if i.qualifiers.has_key("transl_table"):
                            myTranscript.txTable = \
                                int(i.qualifiers["transl_table"][0])
                        myRealGene.transcriptList.append(myTranscript)
                        #if
                    #if
                #for
            #for
        #if
        else:
            if geneDict:
                myGene = geneDict[geneDict.keys()[0]]
                myRealGene = record.geneList[0]
                if myGene.cdsList:
                    myCDS = myGene.cdsList[0]
                    self.__tagByDict(myCDS, "protein_id")
                    self.__tagByDict(myCDS, "product")
                #if
                else:
                    myCDS = None
                myTranscript = Locus("001")
                myTranscript.exon = PList()
                if exonList:
                    myTranscript.exon.positionList = exonList
                else:
                    myTranscript.exon.location = myRealGene.location
                if myCDS:
                    myTranscript.CDS = PList()
                    myTranscript.CDS.location = \
                        self.__location2pos(myCDS.location)
                #if
                if exonList or myRealGene.location or \
                   myTranscript.CDS.location :
                    myTranscript.transcriptID = biorecord.id
                    if myCDS:
                        myTranscript.proteinID = myCDS.protein_id
                        myTranscript.proteinProduct = myCDS.product
                        myTranscript.linkMethod = "exhaustion"
                        myTranscript.transcribe = True
                        if myCDS.qualifiers.has_key("transl_table"):
                            myTranscript.txTable = \
                                int(i.qualifiers["transl_table"][0])
                    #if
                    myRealGene.transcriptList.append(myTranscript)
                #if
            #if
        #else
        for i in record.geneList:
            if not i.transcriptList:
                record.geneList.remove(i)

        return record

Example #5

Show file

File: genbank.py Project: cchng/mutalyzer

    def create_record(self, filename):
        """
        Create a GenRecord.Record from a GenBank file

        @arg filename: The full path to the compressed GenBank file
        @type filename: unicode

        @return: A GenRecord.Record instance
        @rtype: object (record)
        """
        # first create an intermediate genbank record with BioPython
        file_handle = bz2.BZ2File(filename, "r")
        file_handle = codecs.getreader('utf-8')(file_handle)
        biorecord = SeqIO.read(file_handle, "genbank")
        file_handle.close()

        record = Record()
        record.seq = biorecord.seq

        # Note: The .source_* values may be different from the values we are
        #     working with, e.g. for UD slices where these values (taken from
        #     the genbank file) are from the original NC reference. We try to
        #     set the .id field to the working value in the caller.
        record.source_id = biorecord.id
        record.source_accession, record.source_version = biorecord.id.split('.')[:2]
        record.source_gi = biorecord.annotations['gi']
        record.organism = biorecord.annotations['organism']

        # Todo: This will change once we support protein references
        if isinstance(biorecord.seq.alphabet, ProteinAlphabet):
            return record

        exonList = []
        geneDict = {}

        accInfo = biorecord.annotations['accessions']
        if len(accInfo) >= 3 and accInfo[1] == "REGION:":
            # Todo: This information is present in the genbank file if it is a
            #     UD sliced from a chromosome. We can get the same information
            #     for NM references from our mapping database and that way
            #     also provide chromosomal variant descriptions for those.
            region = accInfo[2]
            if "complement" in region :
                record.orientation = -1
                record.chromOffset = int(region.split('.')[2][:-1])
            #if
            else :
                record.chromOffset = int(accInfo[2].split('.')[0])
        #if
        for i in biorecord.features :
            if i.qualifiers :
                if i.type == "source" :
                    if i.qualifiers.has_key("mol_type") :
                        if i.qualifiers["mol_type"][0] in ["mRNA", \
                           "transcribed RNA"] :
                            record.molType = 'n'
                        else :
                            record.molType = 'g'
                    #if
                    if i.qualifiers.has_key("organelle") :
                        record.organelle = i.qualifiers["organelle"][0]
                        if record.organelle == "mitochondrion" :
                            record.molType = 'm'
                    #if

                    fakeGene = Locus("001")
                    record.source.transcriptList.append(fakeGene)
                    fakeGene.CDS = PList()
                    fakeGene.CDS.location = self.__location2pos(i.location)
                #if

                if i.qualifiers.has_key("gene") :
                    if not unicode(i.location.start).isdigit() or \
                       not unicode(i.location.end).isdigit():
                        # Feature is not completely in reference. Either start
                        # or end is not a Bio.SeqFeature.ExactPosition.
                        continue

                    geneName = i.qualifiers["gene"][0]
                    if i.type == "gene" :
                        if not geneDict.has_key(geneName) :
                            myGene = Gene(geneName)
                            record.geneList.append(myGene)
                            if i.strand :
                                myGene.orientation = i.strand
                            myGene.location = self.__location2pos(i.location)
                            geneDict[geneName] = tempGene(geneName)
                        #if
                    else:
                        if geneName not in geneDict:
                            # We should have seen a gene entry for this gene
                            # by now. Could be that it was skipped because it
                            # was not completely in reference (see check
                            # above). In that case we just ignore any of its
                            # features.
                            continue
                    #if

                    if i.type in ["mRNA", "misc_RNA", "ncRNA", "rRNA", "tRNA",
                       "tmRNA"] :
                        geneDict[geneName].rnaList.append(i)
                    if i.type == "CDS" :
                        geneDict[geneName].cdsList.append(i)
                    if i.type == "exon" :
                        exonLocation = self.__location2pos(i.location)
                        if exonLocation :
                            exonList.extend(exonLocation)
                    #if
                #if
            #if
        #for
        if record.molType in ['g', 'm'] :
            for j in geneDict.keys() :
                myGene = geneDict[j]
                self.link(myGene.rnaList, myGene.cdsList)
                for i in myGene.rnaList :
                    if i.usable :
                        myRealGene = record.findGene(i.gene)
                        if i.locus_tag :
                            # Note: We use the last three characters of the
                            # locus_tag as a unique transcript version id.
                            # This is also used to for the protein-transcript
                            # link table.
                            # Normally, locus_tag ends with three digits, but
                            # for some (e.g. mobA on NC_011228, a plasmid) it
                            # ends with two digits prepended with an
                            # underscore. Or prepended with a letter. We
                            # really want a number, so 'fix' this by only
                            # looking for a numeric part.
                            try:
                                version = LOCUS_TAG_VERSION.findall(
                                    i.locus_tag)[0].zfill(3)
                            except IndexError:
                                version = '000'
                            myTranscript = Locus(version)
                        else :
                            myTranscript = Locus(myRealGene.newLocusTag())
                        myTranscript.mRNA = PList()
                        myTranscript.mRNA.positionList = i.positionList
                        myTranscript.mRNA.location = i.location
                        myTranscript.transcribe = True
                        myTranscript.transcriptID = i.transcript_id
                        myTranscript.transcriptProduct = i.product
                        myTranscript.locusTag = i.locus_tag
                        if i.link :
                            myTranscript.CDS = PList()
                            myTranscript.CDS.positionList = i.link.positionList
                            myTranscript.CDS.location = i.link.location
                            myTranscript.translate = True
                            myTranscript.proteinID = i.link.protein_id
                            myTranscript.linkMethod = i.linkMethod
                            myTranscript.proteinProduct = i.link.product
                            if i.link.qualifiers.has_key("transl_table") :
                                myTranscript.txTable = \
                                    int(i.qualifiers["transl_table"][0])
                        #if
                        myRealGene.transcriptList.append(myTranscript)
                    #if
                #for
                for i in myGene.cdsList :
                    if not i.linked and \
                       (i.usable or not geneDict[myGene.name].rnaList) :
                        myRealGene = record.findGene(i.gene)
                        if i.locus_tag :
                            # Note: We use the last three characters of the
                            # locus_tag as a unique transcript version id.
                            # This is also used to for the protein-transcript
                            # link table.
                            # Normally, locus_tag ends with three digits, but
                            # for some (e.g. mobA on NC_011228, a plasmid) it
                            # ends with two digits prepended with an
                            # underscore. Or prepended with a letter. We
                            # really want a number, so 'fix' this by only
                            # looking for a numeric part.
                            try:
                                version = LOCUS_TAG_VERSION.findall(
                                    i.locus_tag)[0].zfill(3)
                            except IndexError:
                                version = '000'
                            myTranscript = Locus(version)
                        else :
                            myTranscript = Locus(myRealGene.newLocusTag())
                        myTranscript.CDS = PList()
                        myTranscript.CDS.positionList = i.positionList
                        myTranscript.CDS.location = i.location
                        myTranscript.proteinID = i.protein_id
                        myTranscript.proteinProduct = i.product
                        if i.qualifiers.has_key("transl_table") :
                            myTranscript.txTable = \
                                int(i.qualifiers["transl_table"][0])
                        myRealGene.transcriptList.append(myTranscript)
                        #if
                    #if
                #for
            #for
        #if
        else :
            if geneDict :
                myGene = geneDict[geneDict.keys()[0]]
                myRealGene = record.geneList[0]
                if myGene.cdsList :
                    myCDS = myGene.cdsList[0]
                    self.__tagByDict(myCDS, "protein_id")
                    self.__tagByDict(myCDS, "product")
                #if
                else :
                    myCDS = None
                myTranscript = Locus("001")
                myTranscript.exon = PList()
                if exonList :
                    myTranscript.exon.positionList = exonList
                else :
                    myTranscript.exon.location = myRealGene.location
                if myCDS :
                    myTranscript.CDS = PList()
                    myTranscript.CDS.location = \
                        self.__location2pos(myCDS.location)
                #if
                if exonList or myRealGene.location or \
                   myTranscript.CDS.location :
                    myTranscript.transcriptID = biorecord.id
                    if myCDS :
                        myTranscript.proteinID = myCDS.protein_id
                        myTranscript.proteinProduct = myCDS.product
                        myTranscript.linkMethod = "exhaustion"
                        myTranscript.transcribe = True
                        if myCDS.qualifiers.has_key("transl_table") :
                            myTranscript.txTable = \
                                int(i.qualifiers["transl_table"][0])
                    #if
                    myRealGene.transcriptList.append(myTranscript)
                #if
            #if
        #else
        for i in record.geneList :
            if not i.transcriptList :
                record.geneList.remove(i)

        return record