Esempio n. 1
0
    def setUp(self, **kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile('dnaseq.fasta')
        tryannot = testutil.tempdatafile('tryannot')

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = 'little dna'

            self.pygrData.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB({1: ('seq1', 5, 10, 'fred'),
                                         2: ('seq1', -60, -50, 'bob'),
                                         3: ('seq2', -20, -10, 'mary')},
                                        db,
                                  sliceAttrDict=dict(id=0, start=1, stop=2,
                                                     name=3))
            annoDB.__doc__ = 'trivial annotation'
            self.pygrData.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot, 'w', pairwiseMode=True,
                                      bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build()
                nlmsa.__doc__ = 'trivial map'
                self.pygrData.Bio.Test.map = nlmsa
                self.schema.Bio.Test.map = metabase.ManyToManyRelation(db,
                                                 annoDB, bindAttrs=('exons', ))
                self.metabase.commit()
                self.metabase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Esempio n. 2
0
def populate_swissprot():
    "Populate the current worldbase with swissprot data"
    # build BlastDB out of the sequences
    sp_hbb1 = testutil.datafile('sp_hbb1')
    sp = seqdb.BlastDB(sp_hbb1)
    sp.__doc__ = 'little swissprot'
    worldbase.Bio.Seq.Swissprot.sp42 = sp

    # also store a fragment
    hbb = sp['HBB1_TORMA']
    ival = hbb[10:35]
    ival.__doc__ = 'fragment'
    worldbase.Bio.Seq.frag = ival

    # build a mapping to itself
    m = mapping.Mapping(sourceDB=sp, targetDB=sp)
    trypsin = sp['PRCA_ANAVA']
    m[hbb] = trypsin
    m.__doc__ = 'map sp to itself'
    worldbase.Bio.Seq.spmap = m

    # create an annotation database and bind as exons attribute
    worldbase.schema.Bio.Seq.spmap = metabase.OneToManyRelation(
        sp, sp, bindAttrs=('buddy', ))
    annoDB = seqdb.AnnotationDB({1: ('HBB1_TORMA', 10, 50)},
                                sp,
                                sliceAttrDict=dict(id=0, start=1, stop=2))
    exon = annoDB[1]

    # generate the names where these will be stored
    tempdir = testutil.TempDir('exonAnnot')
    filename = tempdir.subfile('cnested')
    nlmsa = cnestedlist.NLMSA(filename,
                              'w',
                              pairwiseMode=True,
                              bidirectional=False)
    nlmsa.addAnnotation(exon)
    nlmsa.build()
    annoDB.__doc__ = 'a little annotation db'
    nlmsa.__doc__ = 'a little map'
    worldbase.Bio.Annotation.annoDB = annoDB
    worldbase.Bio.Annotation.map = nlmsa
    worldbase.schema.Bio.Annotation.map = \
         metabase.ManyToManyRelation(sp, annoDB, bindAttrs=('exons', ))
Esempio n. 3
0
    def test_mysqlannot(self):
        'Test building an AnnotationDB from MySQL'
        from pygr import seqdb, cnestedlist, sqlgraph
        dm2 = pygr.Data.getResource('TEST.Seq.Genome.dm2')
        # BUILD ANNOTATION DATABASE FOR REFSEQ EXONS: MYSQL VERSION
        exon_slices = sqlgraph.SQLTableClustered(
            '%s.pygr_refGene_exonAnnot%s_dm2' % (testInputDB,
                                                 smallSamplePostfix),
            clusterKey='chromosome', maxCache=0)
        exon_db = seqdb.AnnotationDB(exon_slices, dm2,
                                     sliceAttrDict=dict(id='chromosome',
                                                        gene_id='name',
                                                        exon_id='exon_id'))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'refGene_exonAnnot_SQL_dm2'), 'w',
                                pairwiseMode=True, bidirectional=False)
        for id in exon_db:
            msa.addAnnotation(exon_db[id])
        exon_db.clear_cache() # not really necessary; cache should autoGC
        exon_slices.clear_cache()
        msa.build()
        exon_db.__doc__ = 'SQL Exon Annotation Database for dm2'
        pygr.Data.addResource('TEST.Annotation.SQL.dm2.exons', exon_db)
        msa.__doc__ = 'SQL NLMSA Exon for dm2'
        pygr.Data.addResource('TEST.Annotation.NLMSA.SQL.dm2.exons', msa)
        exon_schema = pygr.Data.ManyToManyRelation(dm2, exon_db,
                                                   bindAttrs=('exon2', ))
        exon_schema.__doc__ = 'SQL Exon Schema for dm2'
        pygr.Data.addSchema('TEST.Annotation.NLMSA.SQL.dm2.exons', exon_schema)
        # BUILD ANNOTATION DATABASE FOR REFSEQ SPLICES: MYSQL VERSION
        splice_slices = sqlgraph.SQLTableClustered(
            '%s.pygr_refGene_spliceAnnot%s_dm2' % (testInputDB,
                                                   smallSamplePostfix),
            clusterKey='chromosome', maxCache=0)
        splice_db = seqdb.AnnotationDB(splice_slices, dm2,
                                       sliceAttrDict=dict(id='chromosome',
                                                          gene_id='name',
                                                        splice_id='splice_id'))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'refGene_spliceAnnot_SQL_dm2'),
                                'w', pairwiseMode=True, bidirectional=False)
        for id in splice_db:
            msa.addAnnotation(splice_db[id])
        splice_db.clear_cache() # not really necessary; cache should autoGC
        splice_slices.clear_cache()
        msa.build()
        splice_db.__doc__ = 'SQL Splice Annotation Database for dm2'
        pygr.Data.addResource('TEST.Annotation.SQL.dm2.splices', splice_db)
        msa.__doc__ = 'SQL NLMSA Splice for dm2'
        pygr.Data.addResource('TEST.Annotation.NLMSA.SQL.dm2.splices', msa)
        splice_schema = pygr.Data.ManyToManyRelation(dm2, splice_db,
                                                     bindAttrs=('splice2', ))
        splice_schema.__doc__ = 'SQL Splice Schema for dm2'
        pygr.Data.addSchema('TEST.Annotation.NLMSA.SQL.dm2.splices',
                            splice_schema)
        # BUILD ANNOTATION DATABASE FOR MOST CONSERVED ELEMENTS FROM UCSC:
        # MYSQL VERSION
        ucsc_slices = sqlgraph.SQLTableClustered(
            '%s.pygr_phastConsElements15way%s_dm2' % (testInputDB,
                                                      smallSamplePostfix),
            clusterKey='chromosome', maxCache=0)
        ucsc_db = seqdb.AnnotationDB(ucsc_slices, dm2,
                                     sliceAttrDict=dict(id='chromosome',
                                                        gene_id='name',
                                                        ucsc_id='ucsc_id'))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'phastConsElements15way_SQL_dm2'),
                                'w', pairwiseMode=True, bidirectional=False)
        for id in ucsc_db:
            msa.addAnnotation(ucsc_db[id])
        ucsc_db.clear_cache() # not really necessary; cache should autoGC
        ucsc_slices.clear_cache()
        msa.build()
        ucsc_db.__doc__ = 'SQL Most Conserved Elements for dm2'
        pygr.Data.addResource('TEST.Annotation.UCSC.SQL.dm2.mostconserved',
                              ucsc_db)
        msa.__doc__ = 'SQL NLMSA for Most Conserved Elements for dm2'
        pygr.Data.addResource(
            'TEST.Annotation.UCSC.NLMSA.SQL.dm2.mostconserved', msa)
        ucsc_schema = pygr.Data.ManyToManyRelation(dm2, ucsc_db,
                                                   bindAttrs=('element2', ))
        ucsc_schema.__doc__ = \
                'SQL Schema for UCSC Most Conserved Elements for dm2'
        pygr.Data.addSchema('TEST.Annotation.UCSC.NLMSA.SQL.dm2.mostconserved',
                            ucsc_schema)
        pygr.Data.save()
        pygr.Data.clear_cache()

        # QUERY TO EXON AND SPLICES ANNOTATION DATABASE
        dm2 = pygr.Data.getResource('TEST.Seq.Genome.dm2')
        exonmsa = pygr.Data.getResource('TEST.Annotation.NLMSA.SQL.dm2.exons')
        splicemsa = \
                pygr.Data.getResource('TEST.Annotation.NLMSA.SQL.dm2.splices')
        conservedmsa = \
      pygr.Data.getResource('TEST.Annotation.UCSC.NLMSA.SQL.dm2.mostconserved')
        exons = pygr.Data.getResource('TEST.Annotation.SQL.dm2.exons')
        splices = pygr.Data.getResource('TEST.Annotation.SQL.dm2.splices')
        mostconserved = \
            pygr.Data.getResource('TEST.Annotation.UCSC.SQL.dm2.mostconserved')

        # OPEN DM2_MULTIZ15WAY NLMSA
        msa = cnestedlist.NLMSA(os.path.join(msaDir, 'dm2_multiz15way'), 'r',
                                trypath=[seqDir])

        exonAnnotFileName = os.path.join(testInputDir,
                                  'Annotation_ConservedElement_Exons%s_dm2.txt'
                                         % smallSamplePostfix)
        intronAnnotFileName = os.path.join(testInputDir,
                                'Annotation_ConservedElement_Introns%s_dm2.txt'
                                           % smallSamplePostfix)
        newexonAnnotFileName = os.path.join(self.path, 'new_Exons_dm2.txt')
        newintronAnnotFileName = os.path.join(self.path, 'new_Introns_dm2.txt')
        tmpexonAnnotFileName = self.copyFile(exonAnnotFileName)
        tmpintronAnnotFileName = self.copyFile(intronAnnotFileName)

        if smallSampleKey:
            chrList = [smallSampleKey]
        else:
            chrList = dm2.seqLenDict.keys()
            chrList.sort()

        outfile = open(newexonAnnotFileName, 'w')
        for chrid in chrList:
            slice = dm2[chrid]
            try:
                ex1 = exonmsa[slice]
            except KeyError:
                continue
            else:
                exlist1 = [(ix.exon_id, ix) for ix in ex1.keys()]
                exlist1.sort()
                for ixx, exon in exlist1:
                    saveList = []
                    tmp = exon.sequence
                    tmpexon = exons[exon.exon_id]
                    tmpslice = tmpexon.sequence # FOR REAL EXON COORDINATE
                    wlist1 = 'EXON', chrid, tmpexon.exon_id, tmpexon.gene_id, \
                            tmpslice.start, tmpslice.stop
                    try:
                        out1 = conservedmsa[tmp]
                    except KeyError:
                        pass
                    else:
                        elementlist = [(ix.ucsc_id, ix) for ix in out1.keys()]
                        elementlist.sort()
                        for iyy, element in elementlist:
                            if element.stop - element.start < 100:
                                continue
                            score = int(string.split(element.gene_id, '=')[1])
                            if score < 100:
                                continue
                            tmp2 = element.sequence
                            tmpelement = mostconserved[element.ucsc_id]
                            # FOR REAL ELEMENT COORDINATE
                            tmpslice2 = tmpelement.sequence
                            wlist2 = wlist1 + (tmpelement.ucsc_id,
                                               tmpelement.gene_id,
                                               tmpslice2.start, tmpslice2.stop)
                            slicestart, sliceend = max(tmp.start, tmp2.start),\
                                    min(tmp.stop, tmp2.stop)
                            tmp1 = msa.seqDict['dm2.' + chrid][slicestart:
                                                               sliceend]
                            edges = msa[tmp1].edges()
                            for src, dest, e in edges:
                                if src.stop - src.start < 100:
                                    continue
                                palign, pident = e.pAligned(), e.pIdentity()
                                if palign < 0.8 or pident < 0.8:
                                    continue
                                palign, pident = '%.2f' % palign, \
                                        '%.2f' % pident
                                wlist3 = wlist2 + ((~msa.seqDict)[src],
                                                   str(src), src.start,
                                                   src.stop,
                                                   (~msa.seqDict)[dest],
                                                   str(dest), dest.start,
                                                   dest.stop, palign, pident)
                                saveList.append('\t'.join(map(str, wlist3))
                                                + '\n')
                        saveList.sort()
                        for saveline in saveList:
                            outfile.write(saveline)
        outfile.close()
        md5old = hashlib.md5()
        md5old.update(open(tmpexonAnnotFileName, 'r').read())
        md5new = hashlib.md5()
        md5new.update(open(newexonAnnotFileName, 'r').read())
        assert md5old.digest() == md5new.digest()

        outfile = open(newintronAnnotFileName, 'w')
        for chrid in chrList:
            slice = dm2[chrid]
            try:
                sp1 = splicemsa[slice]
            except:
                continue
            else:
                splist1 = [(ix.splice_id, ix) for ix in sp1.keys()]
                splist1.sort()
                for ixx, splice in splist1:
                    saveList = []
                    tmp = splice.sequence
                    tmpsplice = splices[splice.splice_id]
                    tmpslice = tmpsplice.sequence # FOR REAL EXON COORDINATE
                    wlist1 = 'INTRON', chrid, tmpsplice.splice_id, \
                            tmpsplice.gene_id, tmpslice.start, tmpslice.stop
                    try:
                        out1 = conservedmsa[tmp]
                    except KeyError:
                        pass
                    else:
                        elementlist = [(ix.ucsc_id, ix) for ix in out1.keys()]
                        elementlist.sort()
                        for iyy, element in elementlist:
                            if element.stop - element.start < 100:
                                continue
                            score = int(string.split(element.gene_id, '=')[1])
                            if score < 100:
                                continue
                            tmp2 = element.sequence
                            tmpelement = mostconserved[element.ucsc_id]
                            # FOR REAL ELEMENT COORDINATE
                            tmpslice2 = tmpelement.sequence
                            wlist2 = wlist1 + (tmpelement.ucsc_id,
                                               tmpelement.gene_id,
                                               tmpslice2.start, tmpslice2.stop)
                            slicestart, sliceend = max(tmp.start, tmp2.start),\
                                    min(tmp.stop, tmp2.stop)
                            tmp1 = msa.seqDict['dm2.' + chrid][slicestart:
                                                               sliceend]
                            edges = msa[tmp1].edges()
                            for src, dest, e in edges:
                                if src.stop - src.start < 100:
                                    continue
                                palign, pident = e.pAligned(), e.pIdentity()
                                if palign < 0.8 or pident < 0.8:
                                    continue
                                palign, pident = '%.2f' % palign, \
                                        '%.2f' % pident
                                wlist3 = wlist2 + ((~msa.seqDict)[src],
                                                   str(src), src.start,
                                                   src.stop,
                                                   (~msa.seqDict)[dest],
                                                   str(dest), dest.start,
                                                   dest.stop, palign, pident)
                                saveList.append('\t'.join(map(str, wlist3))
                                                + '\n')
                        saveList.sort()
                        for saveline in saveList:
                            outfile.write(saveline)
        outfile.close()
        md5old = hashlib.md5()
        md5old.update(open(tmpintronAnnotFileName, 'r').read())
        md5new = hashlib.md5()
        md5new.update(open(newintronAnnotFileName, 'r').read())
        assert md5old.digest() == md5new.digest()
Esempio n. 4
0
    def test_collectionannot(self):
        'Test building an AnnotationDB from file'
        from pygr import seqdb, cnestedlist, sqlgraph
        dm2 = pygr.Data.getResource('TEST.Seq.Genome.dm2')
        # BUILD ANNOTATION DATABASE FOR REFSEQ EXONS
        exon_slices = Collection(
            filename=os.path.join(self.path, 'refGene_exonAnnot_dm2.cdb'),
            intKeys=True, mode='cr', writeback=False)
        exon_db = seqdb.AnnotationDB(exon_slices, dm2,
                                     sliceAttrDict=dict(id=0, exon_id=1,
                                                        orientation=2,
                                                        gene_id=3, start=4,
                                                        stop=5))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'refGene_exonAnnot_dm2'), 'w',
                                pairwiseMode=True, bidirectional=False)
        for lines in open(os.path.join(testInputDir,
                                       'refGene_exonAnnot%s_dm2.txt'
                                       % smallSamplePostfix),
                          'r').xreadlines():
            row = [x for x in lines.split('\t')] # CONVERT TO LIST SO MUTABLE
            row[1] = int(row[1]) # CONVERT FROM STRING TO INTEGER
            exon_slices[row[1]] = row
            exon = exon_db[row[1]] # GET THE ANNOTATION OBJECT FOR THIS EXON
            msa.addAnnotation(exon) # SAVE IT TO GENOME MAPPING
        exon_db.clear_cache() # not really necessary; cache should autoGC
        # SHELVE SHOULD BE EXPLICITLY CLOSED IN ORDER TO SAVE CURRENT CONTENTS
        exon_slices.close()
        msa.build() # FINALIZE GENOME ALIGNMENT INDEXES
        exon_db.__doc__ = 'Exon Annotation Database for dm2'
        pygr.Data.addResource('TEST.Annotation.dm2.exons', exon_db)
        msa.__doc__ = 'NLMSA Exon for dm2'
        pygr.Data.addResource('TEST.Annotation.NLMSA.dm2.exons', msa)
        exon_schema = pygr.Data.ManyToManyRelation(dm2, exon_db,
                                                   bindAttrs=('exon1', ))
        exon_schema.__doc__ = 'Exon Schema for dm2'
        pygr.Data.addSchema('TEST.Annotation.NLMSA.dm2.exons', exon_schema)
        # BUILD ANNOTATION DATABASE FOR REFSEQ SPLICES
        splice_slices = Collection(
            filename=os.path.join(self.path, 'refGene_spliceAnnot_dm2.cdb'),
            intKeys=True, mode='cr', writeback=False)
        splice_db = seqdb.AnnotationDB(splice_slices, dm2,
                                       sliceAttrDict=dict(id=0, splice_id=1,
                                                          orientation=2,
                                                          gene_id=3, start=4,
                                                          stop=5))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'refGene_spliceAnnot_dm2'), 'w',
                                pairwiseMode=True, bidirectional=False)
        for lines in open(os.path.join(testInputDir,
                                       'refGene_spliceAnnot%s_dm2.txt'
                                       % smallSamplePostfix),
                          'r').xreadlines():
            row = [x for x in lines.split('\t')] # CONVERT TO LIST SO MUTABLE
            row[1] = int(row[1]) # CONVERT FROM STRING TO INTEGER
            splice_slices[row[1]] = row
            # GET THE ANNOTATION OBJECT FOR THIS EXON
            splice = splice_db[row[1]]
            msa.addAnnotation(splice) # SAVE IT TO GENOME MAPPING
        splice_db.clear_cache() # not really necessary; cache should autoGC
        # SHELVE SHOULD BE EXPLICITLY CLOSED IN ORDER TO SAVE CURRENT CONTENTS
        splice_slices.close()
        msa.build() # FINALIZE GENOME ALIGNMENT INDEXES
        splice_db.__doc__ = 'Splice Annotation Database for dm2'
        pygr.Data.addResource('TEST.Annotation.dm2.splices', splice_db)
        msa.__doc__ = 'NLMSA Splice for dm2'
        pygr.Data.addResource('TEST.Annotation.NLMSA.dm2.splices', msa)
        splice_schema = pygr.Data.ManyToManyRelation(dm2, splice_db,
                                                     bindAttrs=('splice1', ))
        splice_schema.__doc__ = 'Splice Schema for dm2'
        pygr.Data.addSchema('TEST.Annotation.NLMSA.dm2.splices', splice_schema)
        # BUILD ANNOTATION DATABASE FOR MOST CONSERVED ELEMENTS FROM UCSC
        ucsc_slices = Collection(
            filename=os.path.join(self.path, 'phastConsElements15way_dm2.cdb'),
            intKeys=True, mode='cr', writeback=False)
        ucsc_db = seqdb.AnnotationDB(ucsc_slices, dm2,
                                     sliceAttrDict=dict(id=0, ucsc_id=1,
                                                        orientation=2,
                                                        gene_id=3, start=4,
                                                        stop=5))
        msa = cnestedlist.NLMSA(os.path.join(self.path,
                                             'phastConsElements15way_dm2'),
                                'w', pairwiseMode=True, bidirectional=False)
        for lines in open(os.path.join(testInputDir,
                                       'phastConsElements15way%s_dm2.txt'
                                       % smallSamplePostfix),
                          'r').xreadlines():
            row = [x for x in lines.split('\t')] # CONVERT TO LIST SO MUTABLE
            row[1] = int(row[1]) # CONVERT FROM STRING TO INTEGER
            ucsc_slices[row[1]] = row
            ucsc = ucsc_db[row[1]] # GET THE ANNOTATION OBJECT FOR THIS EXON
            msa.addAnnotation(ucsc) # SAVE IT TO GENOME MAPPING
        ucsc_db.clear_cache() # not really necessary; cache should autoGC
        # SHELVE SHOULD BE EXPLICITLY CLOSED IN ORDER TO SAVE CURRENT CONTENTS
        ucsc_slices.close()
        msa.build() # FINALIZE GENOME ALIGNMENT INDEXES
        ucsc_db.__doc__ = 'Most Conserved Elements for dm2'
        pygr.Data.addResource('TEST.Annotation.UCSC.dm2.mostconserved',
                              ucsc_db)
        msa.__doc__ = 'NLMSA for Most Conserved Elements for dm2'
        pygr.Data.addResource('TEST.Annotation.UCSC.NLMSA.dm2.mostconserved',
                              msa)
        ucsc_schema = pygr.Data.ManyToManyRelation(dm2, ucsc_db,
                                                   bindAttrs=('element1', ))
        ucsc_schema.__doc__ = 'Schema for UCSC Most Conserved Elements for dm2'
        pygr.Data.addSchema('TEST.Annotation.UCSC.NLMSA.dm2.mostconserved',
                            ucsc_schema)
        pygr.Data.save()
        pygr.Data.clear_cache() # force resources to reload when requested

        # QUERY TO EXON AND SPLICES ANNOTATION DATABASE
        dm2 = pygr.Data.getResource('TEST.Seq.Genome.dm2')
        exonmsa = pygr.Data.getResource('TEST.Annotation.NLMSA.dm2.exons')
        splicemsa = pygr.Data.getResource('TEST.Annotation.NLMSA.dm2.splices')
        conservedmsa = \
          pygr.Data.getResource('TEST.Annotation.UCSC.NLMSA.dm2.mostconserved')
        exons = pygr.Data.getResource('TEST.Annotation.dm2.exons')
        splices = pygr.Data.getResource('TEST.Annotation.dm2.splices')
        mostconserved = \
                pygr.Data.getResource('TEST.Annotation.UCSC.dm2.mostconserved')

        # OPEN DM2_MULTIZ15WAY NLMSA
        msa = cnestedlist.NLMSA(os.path.join(msaDir, 'dm2_multiz15way'), 'r',
                                trypath=[seqDir])

        exonAnnotFileName = os.path.join(testInputDir,
                                  'Annotation_ConservedElement_Exons%s_dm2.txt'
                                         % smallSamplePostfix)
        intronAnnotFileName = os.path.join(testInputDir,
                                'Annotation_ConservedElement_Introns%s_dm2.txt'
                                           % smallSamplePostfix)
        newexonAnnotFileName = os.path.join(self.path, 'new_Exons_dm2.txt')
        newintronAnnotFileName = os.path.join(self.path, 'new_Introns_dm2.txt')
        tmpexonAnnotFileName = self.copyFile(exonAnnotFileName)
        tmpintronAnnotFileName = self.copyFile(intronAnnotFileName)

        if smallSampleKey:
            chrList = [smallSampleKey]
        else:
            chrList = dm2.seqLenDict.keys()
            chrList.sort()

        outfile = open(newexonAnnotFileName, 'w')
        for chrid in chrList:
            slice = dm2[chrid]
            try:
                ex1 = exonmsa[slice]
            except KeyError:
                continue
            else:
                exlist1 = [(ix.exon_id, ix) for ix in ex1.keys()]
                exlist1.sort()
                for ixx, exon in exlist1:
                    saveList = []
                    tmp = exon.sequence
                    tmpexon = exons[exon.exon_id]
                    tmpslice = tmpexon.sequence # FOR REAL EXON COORDINATE
                    wlist1 = 'EXON', chrid, tmpexon.exon_id, tmpexon.gene_id, \
                            tmpslice.start, tmpslice.stop
                    try:
                        out1 = conservedmsa[tmp]
                    except KeyError:
                        pass
                    else:
                        elementlist = [(ix.ucsc_id, ix) for ix in out1.keys()]
                        elementlist.sort()
                        for iyy, element in elementlist:
                            if element.stop - element.start < 100:
                                continue
                            score = int(string.split(element.gene_id, '=')[1])
                            if score < 100:
                                continue
                            tmp2 = element.sequence
                            tmpelement = mostconserved[element.ucsc_id]
                            # FOR REAL ELEMENT COORDINATE
                            tmpslice2 = tmpelement.sequence
                            wlist2 = wlist1 + (tmpelement.ucsc_id,
                                               tmpelement.gene_id,
                                               tmpslice2.start, tmpslice2.stop)
                            slicestart, sliceend = max(tmp.start, tmp2.start),\
                                    min(tmp.stop, tmp2.stop)
                            tmp1 = msa.seqDict['dm2.' + chrid][slicestart:
                                                               sliceend]
                            edges = msa[tmp1].edges()
                            for src, dest, e in edges:
                                if src.stop - src.start < 100:
                                    continue
                                palign, pident = e.pAligned(), e.pIdentity()
                                if palign < 0.8 or pident < 0.8:
                                    continue
                                palign, pident = '%.2f' % palign, \
                                        '%.2f' % pident
                                wlist3 = wlist2 + ((~msa.seqDict)[src],
                                                   str(src), src.start,
                                                   src.stop,
                                                   (~msa.seqDict)[dest],
                                                   str(dest), dest.start,
                                                   dest.stop, palign, pident)
                                saveList.append('\t'.join(map(str, wlist3))
                                                + '\n')
                        saveList.sort()
                        for saveline in saveList:
                            outfile.write(saveline)
        outfile.close()
        md5old = hashlib.md5()
        md5old.update(open(tmpexonAnnotFileName, 'r').read())
        md5new = hashlib.md5()
        md5new.update(open(newexonAnnotFileName, 'r').read())
        assert md5old.digest() == md5new.digest()

        outfile = open(newintronAnnotFileName, 'w')
        for chrid in chrList:
            slice = dm2[chrid]
            try:
                sp1 = splicemsa[slice]
            except:
                continue
            else:
                splist1 = [(ix.splice_id, ix) for ix in sp1.keys()]
                splist1.sort()
                for ixx, splice in splist1:
                    saveList = []
                    tmp = splice.sequence
                    tmpsplice = splices[splice.splice_id]
                    tmpslice = tmpsplice.sequence # FOR REAL EXON COORDINATE
                    wlist1 = 'INTRON', chrid, tmpsplice.splice_id, \
                            tmpsplice.gene_id, tmpslice.start, tmpslice.stop
                    try:
                        out1 = conservedmsa[tmp]
                    except KeyError:
                        pass
                    else:
                        elementlist = [(ix.ucsc_id, ix) for ix in out1.keys()]
                        elementlist.sort()
                        for iyy, element in elementlist:
                            if element.stop - element.start < 100:
                                continue
                            score = int(string.split(element.gene_id, '=')[1])
                            if score < 100:
                                continue
                            tmp2 = element.sequence
                            tmpelement = mostconserved[element.ucsc_id]
                            # FOR REAL ELEMENT COORDINATE
                            tmpslice2 = tmpelement.sequence
                            wlist2 = wlist1 + (tmpelement.ucsc_id,
                                               tmpelement.gene_id,
                                               tmpslice2.start, tmpslice2.stop)
                            slicestart, sliceend = max(tmp.start, tmp2.start),\
                                    min(tmp.stop, tmp2.stop)
                            tmp1 = msa.seqDict['dm2.' + chrid][slicestart:
                                                               sliceend]
                            edges = msa[tmp1].edges()
                            for src, dest, e in edges:
                                if src.stop - src.start < 100:
                                    continue
                                palign, pident = e.pAligned(), e.pIdentity()
                                if palign < 0.8 or pident < 0.8:
                                    continue
                                palign, pident = '%.2f' % palign, \
                                        '%.2f' % pident
                                wlist3 = wlist2 + ((~msa.seqDict)[src],
                                                   str(src), src.start,
                                                   src.stop,
                                                   (~msa.seqDict)[dest],
                                                   str(dest), dest.start,
                                                   dest.stop, palign, pident)
                                saveList.append('\t'.join(map(str, wlist3))
                                                + '\n')
                        saveList.sort()
                        for saveline in saveList:
                            outfile.write(saveline)
        outfile.close()
        md5old = hashlib.md5()
        md5old.update(open(tmpintronAnnotFileName, 'r').read())
        md5new = hashlib.md5()
        md5new.update(open(newintronAnnotFileName, 'r').read())
        assert md5old.digest() == md5new.digest()