Example #1
0
    def getAncestry(self, parent, immediate=None):
        """Find out if a record is parent of another record using
           sequence information and get operations"""
        import sequence_alignment
        from Sequence import SequenceOperations as SQ
        full_record_sequence = self.aaseq
        full_parent_sequence = parent.aaseq

        if parent.has_key('Structure_alnseq_PDBfile') and parent.has_key(
                'Structure_alnseq_EATrecord'):
            parent_aln_PDBfile = parent.Structure_alnseq_PDBfile + '*'
            parent_aln_record = parent.Structure_alnseq_EATrecord + '*'
        else:
            parent_aln_PDBfile = record_sequence, ignored_res = sequence_alignment.Protool2pir(
                full_record_sequence)
            parent_aln_record = parent_aln_PDBfile

        if full_record_sequence and full_parent_sequence:
            record_sequence, ignored_res = sequence_alignment.Protool2pir(
                full_record_sequence)
            parent_sequence, ignored_parent = sequence_alignment.Protool2pir(
                full_parent_sequence)

            # First try the simple option

            operations = [None]
            if len(record_sequence) == len(parent_sequence):
                operations = SQ.findSequenceDifferences(
                    record_sequence,
                    parent_sequence,
                    full_parent_sequence,
                    PDBaln=parent_aln_PDBfile,
                    recordALN=parent_aln_record)

            if len(operations) > 10 or len(record_sequence) != len(
                    parent_sequence):
                NW_align = sequence_alignment.NW(record_sequence,
                                                 parent_sequence)
                al_seq, al_parent, map_seq, map_parent = NW_align.Align(
                    verbose=True)
                operations = SQ.findSequenceDifferences(
                    al_seq,
                    al_parent,
                    full_parent_sequence,
                    PDBaln=parent_aln_PDBfile,
                    recordALN=parent_aln_record)
            if operations == [None]:
                raise Exception()

            if len(operations) >= 0 and not immediate:
                return True, operations
            elif len(operations) > 0 and immediate:
                if len(operations) == 1:
                    return False, operations
            else:
                return False, operations
        return False, False
Example #2
0
 def align_withseq(self, sequence):
     """Extract the sequence of the current molecule and align it with the sequence given.
     Return an array giving the relation between the sequence position and the PDB residue ID"""
     pdbpir = self.clean_seq(self.PirSeq(), remove_gaps=True)
     sequence = self.clean_seq(sequence, remove_gaps=True)
     import sequence_alignment
     NW = sequence_alignment.NW(pdbpir, sequence)
     al1, al2, map1, map2 = NW.Align(verbose=True)
     map = []
     residues = self.residues.keys()
     residues.sort()
     count = 0
     #
     for count in range(len(sequence)):
         pdbrespos = map2[count]
         if pdbrespos != '-':
             map.append(residues[map2[count]])
         else:
             map.append(None)
     #
     # Check that we have 100% sequence identify for the aligned residues
     #
     print 'Aligned seqid', NW.aligned_seqid
     return map, NW.aligned_seqid
Example #3
0
def main():

    import os, sys, math
    sys.path.append('/home/people/tc/svn/Protool/')
    import geometry
    instance_geometry = geometry.geometry()

    ##    ## lactoferrin
    ##    pdb1 = '1lfg'
    ##    pdb2 = '1lfh'
    ##    domain_range = range(1,88+1)+range(253,333+1)
    ##    chain = 'A'

    ##    ## trp repressor
    ##    pdb1 = '1wrp'
    ##    pdb2 = '2oz9' ## 2wrp
    ##    pdb2 = '1zt9'
    ##    domain_range = range(1,999+1)
    ##    chain1 = 'R'
    ##    chain2 = 'R'
    ##    chain2 = 'A'
    ##    exclude_chain = ''

    ##    ## luciferase
    ##    pdb1 = '1ba3'
    ##    pdb2 = '1lci'
    ##    domain_range = range(1,999+1)
    ##    chain1 = 'A'
    ##    chain2 = 'A'
    ##    exclude_chain = ''

    ##    ## G3P DH
    ##    pdb1 = '1gd1'
    ##    pdb2 = '2gd1'
    ##    domain_range = range(1,999+1)
    ##    chain1 = 'O'
    ##    chain2 = 'O'
    ##    exclude_chain = ''

    ##    ## hexokinase
    ##    pdb1 = '1hkg'
    ##    pdb2 = '2yhx'
    ##    domain_range = range(1,999+1)
    ##    chain1 = 'A'
    ##    chain2 = 'A'
    ##    exclude_chain = ''

    ##    ## adk
    ##    pdb1 = '1ake'
    ##    pdb2 = '4ake'
    ##    domain_range = range(1,999+1)
    ##    chain1 = 'A'
    ##    chain2 = 'A'
    ##    exclude_chain = 'B'

    ##    ## t4l
    ##    pdb1 = '2lzm'
    ##    pdb2 = '150l'
    ####    domain_range = range(15,59+1)
    ####    domain_range = range(60,80+1)
    ##    domain_range = range(81,162+1)
    ##    chain1 = 'A'
    ##    chain2 = 'D'
    ##    exclude_chain = 'B'

    l_input = [
        ##        ##
        ##        {'pdb1':'1ipd','pdb2':'1osj','chain1':'A','chain2':'A','range':range(1,98+1)+range(253,345+1)},
        ##        {'pdb1':'1ipd','pdb2':'1osj','chain1':'A','chain2':'A','range':range(99,108+1)+range(109,252+1)},
        ###### shears
        ##        ## aspartate amino transferase
        ##        {'pdb1':'9aat','pdb2':'1ama','chain1':'A','chain2':'A','range':range(15,36+1)+range(349,410+1)},
        ##        {'pdb1':'9aat','pdb2':'1ama','chain1':'A','chain2':'A','range':range(50,312+1)},
        ## alcohol dehydrogenase
        {
            'pdb1': '6adh',
            'pdb2': '8adh1',
            'chain1': 'A',
            'chain2': 'A',
            'range': range(1, 174 + 1) + range(322, 374 + 1)
        },
        {
            'pdb1': '6adh',
            'pdb2': '8adh2',
            'chain1': 'A',
            'chain2': 'A',
            'range': range(193, 317 + 1)
        },
        ##        ## citrate synthase
        ##        {'pdb1':'1cts','pdb2':'4cts','chain1':'A','chain2':'A','range':range(1,276+1)+range(386,999+1)},
        ###### hinges
        ##        ## atpsulf
        ##        {'pdb1':'1i2d','pdb2':'1m8p','chain1':'A','chain2':'A','range':range(1,389+1)},
        ##        ## dnak (different spacegroups)
        ##        {'pdb1':'1dkx','pdb2':'1dky','chain1':'A','chain2':'A','range':range(389,509+1)},
        ##        ## dnak (different spacegroups)
        ##        {'pdb1':'1ddt','pdb2':'1mdt','chain1':'A','chain2':'A','range':range(1,376+1)},
        ##        ## ecpdpbp
        ##        {'pdb1':'1dpp','pdb2':'1dpe','chain1':'A','chain2':'A','range':range(1,260+1)+range(479,999+1)},
        ##        ## ef2
        ##        {'pdb1':'1n0v','pdb2':'1n0u','chain1':'C','chain2':'A','range':range(1,478+1)}, ## large
        ##        {'pdb1':'1n0v','pdb2':'1n0u','chain1':'C','chain2':'A','range':range(479,560+1)}, ## independent
        ##        {'pdb1':'1n0v','pdb2':'1n0u','chain1':'C','chain2':'A','range':range(561,9999+1)}, ## small
        ##        ## febp
        ##        {'pdb1':'1d9v','pdb2':'1mrp','chain1':'A','chain2':'A','range':range(109,227+1)+range(292,309+1)},
        ##        {'pdb1':'1d9v','pdb2':'1mrp','chain1':'A','chain2':'A','range':range(1,96+1)+range(228,262+1)},
        ##        ## folylpolyglutamate synthetase
        ##        {'pdb1':'1jbv','pdb2':'1jbw','chain1':'A','chain2':'A','range':range(1,295+1)},
        ##        {'pdb1':'1jbv','pdb2':'1jbw','chain1':'A','chain2':'A','range':range(296,386+1)},
        ##        ## glucose ABC transporter ATPase subunit (different spacegroups)
        ##        {'pdb1':'1oxs','pdb2':'1oxu','chain1':'C','chain2':'C','range':range(1,209+1)},
        ##        {'pdb1':'1oxs','pdb2':'1oxu','chain1':'C','chain2':'C','range':range(244,999+1)},
        ##        ## groel domain
        ##        {'pdb1':'1aon','pdb2':'1oel','chain1':'A','chain2':'A','range':range(1,137+1)+range(410,999+1)},
        ##        {'pdb1':'1aon','pdb2':'1oel','chain1':'A','chain2':'A','range':range(192,374+1)},
        ##        {'pdb1':'1aon','pdb2':'1oel','chain1':'A','chain2':'A','range':range(138,190+1)+range(375,409+1)},
        ##        ## lao bp
        ##        {'pdb1':'2lao','pdb2':'1laf','chain1':'A','chain2':'E','range':range(1,90+1)+range(192,238+1)},
        ##        {'pdb1':'2lao','pdb2':'1laf','chain1':'A','chain2':'E','range':range(91,191+1)},
        ##        ## t4l
        ##        {'pdb1':'1l96','pdb2':'1l97','chain1':'A','chain2':'A','range':range(13,59+1)},
        ##        {'pdb1':'1l96','pdb2':'1l97','chain1':'A','chain2':'A','range':range(81,164+1)},
        ##        ## maltodextrin bp
        ##        {'pdb1':'1omp','pdb2':'3mbp','chain1':'A','chain2':'A','range':range(1,104+1)+range(268,313+1)},
        ##        {'pdb1':'1omp','pdb2':'3mbp','chain1':'A','chain2':'A','range':range(113,258+1)+range(314,370+1)},
        ##        ## mRNA capping enzyme
        ##        {'pdb1':'1ckm','pdb2':'1ckm','chain1':'A','chain2':'B','range':range(1,237+1)+range(319,327+1)},
        ##        {'pdb1':'1ckm','pdb2':'1ckm','chain1':'A','chain2':'B','range':range(241,303+1)},
        ##        ## mura
        ##        {'pdb1':'1ejd','pdb2':'1a2n','chain1':'A','chain2':'A','range':range(1,20+1)+range(230,417+1)},
        ##        {'pdb1':'1ejd','pdb2':'1a2n','chain1':'A','chain2':'A','range':range(20,230+1)},
        ##        ## oligopeptide bp
        ##        {'pdb1':'1rkm','pdb2':'2rkm','chain1':'A','chain2':'A','range':range(1,263+1)+range(491,517+1)},
        ##        {'pdb1':'1rkm','pdb2':'2rkm','chain1':'A','chain2':'A','range':range(277,477+1)},
        ##        ## protein kinase A
        ##        {'pdb1':'1jlu','pdb2':'1cmk','chain1':'E','chain2':'E','range':range(1,33+1)+range(125,310+1),
        ##        {'pdb1':'1jlu','pdb2':'1cmk','chain1':'E','chain2':'E','range':range(34,124+1)},
        ##        ## dna polymerase beta
        ##        {'pdb1':'1bpd','pdb2':'2bpg','chain1':'A','chain2':'A','range':range(1,82+1)},
        ##        {'pdb1':'1bpd','pdb2':'2bpg','chain1':'A','chain2':'A','range':range(106,132+1)},
        ##        {'pdb1':'1bpd','pdb2':'2bpg','chain1':'A','chain2':'A','range':range(148,262+1)},
        ##        {'pdb1':'1bpd','pdb2':'2bpg','chain1':'A','chain2':'A','range':range(262,335+1)},
        ##        ## ribose bp
        ##        {'pdb1':'1urp','pdb2':'2dri','chain1':'A','chain2':'A','range':range(1,98+1)+range(235,259+1)},
        ##        {'pdb1':'1urp','pdb2':'2dri','chain1':'A','chain2':'A','range':range(104,234+1)+range(265,271+1)},
        ##        ## thioredoxin reductase
        ##        {'pdb1':'1tde','pdb2':'1f6m','chain1':'A','chain2':'E','range':range(1,112+1)+range(248,320+1)},
        ##        {'pdb1':'1tde','pdb2':'1f6m','chain1':'A','chain2':'E','range':range(118,242+1)},
        ##        ## dna bp
        ##        {'pdb1':'1fgu','pdb2':'1jmc','chain1':'A','chain2':'A','range':range(183,283+1)},
        ##        ## transferrin
        ##        {'pdb1':'1bp5','pdb2':'1a8e','chain1':'A','chain2':'A','range':range(1,75+1)+range(249,316+1)},
        ##        {'pdb1':'1bp5','pdb2':'1a8e','chain1':'A','chain2':'A','range':range(103,242+1)},
        ##        ## uracil dna glycosylase
        ##        {'pdb1':'1ssp','pdb2':'1akz','chain1':'E','chain2':'A','range':range(82,144+1)+range(191,240+1)},
        ##        {'pdb1':'1ssp','pdb2':'1akz','chain1':'E','chain2':'A','range':range(166,182+1)+range(270,304+1)},
    ]

    for i in range(len(l_input)):

        pdb1 = l_input[i]['pdb1']
        pdb2 = l_input[i]['pdb2']
        chain1 = l_input[i]['chain1']
        chain2 = l_input[i]['chain2']
        domain_range = l_input[i]['range']

        os.system('cp /oxygenase_local/data/pdb/%s/pdb%s.ent %s.pdb' % (
            pdb1[1:3],
            pdb1,
            pdb1,
        ))
        os.system('cp /oxygenase_local/data/pdb/%s/pdb%s.ent %s.pdb' % (
            pdb2[1:3],
            pdb2[:4],
            pdb2[:4],
        ))

        ss_range1, l_missing1, seqres1, l_modres = parse_header(
            pdb1,
            chain1,
        )
        ss_range2, l_missing2, seqres2, l_modres = parse_header(
            pdb2[:4],
            chain2,
        )

        ss_range = list(set(ss_range1) & set(ss_range2))
        l_missing = list(set(l_missing1) | set(l_missing2))

        if len(seqres1) != len(seqres2):
            d_replace = {
                'TPO': 'THR',
                'PTR': 'TYR',
                ##                'SER':'CYS', ## 1tde v 1f6m
            }
            for i in range(len(seqres1)):
                if seqres1[i] in d_replace.keys():
                    seqres1[i] = d_replace[seqres1[i]]
            for i in range(len(seqres2)):
                if seqres2[i] in d_replace.keys():
                    seqres2[i] = d_replace[seqres2[i]]
            if not (''.join(seqres1) in ''.join(seqres2)
                    or ''.join(seqres2) in ''.join(seqres1)):
                import sys
                sys.path.append('/home/people/tc/svn/EAT_DB/')
                import sequence_alignment
                d_res = {
                    'ALA': 'A',
                    'CYS': 'C',
                    'ASP': 'D',
                    'GLU': 'E',
                    'PHE': 'F',
                    'GLY': 'G',
                    'HIS': 'H',
                    'ILE': 'I',
                    'LYS': 'K',
                    'LEU': 'L',
                    'MET': 'M',
                    'ASN': 'N',
                    'PRO': 'P',
                    'GLN': 'Q',
                    'ARG': 'R',
                    'SER': 'S',
                    'THR': 'T',
                    'VAL': 'V',
                    'TRP': 'W',
                    'TYR': 'Y',
                }
                seq1 = ''
                for res in seqres1:
                    seq1 += d_res[res]
                seq2 = ''
                for res in seqres2:
                    seq2 += d_res[res]
                instance = sequence_alignment.NW(seq1, seq2)
                s1, s2 = instance.Align(verbose=False)[:2]
                l1 = len(s1) - len(s1.lstrip('-'))
                l2 = len(s2) - len(s2.lstrip('-'))
                r1 = len(s1) - len(s1.rstrip('-'))
                r2 = len(s2) - len(s2.rstrip('-'))
                print seqres1
                print seqres2
                print len(seqres1)
                print len(seqres2)
                print pdb1, pdb2
                print l1, l2, r1, r2
                print seqres2[l1:len(seqres2) - r1]
                print seqres1[l2:len(seqres1) - r2]

        l_coordinates1 = parse_coordinates(
            pdb1,
            chain1,
            domain_range,
            ss_range,
            l_missing,
        )
        l_coordinates2 = parse_coordinates(
            pdb2[:4],
            chain2,
            domain_range,
            ss_range,
            l_missing,
        )

        ##        l_coordinates1 = l_coordinates1[l2:len(l_coordinates1)-r2]
        ##        l_coordinates2 = l_coordinates2[l1:len(l_coordinates2)-r1]

        if len(l_coordinates1) != len(l_coordinates2):
            print len(l_coordinates1)
            print len(l_coordinates2)
            stop

        rmsd = instance_geometry.superpose(l_coordinates1, l_coordinates2)
        print pdb1, pdb2, round(rmsd, 1), len(l_coordinates1) / 3.
        tv1 = instance_geometry.fitcenter
        rm = instance_geometry.rotation
        tv2 = instance_geometry.refcenter

        apply_transformation_matrix(
            pdb1,
            chain1,
            l_modres,
            [0, 0, 0],
            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
            [0, 0, 0],
        )
        apply_transformation_matrix(
            pdb2,
            chain2,
            l_modres,
            tv1,
            rm,
            tv2,
        )

        l_coordinates1 = parse_coordinates(
            pdb1 + '_rotated',
            chain1,
            range(1, 9999),
            ss_range,
            l_missing,
        )
        l_coordinates2 = parse_coordinates(
            pdb2 + '_rotated',
            chain2,
            range(1, 9999),
            ss_range,
            l_missing,
        )

        SUM = 0.
        n = len(l_coordinates1)
        for i in range(n):
            SUM += sum((l_coordinates1[i] - l_coordinates2[i])**2)
        RMSD = math.sqrt(SUM / n)
        print RMSD

    return
def main(
    pdb1,
    pdb2,
    chains1_align,
    chains2_align,
):

    chains1_apply = chains1_align
    chains2_apply = chains2_align

    import os, sys, math
    sys.path.append('/home/people/tc/svn/Protool/')
    import geometry
    instance_geometry = geometry.geometry()

    domain_range = range(0, 9999)

    os.system('cp /data/pdb-v3.2/%s/pdb%s.ent %s.pdb' % (
        pdb1[1:3],
        pdb1,
        pdb1,
    ))
    os.system('cp /data/pdb-v3.2/%s/pdb%s.ent %s.pdb' % (
        pdb2[1:3],
        pdb2,
        pdb2,
    ))

    ss_range1, l_missing1, seqres1, l_modres = parse_header(
        pdb1,
        chains1_align,
    )
    ss_range2, l_missing2, seqres2, l_modres = parse_header(
        pdb2,
        chains2_align,
    )

    ss_range = list(set(ss_range1) & set(ss_range2))
    l_missing = list(set(l_missing1) | set(l_missing2))

    if len(seqres1) != len(seqres2):
        d_replace = {
            'TPO': 'THR',
            'PTR': 'TYR',
            ##                'SER':'CYS', ## 1tde v 1f6m
        }
        for i in range(len(seqres1)):
            if seqres1[i] in d_replace.keys():
                seqres1[i] = d_replace[seqres1[i]]
        for i in range(len(seqres2)):
            if seqres2[i] in d_replace.keys():
                seqres2[i] = d_replace[seqres2[i]]
        if not (''.join(seqres1) in ''.join(seqres2)
                or ''.join(seqres2) in ''.join(seqres1)):
            import sys
            sys.path.append('/home/people/tc/svn/EAT_DB/')
            import sequence_alignment
            d_res = {
                'ALA': 'A',
                'CYS': 'C',
                'ASP': 'D',
                'GLU': 'E',
                'PHE': 'F',
                'GLY': 'G',
                'HIS': 'H',
                'ILE': 'I',
                'LYS': 'K',
                'LEU': 'L',
                'MET': 'M',
                'ASN': 'N',
                'PRO': 'P',
                'GLN': 'Q',
                'ARG': 'R',
                'SER': 'S',
                'THR': 'T',
                'VAL': 'V',
                'TRP': 'W',
                'TYR': 'Y',
            }
            seq1 = ''
            for res in seqres1:
                seq1 += d_res[res]
            seq2 = ''
            for res in seqres2:
                seq2 += d_res[res]
            instance = sequence_alignment.NW(seq1, seq2)
            s1, s2 = instance.Align(verbose=False)[:2]
            l1 = len(s1) - len(s1.lstrip('-'))
            l2 = len(s2) - len(s2.lstrip('-'))
            r1 = len(s1) - len(s1.rstrip('-'))
            r2 = len(s2) - len(s2.rstrip('-'))
            print seqres1
            print seqres2
            print len(seqres1)
            print len(seqres2)
            print pdb1, pdb2
            print l1, l2, r1, r2
            print seqres2[l1:len(seqres2) - r1]
            print seqres1[l2:len(seqres1) - r2]
        else:
            s1 = ''.join(seqres1)
            s2 = ''.join(seqres2)
            if s1 in s2:
                seqres2 = seqres2[s2.index(s1) / 3:]
            else:
                seqres1 = seqres1[s1.index(s2) / 3:]
            if len(seqres1) != len(seqres2):
                print len(seqres1), len(seqres2)
                stop

    l_coordinates1 = parse_coordinates(
        pdb1,
        chains1_align,
        domain_range,
        ss_range,
        l_missing,
    )
    l_coordinates2 = parse_coordinates(
        pdb2,
        chains2_align,
        domain_range,
        ss_range,
        l_missing,
    )

    ##        l_coordinates1 = l_coordinates1[l2:len(l_coordinates1)-r2]
    ##        l_coordinates2 = l_coordinates2[l1:len(l_coordinates2)-r1]

    if len(l_coordinates1) == 0 or len(l_coordinates2) == 0:
        stop

    if len(l_coordinates1) != len(l_coordinates2):
        print len(l_coordinates1)
        print len(l_coordinates2)
        stop

    rmsd = instance_geometry.superpose(l_coordinates1, l_coordinates2)
    print pdb1, pdb2
    print 'rmsd', round(rmsd, 1)
    print 'residues', len(seqres1), len(seqres2)
    print 'coordinates', len(l_coordinates1)
    tv1 = instance_geometry.fitcenter
    rm = instance_geometry.rotation
    tv2 = instance_geometry.refcenter

    lines1 = apply_transformation_matrix(
        pdb1,
        chains1_apply,
        l_modres,
        [0, 0, 0],
        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
        [0, 0, 0],
    )
    lines2 = apply_transformation_matrix(
        pdb2,
        chains2_apply,
        l_modres,
        tv1,
        rm,
        tv2,
    )

    fd = open('rotated_%s%s.pdb' % (
        pdb1,
        pdb2,
    ), 'w')
    fd.writelines(lines1 + lines2)
    fd.close()

    l_coordinates1 = parse_coordinates(
        'rotated_' + pdb1,
        chains1_apply,
        range(-9999, 9999),
        ss_range,
        l_missing,
    )
    l_coordinates2 = parse_coordinates(
        'rotated_' + pdb2,
        chains2_apply,
        range(-9999, 9999),
        ss_range,
        l_missing,
    )

    SUM = 0.
    n = len(l_coordinates1)
    for i in range(n):
        SUM += sum((l_coordinates1[i] - l_coordinates2[i])**2)
    RMSD = math.sqrt(SUM / n)
    print 'RMSD all atoms', RMSD

    return RMSD, l_coordinates1, l_coordinates2
Example #5
0
    def checkPDBSequence(self, name):
        """Check the PDB sequence against a newly added structure, optional.
           Adds the amino acid seq of the PDB file, overwriting the old one"""
        # Extract the sequence
        import sequence_alignment
        pdb_1, ignored_res1 = sequence_alignment.Protool2pir(self.X.sequence)
        print 'IGNORED', ignored_res1
        if ignored_res1 != {}:
            igroups = ignored_res1.keys()
            igroups.sort()
            import tkMessageBox
            tkMessageBox.showwarning(
                'Unknown entities in PDB file',
                'I ignored the following residue types/molecules in the PDB file:\n%s'
                % (str(igroups)))

        # Get the entry sequence
        accept_alignment_automatically = None
        record_AA = DB.get_AA_sequence(name)
        if record_AA:
            record_AA1, ignored_res = sequence_alignment.Protool2pir(record_AA)

        # If we do not have an amino acid sequence for the record, then
        # we simply use the one from the PDB file and accept the alignment
        # straight away
        accept_alignment_automatically = 1
        import copy
        record_AA1 = copy.deepcopy(pdb_1)

        # Also deposit the amino acid sequence in the protein record

        DB.data[name]['aaseq'] = copy.deepcopy(self.X.sequence)

        # Align the two sequences
        NW_align = sequence_alignment.NW(pdb_1, record_AA1)
        al_pdb, al_record, map_pdb, map_record = NW_align.Align()
        self.al_pdb = al_pdb
        self.al_record = al_record

        # Find regions of overlap

        ids = 0
        for count in range(len(al_pdb)):
            res_pdb = al_pdb[count]
            res_rec = al_record[count]
            if res_pdb == res_rec:
                ids = ids + 1
        print 'Sequence identity %5.3f' % (100.0 * float(ids) /
                                           float(len(al_pdb)))
        AlignmentMap = {}
        AlignmentMap['OrigAa'] = al_record
        AlignmentMap['AlignedAa'] = al_pdb

        #Make alignment window
        AlignWindow = Toplevel()
        self.AlingWindow = AlignWindow
        AlignWindow.geometry('+100+200')
        AlignWindow.title('Please check alignment')
        AlignWindow.button = Button(AlignWindow, {
            "text": "Alignment OK",
            "fg": "black",
            "command": storePDB
        })
        AlignWindow.button.grid(row=3, column=0)
        AlignWindow.button = Button(
            AlignWindow, {
                "text": "Alignment not OK",
                "fg": "black",
                "command": AlignWindow.destroy
            })
        AlignWindow.button.grid(row=3, column=1)
        AlignWindow.Slider = Scrollbar(AlignWindow, orient=HORIZONTAL)
        AlignWindow.Slider.grid(row=1, column=0, sticky='news', columnspan=2)

        listbox = Listbox(AlignWindow, {
            "height": 2,
            "width": 80,
            "font": "courier 14"
        })
        listbox.insert('end', "PEAT_DB record: " + al_record)
        listbox.insert('end', "PDB file      : " + al_pdb)
        listbox.grid(row=0, column=0, columnspan=2)
        listbox.config(xscrollcommand=AlignWindow.Slider.set)
        AlignWindow.Slider.config(command=listbox.xview)
        return AlignmentMap