def main(pdb):

    os.system('cp %s/%s/pdb%s.ent %s.pdb' %(path_pdb,pdb[1:3],pdb,pdb,))

    ##
    ## Tommy crystal contacts
    ##

    ## create biounit
    biounit.biounit().main(pdb, '/data/remediated_pdb/', exclude_ligands = True)

    ##
    ## parse header (just use the asu instead!!!)
    ##
    fd = open('%s/%s/pdb%s.ent' %(path_pdb,pdb[1:3],pdb,),'r')
    lines = fd.readlines()
    fd.close()
    for i in range(len(lines)):
        line = lines[i]
        record = line[:6].strip()
        if record in ['MODEL','ATOM',]:
            break
    lines_header = lines[:i]

    ##
    ## parse coordinates
    ##
    fd = open('%s_1.pdb' %(pdb),'r')
    lines_biounit = fd.readlines()
    fd.close()

    fd = open('%s.pdb' %(pdb),'r')
    lines_asu = fd.readlines()
    fd.close()

##        fd = open('C:\Users\Tommy Carstensen\pdb\%s.pdb' %(pdb),'r')
##        lines = fd.readlines()
##        fd.close()

    d_header = parse_pdb.parse_header(lines_header)
    d_coordinates_biounit, d_ATOMseq = parse_pdb.parse_coordinates(
        lines_biounit,d_header,
        parse_atom_seq = False, parse_ligands = False,
        )
    d_coordinates_asu, d_ATOMseq = parse_pdb.parse_coordinates(
        lines_asu,d_header,
        parse_atom_seq = False, parse_ligands = False,
        )

    ## set new chain IDs
    l_old_chains = d_coordinates_asu['chains'].keys()
    l_new_chains = list(
        set(s_alphabet)-set(l_old_chains)
        )
    l_old_chains.sort()
    l_new_chains.sort()
    d_chains = {}
    for i in range(len(l_old_chains)):
        d_chains[l_old_chains[i]] = l_new_chains[i]

##    a = d_header['CRYST1']['edges'][0]
##    b = d_header['CRYST1']['edges'][1]
##    c = d_header['CRYST1']['edges'][2]
##    alpha = math.pi*d_header['CRYST1']['angles'][0]/180.
##    beta = math.pi*d_header['CRYST1']['angles'][1]/180.
##    gamma = math.pi*d_header['CRYST1']['angles'][2]/180.
##    ## unit cell voumne
##    volume = a*b*c*math.sqrt(1-math.cos(alpha)**2-math.cos(beta)**2-math.cos(gamma)**2+2*(math.cos(alpha)*math.cos(beta)*math.cos(gamma)))
##    matrix_fractional2cartesian = numpy.array([
##        [a, b*math.cos(gamma), c*math.cos(beta),],
##        [0, b*math.sin(gamma), c*(math.cos(alpha)-math.cos(beta)*math.cos(gamma))/math.sin(gamma),],
##        [0,0,volume/(a*b*math.sin(gamma)),],
##        ])
    matrix_scale = d_header['SCALE']
    matrix_scalei = numpy.linalg.inv(matrix_scale)

##    lines = []
    l_symop = d_header['REMARK290'].keys()
    l_symop.sort()
##        l_symop = [1]
    for symop in l_symop:
##        if symop != 1:
##            continue
        matrix_symop = d_header['REMARK290'][symop]['4x4matrix']
        for i in range(len(l_translations)):
            vector_translation = l_translations[i]
##            if vector_translation != [-1,0,0]:
##                continue
            vector_translation = numpy.array([vector_translation[0],vector_translation[1],vector_translation[2],0,])
##                if i != 0:
##                    continue
##                if vector_translation[0] != 1:
##                    continue
##                if vector_translation[1] != -1:
##                    continue
##                if vector_translation[2] != -1:
##                    continue
            for chain2 in d_coordinates_asu['chains'].keys():
                for res_no2 in d_coordinates_asu['chains'][chain2]['residues'].keys():
##                    if res_no2 != 1:
##                        continue
                    print '%4s %2i/%2i %2i/26 %1s %4i' %(pdb, symop, len(l_symop), i+1, chain2, res_no2)
                    for iCode2 in d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'].keys():
                        for chain1 in d_coordinates_biounit['chains'].keys():
                            for res_no1 in d_coordinates_biounit['chains'][chain1]['residues'].keys():
                                for iCode1 in d_coordinates_biounit['chains'][chain1]['residues'][res_no1]['d_iCodes'].keys():
                                    for atom_name1 in d_coordinates_biounit['chains'][chain1]['residues'][res_no1]['d_iCodes'][iCode1]['atoms'].keys():
                                        coordinate1 = d_coordinates_biounit['chains'][chain1]['residues'][res_no1]['d_iCodes'][iCode1]['atoms'][atom_name1]['coordinate']
                                        for atom_name2 in d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'].keys():
                                            coordinate2 = d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'][atom_name2]['coordinate']
##                                            coordinate2 = numpy.dot(matrix_symop,coordinate2)+vector_symop
##                                            coordinate2 += numpy.dot(matrix_fractional2cartesian,vector_translation)
##                                            coordinate2[0] = round(coordinate2[0],3)
##                                            coordinate2[1] = round(coordinate2[1],3)
##                                            coordinate2[2] = round(coordinate2[2],3)

                                            ## conversion from 3x1 vector to 4x1 vector
                                            coordinate2 = numpy.array([coordinate2[0],coordinate2[1],coordinate2[2],1.,])
                                            ## conversion from cartesian to fractional coordinates
                                            coordinate2 = numpy.dot(matrix_scale,coordinate2)
                                            ## symmetry operator (before unit cell translation???)
                                            coordinate2 = numpy.dot(matrix_symop,coordinate2)
                                            ## unit cell translation
                                            coordinate2 += vector_translation
                                            ## conversion from fractional to cartesian coordinates
                                            coordinate2 = numpy.dot(matrix_scalei,coordinate2)
                                            ## conversion from 4x1 vector to 3x1 vector
                                            coordinate2 = numpy.array([coordinate2[0],coordinate2[1],coordinate2[2],])

                                            vicinity = False
                                            distant = False
                                            dist = math.sqrt(sum((coordinate2-coordinate1)**2))
                                            if dist < 5:
                                                print vector_translation
                                                print numpy.dot(matrix_fractional2cartesian,vector_translation)
                                                print matrix_fractional2cartesian
                                                print '%2i %2i %.2f %.2f %1s %4i %4s %s %1s %4i %4s %s %s' %(
                                                    symop, i, round(dist,2), dist_treshold,
                                                    chain2, res_no2, atom_name2, coordinate2,
                                                    chain1, res_no1, atom_name1, coordinate1,
                                                    d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'][atom_name2]['coordinate'],
                                                    )

                                            ## break atom_name2 loop
                                            if dist > 80.:
                                                distant = True
                                                break

                                            dist = 0 ## tmp!!! temp!!! get *all* translations

                                            dist_treshold = d_radii_vdw[atom_name1[0]]+d_radii_vdw[atom_name2[0]]+.25
                                            ## break atom_name2 loop
                                            if dist < dist_treshold:
                                                vicinity = True
                                                break

                                        ## break atom_name1 loop (append line and check next iCode1)
                                        if vicinity == True:
                                            for atom_name3 in d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'].keys():
                                                coordinate3 = d_coordinates_asu['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'][atom_name3]['coordinate']
                                                ## conversion from 3x1 vector to 4x1 vector
                                                coordinate3 = numpy.array([coordinate3[0],coordinate3[1],coordinate3[2],1.,])
                                                ## conversion from cartesian to fractional coordinates
                                                coordinate3 = numpy.dot(matrix_scale,coordinate3)
                                                ## symmetry operator (before unit cell translation???)
                                                coordinate3 = numpy.dot(matrix_symop,coordinate3)
                                                ## unit cell translation
                                                coordinate3 += vector_translation
                                                ## conversion from fractional to cartesian coordinates
                                                coordinate3 = numpy.dot(matrix_scalei,coordinate3)
                                                ## conversion from 4x1 vector to 3x1 vector
                                                coordinate3 = numpy.array([coordinate3[0],coordinate3[1],coordinate3[2],])
                                                line = build_line(
                                                    atom_name3,d_coordinates_asu,coordinate3,
                                                    chain2,res_no2,iCode2,
                                                    d_chains,
                                                    )
                                                lines += [line]
                                            break
                                        ## break atom_name1 loop (check next iCode1)
                                        if distant == True:
                                            break
                                    ## break iCode1 loop (check next iCode2)
                                    if vicinity == True:
                                        break
                                    if distant == True:
                                        break
                                ## break resno1 loop (check next iCode2)
                                if vicinity == True:
                                    break
                                if distant == True:
                                    break
                            ## break chain1 loop (check next iCode2)
                            if vicinity == True:
                                break
                            if distant == True:
                                break

    fd = open('%s_biou_cc.pdb' %(pdb),'w')
    fd.writelines(lines)
    fd.close()

    return
Example #2
0
def main():

    import os, numpy, math
    import sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import parse_pdb

    path = '/oxygenase_local/data/pdb/'
    dirs = os.listdir(path)
    dirs.sort()
    for dir in dirs:
        if dir < sys.argv[1][1:3]:
            continue
        print dir
        ents = os.listdir('%s%s' % (path, dir))
        ents.sort()
        for ent in ents:
            pdb = ent[3:7]
            if dir == sys.argv[-1][1:3] and pdb < sys.argv[-1]:
                continue
            print pdb
            if pdb in [
                    '1a0k',  ## 310 helix?
                    '2aeb',  ## helix kink
                    '2aoc',
                    '2aod',
                    '2aog',  ## some error...
                    '2c03',
                    '2c04',
                    '3bxd',  ## bent helix
                    '4fbp',
                    '5fbp',  ## pro residue in i+6 position
                    '1a2f',
                    '1a2g',  ## gly residue in i+1,i+3 position (310 helix?)
            ]:
                continue
            if pdb in [  ## from phipsiparseall
                    ## std_res_name in neither REMARK465 nor SEQRES (large peptide length)
                    '1a3q',
                    '1c3w',
                    '1c3x',
                    '1ad5',
                    ## hetID in SEQRES and HETATM    but not MODRES
                    '2b2u',
                    '2a2x',
                    '2b7f',
                    '2c2k',
                    '2c2m',
                    '2c2o',
                    '2c2z',
                    '2aal',
                    '1bdu',
                    '2ag3',
                    '2age',
                    '2agg',
                    '1an5',
                    '2ci1',
                    '2ank',
                    ##                ## hetID in SEQRES and REMARK465 but not MODRES
                    ##                '1aco','7acn','8acn','2aig','3aig','1g1f','1g1s','2f4i',
                    ## hetID in MODRES               but not SEQRES
                    '2a4o',
                    '3bbd',
                    '1orw',
                    ## std_res in REMARK465 but not SEQRES
                    '2uva',
                    '2uva',
                    '2uvc',
                    ## hetID in SEQRES but not REMARK465,ATOM
                    '2vhn',
                    ## REMARK465 records missing
                    '1c04',
                    '3b9v',
                    '1fka',
                    '1deq',
                    '2jcc',
                    '2jj4',
                    '1i3q',
                    '1i50',
                    '1i6h',
                    '1iw7',
                    '1p0t',
                    '1smy',
                    '1uf2',
                    '2v0z',
                    '2v7n',
                    '2vs4',
                    '1zbb',
                    '1zlv',
                    ## remark470 records missing (alpha carbon only in most cases)
                    '1cc0',
                    '1f1o',
                    '1ffk',
                    '3b5d',
                    '3b5w',
                    '3b5x',
                    '3b5y',
                    '3b5z',
                    '3b61',
                    '3b62',
                    '1bdx',
                    '1d3l',
                    '1gix',
                    '1e8s',
                    '1j5a',
                    '4cro',
                    '1i9w',
                    '1giy',
                    '2v9l',

                    ## SEQRES/ATOM conflict (incorrect res_name)
                    '2c38',
                    '2j01',
                    '2j03',
                    '4icd',
                    '2aew',
                    '2plv',
                    '1d2q',

                    ## remark465 initiation line missing
                    '2iwq',
                    '2v07',

                    ## incorrect residue numbers (N-terminal)
                    '1ef0',
                    '2ji5',
                    '1eu3',
                    ## incorrect residue numbers (reversed)
                    '9lpr',
                    ## incorrect residue numbers (insertion)
                    '1a4k',
                    '2ged',
                    '1cj0',
                    ## incorrect residue numbers (altloc)
                    '2bb3',
                    ## incorrect residue numbers (C-terminal)
                    '1a7l',
                    '1ce0',
                    '2ci0',
                    '2cib',
                    '1f32',
                    ## inccorect residue numbers (type error)
                    '1f7o',
                    '1f7p',
                    '1ke8',
                    '1kj4',
                    '1n3f',
                    '1nlq',
                    '1nv8',
                    '1nv9',
                    '1pxx',
                    '2by6',
                    '1ll0',
                    '2bfk',
                    '2gnk',
                    '1e5r',
                    '1lox',
                    '1ca8',
                    '1x11',
                    ## incorrect residue numbers (REMARK465)
                    '1jn6',
                    ## incorrect residue numbers (ATOM)
                    '2qqh',
                    ## incorrect residue numbers ("reset"/"broken"... 28,29,30,1,2,3)
                    '1jpl',
                    '1juq',
                    '1k4j',
                    '1q6j',
                    '1q6m',
                    '2oxg',
                    '2oxr',
                    '2qzl',
                    '1swf',
                    '1swg',
                    '2z5s',
                    '1yr6',
                    '1yr7',
                    '1yr8',
                    '1yr9',
                    ## incorrect residue numbers (iCodes)
                    '1iao',
                    '2iad',
                    '1dki',
                    '1es0',
                    '7pck',
                    '1pfz',
                    '1qdm',
                    '1ygp',
                    '2qri',
                    '2qrs',
                    '2qrt',
                    '3dgv',
                    ## incorrect residue numbers (none)
                    '2om7',

                    ## incorrect chain ID (ATOM)
                    '1bml',
                    '1h74',
                    '1jwt',
                    ## incorrect iCode (ATOM)
                    '2ius',
                    ## incorrect SEQRES sequence
                    '1clw',

                    ## identical ATOM/HETATM IDs (coordinate section)
                    '1h9h',
                    '427d',
                    '2olb',
                    '11gs',
                    '121p',
                    '12gs',
                    '13gs',
                    '16gs',
                    '16pk',
                    '17gs',
                    '185d',
                    '18gs',
                    '193d',
                    '19gs',
                    '1a05',
                    '1a0i',
                    '1a25',
                    '1a3l',
                    '1a44',
                    '1a48',
                    '1a4a',
                    '1a4b',
                    '1a4c',
                    '1a4f',
                    '1a4g',
                    '1a4k',
                    '1a4q',
                    '1a52',
                    '1a5a',
                    '1a5b',
                    '1a5z',
                    '1a65',
                    '1a69',
                    '1a6g',
                    '1a6m',
                    '1a6q',
                    '1a71',
                    '1a78',
                    '1a79',
                    '1a8i',
                    '1a8s',
                    '1a8u',
                    '1a9c',
                    '1a9m',
                    '1a9x',
                    '1a9y',
                    '1a9z',
                    '1aal',
                    '1aax',
                    '1aaz',
                    '1aba',
                    '1abw',
                    '1aby',
                    '1ad8',
                    '1ad9',
                    '1adb',
                    '1adc',
                    '1adf',
                    '1adg',
                    '1adj',
                    '1aec',
                    '1af6',
                    '1afa',
                    '1afb',
                    '1ag0',
                    '1ah8',
                    '1ahe',
                    '1ahf',
                    '1ahx',
                    '1ahy',
                    '1ai4',
                    '1ai5',
                    '1ai6',
                    '1ai7',
                    '1ai8',
                    '1aix',
                    '1aj6',
                    '1aj9',
                    '1ajn',
                    '1ajp',
                    '1ajq',
                    '1aku',
                    '1akv',
                    '1all',
                    '1ami',
                    '1ao5',
                    '1aok',
                    '1aor',
                    '1apm',
                    '1aq1',
                    '1aq6',
                    '1aq7',
                    '1ar1',
                    '1ari',
                    '1art',
                    '1arx',
                    '1asm',
                    '1asn',
                    '1aso',
                    '1asp',
                    '1asq',
                    '1at1',
                    '1atg',
                    '1atl',
                    '1atn',
                    '1atp',
                    '1au1',
                    '1aua',
                    '1auj',
                    '1aus',
                    '1av4',
                    '1av6',
                    '1avb',
                    '1avf',
                    '1avq',
                    '1awb',
                    '1axa',
                    '1axd',
                    '1axs',
                    '1aya',
                    '1ayo',
                    '1ayr',
                    '1azr',
                    '1azs',
                    '1b08',
                    '1b0m',
                    '1b25',
                    '1b4k',
                    '1b4n',
                    '1b55',
                    '1b6h',
                    '1b8t',
                    '1b92',
                    '1b9f',
                    '1bb1',
                    '1bch',
                    '1bcj',
                    '1bcr',
                    '1bcs',
                    '1bd0',
                    '1be3',
                    '1beh',
                    '1ben',
                    '1bfn',
                    '1bgy',
                    '1bh3',
                    '1bij',
                    '1biq',
                    '1biz',
                    '1bj3',
                    '1bja',
                    '1bk5',
                    '1bks',
                    '1bnl',
                    '1bow',
                    '1bps',
                    '1bqa',
                    '1bqd',
                    '1bqh',
                    '1brh',
                    '1brk',
                    '1brt',
                    '1bsk',
                    '1btc',
                    '1bvc',
                    '1bvd',
                    '1bwn',
                    '1bwo',
                    '1cdk',
                    '1cdm',
                    '1cel',
                    '1ces',
                    '1cf5',
                    '1cgk',
                    '1ch0',
                    '1ch4',
                    '1cls',
                    '1cml',
                    '1cnt',
                    '1coh',
                    '1con',
                    '1cow',
                    '1cpc',
                    '1cpq',
                    '1cpr',
                    '1crx',
                    '1ctf',
                    '1ctp',
                    '1cxe',
                    '1cxf',
                    '1cxh',
                    '1czf',
                    '1d33',
                    '1d35',
                    '1d39',
                    '1d40',
                    '1d61',
                    '1d8h',
                    '1daj',
                    '1dan',
                    '1dbj',
                    '1dbk',
                    '1dbp',
                    '1dbr',
                    '1dff',
                    '1dhy',
                    '1dif',
                    '1djc',
                    '1dkk',
                    '1dl4',
                    '1dlr',
                    '1dls',
                    '1dmr',
                    '1dms',
                    '1dpe',
                    '1dpm',
                    '1drf',
                    '1drj',
                    '1drk',
                    '1dut',
                    '1eas',
                    '1ecc',
                    '1ecg',
                    '1efg',
                    '1eg6',
                    '1elp',
                    '1ent',
                    '1epm',
                    '1epn',
                    '1epp',
                    '1epq',
                    '1eta',
                    '1etb',
                    '1eth',
                    '1etj',
                    '1fax',
                    '1fbt',
                    '1fdh',
                    '1fgh',
                    '1fgj',
                    '1fig',
                    '1fiv',
                    '1fkb',
                    '1fki',
                    '1fn1',
                    '1fpc',
                    '1fpi',
                    '1fpj',
                    '1fpl',
                    '1fsa',
                    '1fui',
                    '1fuo',
                    '1fup',
                    '1fuq',
                    '1fur',
                    '1fv7',
                    '1fyl',
                    '1g5l',
                    '1gac',
                    '1gaf',
                    '1gan',
                    '1gbu',
                    '1gdi',
                    '1gic',
                    '1gj2',
                    '1glc',
                    '1gld',
                    '1gle',
                    '1gli',
                    '1gmk',
                    '1gmp',
                    '1gmr',
                    '1gnm',
                    '1gnn',
                    '1gno',
                    '1gnw',
                    '1gpa',
                    '1gpd',
                    '1gqh',
                    '1gsu',
                    '1gsy',
                    '1gti',
                    '1gtm',
                    '1gtp',
                    '1hab',
                    '1hac',
                    '1hah',
                    '1hbg',
                    '1hbi',
                    '1hbs',
                    '1hbt',
                    '1hcn',
                    '1hcs',
                    '1hct',
                    '1hds',
                    '1hdt',
                    '1hfp',
                    '1hfq',
                    '1hfr',
                    '1hga',
                    '1hgb',
                    '1hgc',
                    '1hgt',
                    '1hho',
                    '1hih',
                    '1hii',
                    '1hiv',
                    '1hmd',
                    '1hmo',
                    '1hpc',
                    '1hr3',
                    '1hro',
                    '1hrp',
                    '1htp',
                    '1huj',
                    '1huk',
                    '1hur',
                    '1hvq',
                    '1hvr',
                    '1hxp',
                    '1hya',
                    '1i3t',
                    '1i47',
                    '1i7v',
                    '1iak',
                    '1ibg',
                    '1icj',
                    '1idc',
                    '1ide',
                    '1iea',
                    '1ieb',
                    '1igt',
                    '1igy',
                    '1ils',
                    '1ilu',
                    '1imc',
                    '1imd',
                    '1ipw',
                    '1irn',
                    '1iro',
                    '1ith',
                    '1ixx',
                    '1izb',
                    '1j8l',
                    '1jaw',
                    '1jdb',
                    '1jfd',
                    '1jka',
                    '1jkb',
                    '1jkc',
                    '1jkd',
                    '1jlx',
                    '1jpc',
                    '1jsa',
                    '1jst',
                    '1jsu',
                    '1kao',
                    '1kpe',
                    '1kr7',
                    '1krb',
                    '1krc',
                    '1krn',
                    '1ksa',
                    '1kvq',
                    '1kvr',
                    '1kvs',
                    '1kvt',
                    '1kvu',
                    '1lam',
                    '1lcj',
                    '1lck',
                    '1lcp',
                    '1ldp',
                    '1len',
                    '1lia',
                    '1lkk',
                    '1lkl',
                    '1llo',
                    '1lml',
                    '1loa',
                    '1lob',
                    '1lpa',
                    '1lpb',
                    '1lth',
                    '1ltr',
                    '1lu2',
                    '1lya',
                    '1lyb',
                    '1lyw',
                    '1m5k',
                    '1mae',
                    '1maf',
                    '1mbd',
                    '1mfr',
                    '1mhe',
                    '1mhk',
                    '1mmb',
                    '1mmo',
                    '1mmp',
                    '1mmq',
                    '1mmr',
                    '1mpa',
                    '1mpg',
                    '1mpq',
                    '1mpr',
                    '1mwe',
                    '1myf',
                    '1myh',
                    '1myj',
                    '1nah',
                    '1nai',
                    '1nas',
                    '1nbb',
                    '1nbc',
                    '1nbe',
                    '1nci',
                    '1nco',
                    '1nec',
                    '1nfd',
                    '1nfp',
                    '1nlk',
                    '1nn2',
                    '1nnc',
                    '1np1',
                    '1nrs',
                    '1nsa',
                    '1nsc',
                    '1nsd',
                    '1nzr',
                    '1oaa',
                    '1oat',
                    '1ohj',
                    '1olc',
                    '1ord',
                    '1otg',
                    '1ouu',
                    '1ova',
                    '1ovw',
                    '1p35',
                    '1pag',
                    '1pam',
                    '1pbo',
                    '1pea',
                    '1pgt',
                    '1php',
                    '1ply',
                    '1pnk',
                    '1pnl',
                    '1pnm',
                    '1poi',
                    '1ppm',
                    '1ppr',
                    '1psc',
                    '1psh',
                    '1ptv',
                    '1pty',
                    '1pyt',
                    '1qbi',
                    '1qd6',
                    '1qdc',
                    '1qf8',
                    '1qge',
                    '1qha',
                    '1qhf',
                    '1qpr',
                    '1qrd',
                    '1qs8',
                    '1qsg',
                    '1raa',
                    '1rab',
                    '1rac',
                    '1rad',
                    '1rae',
                    '1raf',
                    '1rag',
                    '1rah',
                    '1rai',
                    '1rbl',
                    '1rcm',
                    '1rcp',
                    '1rdg',
                    '1rdi',
                    '1rdj',
                    '1rdk',
                    '1rdl',
                    '1rdm',
                    '1rdn',
                    '1rdv',
                    '1rdx',
                    '1rdy',
                    '1rdz',
                    '1rem',
                    '1rge',
                    '1rgf',
                    '1rgg',
                    '1rgh',
                    '1rn1',
                    '1rnc',
                    '1rsy',
                    '1rtf',
                    '1rth',
                    '1rti',
                    '1rtj',
                    '1rtm',
                    '1rvw',
                    '1sac',
                    '1scu',
                    '1sda',
                    '1sdk',
                    '1sdl',
                    '1sep',
                    '1sfi',
                    '1sft',
                    '1sgc',
                    '1shb',
                    '1shd',
                    '1skj',
                    '1sky',
                    '1slb',
                    '1slc',
                    '1sli',
                    '1slt',
                    '1slu',
                    '1smp',
                    '1sps',
                    '1stc',
                    '1sty',
                    '1swd',
                    '1swe',
                    '1swn',
                    '1swp',
                    '1swr',
                    '1taq',
                    '1tar',
                    '1tat',
                    '1taw',
                    '1tcb',
                    '1tcf',
                    '1tco',
                    '1tei',
                    '1tet',
                    '1thb',
                    '1ths',
                    '1tlc',
                    '1tlg',
                    '1tli',
                    '1tlp',
                    '1tmb',
                    '1tmn',
                    '1tmu',
                    '1tn4',
                    '1tpf',
                    '1tpk',
                    '1try',
                    '1trz',
                    '1tsd',
                    '1ttp',
                    '1ttq',
                    '1tub',
                    '1tyl',
                    '1tym',
                    '1tyu',
                    '1tyw',
                    '1tyx',
                    '1udg',
                    '1udh',
                    '1uma',
                    '1uz8',
                    '1vdr',
                    '1vrt',
                    '1vru',
                    '1vwt',
                    '1wav',
                    '1wgi',
                    '1wgj',
                    '1whs',
                    '1wht',
                    '1wyk',
                    '1xel',
                    '1xgm',
                    '1xgn',
                    '1xgs',
                    '1xik',
                    '1xso',
                    '1yag',
                    '1yec',
                    '1yef',
                    '1yeg',
                    '1yeh',
                    '1yrq',
                    '1zeg',
                    '1zeh',
                    '1zni',
                    '1znj',
                    '20gs',
                    '211d',
                    '219d',
                    '21gs',
                    '221p',
                    '22gs',
                    '253d',
                    '256b',
                    '258d',
                    '25c8',
                    '277d',
                    '2a3h',
                    '2aaa',
                    '2aac',
                    '2aae',
                    '2ahj',
                    '2arc',
                    '2at1',
                    '2atc',
                    '2ay1',
                    '2ay2',
                    '2ay3',
                    '2ay4',
                    '2ay5',
                    '2ay6',
                    '2ay7',
                    '2ay8',
                    '2ay9',
                    '2azu',
                    '2btf',
                    '2bvw',
                    '2c7e',
                    '2cah',
                    '2cht',
                    '2cmm',
                    '2cpk',
                    '2cst',
                    '2d34',
                    '2d95',
                    '2da8',
                    '2dhb',
                    '2dhn',
                    '2dmr',
                    '2dri',
                    '2ecp',
                    '2fbj',
                    '2fus',
                    '2gli',
                    '2glr',
                    '2gls',
                    '2gsa',
                    '2gss',
                    '2hck',
                    '2hhb',
                    '2hhd',
                    '2hk6',
                    '2hmq',
                    '2hmz',
                    '2hr7',
                    '2iep',
                    '2kau',
                    '2lal',
                    '2lhb',
                    '2mas',
                    '2mcp',
                    '2mhb',
                    '2mpa',
                    '2mpr',
                    '2msb',
                    '2nac',
                    '2np1',
                    '2otc',
                    '2ovw',
                    '2oxi',
                    '2pax',
                    '2pcp',
                    '2pfl',
                    '2pgh',
                    '2pgt',
                    '2phk',
                    '2pk4',
                    '2pld',
                    '2ple',
                    '2prc',
                    '2prg',
                    '2q41',
                    '2qwa',
                    '2qwb',
                    '2qwc',
                    '2qwd',
                    '2qwe',
                    '2qwf',
                    '2qwg',
                    '2qwh',
                    '2qwi',
                    '2qwj',
                    '2qwk',
                    '2r2f',
                    '2ran',
                    '2rkm',
                    '2rus',
                    '2scp',
                    '2sec',
                    '2shk',
                    '2sli',
                    '2sn3',
                    '2sni',
                    '2sod',
                    '2taa',
                    '2tgd',
                    '2tli',
                    '2tmn',
                    '2tn4',
                    '2trc',
                    '2tsc',
                    '2wea',
                    '2web',
                    '2wec',
                    '308d',
                    '367d',
                    '380d',
                    '381d',
                    '382d',
                    '3a3h',
                    '3at1',
                    '3azu',
                    '3chb',
                    '3cyt',
                    '3dmr',
                    '3eng',
                    '3fru',
                    '3fx2',
                    '3grs',
                    '3gss',
                    '3gst',
                    '3hat',
                    '3hhb',
                    '3ljr',
                    '3lkf',
                    '3np1',
                    '3p2p',
                    '3pax',
                    '3pca',
                    '3pcb',
                    '3pcc',
                    '3pcd',
                    '3pce',
                    '3pcf',
                    '3pcg',
                    '3pch',
                    '3pci',
                    '3pcj',
                    '3pck',
                    '3pcn',
                    '3pmg',
                    '3prc',
                    '3prn',
                    '3pro',
                    '3sc2',
                    '3sli',
                    '3sqc',
                    '3tli',
                    '3tmn',
                    '421p',
                    '455d',
                    '456c',
                    '4a3h',
                    '4at1',
                    '4cts',
                    '4dfr',
                    '4dmr',
                    '4eng',
                    '4fx2',
                    '4gr1',
                    '4gsa',
                    '4gss',
                    '4hhb',
                    '4hmg',
                    '4mdh',
                    '4np1',
                    '4ovw',
                    '4pax',
                    '4rub',
                    '4sli',
                    '4tgl',
                    '4tmn',
                    '5at1',
                    '5bir',
                    '5dnb',
                    '5fx2',
                    '5gss',
                    '5hmg',
                    '5mdh',
                    '5prn',
                    '5tmn',
                    '621p',
                    '6abp',
                    '6adh',
                    '6at1',
                    '6gsp',
                    '6gss',
                    '6gsu',
                    '6gsv',
                    '6gsw',
                    '6gsx',
                    '6ins',
                    '6prn',
                    '6tmn',
                    '721p',
                    '7aat',
                    '7abp',
                    '7at1',
                    '7gss',
                    '7odc',
                    '7prn',
                    '7taa',
                    '830c',
                    '8aat',
                    '8abp',
                    '8at1',
                    '8gss',
                    '8prn',
                    '966c',
                    '9gss',
                    '9pap',
                    '9rub',
                    '2q44',
                    ## identical IDs in REMARK465 and ATOM records
                    '3c9u',

                    ## altloc not in MODRES records
                    '354d',
            ]:
                continue
            file = '%s%s/%s' % (path, dir, ent)
            fd = open(file, 'r')
            lines = fd.readlines()
            fd.close()

            ##
            ## parse header
            ##
            d_header = parse_pdb.parse_header(lines)
            if not 'HELIX' in d_header.keys():
                continue
            if d_header['EXPDTA'] != 'X-RAY':
                continue

            ##
            ## parse coordinates
            ##
            d_coordinates, d_ATOMseq = parse_pdb.parse_coordinates(
                lines,
                d_header,
            )

            for chain in d_header['SEQRES']['chains']:
                if chain not in d_header['HELIX'].keys():
                    continue
                for helix_no in d_header['HELIX'][chain].keys():
                    res_no = d_header['HELIX'][chain][helix_no]['res_no']
                    iCode = d_header['HELIX'][chain][helix_no]['iCode']
                    helix_len = d_header['HELIX'][chain][helix_no]['len']
                    if not res_no in d_coordinates['chains'][chain][
                            'residues'].keys():
                        continue
                    seq_no = d_coordinates['chains'][chain]['residues'][
                        res_no]['d_iCodes'][iCode]['seq_no']
                    if seq_no == -1:
                        continue  ## hoeker-loesning
                    if seq_no >= len(d_ATOMseq[chain]['res_nos']):
                        continue  ## hoeker-loesning
                    if d_ATOMseq[chain]['res_nos'][seq_no] != res_no:
                        stop
                    if d_ATOMseq[chain]['iCodes'][seq_no] != iCode:
                        stop
##                    if d_ATOMseq[chain]['altlocs'][seq_no] != ' ':
##                        stop
                    if helix_len < 11:
                        continue
                    l_angles = []
                    for i in range(
                            seq_no + 1,
                            min(seq_no + helix_len,
                                len(d_ATOMseq[chain]['res_nos'])) - 4 - 1):
                        res_no1 = d_ATOMseq[chain]['res_nos'][i]
                        res_no2 = d_ATOMseq[chain]['res_nos'][i + 4]
                        iCode1 = d_ATOMseq[chain]['iCodes'][i]
                        iCode2 = d_ATOMseq[chain]['iCodes'][i + 4]
                        record1 = d_ATOMseq[chain]['records'][i]
                        record2 = d_ATOMseq[chain]['records'][i + 4]

                        if not res_no1 in d_coordinates['chains'][chain][
                                'residues'].keys() and record1 == 'REMARK465':
                            continue
                        if not res_no2 in d_coordinates['chains'][chain][
                                'residues'].keys() and record2 == 'REMARK465':
                            continue

                        res_name1 = d_coordinates['chains'][chain]['residues'][
                            res_no1]['d_iCodes'][iCode1]['res_name']

                        if not 'O' in d_coordinates['chains'][chain][
                                'residues'][res_no1]['d_iCodes'][iCode1][
                                    'atoms'].keys():
                            continue
                        if not 'H' in d_coordinates['chains'][chain][
                                'residues'][res_no2]['d_iCodes'][iCode2][
                                    'atoms'].keys():
                            continue

                        O = d_coordinates['chains'][chain]['residues'][
                            res_no1]['d_iCodes'][iCode1]['atoms']['O'][
                                'coordinate']
                        N = d_coordinates['chains'][chain]['residues'][
                            res_no2]['d_iCodes'][iCode2]['atoms']['N'][
                                'coordinate']
                        H = d_coordinates['chains'][chain]['residues'][
                            res_no2]['d_iCodes'][iCode2]['atoms']['H'][
                                'coordinate']
                        vNH = N - H
                        vOH = O - H
                        angle = (180 / math.pi) * math.acos(
                            numpy.dot(vNH, vOH) /
                            math.sqrt(sum(vNH**2) * sum(vOH**2)))
                        print pdb, chain, res_no1, res_no2, angle
                        if res_name1 != 'GLY' and angle < 78:  ## 113
                            print res_name1
                            stop1
                        if res_name1 == 'GLY' and angle < 101:
                            stop1
                        l_angles += [angle]
                    if len(l_angles) > 0:
                        print helix_len, sum(l_angles) / len(l_angles)
                        if sum(l_angles) / len(l_angles) > 171:
                            stop3
                        if sum(l_angles) / len(l_angles) < 149:
                            stop4
Example #3
0
def main():

    for pdb in [
            ##    '2hhb',

            ##    '1hho',
            ##    '1hv4',
            '2lzm',
            ##        '2lzt',

            ##        '1c76',
            ##        '1c77',
            ##        '1c78',
    ]:

        ##    os.system('cp /local/data/pdb/%s/pdb%s.ent %s.pdb' %(pdb[1:3],pdb,pdb,))
        ##
        ####    biounit.biounit().main(pdb, '/data/remediated_pdb/', exclude_ligands = True)
        ####
        ####    os.system('cp %s_1.pdb %s.pdb' %(pdb,pdb,))
        ##
        ##    fd = open('%s.pdb' %(pdb),'r')
        ##    lines = fd.readlines()
        ##    fd.close()

        fd = open('C:\Users\Tommy Carstensen\pdb\%s.pdb' % (pdb), 'r')
        lines = fd.readlines()
        fd.close()

        d_header = parse_pdb.parse_header(lines)
        d_coordinates, d_ATOMseq = parse_pdb.parse_coordinates(
            lines,
            d_header,
        )
        l_coordinates = []

        a = d_header['CRYST1']['edges'][0]
        b = d_header['CRYST1']['edges'][1]
        c = d_header['CRYST1']['edges'][2]
        alpha = math.pi * d_header['CRYST1']['angles'][0] / 180.
        beta = math.pi * d_header['CRYST1']['angles'][1] / 180.
        gamma = math.pi * d_header['CRYST1']['angles'][2] / 180.
        ## unit cell voumne
        volume = a * b * c * math.sqrt(
            1 - math.cos(alpha)**2 - math.cos(beta)**2 - math.cos(gamma)**2 +
            2 * (math.cos(alpha) * math.cos(beta) * math.cos(gamma)))
        matrix_fractional2cartesian = numpy.array([
            [
                a,
                b * math.cos(gamma),
                c * math.cos(beta),
            ],
            [
                0,
                b * math.sin(gamma),
                c * (math.cos(alpha) - math.cos(beta) * math.cos(gamma)) /
                math.sin(gamma),
            ],
            [
                0,
                0,
                volume / (a * b * math.sin(gamma)),
            ],
        ])

        ##    lines = []
        for symop in d_header['REMARK290'].keys():
            print 'symop', symop
            matrix_symop = d_header['REMARK290'][symop]['matrix']
            vector_symop = d_header['REMARK290'][symop]['vector']
            for i_translation in range(len(l_translations)):
                print 'symop', symop, 'translation', i_translation
                vector_translation = l_translations[i_translation]

                for chain2 in d_coordinates['chains'].keys():
                    for res_no2 in d_coordinates['chains'][chain2][
                            'residues'].keys():
                        ##                    print symop, i, chain2, res_no2
                        for iCode2 in d_coordinates['chains'][chain2][
                                'residues'][res_no2]['d_iCodes'].keys():
                            for chain1 in d_coordinates['chains'].keys():
                                for res_no1 in d_coordinates['chains'][chain1][
                                        'residues'].keys():
                                    ##                                if res_no2 != 75:
                                    ##                                    continue
                                    for iCode1 in d_coordinates['chains'][
                                            chain1]['residues'][res_no1][
                                                'd_iCodes'].keys():
                                        for atom_name1 in d_coordinates[
                                                'chains'][chain1]['residues'][
                                                    res_no1]['d_iCodes'][
                                                        iCode1]['atoms'].keys(
                                                        ):
                                            coordinate1 = d_coordinates[
                                                'chains'][chain1]['residues'][
                                                    res_no1]['d_iCodes'][
                                                        iCode1]['atoms'][
                                                            atom_name1][
                                                                'coordinate']
                                            for atom_name2 in d_coordinates[
                                                    'chains'][chain2][
                                                        'residues'][res_no2][
                                                            'd_iCodes'][iCode2][
                                                                'atoms'].keys(
                                                                ):
                                                coordinate2 = d_coordinates[
                                                    'chains'][chain2]['residues'][
                                                        res_no2]['d_iCodes'][
                                                            iCode2]['atoms'][
                                                                atom_name2][
                                                                    'coordinate']
                                                coordinate2 = numpy.dot(
                                                    matrix_symop,
                                                    coordinate2) + vector_symop
                                                coordinate2 += numpy.dot(
                                                    matrix_fractional2cartesian,
                                                    vector_translation)
                                                coordinate2[0] = round(
                                                    coordinate2[0], 3)
                                                coordinate2[1] = round(
                                                    coordinate2[1], 3)
                                                coordinate2[2] = round(
                                                    coordinate2[2], 3)

                                                vicinity = False
                                                distant = False
                                                dist = math.sqrt(
                                                    sum((coordinate2 -
                                                         coordinate1)**2))
                                                dist = 0
                                                ##                                            if dist < 5:
                                                ##                                                print '%2i %.2f %.2f %4i %4s %s %4i %4s %s ' %(
                                                ##                                                    i, round(dist,2), dist_treshold,
                                                ##                                                    res_no2, atom_name2, coordinate2, res_no1, atom_name1, coordinate1
                                                ##                                                    )
                                                dist_treshold = d_radii_vdw[
                                                    atom_name1[
                                                        0]] + d_radii_vdw[
                                                            atom_name2[
                                                                0]] + 0.25
                                                ## break atom_name2 loop
                                                if dist < dist_treshold:
                                                    vicinity = True
                                                    break
                                                ## break atom_name2 loop
                                                if dist > 10.:  ## length of lysine is 7-8Angstrom
                                                    distant = True
                                                    break
                                            ## break atom_name1 loop (append line and check next iCode1)
                                            if vicinity == True:
                                                for atom_name3 in d_coordinates[
                                                        'chains'][chain2][
                                                            'residues'][res_no2][
                                                                'd_iCodes'][
                                                                    iCode2][
                                                                        'atoms'].keys(
                                                                        ):
                                                    coordinate3 = d_coordinates[
                                                        'chains'][chain2]['residues'][
                                                            res_no2]['d_iCodes'][
                                                                iCode2]['atoms'][
                                                                    atom_name3][
                                                                        'coordinate']
                                                    coordinate3 = numpy.dot(
                                                        matrix_symop,
                                                        coordinate3,
                                                    ) + vector_symop
                                                    coordinate3 += numpy.dot(
                                                        matrix_fractional2cartesian,
                                                        vector_translation,
                                                    )
                                                    line = build_line(
                                                        atom_name3,
                                                        d_coordinates,
                                                        coordinate3,
                                                        chain2,
                                                        res_no2,
                                                        iCode2,
                                                        symop,
                                                        i_translation,
                                                    )
                                                    lines += [line]
                                                break
                                            ## break atom_name1 loop (check next iCode1)
                                            if distant == True:
                                                break
                                        ## break iCode1 loop (check next iCode2)
                                        if vicinity == True:
                                            break
                                    ## break resno1 loop (check next iCode2)
                                    if vicinity == True:
                                        break
                                ## break chain1 loop (check next iCode2)
                                if vicinity == True:
                                    break

        fd = open('%s_crystalcontacts.pdb' % (pdb), 'w')
        fd.writelines(lines)
        fd.close()

        source = 'whatif.src'
        fd = open(source, 'w')
        fd.writelines([
            '/software/whatif/DO_WHATIF.COM <<EOF\n',
            'GETMOL %s.pdb\n' % (pdb, ),
            '%s\n' % (pdb, ),
            '%DELWAT\n',
            '%DELLIG\n',
            '%SOUSHL\n',  ## crystal contacts
            '%MAKMOL\n',
            '\n',  ## The file header will be copied from a PDB file. Hit return for the default header that has no information in it.
            'soushl_%s.pdb\n' % (pdb),
            'TOT 0\n',
            '\n',  ## REMARKS
            'STOP\n',
            'Y\n',
        ])
        fd.close()
        os.system('source %s > whatif_surface/%s.out' % (source, pdb))

    os.system(
        'rm DRG* DAVADRUG.PDB ALTERR.LOG PDBFILE whatif.src FOR*.DAT WHATIF.FIG'
    )

    return
def main():

    import os, numpy, math
    import sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import parse_pdb

    path = '/oxygenase_local/data/pdb/'
    dirs = os.listdir(path)
    dirs.sort()
    for dir in dirs:
        if dir < sys.argv[1][1:3]:
            continue
        print dir
        ents = os.listdir('%s%s' %(path,dir))
        ents.sort()
        for ent in ents:
            pdb = ent[3:7]
            if dir == sys.argv[-1][1:3] and pdb < sys.argv[-1]:
                continue
            print pdb
            if pdb in [
                '1a0k', ## 310 helix?
                '2aeb', ## helix kink
                '2aoc','2aod','2aog', ## some error...
                '2c03','2c04','3bxd', ## bent helix
                '4fbp','5fbp', ## pro residue in i+6 position
                '1a2f','1a2g', ## gly residue in i+1,i+3 position (310 helix?)
                ]:
                continue
            if pdb in [ ## from phipsiparseall
                ## std_res_name in neither REMARK465 nor SEQRES (large peptide length)
                '1a3q','1c3w','1c3x','1ad5',
                ## hetID in SEQRES and HETATM    but not MODRES
                '2b2u','2a2x','2b7f','2c2k','2c2m','2c2o','2c2z','2aal','1bdu',
                '2ag3','2age','2agg','1an5','2ci1','2ank',
##                ## hetID in SEQRES and REMARK465 but not MODRES
##                '1aco','7acn','8acn','2aig','3aig','1g1f','1g1s','2f4i',
                ## hetID in MODRES               but not SEQRES
                '2a4o','3bbd','1orw',
                ## std_res in REMARK465 but not SEQRES
                '2uva','2uva','2uvc',
                ## hetID in SEQRES but not REMARK465,ATOM
                '2vhn',
                ## REMARK465 records missing
                '1c04','3b9v','1fka','1deq','2jcc','2jj4','1i3q','1i50','1i6h',
                '1iw7','1p0t','1smy','1uf2','2v0z','2v7n','2vs4','1zbb','1zlv',
                ## remark470 records missing (alpha carbon only in most cases)
                '1cc0','1f1o','1ffk','3b5d','3b5w','3b5x','3b5y','3b5z','3b61',
                '3b62','1bdx','1d3l','1gix','1e8s','1j5a','4cro','1i9w','1giy',
                '2v9l',

                ## SEQRES/ATOM conflict (incorrect res_name)
                '2c38','2j01','2j03','4icd','2aew','2plv','1d2q',

                ## remark465 initiation line missing
                '2iwq','2v07',

                ## incorrect residue numbers (N-terminal)
                '1ef0','2ji5','1eu3',
                ## incorrect residue numbers (reversed)
                '9lpr',
                ## incorrect residue numbers (insertion)
                '1a4k','2ged','1cj0',
                ## incorrect residue numbers (altloc)
                '2bb3',
                ## incorrect residue numbers (C-terminal)
                '1a7l','1ce0','2ci0','2cib','1f32',
                ## inccorect residue numbers (type error)
                '1f7o','1f7p','1ke8','1kj4','1n3f','1nlq','1nv8','1nv9','1pxx',
                '2by6','1ll0','2bfk','2gnk','1e5r','1lox','1ca8','1x11',
                ## incorrect residue numbers (REMARK465)
                '1jn6',
                ## incorrect residue numbers (ATOM)
                '2qqh',
                ## incorrect residue numbers ("reset"/"broken"... 28,29,30,1,2,3)
                '1jpl','1juq','1k4j','1q6j','1q6m','2oxg','2oxr','2qzl',
                '1swf','1swg','2z5s','1yr6','1yr7','1yr8','1yr9',
                ## incorrect residue numbers (iCodes)
                '1iao','2iad','1dki','1es0','7pck','1pfz','1qdm','1ygp',
                '2qri','2qrs','2qrt','3dgv',
                ## incorrect residue numbers (none)
                '2om7',

                ## incorrect chain ID (ATOM)
                '1bml','1h74','1jwt',
                ## incorrect iCode (ATOM)
                '2ius',
                ## incorrect SEQRES sequence
                '1clw',

                ## identical ATOM/HETATM IDs (coordinate section)
                '1h9h','427d','2olb','11gs','121p','12gs','13gs','16gs','16pk','17gs','185d','18gs','193d','19gs','1a05','1a0i','1a25','1a3l','1a44','1a48','1a4a','1a4b','1a4c','1a4f','1a4g','1a4k','1a4q','1a52','1a5a','1a5b','1a5z','1a65','1a69','1a6g','1a6m','1a6q','1a71','1a78','1a79','1a8i', '1a8s', '1a8u', '1a9c', '1a9m', '1a9x', '1a9y', '1a9z', '1aal', '1aax', '1aaz', '1aba', '1abw', '1aby', '1ad8', '1ad9', '1adb', '1adc', '1adf', '1adg', '1adj', '1aec', '1af6', '1afa', '1afb', '1ag0', '1ah8', '1ahe', '1ahf', '1ahx', '1ahy', '1ai4', '1ai5', '1ai6', '1ai7', '1ai8', '1aix', '1aj6', '1aj9', '1ajn', '1ajp', '1ajq', '1aku', '1akv', '1all', '1ami', '1ao5', '1aok', '1aor', '1apm', '1aq1', '1aq6', '1aq7', '1ar1', '1ari', '1art', '1arx', '1asm', '1asn', '1aso', '1asp', '1asq', '1at1', '1atg', '1atl', '1atn', '1atp', '1au1', '1aua', '1auj', '1aus', '1av4', '1av6', '1avb', '1avf', '1avq', '1awb', '1axa', '1axd', '1axs', '1aya', '1ayo', '1ayr', '1azr', '1azs', '1b08', '1b0m', '1b25', '1b4k', '1b4n', '1b55', '1b6h', '1b8t', '1b92', '1b9f', '1bb1', '1bch', '1bcj', '1bcr', '1bcs', '1bd0', '1be3', '1beh', '1ben', '1bfn', '1bgy', '1bh3', '1bij', '1biq', '1biz', '1bj3', '1bja', '1bk5', '1bks', '1bnl', '1bow', '1bps', '1bqa', '1bqd', '1bqh', '1brh', '1brk', '1brt', '1bsk', '1btc', '1bvc', '1bvd', '1bwn', '1bwo', '1cdk', '1cdm', '1cel', '1ces', '1cf5', '1cgk', '1ch0', '1ch4', '1cls', '1cml', '1cnt', '1coh', '1con', '1cow', '1cpc', '1cpq', '1cpr', '1crx', '1ctf', '1ctp', '1cxe', '1cxf', '1cxh', '1czf', '1d33', '1d35', '1d39', '1d40', '1d61', '1d8h', '1daj', '1dan', '1dbj', '1dbk', '1dbp', '1dbr', '1dff', '1dhy', '1dif', '1djc', '1dkk', '1dl4', '1dlr', '1dls', '1dmr', '1dms', '1dpe', '1dpm', '1drf', '1drj', '1drk', '1dut', '1eas', '1ecc', '1ecg', '1efg', '1eg6', '1elp', '1ent', '1epm', '1epn', '1epp', '1epq', '1eta', '1etb', '1eth', '1etj', '1fax', '1fbt', '1fdh', '1fgh', '1fgj', '1fig', '1fiv', '1fkb', '1fki', '1fn1', '1fpc', '1fpi', '1fpj', '1fpl', '1fsa', '1fui', '1fuo', '1fup', '1fuq', '1fur', '1fv7', '1fyl', '1g5l', '1gac', '1gaf', '1gan', '1gbu', '1gdi', '1gic', '1gj2', '1glc', '1gld', '1gle', '1gli', '1gmk', '1gmp', '1gmr', '1gnm', '1gnn', '1gno', '1gnw', '1gpa', '1gpd', '1gqh', '1gsu', '1gsy', '1gti', '1gtm', '1gtp', '1hab', '1hac', '1hah', '1hbg', '1hbi', '1hbs', '1hbt', '1hcn', '1hcs', '1hct',  '1hds', '1hdt', '1hfp', '1hfq', '1hfr', '1hga', '1hgb', '1hgc', '1hgt', '1hho', '1hih', '1hii', '1hiv', '1hmd', '1hmo', '1hpc', '1hr3', '1hro', '1hrp', '1htp', '1huj', '1huk', '1hur', '1hvq', '1hvr', '1hxp', '1hya', '1i3t', '1i47', '1i7v', '1iak', '1ibg', '1icj', '1idc', '1ide', '1iea', '1ieb', '1igt', '1igy', '1ils', '1ilu', '1imc', '1imd', '1ipw', '1irn', '1iro', '1ith', '1ixx', '1izb', '1j8l', '1jaw', '1jdb', '1jfd', '1jka', '1jkb', '1jkc', '1jkd', '1jlx', '1jpc', '1jsa', '1jst', '1jsu', '1kao', '1kpe', '1kr7', '1krb', '1krc', '1krn', '1ksa', '1kvq', '1kvr', '1kvs', '1kvt', '1kvu', '1lam', '1lcj', '1lck', '1lcp', '1ldp', '1len', '1lia', '1lkk', '1lkl', '1llo', '1lml', '1loa', '1lob', '1lpa', '1lpb', '1lth', '1ltr', '1lu2', '1lya', '1lyb', '1lyw', '1m5k', '1mae', '1maf', '1mbd', '1mfr', '1mhe', '1mhk', '1mmb', '1mmo', '1mmp', '1mmq', '1mmr', '1mpa', '1mpg', '1mpq', '1mpr', '1mwe', '1myf', '1myh', '1myj', '1nah', '1nai', '1nas', '1nbb', '1nbc', '1nbe', '1nci', '1nco', '1nec', '1nfd', '1nfp', '1nlk', '1nn2', '1nnc', '1np1', '1nrs', '1nsa', '1nsc', '1nsd', '1nzr', '1oaa', '1oat', '1ohj', '1olc', '1ord', '1otg', '1ouu', '1ova', '1ovw', '1p35', '1pag', '1pam', '1pbo', '1pea', '1pgt', '1php', '1ply', '1pnk', '1pnl', '1pnm', '1poi', '1ppm', '1ppr', '1psc', '1psh', '1ptv', '1pty', '1pyt', '1qbi', '1qd6', '1qdc', '1qf8', '1qge', '1qha', '1qhf', '1qpr', '1qrd', '1qs8', '1qsg', '1raa', '1rab', '1rac', '1rad', '1rae', '1raf', '1rag', '1rah', '1rai', '1rbl', '1rcm', '1rcp', '1rdg', '1rdi', '1rdj', '1rdk', '1rdl', '1rdm', '1rdn', '1rdv', '1rdx', '1rdy', '1rdz', '1rem', '1rge', '1rgf', '1rgg', '1rgh', '1rn1', '1rnc', '1rsy', '1rtf', '1rth', '1rti', '1rtj', '1rtm', '1rvw', '1sac', '1scu', '1sda', '1sdk', '1sdl', '1sep', '1sfi', '1sft', '1sgc', '1shb', '1shd', '1skj', '1sky', '1slb', '1slc', '1sli', '1slt', '1slu', '1smp', '1sps', '1stc', '1sty', '1swd', '1swe', '1swn', '1swp', '1swr', '1taq', '1tar', '1tat', '1taw', '1tcb', '1tcf', '1tco', '1tei', '1tet', '1thb', '1ths', '1tlc', '1tlg', '1tli', '1tlp', '1tmb', '1tmn', '1tmu', '1tn4', '1tpf', '1tpk', '1try', '1trz', '1tsd', '1ttp', '1ttq', '1tub', '1tyl', '1tym', '1tyu', '1tyw', '1tyx', '1udg', '1udh', '1uma', '1uz8', '1vdr', '1vrt', '1vru', '1vwt', '1wav', '1wgi', '1wgj', '1whs', '1wht', '1wyk', '1xel', '1xgm', '1xgn', '1xgs', '1xik', '1xso', '1yag', '1yec', '1yef', '1yeg', '1yeh', '1yrq', '1zeg', '1zeh', '1zni', '1znj', '20gs', '211d', '219d', '21gs', '221p', '22gs', '253d', '256b', '258d', '25c8', '277d', '2a3h', '2aaa', '2aac', '2aae', '2ahj', '2arc', '2at1', '2atc', '2ay1', '2ay2', '2ay3', '2ay4', '2ay5', '2ay6', '2ay7', '2ay8', '2ay9', '2azu', '2btf', '2bvw', '2c7e', '2cah', '2cht', '2cmm', '2cpk', '2cst', '2d34', '2d95', '2da8', '2dhb', '2dhn', '2dmr', '2dri', '2ecp', '2fbj', '2fus', '2gli', '2glr', '2gls', '2gsa', '2gss', '2hck', '2hhb', '2hhd', '2hk6', '2hmq', '2hmz', '2hr7', '2iep', '2kau', '2lal', '2lhb', '2mas', '2mcp', '2mhb', '2mpa', '2mpr', '2msb', '2nac', '2np1','2otc', '2ovw', '2oxi', '2pax', '2pcp', '2pfl', '2pgh', '2pgt', '2phk', '2pk4', '2pld', '2ple', '2prc', '2prg', '2q41', '2qwa', '2qwb', '2qwc', '2qwd', '2qwe', '2qwf', '2qwg', '2qwh', '2qwi', '2qwj', '2qwk', '2r2f', '2ran', '2rkm', '2rus', '2scp', '2sec', '2shk', '2sli', '2sn3', '2sni', '2sod', '2taa', '2tgd', '2tli', '2tmn', '2tn4', '2trc', '2tsc', '2wea', '2web', '2wec', '308d', '367d', '380d', '381d', '382d', '3a3h', '3at1', '3azu', '3chb', '3cyt', '3dmr', '3eng', '3fru', '3fx2', '3grs', '3gss', '3gst', '3hat', '3hhb', '3ljr', '3lkf', '3np1', '3p2p', '3pax', '3pca', '3pcb', '3pcc', '3pcd', '3pce', '3pcf', '3pcg', '3pch', '3pci', '3pcj', '3pck', '3pcn', '3pmg', '3prc', '3prn', '3pro', '3sc2', '3sli', '3sqc', '3tli', '3tmn', '421p', '455d', '456c', '4a3h', '4at1', '4cts', '4dfr', '4dmr', '4eng', '4fx2', '4gr1', '4gsa', '4gss', '4hhb', '4hmg', '4mdh', '4np1', '4ovw', '4pax', '4rub', '4sli', '4tgl', '4tmn', '5at1', '5bir', '5dnb', '5fx2', '5gss', '5hmg', '5mdh', '5prn', '5tmn', '621p', '6abp', '6adh', '6at1', '6gsp', '6gss', '6gsu', '6gsv', '6gsw', '6gsx', '6ins', '6prn', '6tmn', '721p', '7aat', '7abp', '7at1', '7gss', '7odc', '7prn', '7taa', '830c', '8aat', '8abp', '8at1', '8gss', '8prn', '966c', '9gss', '9pap', '9rub','2q44',
                ## identical IDs in REMARK465 and ATOM records
                '3c9u',

                ## altloc not in MODRES records
                '354d',
                ]:
                continue            
            file = '%s%s/%s' %(path,dir,ent)
            fd = open(file,'r')
            lines = fd.readlines()
            fd.close()

            ##
            ## parse header
            ##
            d_header = parse_pdb.parse_header(lines)
            if not 'HELIX' in d_header.keys():
                continue
            if d_header['EXPDTA'] != 'X-RAY':
                continue

            ##
            ## parse coordinates
            ##
            d_coordinates, d_ATOMseq = parse_pdb.parse_coordinates(lines, d_header,)

            for chain in d_header['SEQRES']['chains']:
                if chain not in d_header['HELIX'].keys():
                    continue
                for helix_no in d_header['HELIX'][chain].keys():
                    res_no = d_header['HELIX'][chain][helix_no]['res_no']
                    iCode = d_header['HELIX'][chain][helix_no]['iCode']
                    helix_len = d_header['HELIX'][chain][helix_no]['len']
                    if not res_no in d_coordinates['chains'][chain]['residues'].keys():
                        continue
                    seq_no = d_coordinates['chains'][chain]['residues'][res_no]['d_iCodes'][iCode]['seq_no']
                    if seq_no == -1:
                        continue ## hoeker-loesning
                    if seq_no >= len(d_ATOMseq[chain]['res_nos']):
                        continue ## hoeker-loesning
                    if d_ATOMseq[chain]['res_nos'][seq_no] != res_no:
                        stop
                    if d_ATOMseq[chain]['iCodes'][seq_no] != iCode:
                        stop
##                    if d_ATOMseq[chain]['altlocs'][seq_no] != ' ':
##                        stop
                    if helix_len < 11:
                        continue
                    l_angles = []
                    for i in range(seq_no+1,min(seq_no+helix_len,len(d_ATOMseq[chain]['res_nos']))-4-1):
                        res_no1 = d_ATOMseq[chain]['res_nos'][i]
                        res_no2 = d_ATOMseq[chain]['res_nos'][i+4]
                        iCode1 = d_ATOMseq[chain]['iCodes'][i]
                        iCode2 = d_ATOMseq[chain]['iCodes'][i+4]
                        record1 = d_ATOMseq[chain]['records'][i]
                        record2 = d_ATOMseq[chain]['records'][i+4]

                        if not res_no1 in d_coordinates['chains'][chain]['residues'].keys() and record1 == 'REMARK465':
                            continue
                        if not res_no2 in d_coordinates['chains'][chain]['residues'].keys() and record2 == 'REMARK465':
                            continue

                        res_name1 = d_coordinates['chains'][chain]['residues'][res_no1]['d_iCodes'][iCode1]['res_name']

                        if not 'O' in d_coordinates['chains'][chain]['residues'][res_no1]['d_iCodes'][iCode1]['atoms'].keys():
                            continue
                        if not 'H' in d_coordinates['chains'][chain]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'].keys():
                            continue

                        O = d_coordinates['chains'][chain]['residues'][res_no1]['d_iCodes'][iCode1]['atoms']['O']['coordinate']
                        N = d_coordinates['chains'][chain]['residues'][res_no2]['d_iCodes'][iCode2]['atoms']['N']['coordinate']
                        H = d_coordinates['chains'][chain]['residues'][res_no2]['d_iCodes'][iCode2]['atoms']['H']['coordinate']
                        vNH = N-H
                        vOH = O-H
                        angle = (180/math.pi)*math.acos(
                            numpy.dot(vNH,vOH)/math.sqrt(
                                sum(vNH**2)*sum(vOH**2)
                                )
                            )
                        print pdb, chain, res_no1, res_no2, angle
                        if res_name1 != 'GLY' and angle < 78: ## 113
                            print res_name1
                            stop1
                        if res_name1 == 'GLY' and angle < 101:
                            stop1
                        l_angles += [angle]
                    if len(l_angles) > 0:
                        print helix_len, sum(l_angles)/len(l_angles)
                        if sum(l_angles)/len(l_angles) > 171:
                            stop3
                        if sum(l_angles)/len(l_angles) < 149:
                            stop4
def main():

    for pdb in [
    ##    '2hhb',

    ##    '1hho',
    ##    '1hv4',
        
        '2lzm',
##        '2lzt',

##        '1c76',
##        '1c77',
##        '1c78',
        ]:

    ##    os.system('cp /local/data/pdb/%s/pdb%s.ent %s.pdb' %(pdb[1:3],pdb,pdb,))
    ##
    ####    biounit.biounit().main(pdb, '/data/remediated_pdb/', exclude_ligands = True)
    ####
    ####    os.system('cp %s_1.pdb %s.pdb' %(pdb,pdb,))
    ##
    ##    fd = open('%s.pdb' %(pdb),'r')
    ##    lines = fd.readlines()
    ##    fd.close()

        fd = open('C:\Users\Tommy Carstensen\pdb\%s.pdb' %(pdb),'r')
        lines = fd.readlines()
        fd.close()

        d_header = parse_pdb.parse_header(lines)
        d_coordinates, d_ATOMseq = parse_pdb.parse_coordinates(lines,d_header,)
        l_coordinates = []

        a = d_header['CRYST1']['edges'][0]
        b = d_header['CRYST1']['edges'][1]
        c = d_header['CRYST1']['edges'][2]
        alpha = math.pi*d_header['CRYST1']['angles'][0]/180.
        beta = math.pi*d_header['CRYST1']['angles'][1]/180.
        gamma = math.pi*d_header['CRYST1']['angles'][2]/180.
        ## unit cell voumne
        volume = a*b*c*math.sqrt(1-math.cos(alpha)**2-math.cos(beta)**2-math.cos(gamma)**2+2*(math.cos(alpha)*math.cos(beta)*math.cos(gamma)))
        matrix_fractional2cartesian = numpy.array([
            [a, b*math.cos(gamma), c*math.cos(beta),],
            [0, b*math.sin(gamma), c*(math.cos(alpha)-math.cos(beta)*math.cos(gamma))/math.sin(gamma),],
            [0,0,volume/(a*b*math.sin(gamma)),],
            ])

    ##    lines = []
        for symop in d_header['REMARK290'].keys():
            print 'symop', symop
            matrix_symop = d_header['REMARK290'][symop]['matrix']
            vector_symop = d_header['REMARK290'][symop]['vector']
            for i_translation in range(len(l_translations)):
                print 'symop', symop, 'translation', i_translation
                vector_translation = l_translations[i_translation]
                
                for chain2 in d_coordinates['chains'].keys():
                    for res_no2 in d_coordinates['chains'][chain2]['residues'].keys():
    ##                    print symop, i, chain2, res_no2
                        for iCode2 in d_coordinates['chains'][chain2]['residues'][res_no2]['d_iCodes'].keys():
                            for chain1 in d_coordinates['chains'].keys():
                                for res_no1 in d_coordinates['chains'][chain1]['residues'].keys():
    ##                                if res_no2 != 75:
    ##                                    continue
                                    for iCode1 in d_coordinates['chains'][chain1]['residues'][res_no1]['d_iCodes'].keys():
                                        for atom_name1 in d_coordinates['chains'][chain1]['residues'][res_no1]['d_iCodes'][iCode1]['atoms'].keys():
                                            coordinate1 = d_coordinates['chains'][chain1]['residues'][res_no1]['d_iCodes'][iCode1]['atoms'][atom_name1]['coordinate']
                                            for atom_name2 in d_coordinates['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'].keys():
                                                coordinate2 = d_coordinates['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'][atom_name2]['coordinate']
                                                coordinate2 = numpy.dot(matrix_symop,coordinate2)+vector_symop
                                                coordinate2 += numpy.dot(matrix_fractional2cartesian,vector_translation)
                                                coordinate2[0] = round(coordinate2[0],3)
                                                coordinate2[1] = round(coordinate2[1],3)
                                                coordinate2[2] = round(coordinate2[2],3)

                                                vicinity = False
                                                distant = False
                                                dist = math.sqrt(sum((coordinate2-coordinate1)**2))
                                                dist = 0
    ##                                            if dist < 5:
    ##                                                print '%2i %.2f %.2f %4i %4s %s %4i %4s %s ' %(
    ##                                                    i, round(dist,2), dist_treshold,
    ##                                                    res_no2, atom_name2, coordinate2, res_no1, atom_name1, coordinate1
    ##                                                    )
                                                dist_treshold = d_radii_vdw[atom_name1[0]]+d_radii_vdw[atom_name2[0]]+0.25
                                                ## break atom_name2 loop
                                                if dist < dist_treshold:
                                                    vicinity = True
                                                    break
                                                ## break atom_name2 loop
                                                if dist > 10.: ## length of lysine is 7-8Angstrom
                                                    distant = True
                                                    break
                                            ## break atom_name1 loop (append line and check next iCode1)
                                            if vicinity == True:
                                                for atom_name3 in d_coordinates['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'].keys():
                                                    coordinate3 = d_coordinates['chains'][chain2]['residues'][res_no2]['d_iCodes'][iCode2]['atoms'][atom_name3]['coordinate']
                                                    coordinate3 = numpy.dot(matrix_symop,coordinate3,)+vector_symop
                                                    coordinate3 += numpy.dot(matrix_fractional2cartesian,vector_translation,)
                                                    line = build_line(
                                                        atom_name3,d_coordinates,coordinate3,
                                                        chain2,res_no2,iCode2,
                                                        symop, i_translation,
                                                        )
                                                    lines += [line]
                                                break
                                            ## break atom_name1 loop (check next iCode1)
                                            if distant == True:
                                                break
                                        ## break iCode1 loop (check next iCode2)
                                        if vicinity == True:
                                            break
                                    ## break resno1 loop (check next iCode2)
                                    if vicinity == True:
                                        break
                                ## break chain1 loop (check next iCode2)
                                if vicinity == True:
                                    break

        fd = open('%s_crystalcontacts.pdb' %(pdb),'w')
        fd.writelines(lines)
        fd.close()

        source = 'whatif.src'
        fd = open(source,'w')
        fd.writelines([
            '/software/whatif/DO_WHATIF.COM <<EOF\n',
            'GETMOL %s.pdb\n' %(pdb,),
            '%s\n' %(pdb,),
            '%DELWAT\n',
            '%DELLIG\n',
            '%SOUSHL\n', ## crystal contacts
            '%MAKMOL\n',
            '\n', ## The file header will be copied from a PDB file. Hit return for the default header that has no information in it.
            'soushl_%s.pdb\n' %(pdb),
            'TOT 0\n',
            '\n', ## REMARKS
            'STOP\n',
            'Y\n',
            ])
        fd.close()
        os.system('source %s > whatif_surface/%s.out' %(source, pdb))

    os.system('rm DRG* DAVADRUG.PDB ALTERR.LOG PDBFILE whatif.src FOR*.DAT WHATIF.FIG')

    return