def expand_sym_cello(info): ''' using the orthogonal coord! (not used) ''' xf, yf, zf = info['xf'], info['yf'], info['zf'] xo, yo, zo = info['x'], info['y'], info['z'] boxsize = 5 print 'The space group = %s' % info['spg'] # m1,m2,m3=get_sym_operator(info, 'frac') m1, m2, m3 = get_sym_operator(info, 'orth') frac, orth = util.frac_orth_matrix(info['cell']) #get matrix xmin, xmax = min(xo) - boxsize, max(xo) + boxsize ymin, ymax = min(yo) - boxsize, max(yo) + boxsize zmin, zmax = min(zo) - boxsize, max(zo) + boxsize for i in range(len(yo)): #going through each atom xi = [info['x'][i], info['y'][i], info['z'][i]] for ii in (-1, 0, 1): for jj in (-1, 0, 1): for kk in (-1, 0, 1): scode = '%d%d%d' % (ii + 5, jj + 5, kk + 5) for j in range(len(m1)): xo0 = m1[j][0] * xi[0] + m1[j][1] * xi[1] + m1[j][ 2] * xi[2] + m1[j][3] xo1 = m2[j][0] * xi[0] + m2[j][1] * xi[1] + m2[j][ 2] * xi[2] + m2[j][3] xo2 = m3[j][0] * xi[0] + m3[j][1] * xi[1] + m3[j][ 2] * xi[2] + m3[j][3] xf = util.matrix_prod(frac, [xo0, xo1, xo2]) xf[0] = xf[0] + ii xf[1] = xf[1] + jj xf[2] = xf[2] + kk xno = util.matrix_prod(orth, [xf[0], xf[1], xf[2]]) if (xno[0] < xmin or xno[0] > xmax or xno[1] < ymin or xno[1] > ymax or xno[2] < zmin or xno[2] > zmax): continue #print xo, xno d = math.sqrt((xi[0] - xno[0])**2 + (xi[1] - xno[1])**2 + (xi[2] - xno[2])**2) if d > 0.7: continue if info['nres'][i] == 2408: print '2408=', d, scode if ii == 0 and jj == 0 and kk == 0: continue nop = j print 'ATOM %5d %4s%1s%3s %1s%4s%1s %8.3f%8.3f%8.3f%6.2f%6.2f%3d %s %.3f' % ( 1, info['atom'][i], info['alt'][i], info['comp'][i], info['chain'][i], info['nres'][i], info['ins'][i], xno[0], xno[1], xno[2], 0, 0, info['mtr'][i], scode, d)
def data_from_cif(file, info): '''load data to dictionary from mmcif format ''' flist = open(file, 'r').readlines() cell = parse.cell(flist) spg = parse.space_group_name(flist) mtrix = parse.ncs_matrix(flist) scale = parse.scale(flist) atom = parse.atom_site(flist) # return a dic info.update(atom) info['cell'] = cell info['spg'] = spg info['mtrix'] = mtrix if (cell and cell[0] > 1.1): frac, orth = util.frac_orth_matrix(cell) #get matrix n = len(info['x']) xf, yf, zf, mtr = [], [], [], [] for i in range(n): xyzf = util.matrix_prod(frac, [info['x'][i], info['y'][i], info['z'][i]]) xf.append(xyzf[0]) yf.append(xyzf[1]) zf.append(xyzf[2]) mtr.append(1) # allways 1 for the 1st copy info['xf'] = xf info['yf'] = yf info['zf'] = zf info['mtr'] = mtr return info
def shift_xyz_into_unitcell(file, outfile): '''Shift the asu into the unit. file is a PDB file! ''' if not outfile: outfile = file + '_shifted' info = get_atominfo(file) if not info['cell']: print 'Error: The unit cell is not extracted! check you xyz file.\n' return fw = open(outfile, 'w') frac, orth = util.frac_orth_matrix(info['cell']) #get conversion matrix xf, yf, zf = info['xf'], info['yf'], info['zf'] xfn, yfn, zfn = shift_xyz2unit(xf, yf, zf) nxf = len(xf) fp = open(file, 'r').readlines() anis = {} for x in fp: if 'ATOM' in x[:4] or 'HETA' in x[:4]: anis[x[12:26]] = [] if 'ANISOU' in x[:6]: anis[x[12:26]] = x[26:] for x in fp: #header if 'ATOM' in x[:4] or 'HETA' in x[:4]: break fw.write(x) sp = ' ' for i in range(nxf): xff, yff, zff = xfn[i], yfn[i], zfn[i] xno = util.matrix_prod(orth, [xff, yff, zff]) #convert franck back to orth. group, natm, atom_o = info['group'][i], info['natm'][i], info[ 'atom_o'][i] alt, comp, ch = info['alt'][i], info['comp'][i], info['chain'][i] nres, ins, segid = info['nres'][i], info['ins'][i], info['segid'][i] occ, biso, symbol = float(info['occ'][i]), float( info['biso'][i]), info['symbol'][i] if '.' in alt: alt = ' ' if '.' in ins: ins = ' ' ss='%-6s%5s %4s%1s%3s%2s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f%4s %4s%2s \n'\ %(group,natm,atom_o,alt,comp,ch,nres,ins,xno[0],xno[1],xno[2],occ,biso, sp,segid, symbol ) fw.write(ss) id = '%4s%1s%3s%2s%4d' % (atom_o, alt, comp, ch, nres) if anis[id]: ss1 = 'ANISOU%5s %s%s' % (natm, id, anis[id]) fw.write(ss1) fpr = reversed(fp) for x in fpr: if 'ATOM' in x[:4] or 'HETA' in x[:4]: break fw.write(x) fw.close() print 'The new PDB file = %s\n' % outfile
def get_sym_operator(info, idd): '''read the text (operator converted to numbers) and convert it to matrix m1= x11,x12,x13,t1; y11,y12,y13,t2; z11,z12,z13,t3; m1,m2,m3 the same as (REMARK 290) if applying orthogonal ''' spg = info['spg'] frac, orth = util.frac_orth_matrix(info['cell']) #get matrix m1, m2, m3 = [], [], [] spg_text = space_group_cif.text tmp = spg_text.split('\n') tto = [] mmf = [] for x in tmp: t = x.split("'") if "'" in x and t[1] == spg: # print x t1 = [float(y) for y in x[22:].split()] tr = [0, 0, 0] if t1[10] > 0: tr[0] = t1[9] / float(t1[10]) if t1[12] > 0: tr[1] = t1[11] / float(t1[12]) if t1[14] > 0: tr[2] = t1[13] / float(t1[14]) if idd == 'frac': m1.append([t1[0], t1[1], t1[2], tr[0]]) m2.append([t1[3], t1[4], t1[5], tr[1]]) m3.append([t1[6], t1[7], t1[8], tr[2]]) continue to = util.matrix_prod(orth, tr) mmf = [[t1[0], t1[1], t1[2]], [t1[3], t1[4], t1[5]], [t1[6], t1[7], t1[8]]] mmo = util.matrix_prod_(orth, mmf) mmf = util.matrix_prod_(mmo, frac) mmf[0].append(to[0]) mmf[1].append(to[1]) mmf[2].append(to[2]) m1.append(mmf[0]) m2.append(mmf[1]) m3.append(mmf[2]) #print 'final=', m1,m2,m3 return m1, m2, m3
def check_special_position(file, outfile, out_xyz): '''file can be in PDB/mmCIF format. the tolerance of distanc for 0.25 for refmac, 0.5 for shelx, 1.0 for phenix. ''' if not outfile: outfile = file + '_occ' fw = open(outfile, 'w') dist_limit = 0.5 # the minimum distance for atoms on special positions extend = 2 info = get_atominfo(file) print 'Searching atoms on special positions(space group=%s)..' % info['spg'] if not info['cell']: t = 'Error: the unit cell is not extracted!\n' fw.write(t) fw.close() return frac, orth = util.frac_orth_matrix(info['cell']) #get conversion matrix m1, m2, m3 = get_sym_operator(info, 'frac') #get sym xf, yf, zf = info['xf'], info['yf'], info['zf'] xfn, yfn, zfn = shift_xyz2unit(xf, yf, zf) # xfc,yfc,zfc=xyz_orig(xf, yf, zf) # xfc1,yfc1,zfc1=xyz_orig(xfn, yfn, zfn) # print 'new orig=%.3f %.3f %.3f ; old orig=%.3f %.3f %.3f' %(xfc1,yfc1,zfc1,xfc,yfc,zfc) xfb = util.matrix_prod(frac, [extend, extend, extend]) #boundary in frac xmin, xmax = expand_boundary(xfn, yfn, zfn, xfb) #use the shifted # print xmin, xmax d = 0 nspecial = [] box = (-3, -2, -1, 0, 1, 2, 3) #7*7*7=125 cell # box=( -2, -1, 0, 1 ,2) #5*5*5=125 cell # box=( -1, 0, 1) #3*3*3=27 nop = len(m1) for ii in box: for jj in box: for kk in box: for j in range(nop): #number of operators m10, m20, m30 = m1[j][0], m2[j][0], m3[j][ 0] #pre-assign 30% faster m11, m21, m31 = m1[j][1], m2[j][1], m3[j][1] m12, m22, m32 = m1[j][2], m2[j][2], m3[j][2] m13, m23, m33 = m1[j][3], m2[j][3], m3[j][3] nxf = len(xf) for i in range(nxf): xi = [xfn[i], yfn[i], zfn[i]] #shifted frac x, y, z = xfn[i], yfn[i], zfn[i] xnf0 = m10 * x + m11 * y + m12 * z + m13 + ii xnf1 = m20 * x + m21 * y + m22 * z + m23 + jj xnf2 = m30 * x + m31 * y + m32 * z + m33 + kk if (xnf0 < xmin[0] or xnf0 > xmax[0] or xnf1 < xmin[1] or xnf1 > xmax[1] or xnf2 < xmin[2] or xnf2 > xmax[2]): continue d = util.distf(xi, [xnf0, xnf1, xnf2], info['cell']) # if d>0.5 : continue # xno = util.matrix_prod(orth,[xnf0,xnf1,xnf2]) scode = '%d%d%d' % (ii + 5, jj + 5, kk + 5) if d < 0.5 and (j != 0 or '555' not in scode ): # atom on special position s1 = '%s_%s_%s_%s_%s_%s_%s_%d' % ( info['chain'][i], info['comp'][i], info['nres'][i], info['atom'][i], info['alt'][i], info['ins'][i], info['natm'][i], i) s2 = '%3d %s %d %.3f' % (info['mtr'][i], scode, j + 1, d) nspecial.append([s1, s2]) if not nspecial: print('\nNote: No atoms sit on special position.\n') fw.write('\nNote: No atoms sit on special position.\n') return t = '\nNote: The following atoms sit on special position.\n' print(t) fw.write(t) nspecial.sort(key=lambda v: v[0]) dic = {} for m, xx in enumerate(nspecial): if m > 0 and nspecial[m][0] == nspecial[m - 1][0]: dic[xx[0]].append(xx[1]) else: dic[xx[0]] = [xx[1]] swater, l_occ = [], [] for s in sorted(dic.keys()): nfold = len(dic[s]) + 1 op = ['555 1'] #the first one for m in dic[s]: t = m.split() op.append('%s %s' % (t[1], t[2])) occ, ss = check_coord_occ(info, s, nfold) s1 = s.split('_') s2 = '_'.join(s1[:-1]) t = '\natom id=%s : symmetry fold=%d \n' % (s2, nfold) if '_' not in s: continue na = int(s.split('_')[-1]) occ_orig = info['occ'][na] print t.strip(), ' sym_code=', op, 'occ_orig=%s' % occ_orig print ss if ('_HOH_' in ss or '_DOD_' in ss) and float(occ_orig) == 1: swater.append(s1[:-1]) l_occ.append(occ) if 'Wrong occupancy' in ss: fw.write(t) fw.write(ss) fw.close() if (l_occ and util.is_cif(file)): update_coord(file, swater, l_occ, out_xyz) print '%d waters are updated with occupancy in the coordinates.' % len( l_occ) print 'New coordinate= %s' % out_xyz else: print 'Coordinates are not updated (Reason: either not in cif or OCC not 1.00).' print '\nThe output file =%s\n' % outfile
def data_from_pdb(file, info): '''load data to dictionary from pdb format ''' fp = open(file, 'r') chain, nres,comp,atom,symbol,alt, ins,biso,occ=[],[],[],[],[],[],[],[],[] group, natm, natm_c, atom_o, segid = [], [], [], [], [] mtrix,mtr,symop, xf, yf,zf , x,y,z,spg =[],[],[],[],[],[],[],[],[],'' cell, spg = [], '' for v in fp: #get header infor if v[:6] == 'MTRIX1' and '1' not in v[55:].strip(): mt1 = [float(m) for m in v[10:55].split()] elif v[:6] == 'MTRIX2' and '1' not in v[55:].strip(): mt2 = [float(m) for m in v[10:55].split()] elif v[:6] == 'MTRIX3' and '1' not in v[55:].strip(): mt3 = [float(m) for m in v[10:55].split()] mtrix.append([mt1, mt2, mt3]) elif 'CRYST1' in v[:6]: cell = [float(xx) for xx in v.split()[1:7]] spg = v[54:65].strip() elif 'ATOM' in v[:4] or 'HETA' in v[:4]: break frac, orth = util.frac_orth_matrix(cell) #get matrix fp.seek(0) nat = 0 for v in fp: if 'ENDMDL' in v[:6]: break if 'ATOM' in v[:4] or 'HETA' in v[:4]: xx, yy, zz = float(v[30:38]), float(v[38:46]), float(v[46:54]) oc, bb = float(v[54:60]), float(v[60:66]) x.append(xx) y.append(yy) z.append(zz) biso.append(bb) occ.append(oc) if (cell and cell[0] > 1.1): #get the fractional xyz xyzf = util.matrix_prod(frac, [xx, yy, zz]) xf.append(xyzf[0]) yf.append(xyzf[1]) zf.append(xyzf[2]) if v[70:74].strip(): chain.append(v[70:74].strip()) else: chain.append(v[20:22].strip()) group.append(v[:6].strip()) natm.append(v[6:11].strip()) natm_c.append(nat) nres.append(int(v[22:26])) comp.append(v[17:20].strip()) atom.append(v[12:16].strip()) atom_o.append(v[12:16]) segid.append(v[72:76]) symbol.append(v[76:78].strip()) mtr.append(1) if v[16:17] == ' ': alt.append('.') else: alt.append(v[16:17]) if v[26:27] == ' ': ins.append('.') else: ins.append(v[26:27]) nat = nat + 1 #counting the atom/hetatm fp.close() info = { 'chain': chain, 'nres': nres, 'comp': comp, 'atom': atom, 'biso': biso, 'occ': occ, 'symbol': symbol, 'alt': alt, 'ins': ins, 'x': x, 'y': y, 'z': z, 'xf': xf, 'yf': yf, 'zf': zf, 'mtr': mtr, 'symop': symop, 'spg': spg, 'cell': cell, 'mtrix': mtrix, 'group': group, 'natm': natm, 'natm_c': natm_c, 'atom_o': atom_o, 'segid': segid } return info
def find_xyzlim_compound(compid, coord): '''find xyzlimit used by mapmask, and write the coord in cif or pdb format. compid: atom_group_id (model_compound_chainID_resnumber_alter_insertion) coord: the coordinate file idd = 0, cif format; =1, the pdb format ''' comp = 'XXXX' t1 = compid.split(':') for i, x in enumerate(t1): t = x.split('_') if i == 0: comp = '_'.join([t[0], t[1], t[2], t[3]]) if len(t) != 6: print( 'Error: in group-id (%d). it should be (model_compound_chainID_resnumber_alter_insertion).' % (i + 1)) return '', '' idd = util.is_cif(coord) xyzcomp = comp + '.pdb' if idd == 1: xyzcomp = comp + '.cif' fw = open(xyzcomp, 'w') border = 1 #extend a little to cover more density xx, yy, zz = [], [], [] if idd == 1: #input cif format fw.write('data_xyzcomp\n#\n') flist = open(coord, 'r').readlines() items, values = cif.cifparse(flist, '_cell.') fw.write('\n#\n') for m, p in enumerate(items): fw.write("%s %s\n" % (p, values[m])) cell = cif.get_cell(flist) items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") x = cif.parse_values(items, values, "_atom_site.Cartn_x") y = cif.parse_values(items, values, "_atom_site.Cartn_y") z = cif.parse_values(items, values, "_atom_site.Cartn_z") model = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") if (not (alt and comp and ins and asym and seq and x and y and z)): print( 'Error: not enough infor. extraced from (%s). Check ciftokens' % coord) sys.exit() fw.write('\n#\nloop_\n') for p in items: fw.write("%s\n" % p) row = cif.get_rows(items, values) for i in range(len(x)): alter, inst, mod = '.', '.', '1' if model and util.is_number(model[i]): mod = model[i] if alt and alt[i] != '?': alter = alt[i] if ins and ins[i] != '?': inst = ins[i] id1 = '_'.join([mod, comp[i], asym[i], seq[i], alter, inst]) if id1 in compid: xx.append(float(x[i])) yy.append(float(y[i])) zz.append(float(z[i])) for m in row[i]: fw.write("%s " % m) fw.write('\n') else: #pdb format fp = open(coord, 'r') for x1 in fp: if ('CRYST1' in x1[:6]): fw.write(x1) cell = [float(p) for p in x1[8:54].split()] elif ('ATOM' in x1[:4] or 'HETATM' in x1[:6]): alt = x1[16:17] if alt.isspace(): alt = '.' ins = x1[26:27] if ins.isspace(): ins = '.' resname, chid, resnum = x1[17:20].strip(), x1[20:22].strip( ), x1[22:26].strip() resid = '_'.join([resname, chid, resnum, alt, ins]) if resid in compid: fw.write(x1) #only write the selected section xx.append(float(x1[30:38])) yy.append(float(x1[38:46])) zz.append(float(x1[46:54])) fp.close() if not xx or not yy or not zz: print('Error: %s can not be found in the coordinate. try a new id. ' % (compid)) return '', '' frac, orth = util.frac_orth_matrix(cell) #get matrix border = 2.0 xx_min, xx_max = min(xx) - border, max(xx) + border yy_min, yy_max = min(yy) - border, max(yy) + border zz_min, zz_max = min(zz) - border, max(zz) + border xf_min = util.matrix_prod(frac, [xx_min, yy_min, zz_min]) xf_max = util.matrix_prod(frac, [xx_max, yy_max, zz_max]) xyzlim = '%.3f %.3f %.3f %.3f %.3f %.3f' % ( xf_min[0], xf_max[0], xf_min[1], xf_max[1], xf_min[2], xf_max[2]) fw.close() return xyzlim, xyzcomp