def cut_map_around_xyz(mapfile, peppdb_in, pepid): '''using several CCP4 until to cut map around the selected molecule color: [x800080] [xc00000] [xb0b0b0] ''' mapout = pepid + '_cut.map' util.delete_file(mapout) ''' xyzlim,xyzcomp=find_xyzlim_compound(pepid, peppdb_in) mapscr=cut_map_bylimit(xyzlim) arg = mapfile + ' ' + ' ' + mapout command="chmod +x %s ; ./%s " %(mapscr, mapscr) + arg os.system(command) return mapout ''' peppdb = peppdb_in if util.is_cif(peppdb_in): peppdb = cif.cif2pdb(peppdb_in) mapscr = cut_map_scr() arg = mapfile + ' ' + peppdb + ' ' + mapout command = "chmod +x %s ; ./%s " % (mapscr, mapscr) + arg os.system(command) if util.is_cif(peppdb_in): util.delete_file('%s.PDB' % peppdb_in) peppdb = peppdb_in return mapout
def check_ncs(file): '''full check of the NCS groups (file in pdb or cif) ''' if util.is_cif(file): check_ncs_cif(file) else: # print 'Please input a cif file.' return
def get_atominfo(file): '''Load atom information (pdb or cif) ''' atominfo = { 'chain': [], 'nres': [], 'comp': [], 'atom': [], 'symbol': [], 'alt': [], 'ins': [], 'x': [], 'y': [], 'z': [], 'xf': [], 'yf': [], 'zf': [], 'occ': [], 'mtr': [], 'symop': [], 'spg': '', 'cell': [], 'mtrix': [], 'atom_o': [], 'group': [], 'natm': [], 'natm_c': [], 'segid': [] } #print 'Loading atom information.' if not util.check_file(100, file): print('Error: file (%s) not exist' % file) return atominfo if util.is_cif(file): dic = data_from_cif(file, atominfo) else: dic = data_from_pdb(file, atominfo) # print 'spg=', dic['spg'], dic['cell'] atominfo.update(dic) return atominfo
def assign_alt_id(file, outfile): '''assign alt id if it is wrong! ''' info = get_atominfo(file) netdis = check_dis(info, 1) # dist for alt conformers atoms = [] for x in netdis: atoms.append(x[1]) atoms.append(x[2]) atoms.sort() uatoms = [] for i, x in enumerate(atoms): if i > 0 and x == atoms[i - 1]: continue uatoms.append(x) satom, altidd = [], [] for i, x in enumerate(uatoms): t = x.split('_') if i > 0: t0 = uatoms[i - 1].split('_') if t[:4] != t0[:4]: util.gsort(satom, 1, -1) altid = check_alt(satom) if altid: altidd.append(altid) #print satom satom = [] satom.append([x, info['occ'][int(t[7])]]) if len(altidd): if util.is_cif(file): newfile = update_ciffile(file, altidd) else: newfile = update_pdbfile(file, altidd) if outfile: util.move(newfile, outfile) print '\nThe updated new file = %s\n' % outfile else: print '\nThe updated new file = %s\n' % newfile
def check_special_position(file, outfile, out_xyz): '''file can be in PDB/mmCIF format. the tolerance of distanc for 0.25 for refmac, 0.5 for shelx, 1.0 for phenix. ''' if not outfile: outfile = file + '_occ' fw = open(outfile, 'w') dist_limit = 0.5 # the minimum distance for atoms on special positions extend = 2 info = get_atominfo(file) print 'Searching atoms on special positions(space group=%s)..' % info['spg'] if not info['cell']: t = 'Error: the unit cell is not extracted!\n' fw.write(t) fw.close() return frac, orth = util.frac_orth_matrix(info['cell']) #get conversion matrix m1, m2, m3 = get_sym_operator(info, 'frac') #get sym xf, yf, zf = info['xf'], info['yf'], info['zf'] xfn, yfn, zfn = shift_xyz2unit(xf, yf, zf) # xfc,yfc,zfc=xyz_orig(xf, yf, zf) # xfc1,yfc1,zfc1=xyz_orig(xfn, yfn, zfn) # print 'new orig=%.3f %.3f %.3f ; old orig=%.3f %.3f %.3f' %(xfc1,yfc1,zfc1,xfc,yfc,zfc) xfb = util.matrix_prod(frac, [extend, extend, extend]) #boundary in frac xmin, xmax = expand_boundary(xfn, yfn, zfn, xfb) #use the shifted # print xmin, xmax d = 0 nspecial = [] box = (-3, -2, -1, 0, 1, 2, 3) #7*7*7=125 cell # box=( -2, -1, 0, 1 ,2) #5*5*5=125 cell # box=( -1, 0, 1) #3*3*3=27 nop = len(m1) for ii in box: for jj in box: for kk in box: for j in range(nop): #number of operators m10, m20, m30 = m1[j][0], m2[j][0], m3[j][ 0] #pre-assign 30% faster m11, m21, m31 = m1[j][1], m2[j][1], m3[j][1] m12, m22, m32 = m1[j][2], m2[j][2], m3[j][2] m13, m23, m33 = m1[j][3], m2[j][3], m3[j][3] nxf = len(xf) for i in range(nxf): xi = [xfn[i], yfn[i], zfn[i]] #shifted frac x, y, z = xfn[i], yfn[i], zfn[i] xnf0 = m10 * x + m11 * y + m12 * z + m13 + ii xnf1 = m20 * x + m21 * y + m22 * z + m23 + jj xnf2 = m30 * x + m31 * y + m32 * z + m33 + kk if (xnf0 < xmin[0] or xnf0 > xmax[0] or xnf1 < xmin[1] or xnf1 > xmax[1] or xnf2 < xmin[2] or xnf2 > xmax[2]): continue d = util.distf(xi, [xnf0, xnf1, xnf2], info['cell']) # if d>0.5 : continue # xno = util.matrix_prod(orth,[xnf0,xnf1,xnf2]) scode = '%d%d%d' % (ii + 5, jj + 5, kk + 5) if d < 0.5 and (j != 0 or '555' not in scode ): # atom on special position s1 = '%s_%s_%s_%s_%s_%s_%s_%d' % ( info['chain'][i], info['comp'][i], info['nres'][i], info['atom'][i], info['alt'][i], info['ins'][i], info['natm'][i], i) s2 = '%3d %s %d %.3f' % (info['mtr'][i], scode, j + 1, d) nspecial.append([s1, s2]) if not nspecial: print('\nNote: No atoms sit on special position.\n') fw.write('\nNote: No atoms sit on special position.\n') return t = '\nNote: The following atoms sit on special position.\n' print(t) fw.write(t) nspecial.sort(key=lambda v: v[0]) dic = {} for m, xx in enumerate(nspecial): if m > 0 and nspecial[m][0] == nspecial[m - 1][0]: dic[xx[0]].append(xx[1]) else: dic[xx[0]] = [xx[1]] swater, l_occ = [], [] for s in sorted(dic.keys()): nfold = len(dic[s]) + 1 op = ['555 1'] #the first one for m in dic[s]: t = m.split() op.append('%s %s' % (t[1], t[2])) occ, ss = check_coord_occ(info, s, nfold) s1 = s.split('_') s2 = '_'.join(s1[:-1]) t = '\natom id=%s : symmetry fold=%d \n' % (s2, nfold) if '_' not in s: continue na = int(s.split('_')[-1]) occ_orig = info['occ'][na] print t.strip(), ' sym_code=', op, 'occ_orig=%s' % occ_orig print ss if ('_HOH_' in ss or '_DOD_' in ss) and float(occ_orig) == 1: swater.append(s1[:-1]) l_occ.append(occ) if 'Wrong occupancy' in ss: fw.write(t) fw.write(ss) fw.close() if (l_occ and util.is_cif(file)): update_coord(file, swater, l_occ, out_xyz) print '%d waters are updated with occupancy in the coordinates.' % len( l_occ) print 'New coordinate= %s' % out_xyz else: print 'Coordinates are not updated (Reason: either not in cif or OCC not 1.00).' print '\nThe output file =%s\n' % outfile
def matt_coeff(infile, outfile): '''calculate Matthew_coeff and solven content: file is in pdb format; ''' if not util.check_file(100, infile): print('Error: file (%s) not exist' % infile) return file = infile if util.is_cif(infile): file = cif2pdb(infile) fp = open(file, 'r') cell = [1, 1, 1, 1, 1, 1] spt, nop, nmat, sym, res, atom, res = 1, 0, 1, 'X', [], [], [] rmass, amass, armass = 0, 0, 0 hetres, aname, rest = [], [], '' for x in fp: if 'REMARK 290 ' in x[:12] and '555' in x and ',' in x[23:32]: nop = nop + 1 elif 'SEQRES' in x[:6]: t = x[17:79].split() res.extend(t) # elif 'SPLIT' in x[:6] : # t=x[6:].split() # spt=len(t) elif 'MTRIX3' in x[:6] and '1' not in x[55:].strip(): nmat = nmat + 1 elif 'CRYST1' in x[:6]: c = x[7:54].split() cell = [float(y) for y in c] sym = x[54:65].strip() elif ('ATOM' in x[:4] or 'HETA' in x[:4]): if ('HOH' in x[17:20] or 'DOD' in x[17:20]): continue atom.append(x) occ = float(x[54:60]) amass = amass + atom_mass(x[76:78].strip()) * occ t = x[17:27] #comp_ch_res_int aname.append(x[76:78].strip()) if t != rest: comp = t[:3].strip() restmp = residue_mass(comp) if restmp < 1: hetres.append(comp) else: armass = armass + residue_mass(comp) rest = t aname = [] elif 'ENDMDL' in x[:6]: break fp.close() cell_vol = cell_volume(cell) nsym = sg_nsym(sym) if nsym == -1: print('Error: space group (%s) is not in the list (%s)' % (sym, file)) nsym = nop #---------- for x in hetres: armass = armass + non_standard_res(x, atom) for x in res: resm = residue_mass(x) if resm < 1: m1 = non_standard_res(x, atom) rmass = rmass + m1 else: rmass = rmass + resm amatt, asolv = calc_matt(cell_vol, amass, nsym, nmat, spt) #by atom, occ rmatt, rsolv = calc_matt(cell_vol, rmass, nsym, nmat, spt) #by SEQRES armatt, arsolv = calc_matt(cell_vol, armass, nsym, nmat, spt) #residue matt, solv = -1, -1 if 2.0 < rmatt < 5: matt, solv = rmatt, rsolv elif 2.0 < armatt < 5: matt, solv = armatt, arsolv elif 2.0 < amatt < 5: matt, solv = amatt, asolv else: matt, solv = armatt, arsolv print( 'Warning: packing problem (%s), Matthew_coeff=%.2f; Solvent=%.2f' % (file, matt, solv)) if util.is_cif(infile): util.delete_file(file) print('%s : split nsym, nmat= %2d %2d %2d' % (file, spt, nsym, nmat)) print('By ATOM: matt= %6.2f , solvent= %6.2f ' % (amatt, asolv)) print('By SEQRES: matt= %6.2f , solvent= %6.2f ' % (rmatt, rsolv)) print('By residue: matt= %6.2f , solvent= %6.2f ' % (armatt, arsolv)) print('Possible: matt= %6.2f , solvent= %6.2f ' % (matt, solv)) print('\nmass_total_atom=%.1f ; cell_vol=%.1f' % (amass, cell_vol)) error = '?' if (matt > 8.7 or matt < 1.5): error = 'Warning: Matthew_coefficient(%.2f) is abnormal. Possible incomplete content of ASU (or a split entry).' % matt if matt == 0.0 and solv == 1.0: error = '?' #space group problem if outfile: fw = open(outfile, 'w') ss = '''data_matt # _packing.Matthew_coefficient %6.2f _packing.solvent_content %6.2f _packing.error "%s"\n ''' % (matt, solv, error) fw.write(ss) fw.close() print 'The output file = %s\n' % outfile return matt, solv
def find_xyzlim_compound(compid, coord): '''find xyzlimit used by mapmask, and write the coord in cif or pdb format. compid: atom_group_id (model_compound_chainID_resnumber_alter_insertion) coord: the coordinate file idd = 0, cif format; =1, the pdb format ''' comp = 'XXXX' t1 = compid.split(':') for i, x in enumerate(t1): t = x.split('_') if i == 0: comp = '_'.join([t[0], t[1], t[2], t[3]]) if len(t) != 6: print( 'Error: in group-id (%d). it should be (model_compound_chainID_resnumber_alter_insertion).' % (i + 1)) return '', '' idd = util.is_cif(coord) xyzcomp = comp + '.pdb' if idd == 1: xyzcomp = comp + '.cif' fw = open(xyzcomp, 'w') border = 1 #extend a little to cover more density xx, yy, zz = [], [], [] if idd == 1: #input cif format fw.write('data_xyzcomp\n#\n') flist = open(coord, 'r').readlines() items, values = cif.cifparse(flist, '_cell.') fw.write('\n#\n') for m, p in enumerate(items): fw.write("%s %s\n" % (p, values[m])) cell = cif.get_cell(flist) items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") x = cif.parse_values(items, values, "_atom_site.Cartn_x") y = cif.parse_values(items, values, "_atom_site.Cartn_y") z = cif.parse_values(items, values, "_atom_site.Cartn_z") model = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") if (not (alt and comp and ins and asym and seq and x and y and z)): print( 'Error: not enough infor. extraced from (%s). Check ciftokens' % coord) sys.exit() fw.write('\n#\nloop_\n') for p in items: fw.write("%s\n" % p) row = cif.get_rows(items, values) for i in range(len(x)): alter, inst, mod = '.', '.', '1' if model and util.is_number(model[i]): mod = model[i] if alt and alt[i] != '?': alter = alt[i] if ins and ins[i] != '?': inst = ins[i] id1 = '_'.join([mod, comp[i], asym[i], seq[i], alter, inst]) if id1 in compid: xx.append(float(x[i])) yy.append(float(y[i])) zz.append(float(z[i])) for m in row[i]: fw.write("%s " % m) fw.write('\n') else: #pdb format fp = open(coord, 'r') for x1 in fp: if ('CRYST1' in x1[:6]): fw.write(x1) cell = [float(p) for p in x1[8:54].split()] elif ('ATOM' in x1[:4] or 'HETATM' in x1[:6]): alt = x1[16:17] if alt.isspace(): alt = '.' ins = x1[26:27] if ins.isspace(): ins = '.' resname, chid, resnum = x1[17:20].strip(), x1[20:22].strip( ), x1[22:26].strip() resid = '_'.join([resname, chid, resnum, alt, ins]) if resid in compid: fw.write(x1) #only write the selected section xx.append(float(x1[30:38])) yy.append(float(x1[38:46])) zz.append(float(x1[46:54])) fp.close() if not xx or not yy or not zz: print('Error: %s can not be found in the coordinate. try a new id. ' % (compid)) return '', '' frac, orth = util.frac_orth_matrix(cell) #get matrix border = 2.0 xx_min, xx_max = min(xx) - border, max(xx) + border yy_min, yy_max = min(yy) - border, max(yy) + border zz_min, zz_max = min(zz) - border, max(zz) + border xf_min = util.matrix_prod(frac, [xx_min, yy_min, zz_min]) xf_max = util.matrix_prod(frac, [xx_max, yy_max, zz_max]) xyzlim = '%.3f %.3f %.3f %.3f %.3f %.3f' % ( xf_min[0], xf_max[0], xf_min[1], xf_max[1], xf_min[2], xf_max[2]) fw.close() return xyzlim, xyzcomp
def cut_map_around_ligand_peptide(dccfile, dic, mapfile_in, xyzfile_in): '''It generate a complete set for ligand (map, html, jmol). dccfile: the density file by dcc. dic: a directory to hold all the file for webpage (url below). mapfile_in: a input map file. xyzfile_in: a input coordinate file. ''' print('Cutting the density maps for ligands/peptide') tmpxyz = xyzfile_in if util.is_cif(xyzfile_in): tmpxyz = cif.cif2pdb(xyzfile_in) pdbfile = os.path.basename(dic['pdbfile']) + '_new' if pdbfile != tmpxyz: shutil.copy(tmpxyz, pdbfile) mapfile = os.path.basename(dic['pdbfile']) + '_2fofc.map' if dic['ligmapcif']: mapfile = dic['xyzfile_orig'] + '_2fofc.map' shutil.move(mapfile_in, mapfile) if dic['ligmapcif']: #pre-parse the cif file. dic['cif'] = 1 ciffile = dic['xyzfile_orig'] flist = open(ciffile, 'r').readlines() cell_items, values = cif.cifparse(flist, '_cell.') cell = cif.get_rows(cell_items, values) dic['cell_items'], dic['lig_cell'] = cell_items, cell sym_items, values = cif.cifparse(flist, '_symmetry.') sym = cif.get_rows(sym_items, values) dic['sym_items'], dic['lig_sym'] = sym_items, sym items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") mod = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") row = cif.get_rows(items, values) dic['items'], dic['comp1'], dic['asym'], dic[ 'seq'] = items, comp, asym, seq dic['alt'], dic['ins'], dic['mod'], dic['row'] = alt, ins, mod, row fw_itool = open('LIG_PEPTIDE.cif', 'w') #a cif file contains table, filenames fw_itool.write('data_lig_peptide\n') fw_itool.write( '\n# A "!" will be given if the residue is bad with real_space_R.\n') fw_itool.write('\n# Criteria: (CC<0.7 and R>0.4) or CC<0.5 or R>0.5\n') url = 'http://sf-tool.wwpdb.org/users_data/dir_%s/' % dic['dir'] #url=os.environ['THIS_SERVICE_URL__FIX_ME'] + '/users_data/dir_%s/' %dic['dir'] ch_pep, chr_pep, ch_lig, chr_lig, ch_wat, chr_wat = tls.chain_res_range( pdbfile) ligpdb = non_poly_pdb(ch_pep, ch_lig, pdbfile) #get non-poly xyz file dcc = get_dcc(dccfile) #get a list for dcc of each residue if not dcc: util.perror( 'Warning: Failed to parse EDS values! No ligand/peptide maps will be generated. ' ) for k, v in ch_pep.items(): if len(v) < 15: #length of peptide if not dic['sdsc_map']: map_around_peptide(fw_itool, dic, mapfile, ligpdb, dcc, ch_pep, url) break if ch_lig: map_around_ligand(fw_itool, dic, mapfile, ligpdb, dcc, ch_lig, url) get_html_table_baddcc_general(mapfile, dcc) #for polymer/lig/peptide fw_itool.close() if dic['sdsc_map']: arg = 'rm -f %s %s %s LIG_PEPTIDE.cif ' % (mapfile, mapfile_in, xyzfile_in) arg = arg + ' %s_rcc_sum.cif.mtz %s_2fofc.map_all.html ' % ( dic['pdbfile'], dic['pdbfile']) os.system(arg) # util.delete_file(pdbfile) return