def update_ciffile(file, altidd): ''' correct alter conformer IDs if it is wrong. ''' nfile = file + '_new_alt' fw = open(nfile, 'w') flist = open(file, "r").readlines() items, values = cif.cifparse(flist, '_atom_site.') # a loop rows = cif.get_rows(items, values) nalt = -1 alt = '_atom_site.label_alt_id' if alt in items: nalt = items.index(alt) if nalt < 0: print 'Warning: cif token for alt_id is not found, no correction is applied.' return nfile for x in altidd: #correct alt_ids for y in x: n, id = y[0], y[1] rows[n][nalt] = y[1] st, nall = 0, len(flist) for i, ln in enumerate(flist): if 'loop_' in ln.lstrip()[:5] and '_atom_site.' in flist[ i + 1].lstrip()[:11]: st = i break fw.write(ln) for i in range(st, nall): #remove _atom_site if '#' in flist[i].lstrip()[0]: st = i break fw.write('#\nloop_\n') for x in items: fw.write('%s \n' % x) fmt = [] # the writing format nrow, ncol = len(rows), len(rows[0]) for i in range(ncol): #put column format in a list tmp = [] for j in range(nrow): n = len(rows[j][i]) tmp.append(n) fmt.append(max(tmp)) for x in rows: #re-write atom_site (left formated) for i, y in enumerate(x): fw.write(y.ljust(fmt[i] + 1)) fw.write('\n') for n in range(st, nall): # the last lines. fw.write(flist[n]) fw.close() return nfile
def parse_table(alist, table, id): '''parse the table in the DCC: flist is a list id=0, input file; id=1. input list ''' if id==0: # a file flist=open(alist, 'r').readlines() else: flist = alist items,values = cif.cifparse(flist, table) rows=cif.get_rows(items,values) return items, rows
def cif2cif_sf(flist, num): sf = 'SF-%d.cif' % num fw = open(sf, 'w') n1 = 0 n2 = len(flist) for i, x in enumerate(flist): if 'loop_' in x.lstrip()[:5] and '_refln.' in flist[i + 1].lstrip()[:7]: n1 = i break sflist = [] for i in range(n1, n2): sflist.append(flist[i]) t = flist[i].lstrip() if ('#' in t[:1] or 'data_' in t[:5] or ('_' in t[:1] and '_refln.' not in t[:7])): n2 = i break for i in range(0, n1): fw.write(flist[i]) #write the front part items, values = cif.cifparse(flist[n1:n2], '_refln.') status_col = items.index('_refln.status') flag_col = items.index('_refln.pdbx_r_free_flag') rows = cif.get_rows(items, values) for i in range(len(rows)): # if rows[i][status_col] == 'f': rows[i][status_col] = 'o' for i in range(len(rows)): if int(rows[i][flag_col]) == num and rows[i][status_col] == 'o': rows[i][status_col] = 'f' cif.write_cif_loop(fw, items, rows) for i in range(n2, len(flist)): fw.write(flist[i]) #write the end part fw.close() return sf
def update_coord(file, swater, occ, out_xyz): ''' ''' print 'Updating coordinate..\n' newfile = file + '_new' if len(out_xyz): newfile = out_xyz fw = open(newfile, 'w') fp = open(file, 'r') flist = [] for x in fp: if x.strip() == 0: continue flist.append(x.lstrip()) fp.close() flist = open(file, 'r').readlines() n1 = 0 for i, x in enumerate(flist): #get the first, part. if ('loop_' in x[:5] and '_atom_site.' in flist[i + 1][:11] and '_atom_site.' in flist[i + 2][:11]): n1 = i break sflist = [] n2 = len(flist) for i in range(n1, n2): # get last line of coord. (the second part) sflist.append(flist[i]) t = flist[i].lstrip() if ('#' in t[:1] or 'data_' in t[:5] or ('_' in t[:1] and '_atom_site.' not in t[:11])): n2 = i break for i in range(0, n1): fw.write(flist[i]) #write the front part items, values = cif.cifparse(flist[n1:n2], '_atom_site.') ch_col = items.index('_atom_site.auth_asym_id') comp_col = items.index('_atom_site.label_comp_id') nres_col = items.index('_atom_site.auth_seq_id') atom_col = items.index('_atom_site.label_atom_id') alt_col = items.index('_atom_site.label_alt_id') ins_col = items.index('_atom_site.pdbx_PDB_ins_code') natm_col = items.index('_atom_site.id') occ_col = items.index('_atom_site.occupancy') #swater=[ch, comp,nres,atom,alt,ins,natom] rows = cif.get_rows(items, values) for i in range(len(rows)): for j, x in enumerate(swater): if (rows[i][ch_col] == x[0] and rows[i][comp_col] == x[1] and rows[i][nres_col] == x[2] and rows[i][atom_col] == x[3] and rows[i][alt_col] == x[4] and rows[i][ins_col] == x[5] and rows[i][natm_col] == x[6]): rows[i][occ_col] = '%.2f' % occ[j] break cif.write_cif_loop(fw, items, rows) for i in range(n2, len(flist)): fw.write(flist[i]) #write the end part fw.close() return newfile
def find_xyzlim_compound(compid, coord): '''find xyzlimit used by mapmask, and write the coord in cif or pdb format. compid: atom_group_id (model_compound_chainID_resnumber_alter_insertion) coord: the coordinate file idd = 0, cif format; =1, the pdb format ''' comp = 'XXXX' t1 = compid.split(':') for i, x in enumerate(t1): t = x.split('_') if i == 0: comp = '_'.join([t[0], t[1], t[2], t[3]]) if len(t) != 6: print( 'Error: in group-id (%d). it should be (model_compound_chainID_resnumber_alter_insertion).' % (i + 1)) return '', '' idd = util.is_cif(coord) xyzcomp = comp + '.pdb' if idd == 1: xyzcomp = comp + '.cif' fw = open(xyzcomp, 'w') border = 1 #extend a little to cover more density xx, yy, zz = [], [], [] if idd == 1: #input cif format fw.write('data_xyzcomp\n#\n') flist = open(coord, 'r').readlines() items, values = cif.cifparse(flist, '_cell.') fw.write('\n#\n') for m, p in enumerate(items): fw.write("%s %s\n" % (p, values[m])) cell = cif.get_cell(flist) items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") x = cif.parse_values(items, values, "_atom_site.Cartn_x") y = cif.parse_values(items, values, "_atom_site.Cartn_y") z = cif.parse_values(items, values, "_atom_site.Cartn_z") model = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") if (not (alt and comp and ins and asym and seq and x and y and z)): print( 'Error: not enough infor. extraced from (%s). Check ciftokens' % coord) sys.exit() fw.write('\n#\nloop_\n') for p in items: fw.write("%s\n" % p) row = cif.get_rows(items, values) for i in range(len(x)): alter, inst, mod = '.', '.', '1' if model and util.is_number(model[i]): mod = model[i] if alt and alt[i] != '?': alter = alt[i] if ins and ins[i] != '?': inst = ins[i] id1 = '_'.join([mod, comp[i], asym[i], seq[i], alter, inst]) if id1 in compid: xx.append(float(x[i])) yy.append(float(y[i])) zz.append(float(z[i])) for m in row[i]: fw.write("%s " % m) fw.write('\n') else: #pdb format fp = open(coord, 'r') for x1 in fp: if ('CRYST1' in x1[:6]): fw.write(x1) cell = [float(p) for p in x1[8:54].split()] elif ('ATOM' in x1[:4] or 'HETATM' in x1[:6]): alt = x1[16:17] if alt.isspace(): alt = '.' ins = x1[26:27] if ins.isspace(): ins = '.' resname, chid, resnum = x1[17:20].strip(), x1[20:22].strip( ), x1[22:26].strip() resid = '_'.join([resname, chid, resnum, alt, ins]) if resid in compid: fw.write(x1) #only write the selected section xx.append(float(x1[30:38])) yy.append(float(x1[38:46])) zz.append(float(x1[46:54])) fp.close() if not xx or not yy or not zz: print('Error: %s can not be found in the coordinate. try a new id. ' % (compid)) return '', '' frac, orth = util.frac_orth_matrix(cell) #get matrix border = 2.0 xx_min, xx_max = min(xx) - border, max(xx) + border yy_min, yy_max = min(yy) - border, max(yy) + border zz_min, zz_max = min(zz) - border, max(zz) + border xf_min = util.matrix_prod(frac, [xx_min, yy_min, zz_min]) xf_max = util.matrix_prod(frac, [xx_max, yy_max, zz_max]) xyzlim = '%.3f %.3f %.3f %.3f %.3f %.3f' % ( xf_min[0], xf_max[0], xf_min[1], xf_max[1], xf_min[2], xf_max[2]) fw.close() return xyzlim, xyzcomp
def cut_map_around_ligand_peptide(dccfile, dic, mapfile_in, xyzfile_in): '''It generate a complete set for ligand (map, html, jmol). dccfile: the density file by dcc. dic: a directory to hold all the file for webpage (url below). mapfile_in: a input map file. xyzfile_in: a input coordinate file. ''' print('Cutting the density maps for ligands/peptide') tmpxyz = xyzfile_in if util.is_cif(xyzfile_in): tmpxyz = cif.cif2pdb(xyzfile_in) pdbfile = os.path.basename(dic['pdbfile']) + '_new' if pdbfile != tmpxyz: shutil.copy(tmpxyz, pdbfile) mapfile = os.path.basename(dic['pdbfile']) + '_2fofc.map' if dic['ligmapcif']: mapfile = dic['xyzfile_orig'] + '_2fofc.map' shutil.move(mapfile_in, mapfile) if dic['ligmapcif']: #pre-parse the cif file. dic['cif'] = 1 ciffile = dic['xyzfile_orig'] flist = open(ciffile, 'r').readlines() cell_items, values = cif.cifparse(flist, '_cell.') cell = cif.get_rows(cell_items, values) dic['cell_items'], dic['lig_cell'] = cell_items, cell sym_items, values = cif.cifparse(flist, '_symmetry.') sym = cif.get_rows(sym_items, values) dic['sym_items'], dic['lig_sym'] = sym_items, sym items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") mod = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") row = cif.get_rows(items, values) dic['items'], dic['comp1'], dic['asym'], dic[ 'seq'] = items, comp, asym, seq dic['alt'], dic['ins'], dic['mod'], dic['row'] = alt, ins, mod, row fw_itool = open('LIG_PEPTIDE.cif', 'w') #a cif file contains table, filenames fw_itool.write('data_lig_peptide\n') fw_itool.write( '\n# A "!" will be given if the residue is bad with real_space_R.\n') fw_itool.write('\n# Criteria: (CC<0.7 and R>0.4) or CC<0.5 or R>0.5\n') url = 'http://sf-tool.wwpdb.org/users_data/dir_%s/' % dic['dir'] #url=os.environ['THIS_SERVICE_URL__FIX_ME'] + '/users_data/dir_%s/' %dic['dir'] ch_pep, chr_pep, ch_lig, chr_lig, ch_wat, chr_wat = tls.chain_res_range( pdbfile) ligpdb = non_poly_pdb(ch_pep, ch_lig, pdbfile) #get non-poly xyz file dcc = get_dcc(dccfile) #get a list for dcc of each residue if not dcc: util.perror( 'Warning: Failed to parse EDS values! No ligand/peptide maps will be generated. ' ) for k, v in ch_pep.items(): if len(v) < 15: #length of peptide if not dic['sdsc_map']: map_around_peptide(fw_itool, dic, mapfile, ligpdb, dcc, ch_pep, url) break if ch_lig: map_around_ligand(fw_itool, dic, mapfile, ligpdb, dcc, ch_lig, url) get_html_table_baddcc_general(mapfile, dcc) #for polymer/lig/peptide fw_itool.close() if dic['sdsc_map']: arg = 'rm -f %s %s %s LIG_PEPTIDE.cif ' % (mapfile, mapfile_in, xyzfile_in) arg = arg + ' %s_rcc_sum.cif.mtz %s_2fofc.map_all.html ' % ( dic['pdbfile'], dic['pdbfile']) os.system(arg) # util.delete_file(pdbfile) return