def val_from_dcc(outf): ''' The format of outf can not be changed !! index (0,1,2,3,4,5,6,7...) corresponds to (resh,resl,Rwork,Rfree,Comp,FCC, Real_R, Dcc) res_rep: the reported; res_not: no TLS; res_tls: with TLS ''' if not util.check_file(500, outf): return '', '' flist = open(outf, "r").readlines() items, values = cif.cifparse(flist, '_pdbx_density.') res = cif.parse_values(items, values, '_pdbx_density.ls_d_res_high') rw = cif.parse_values(items, values, '_pdbx_density.R_value_R_work') rf = cif.parse_values(items, values, '_pdbx_density.R_value_R_free') biso = cif.parse_values(items, values, '_pdbx_density.Biso_mean') bwil = cif.parse_values(items, values, '_pdbx_density.B_wilson') l2 = cif.parse_values(items, values, '_pdbx_density.Padilla-Yeates_L2_mean') z = cif.parse_values(items, values, '_pdbx_density.Z_score_L_test') fom = cif.parse_values(items, values, '_pdbx_density.fom') items, values = cif.cifparse(flist, '_pdbx_density_corr.') prog = cif.parse_values(items, values, '_pdbx_density_corr.program') resh = cif.parse_values(items, values, '_pdbx_density_corr.ls_d_res_high') rwork = cif.parse_values(items, values, '_pdbx_density_corr.ls_R_factor_R_work') rfree = cif.parse_values(items, values, '_pdbx_density_corr.ls_R_factor_R_free') fcc = cif.parse_values(items, values, '_pdbx_density_corr.correlation_coeff_Fo_to_Fc') rsr = cif.parse_values(items, values, '_pdbx_density_corr.real_space_R') dcc = cif.parse_values(items, values, '_pdbx_density_corr.correlation') detail = cif.parse_values(items, values, '_pdbx_density_corr.details') nr, nb = 0, 0 for i, x in enumerate(detail): if 'Best' in x: nb = i break rep = '%8s %4s %6s %6s %6s ' % (prog[nr], resh[nr], rwork[nr], rfree[nr], biso[0]) val = '%8s %4s %6s %6s %6s %6s %6s %6s %6s %6s %6s ' % ( prog[nb], resh[nb], rwork[nb], rfree[nb], bwil[0], l2[0], z[0], fom[0], fcc[nb], rsr[nb], dcc[nb]) return rep, val
def cell(flist): '''return cell values as a list of float! ''' cell = [0, 0, 0, 0, 0, 0] items, values = cif.cifparse(flist, '_cell.') if not len(items): return cell a = cif.parse_values(items, values, '_cell.length_a') b = cif.parse_values(items, values, '_cell.length_b') c = cif.parse_values(items, values, '_cell.length_c') alpha = cif.parse_values(items, values, '_cell.angle_alpha') beta = cif.parse_values(items, values, '_cell.angle_beta') gamma = cif.parse_values(items, values, '_cell.angle_gamma') if (not (a and b and c and alpha and beta and gamma)): print('Warning: cells not extracted. Check ciftokens') for i, x in enumerate([a, b, c, alpha, beta, gamma]): if len(x) == 0 or not util.is_number(x[0]): print('Error: cell has wrong (%s) values' % x) continue cell[i] = float(x[0].strip()) return cell
def update_ciffile(file, altidd): ''' correct alter conformer IDs if it is wrong. ''' nfile = file + '_new_alt' fw = open(nfile, 'w') flist = open(file, "r").readlines() items, values = cif.cifparse(flist, '_atom_site.') # a loop rows = cif.get_rows(items, values) nalt = -1 alt = '_atom_site.label_alt_id' if alt in items: nalt = items.index(alt) if nalt < 0: print 'Warning: cif token for alt_id is not found, no correction is applied.' return nfile for x in altidd: #correct alt_ids for y in x: n, id = y[0], y[1] rows[n][nalt] = y[1] st, nall = 0, len(flist) for i, ln in enumerate(flist): if 'loop_' in ln.lstrip()[:5] and '_atom_site.' in flist[ i + 1].lstrip()[:11]: st = i break fw.write(ln) for i in range(st, nall): #remove _atom_site if '#' in flist[i].lstrip()[0]: st = i break fw.write('#\nloop_\n') for x in items: fw.write('%s \n' % x) fmt = [] # the writing format nrow, ncol = len(rows), len(rows[0]) for i in range(ncol): #put column format in a list tmp = [] for j in range(nrow): n = len(rows[j][i]) tmp.append(n) fmt.append(max(tmp)) for x in rows: #re-write atom_site (left formated) for i, y in enumerate(x): fw.write(y.ljust(fmt[i] + 1)) fw.write('\n') for n in range(st, nall): # the last lines. fw.write(flist[n]) fw.close() return nfile
def get_chain_seq(flist): '''return a dic (each chain has a list nres). ''' chain = {} items, values = cif.cifparse(flist, '_atom_site.') asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") if not (asym or seq): return chain ch_old, ns_old = '', '' for x in asym: ch = x if ch != ch_old: chain[ch] = [] ch_old = ch for i, x in enumerate(seq): ns = x ch = asym[i] if ns != ns_old and util.is_number(ns): chain[ch].append(int(ns)) ns_old = ns # print chain return chain
def get_dcc(dccfile): '''put dccfile as list of list ''' dcc = [] if (util.check_file(100, dccfile) == 0): return dcc flist = open(dccfile, 'r').readlines() items, values = cif.cifparse(flist, '_pdbx_rscc_mapman.') nseq = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_seq_id") chid = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_asym_id") comp = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_comp_id") alt = cif.parse_values(items, values, "_pdbx_rscc_mapman.label_alt_id") ins = cif.parse_values(items, values, "_pdbx_rscc_mapman.label_ins_code") cc = cif.parse_values(items, values, "_pdbx_rscc_mapman.correlation") rsr = cif.parse_values(items, values, "_pdbx_rscc_mapman.real_space_R") zrsr = cif.parse_values(items, values, "_pdbx_rscc_mapman.real_space_Zscore") biso = cif.parse_values(items, values, "_pdbx_rscc_mapman.Biso_mean") occ = cif.parse_values(items, values, "_pdbx_rscc_mapman.occupancy_mean") #model=cif.parse_values(items,values,"_pdbx_rscc_mapman.model_id"); pdbid = cif.parse_values(items, values, "_pdbx_rscc_mapman.pdb_id") if not items: return dcc for i in range(len(chid)): a = [ nseq[i], chid[i], comp[i], alt[i], cc[i], rsr[i], biso[i], occ[i], pdbid[i] ] dcc.append(a) return dcc
def scale(flist): '''get SCALE matrix, return a list of 3X4 matrix ''' scale = [] items, values = cif.cifparse(flist, "_atom_sites.") if not items: return [] b11 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[1][1]') b12 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[1][2]') b13 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[1][3]') t1 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[1]') b21 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[2][1]') b22 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[2][2]') b23 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[2][3]') t2 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[2]') b31 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[3][1]') b32 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[3][2]') b33 = cif.parse_values(items, values, '_atom_sites.fract_transf_matrix[3][3]') t3 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[3]') if not (b11 and b12 and b13 and t1 and b21 and b22 and b23 and t2 and b31 and b32 and b33 and t3): return [] for i in range(len(b11)): mt1 = [ float(x) for x in (b11[i], b12[i], b13[i], t1[i]) if util.is_number(x) ] mt2 = [ float(x) for x in (b11[i], b12[i], b13[i], t1[i]) if util.is_number(x) ] mt3 = [ float(x) for x in (b11[i], b12[i], b13[i], t1[i]) if util.is_number(x) ] if not (mt1 and mt2 and mt3): print 'Error: Scale matrix has wrong values' continue scale.append([mt1, mt2, mt3]) return scale
def parse_cif(flist, table): dic={} ciftable='_%s.'%table items,values = cif.cifparse(flist, ciftable) if ciftable =='_pdbx_nonpoly_scheme.': asym=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.asym_id') monid=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_mon_id') nseq=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_seq_num') chid=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_strand_id') ins=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_ins_code') if(monid and nseq and asym): dic={'monid':monid, 'nseq':nseq, 'asym':asym,'chid':chid, 'ins':ins} elif ciftable =='_struct_conn.': id=cif.parse_values(items,values,'_struct_conn.id') asym1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_asym_id') comp1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_comp_id') nseq1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_seq_id') alt1 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr1_label_alt_id') ins1 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr1_PDB_ins_code') asym2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_asym_id') comp2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_comp_id') nseq2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_seq_id') alt2 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr2_label_alt_id') ins2 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr2_PDB_ins_code') if (id and asym1 and comp1 and nseq1 and asym2 and comp2 and nseq2): dic={'id':id, 'asym1':asym1, 'comp1':comp1, 'nseq1':nseq1, 'alt1':alt1, 'ins1':ins1, 'asym2':asym2, 'comp2':comp2, 'nseq2':nseq2, 'alt2':alt2, 'ins2':ins2} elif ciftable =='_pdbx_molecule.': ins_id=cif.parse_values(items,values,'_pdbx_molecule.instance_id') prd_id=cif.parse_values(items,values,'_pdbx_molecule.prd_id') asm_id=cif.parse_values(items,values,'_pdbx_molecule.asym_id') if (ins_id and prd_id and asm_id): dic={'ins_id':ins_id, 'prd_id':prd_id, 'asm_id':asm_id} elif ciftable =='_pdbx_poly_seq_scheme.': asym=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.asym_id') nseq=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_seq_num') monid=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_mon_id') chid=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_strand_id') ins=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_ins_code') if asym and monid : dic={'monid':monid, 'nseq':nseq, 'asym':asym,'chid':chid, 'ins':ins} return dic
def space_group_name(flist): '''get space_group, return a string ''' spg = '' items, values = cif.cifparse(flist, '_symmetry.') symm = cif.parse_values(items, values, '_symmetry.space_group_name_H-M') if symm: spg = symm[0].replace("'", '').replace('"', '').strip() else: print('Warning: space group not extracted. Check ciftokens') return spg
def ncs_matrix(flist): '''get NCS matrix, return a list of 3X4 matrix ''' mtrix = [] items, values = cif.cifparse(flist, "_struct_ncs_oper.") if not items: return [] id = cif.parse_values(items, values, '_struct_ncs_oper.id') code = cif.parse_values(items, values, '_struct_ncs_oper.code') b11 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][1]') b12 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][2]') b13 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][3]') b21 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][1]') b22 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][2]') b23 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][3]') b31 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][1]') b32 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][2]') b33 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][3]') t1 = cif.parse_values(items, values, '_struct_ncs_oper.vector[1]') t2 = cif.parse_values(items, values, '_struct_ncs_oper.vector[2]') t3 = cif.parse_values(items, values, '_struct_ncs_oper.vector[3]') if not (id and code and b11 and b12 and b13 and t1 and b21 and b22 and b23 and t2 and b31 and b32 and b33 and t3): return [] for i in range(len(b11)): idd = ' ' if code[i] == 'given': idd = '1' mt1 = [ float(x) for x in (b11[i], b12[i], b13[i], t1[i]) if util.is_number(x) ] mt2 = [ float(x) for x in (b21[i], b22[i], b23[i], t2[i]) if util.is_number(x) ] mt3 = [ float(x) for x in (b31[i], b32[i], b33[i], t3[i]) if util.is_number(x) ] if not (mt1 and mt2 and mt3): print 'Error: NCS matrix has wrong values' continue mtrix.append([mt1, mt2, mt3]) return mtrix
def parse_table(alist, table, id): '''parse the table in the DCC: flist is a list id=0, input file; id=1. input list ''' if id==0: # a file flist=open(alist, 'r').readlines() else: flist = alist items,values = cif.cifparse(flist, table) rows=cif.get_rows(items,values) return items, rows
def is_bigcif(file): '''big cif has chainID more than one charactors return 0: not cif ''' n=0 flist=open(file, 'r').readlines() items,values = cif.cifparse(flist, '_atom_site.') # a loop asym=cif.parse_values(items,values,"_atom_site.auth_asym_id") if asym : for x in asym: if len(x)>1: n=2 print('Note: Cif file (%s) has chainID >1 charactor.' %file) break return n
def remove_bad_water(pdbfile, sffile): npdb=pdbfile+'_rmwat' fw=open(npdb, 'w') dccfile=pdbfile + '_dcc.cif' arg='%s/bin/dcc -pdb %s -sf %s -no_xtriage -o %s ' %(os.environ['DCCPY'], pdbfile, sffile, dccfile) os.system(arg) print 'Removing bad waters from %s' %dccfile flist=open(dccfile, 'r').readlines() items,values = cif.cifparse(flist, '_pdbx_rscc_mapman_prob.') ch=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.auth_asym_id') comp=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.auth_comp_id') seq=cif.parse_values(items,values ,'_pdbx_rscc_mapman_prob.auth_seq_id') rsr=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.real_space_R') wrsr=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.RsR_over_correlation') dcc=cif.parse_values(items,values ,'_pdbx_rscc_mapman_prob.correlation') nn=len(ch) fp=open(pdbfile, 'r') for x in fp: if (('ATOM' in x[:4] or 'HETATM' in x[:6] or 'ANISOU' in x[:6]) and x[17:20] == 'HOH' ): ch1, seq1 = x[20:22].strip(), x[22:26].strip() id=0 for i, y in enumerate (comp): if 'HOH' in y and ch1==ch[i] and seq1==seq[i] : id=1 break if id==1: print 'removing water (%s)' %x[:28] continue else: fw.write(x) else: fw.write(x) fw.close() fp.close() print 'The new pdb file = %s' %npdb
def cif2cif_sf(flist, num): sf = 'SF-%d.cif' % num fw = open(sf, 'w') n1 = 0 n2 = len(flist) for i, x in enumerate(flist): if 'loop_' in x.lstrip()[:5] and '_refln.' in flist[i + 1].lstrip()[:7]: n1 = i break sflist = [] for i in range(n1, n2): sflist.append(flist[i]) t = flist[i].lstrip() if ('#' in t[:1] or 'data_' in t[:5] or ('_' in t[:1] and '_refln.' not in t[:7])): n2 = i break for i in range(0, n1): fw.write(flist[i]) #write the front part items, values = cif.cifparse(flist[n1:n2], '_refln.') status_col = items.index('_refln.status') flag_col = items.index('_refln.pdbx_r_free_flag') rows = cif.get_rows(items, values) for i in range(len(rows)): # if rows[i][status_col] == 'f': rows[i][status_col] = 'o' for i in range(len(rows)): if int(rows[i][flag_col]) == num and rows[i][status_col] == 'o': rows[i][status_col] = 'f' cif.write_cif_loop(fw, items, rows) for i in range(n2, len(flist)): fw.write(flist[i]) #write the end part fw.close() return sf
def get_rsr_database(res, rfact, file): '''parse the mean and dev of the RsR for the res range for each residue. ''' if not util.check_file(30, file): print('Warning: no RsR data base is found. ') flist=open(file, 'r').readlines() items,values = cif.cifparse(flist, '_rsr_shell.') # a loop resname=cif.parse_values(items,values,"_rsr_shell.residue"); resh=cif.parse_values(items,values,"_rsr_shell.d_res_high"); resl=cif.parse_values(items,values,"_rsr_shell.d_res_low"); rfh=cif.parse_values(items,values,"_rsr_shell.rfact_high"); rfl=cif.parse_values(items,values,"_rsr_shell.rfact_low"); sst=cif.parse_values(items,values,"_rsr_shell.secondary_structure"); mean_all=cif.parse_values(items,values,"_rsr_shell.mean_all"); dev_all= cif.parse_values(items,values,"_rsr_shell.deviation_all"); num_all=cif.parse_values(items,values,"_rsr_shell.number_all"); mean_fil=cif.parse_values(items,values,"_rsr_shell.mean_filter"); dev_fil=cif.parse_values(items,values,"_rsr_shell.deviation_filter"); num_fil=cif.parse_values(items,values,"_rsr_shell.number_filter"); resn={} for i,x in enumerate (resname) : # if float(resh[i]) <=res <= float(resl[i]): id='%s_%s' %(resname[i], sst[i]) # if float(resh[i]) <= res <= float(resl[i]) and float(rfl[i]) <= rfact <= float(rfh[i]): if float(resh[i]) <= res <= float(resl[i]) : t= [float(mean_all[i]),float(dev_all[i]),int(num_all[i]), float(mean_fil[i]),float(dev_fil[i]), int(num_fil[i])] if id not in resn : resn[ id ] = t return resn
def cut_map_around_ligand_peptide(dccfile, dic, mapfile_in, xyzfile_in): '''It generate a complete set for ligand (map, html, jmol). dccfile: the density file by dcc. dic: a directory to hold all the file for webpage (url below). mapfile_in: a input map file. xyzfile_in: a input coordinate file. ''' print('Cutting the density maps for ligands/peptide') tmpxyz = xyzfile_in if util.is_cif(xyzfile_in): tmpxyz = cif.cif2pdb(xyzfile_in) pdbfile = os.path.basename(dic['pdbfile']) + '_new' if pdbfile != tmpxyz: shutil.copy(tmpxyz, pdbfile) mapfile = os.path.basename(dic['pdbfile']) + '_2fofc.map' if dic['ligmapcif']: mapfile = dic['xyzfile_orig'] + '_2fofc.map' shutil.move(mapfile_in, mapfile) if dic['ligmapcif']: #pre-parse the cif file. dic['cif'] = 1 ciffile = dic['xyzfile_orig'] flist = open(ciffile, 'r').readlines() cell_items, values = cif.cifparse(flist, '_cell.') cell = cif.get_rows(cell_items, values) dic['cell_items'], dic['lig_cell'] = cell_items, cell sym_items, values = cif.cifparse(flist, '_symmetry.') sym = cif.get_rows(sym_items, values) dic['sym_items'], dic['lig_sym'] = sym_items, sym items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") mod = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") row = cif.get_rows(items, values) dic['items'], dic['comp1'], dic['asym'], dic[ 'seq'] = items, comp, asym, seq dic['alt'], dic['ins'], dic['mod'], dic['row'] = alt, ins, mod, row fw_itool = open('LIG_PEPTIDE.cif', 'w') #a cif file contains table, filenames fw_itool.write('data_lig_peptide\n') fw_itool.write( '\n# A "!" will be given if the residue is bad with real_space_R.\n') fw_itool.write('\n# Criteria: (CC<0.7 and R>0.4) or CC<0.5 or R>0.5\n') url = 'http://sf-tool.wwpdb.org/users_data/dir_%s/' % dic['dir'] #url=os.environ['THIS_SERVICE_URL__FIX_ME'] + '/users_data/dir_%s/' %dic['dir'] ch_pep, chr_pep, ch_lig, chr_lig, ch_wat, chr_wat = tls.chain_res_range( pdbfile) ligpdb = non_poly_pdb(ch_pep, ch_lig, pdbfile) #get non-poly xyz file dcc = get_dcc(dccfile) #get a list for dcc of each residue if not dcc: util.perror( 'Warning: Failed to parse EDS values! No ligand/peptide maps will be generated. ' ) for k, v in ch_pep.items(): if len(v) < 15: #length of peptide if not dic['sdsc_map']: map_around_peptide(fw_itool, dic, mapfile, ligpdb, dcc, ch_pep, url) break if ch_lig: map_around_ligand(fw_itool, dic, mapfile, ligpdb, dcc, ch_lig, url) get_html_table_baddcc_general(mapfile, dcc) #for polymer/lig/peptide fw_itool.close() if dic['sdsc_map']: arg = 'rm -f %s %s %s LIG_PEPTIDE.cif ' % (mapfile, mapfile_in, xyzfile_in) arg = arg + ' %s_rcc_sum.cif.mtz %s_2fofc.map_all.html ' % ( dic['pdbfile'], dic['pdbfile']) os.system(arg) # util.delete_file(pdbfile) return
def update_coord(file, swater, occ, out_xyz): ''' ''' print 'Updating coordinate..\n' newfile = file + '_new' if len(out_xyz): newfile = out_xyz fw = open(newfile, 'w') fp = open(file, 'r') flist = [] for x in fp: if x.strip() == 0: continue flist.append(x.lstrip()) fp.close() flist = open(file, 'r').readlines() n1 = 0 for i, x in enumerate(flist): #get the first, part. if ('loop_' in x[:5] and '_atom_site.' in flist[i + 1][:11] and '_atom_site.' in flist[i + 2][:11]): n1 = i break sflist = [] n2 = len(flist) for i in range(n1, n2): # get last line of coord. (the second part) sflist.append(flist[i]) t = flist[i].lstrip() if ('#' in t[:1] or 'data_' in t[:5] or ('_' in t[:1] and '_atom_site.' not in t[:11])): n2 = i break for i in range(0, n1): fw.write(flist[i]) #write the front part items, values = cif.cifparse(flist[n1:n2], '_atom_site.') ch_col = items.index('_atom_site.auth_asym_id') comp_col = items.index('_atom_site.label_comp_id') nres_col = items.index('_atom_site.auth_seq_id') atom_col = items.index('_atom_site.label_atom_id') alt_col = items.index('_atom_site.label_alt_id') ins_col = items.index('_atom_site.pdbx_PDB_ins_code') natm_col = items.index('_atom_site.id') occ_col = items.index('_atom_site.occupancy') #swater=[ch, comp,nres,atom,alt,ins,natom] rows = cif.get_rows(items, values) for i in range(len(rows)): for j, x in enumerate(swater): if (rows[i][ch_col] == x[0] and rows[i][comp_col] == x[1] and rows[i][nres_col] == x[2] and rows[i][atom_col] == x[3] and rows[i][alt_col] == x[4] and rows[i][ins_col] == x[5] and rows[i][natm_col] == x[6]): rows[i][occ_col] = '%.2f' % occ[j] break cif.write_cif_loop(fw, items, rows) for i in range(n2, len(flist)): fw.write(flist[i]) #write the end part fw.close() return newfile
def parse_dcc(flist, table): '''parse the table in the DCC: flist is a list ''' dcc=[] if table == '_pdbx_rscc_mapman.' : items,values = cif.cifparse(flist, '_pdbx_rscc_mapman.') nseq=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_seq_id"); chid=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_asym_id"); comp=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_comp_id"); alt=cif.parse_values(items,values,"_pdbx_rscc_mapman.label_alt_id"); ins=cif.parse_values(items,values,"_pdbx_rscc_mapman.label_ins_code"); cc=cif.parse_values(items,values,"_pdbx_rscc_mapman.correlation"); rsr=cif.parse_values(items,values,"_pdbx_rscc_mapman.real_space_R"); zrsr=cif.parse_values(items,values,"_pdbx_rscc_mapman.real_space_Zscore"); biso=cif.parse_values(items,values,"_pdbx_rscc_mapman.Biso_mean"); occ=cif.parse_values(items,values,"_pdbx_rscc_mapman.occupancy_mean"); modid=cif.parse_values(items,values,"_pdbx_rscc_mapman.model_id"); pdbid=cif.parse_values(items,values,"_pdbx_rscc_mapman.pdb_id"); if not items: return dcc for i in range(len(chid)): if (modid and int(modid[i])>1): break a=[nseq[i], chid[i], comp[i], alt[i], cc[i], rsr[i], biso[i], occ[i], pdbid[i], zrsr[i], ins[i] ] dcc.append(a) elif table == '_pdbx_map.' : items,values = cif.cifparse(flist, '_pdbx_map.') nseq=cif.parse_values(items,values,"_pdbx_map.auth_seq_id"); chid=cif.parse_values(items,values,"_pdbx_map.auth_asym_id"); comp=cif.parse_values(items,values,"_pdbx_map.auth_comp_id"); biso=cif.parse_values(items,values,"_pdbx_map.Biso_mean_overall"); cc=cif.parse_values(items,values,"_pdbx_map.density_correlation_overall"); rsr=cif.parse_values(items,values,"_pdbx_map.real_space_R_overall"); zobs=cif.parse_values(items,values,"_pdbx_map.ZOBS_overall"); zdiff=cif.parse_values(items,values,"_pdbx_map.ZDIFF_overall"); zdplus=cif.parse_values(items,values,"_pdbx_map.ZDplus_overall"); zdminus=cif.parse_values(items,values,"_pdbx_map.ZDminus_overall"); cc_m=cif.parse_values(items,values,"_pdbx_map.density_correlation_main_chain"); rsr_m=cif.parse_values(items,values,"_pdbx_map.real_space_R_main_chain"); zobs_m=cif.parse_values(items,values,"_pdbx_map.ZOBS_main_chain"); zdiff_m=cif.parse_values(items,values,"_pdbx_map.ZDIFF_main_chain"); cc_s=cif.parse_values(items,values,"_pdbx_map.density_correlation_side_chain"); rsr_s=cif.parse_values(items,values,"_pdbx_map.real_space_R_side_chain"); zobs_s=cif.parse_values(items,values,"_pdbx_map.ZOBS_side_chain"); zdiff_s=cif.parse_values(items,values,"_pdbx_map.ZDIFF_side_chain"); if not items: return dcc for i in range(len(chid)): if zdiff and not zdiff_s: a=[ comp[i], chid[i], nseq[i], biso[i], cc[i], rsr[i], zobs[i], zdiff[i],zdplus[i],zdminus[i] ] elif zdiff and zdiff_s: a=[ comp[i], chid[i], nseq[i], biso[i], cc[i], rsr[i], zobs[i], zdiff[i], zdplus[i],zdminus[i], cc_m[i], rsr_m[i], zobs_m[i], zdiff_m[i],cc_s[i], rsr_s[i], zobs_s[i], zdiff_s[i]] dcc.append(a) return dcc #nseq,chid,comp,alt,cc,rsr,biso,occ,pdid
def atom_site(flist): '''get all atom record. return a dictionary ''' dic = {} chain, nres, comp, atom, symbol, alt, ins = [], [], [], [], [], [], [] x, y, z, biso, occ = [], [], [], [], [] items, values = cif.cifparse(flist, '_atom_site.') # a loop group1 = cif.parse_values(items, values, "_atom_site.group_PDB") natm = cif.parse_values(items, values, "_atom_site.id") symbol1 = cif.parse_values(items, values, "_atom_site.type_symbol") if not symbol1: symbol1 = cif.parse_values(items, values, "_atom_site.atom_type_symbol") atom1 = cif.parse_values(items, values, "_atom_site.label_atom_id") asym1 = cif.parse_values(items, values, "_atom_site.auth_asym_id") comp1 = cif.parse_values(items, values, "_atom_site.label_comp_id") nres1 = cif.parse_values(items, values, "_atom_site.auth_seq_id") x1 = cif.parse_values(items, values, "_atom_site.Cartn_x") y1 = cif.parse_values(items, values, "_atom_site.Cartn_y") z1 = cif.parse_values(items, values, "_atom_site.Cartn_z") biso1 = cif.parse_values(items, values, "_atom_site.B_iso_or_equiv") occ1 = cif.parse_values(items, values, "_atom_site.occupancy") ins1 = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") if not ins1: ins1 = cif.parse_values(items, values, "_atom_site.ndb_ins_code") alt1 = cif.parse_values(items, values, "_atom_site.label_alt_id") if (not (atom1 and comp1 and asym1 and nres1 and x1 and y1 and z1 and occ1 and biso1)): print('Error: there is problem to parse atom_site.') return dic n = len(x1) for i in range(n): chain.append(asym1[i]) nres.append(int(nres1[i])) comp.append(comp1[i]) atom.append(atom1[i]) if (natm): natm.append(natm[i]) #newly added symbol.append(symbol1[i]) if not alt1: alt.append('.') else: alt.append(alt1[i]) if not ins1: ins.append('.') else: ins.append(ins1[i]) x.append(float(x1[i])) y.append(float(y1[i])) z.append(float(z1[i])) biso.append(float(biso1[i])) occ.append(float(occ1[i])) dic = { 'chain': chain, 'nres': nres, 'comp': comp, 'atom': atom, 'symbol': symbol, 'alt': alt, 'ins': ins, 'x': x, 'y': y, 'z': z, 'biso': biso, 'occ': occ, 'natm': natm } return dic
def find_xyzlim_compound(compid, coord): '''find xyzlimit used by mapmask, and write the coord in cif or pdb format. compid: atom_group_id (model_compound_chainID_resnumber_alter_insertion) coord: the coordinate file idd = 0, cif format; =1, the pdb format ''' comp = 'XXXX' t1 = compid.split(':') for i, x in enumerate(t1): t = x.split('_') if i == 0: comp = '_'.join([t[0], t[1], t[2], t[3]]) if len(t) != 6: print( 'Error: in group-id (%d). it should be (model_compound_chainID_resnumber_alter_insertion).' % (i + 1)) return '', '' idd = util.is_cif(coord) xyzcomp = comp + '.pdb' if idd == 1: xyzcomp = comp + '.cif' fw = open(xyzcomp, 'w') border = 1 #extend a little to cover more density xx, yy, zz = [], [], [] if idd == 1: #input cif format fw.write('data_xyzcomp\n#\n') flist = open(coord, 'r').readlines() items, values = cif.cifparse(flist, '_cell.') fw.write('\n#\n') for m, p in enumerate(items): fw.write("%s %s\n" % (p, values[m])) cell = cif.get_cell(flist) items, values = cif.cifparse(flist, '_atom_site.') comp = cif.parse_values(items, values, "_atom_site.auth_comp_id") asym = cif.parse_values(items, values, "_atom_site.auth_asym_id") seq = cif.parse_values(items, values, "_atom_site.auth_seq_id") alt = cif.parse_values(items, values, "_atom_site.label_alt_id") ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code") x = cif.parse_values(items, values, "_atom_site.Cartn_x") y = cif.parse_values(items, values, "_atom_site.Cartn_y") z = cif.parse_values(items, values, "_atom_site.Cartn_z") model = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num") if (not (alt and comp and ins and asym and seq and x and y and z)): print( 'Error: not enough infor. extraced from (%s). Check ciftokens' % coord) sys.exit() fw.write('\n#\nloop_\n') for p in items: fw.write("%s\n" % p) row = cif.get_rows(items, values) for i in range(len(x)): alter, inst, mod = '.', '.', '1' if model and util.is_number(model[i]): mod = model[i] if alt and alt[i] != '?': alter = alt[i] if ins and ins[i] != '?': inst = ins[i] id1 = '_'.join([mod, comp[i], asym[i], seq[i], alter, inst]) if id1 in compid: xx.append(float(x[i])) yy.append(float(y[i])) zz.append(float(z[i])) for m in row[i]: fw.write("%s " % m) fw.write('\n') else: #pdb format fp = open(coord, 'r') for x1 in fp: if ('CRYST1' in x1[:6]): fw.write(x1) cell = [float(p) for p in x1[8:54].split()] elif ('ATOM' in x1[:4] or 'HETATM' in x1[:6]): alt = x1[16:17] if alt.isspace(): alt = '.' ins = x1[26:27] if ins.isspace(): ins = '.' resname, chid, resnum = x1[17:20].strip(), x1[20:22].strip( ), x1[22:26].strip() resid = '_'.join([resname, chid, resnum, alt, ins]) if resid in compid: fw.write(x1) #only write the selected section xx.append(float(x1[30:38])) yy.append(float(x1[38:46])) zz.append(float(x1[46:54])) fp.close() if not xx or not yy or not zz: print('Error: %s can not be found in the coordinate. try a new id. ' % (compid)) return '', '' frac, orth = util.frac_orth_matrix(cell) #get matrix border = 2.0 xx_min, xx_max = min(xx) - border, max(xx) + border yy_min, yy_max = min(yy) - border, max(yy) + border zz_min, zz_max = min(zz) - border, max(zz) + border xf_min = util.matrix_prod(frac, [xx_min, yy_min, zz_min]) xf_max = util.matrix_prod(frac, [xx_max, yy_max, zz_max]) xyzlim = '%.3f %.3f %.3f %.3f %.3f %.3f' % ( xf_min[0], xf_max[0], xf_min[1], xf_max[1], xf_min[2], xf_max[2]) fw.close() return xyzlim, xyzcomp
def check_ncs_cif(file): '''check all errors in the cif file: check the four cif tables agaist the scheme ''' flist = open(file, 'r').readlines() ncs = ncs_from_head(flist) items, values = cif.cifparse(flist, '_struct_ncs_ens.') ens_id = cif.parse_values(items, values, '_struct_ncs_ens.id') items, values = cif.cifparse(flist, '_refine_ls_restr_ncs.') res_ord = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_ordinal') res_ref = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_refine_id') res_ens = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_ens_id') res_dom = cif.parse_values(items, values, '_refine_ls_restr_ncs.dom_id') res_typ = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_type') res_asy = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_auth_asym_id') res_num = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_number') res_rms = cif.parse_values(items, values, '_refine_ls_restr_ncs.rms_dev_position') if '?' in res_rms: res_rms = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_rms') res_wgh = cif.parse_values(items, values, '_refine_ls_restr_ncs.weight_position') items, values = cif.cifparse(flist, '_struct_ncs_dom.') dom_ens = cif.parse_values(items, values, '_struct_ncs_dom.pdbx_ens_id') dom_id = cif.parse_values(items, values, '_struct_ncs_dom.id') dom_all = cif.parse_values(items, values, '_struct_ncs_dom.details') items, values = cif.cifparse(flist, '_struct_ncs_dom_lim.') lim_ens = cif.parse_values(items, values, '_struct_ncs_dom_lim.pdbx_ens_id') lim_dom = cif.parse_values(items, values, '_struct_ncs_dom_lim.dom_id') lim_com = cif.parse_values(items, values, '_struct_ncs_dom_lim.pdbx_component_id') lim_basy = cif.parse_values(items, values, '_struct_ncs_dom_lim.beg_auth_asym_id') lim_bseq = cif.parse_values(items, values, '_struct_ncs_dom_lim.beg_auth_seq_id') lim_easy = cif.parse_values(items, values, '_struct_ncs_dom_lim.end_auth_asym_id') lim_eseq = cif.parse_values(items, values, '_struct_ncs_dom_lim.end_auth_seq_id') lim_all = cif.parse_values(items, values, '_struct_ncs_dom_lim.selection_details') if len(ncs): if not (res_ens or dom_ens or lim_ens or ens_id): util.perror( 'Warning: NCS records exist, but no cif tables for it.') return else: if not len(ens_id): return if not (res_ens or res_dom or res_ord or res_ref or res_asy or res_typ): util.perror( 'Warning: No cif table (refine_ls_restr_ncs) or missing key items for NCS.' ) if not (dom_ens or dom_id): util.perror( 'Warning: No cif table (struct_ncs_dom) or missing key items for NCS.' ) if not (lim_ens or lim_dom or lim_com): util.perror( 'Warning: No cif table (struct_ncs_dom_lim) or missing key items for NCS.' ) if not (ens_id): util.perror('Warning: No cif table (struct_ncs_ens) for NCS.') chain = get_chain_seq(flist) #chain is dic for int def tmp(list1, list2): #put ensemble=key and doms=[] to a dic tmpd = {} for j, y in enumerate(list1): if y not in list(tmpd.keys()): tmpd[y] = [] tmpd[y].append(list2[j]) return tmpd dom = tmp(dom_ens, dom_id) lim = tmp(lim_ens, lim_dom) res = tmp(res_ens, res_dom) def tmp1(list1, list2, s2): for n in list1: if n not in list2: util.perror('Error: NCS ID (%s) not in table (%s).' % (n, s2)) if dom: tmp1(ens_id, list(dom.keys()), 'struct_ncs_dom') if lim: tmp1(ens_id, list(lim.keys()), 'struct_ncs_dom_lim') if res: tmp1(ens_id, list(res.keys()), 'refine_ls_restr_ncs') # print chain.keys(), chain, dom, lim, res if (lim_basy and lim_easy and lim_bseq and lim_eseq): check_lim(lim_ens, chain, lim_basy, lim_easy, lim_bseq, lim_eseq) check_res(res_ens, chain, res_ref, res_asy, res_typ, res_num, res_rms)
def get_list(file): vlist=[] if not util.check_file(200, file): print('Error: file (%s) do not exist' %file) return vlist flist=open(file,'r').readlines() pdbid='XXXX' for x in flist: if 'data_' in x: pdbid=x.split('_')[1].strip() break items,values = cif.cifparse(flist, '_pdbx_density.') sym=cif.parse_values(items,values,'_pdbx_density.space_group_name_H-M') res=cif.parse_values(items,values,'_pdbx_density.ls_d_res_high'); rw=cif.parse_values(items,values,'_pdbx_density.R_value_R_work'); rf=cif.parse_values(items,values,'_pdbx_density.R_value_R_free'); biso=cif.parse_values(items,values,'_pdbx_density.Biso_mean'); bwil=cif.parse_values(items,values,'_pdbx_density.B_wilson'); l2=cif.parse_values(items,values,'_pdbx_density.Padilla-Yeates_L2_mean'); z=cif.parse_values(items,values,'_pdbx_density.Z_score_L_test'); fom=cif.parse_values(items,values,'_pdbx_density.fom'); isig=cif.parse_values(items,values,'_pdbx_density.I_over_sigI_resh'); isigd=cif.parse_values(items,values,'_pdbx_density.I_over_sigI_diff'); pst=cif.parse_values(items,values,'_pdbx_density.translational_pseudo_symmetry'); bsol=cif.parse_values(items,values,'_pdbx_density.B_solvent'); ksol=cif.parse_values(items,values,'_pdbx_density.K_solvent'); tlst=cif.parse_values(items,values,'_pdbx_density.partial_B_value_correction_success'); ntls=cif.parse_values(items,values,'_pdbx_density.tls_group_number'); nncs=cif.parse_values(items,values,'_pdbx_density.ncs_group_number'); nmtx=cif.parse_values(items,values,'_pdbx_density.mtrix_number'); matt=cif.parse_values(items,values,'_pdbx_density.Matthew_coeff'); solv=cif.parse_values(items,values,'_pdbx_density.solvent_content'); dpix=cif.parse_values(items,values,'_pdbx_density.Cruickshank_dpi_xyz'); rtwin=cif.parse_values(items,values,'_pdbx_density.reflns_twin'); xtwin=cif.parse_values(items,values,'_pdbx_density.twin_by_xtriage'); tmp=cif.parse_values(items,values,'_pdbx_density.iso_B_value_type'); if tmp : btype=tmp[0][0] ctwin_t=cif.parse_values(items,values,'_pdbx_density.twin_operator'); ctwin='N' if '2:' in ctwin_t[0] : ctwin='Y' anis=cif.parse_values(items,values,'_pdbx_density.anisotropy'); # looped items,values = cif.cifparse(flist, '_pdbx_density_corr.') prog=cif.parse_values(items,values,'_pdbx_density_corr.program'); resh=cif.parse_values(items,values,'_pdbx_density_corr.ls_d_res_high'); rwork=cif.parse_values(items,values,'_pdbx_density_corr.ls_R_factor_R_work'); rfree=cif.parse_values(items,values,'_pdbx_density_corr.ls_R_factor_R_free'); fcc=cif.parse_values(items,values,'_pdbx_density_corr.correlation_coeff_Fo_to_Fc'); rsr=cif.parse_values(items,values,'_pdbx_density_corr.real_space_R'); dcc=cif.parse_values(items,values,'_pdbx_density_corr.correlation'); detail=cif.parse_values(items,values,'_pdbx_density_corr.details'); nr, nc=0, 0 for i, x in enumerate(detail): if 'Best' in x : nc=i break rprog, cprog = prog[nr].replace(' ', ''), prog[nc] crw, crf, fcc, rsr, dcc=rwork[nc], rfree[nc], fcc[nc],rsr[nc],dcc[nc] rw_crw='?' if util.is_number(rw[0]) and util.is_number(crw): t=int (1000*(float(rw[0]) -float(crw))) rw_crw='%d' %(t) all=[pdbid, res,rw_crw, rw, rf, crw, crf, fcc, rsr, dcc, fom, biso,bwil, matt,solv, ksol, bsol, ntls,nncs, nmtx, tlst,btype, pst, rtwin,xtwin, ctwin, l2, z,anis, isig,isigd, rprog, sym] all_new=[] for x in all: t=x if not x : t='?' else: if (type(x)==list ):t=x[0] y=t.replace(' ', '_') if util.is_number(y) and '.' in y: y='%.2f' %float(y) all_new.append(y) return all_new