def find_similar_rel(cell, rel_tol): rel_min = 1 - rel_tol rel_max = 1 + rel_tol similar_cells = [] for entry in DATA: if entry[2] < rel_min * cell.a: continue if entry[2] > rel_max * cell.a: break # because entries are sorted by a if not rel_min * cell.b < entry[3] < rel_max * cell.b: continue c = ContamCell(entry) if calculate_difference(cell, c) < rel_tol: similar_cells.append(c) similar_cells.sort(key=lambda x: calculate_difference(cell, x)) return similar_cells
def dimple(wf, opt): comment(' ### Dimple v%s. Problems and suggestions:' ' ccp4.github.io/dimple ###' % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line('MTZ (%.1fA)' % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment('\nNo non-empty pdb files given. Nothing to do.') return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment('\nPDBs in order of similarity (using the first one):') opt.pdbs.sort( key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value('data_file', opt.mtz) utils.log_value('pdb_files', opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = 'ini.pdb' wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error('PDB file missing CRYST1 record, starting from MR') if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment('\nHETATM marked as element X would choke many programs.') rb_xyzin = 'prepared.pdb' wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment('\nRemoved %d HETATM atoms' % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error('rwcontents could not interpret %s' % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment('\nHmm... %.1f%% of solvent or incomplete model' % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment('\ndebug: problem when calculating volume?') ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = 'pointless.mtz' wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys='TOLERANCE 5').run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment('\n %-10s CC: %-8.3f cell diff: %.1fA' % (ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment('\n no good indexing') reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): f_mtz = 'amplit.mtz' wf.temporary_files.add(f_mtz) i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): colano = None if opt.anode and all( col in mtz_meta.columns for col in ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']): colano = '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]' wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin='/*/*/[%s,%s]' % i_sigi_cols, colano=colano).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin='IMEAN=%s SIGIMEAN=%s' % i_sigi_cols, labout='F=F SIGF=SIGF').run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = 'FP=%s SIGFP=%s' % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment('\nMR requested unconditionally.') elif cell_diff > 0.1 and opt.mr_when_r < 1: comment('\nDifferent unit cells.') elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment('\nDifferent space groups.') else: comment('\nRigid-body refinement with resolution 3.5 A, %d cycles.' % opt.rigid_cycles) if 'aa_count' in rw_data and 'water_count' in rw_data: if rw_data['aa_count'] != 0: comment(' %.1f waters/aa.' % (rw_data['water_count'] / rw_data['aa_count'])) else: comment(' %d/0 waters/aa.' % rw_data['water_count']) wf.temporary_files |= {'refmacRB.pdb', 'refmacRB.mtz'} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout='refmacRB.mtz', xyzout='refmacRB.pdb', labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle %d""" % opt.rigid_cycles).run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment('\nTry MR.') elif not wf.jobs[-1].data.get('overall_r'): comment('\nWARNING: unknown R factor, something went wrong.\n') refmac_xyzin = 'refmacRB.pdb' elif wf.jobs[-1].data['overall_r'] > opt.mr_when_r: comment('\nRun MR for R > %g.' % opt.mr_when_r) else: comment('\nNo MR for R < %g.' % opt.mr_when_r) refmac_xyzin = 'refmacRB.pdb' ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(' Volume of asu: %.1f%% of model asu.' % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment('\nWould try MR, but it is disabled.') return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = 'ens_merged.pdb' mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= { 'molrep.pdb', 'molrep_dimer.pdb', 'molrep.crd' } wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = 'molrep.pdb' else: wf.temporary_files |= {'phaser.1.pdb', 'phaser.1.mtz'} wf.phaser_auto(hklin=f_mtz, labin='F=%s SIGF=%s' % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt='ALL', opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): raise RuntimeError('No phaser solution.') refmac_xyzin = 'phaser.1.pdb' f_mtz = 'phaser.1.mtz' if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f='FC', phi='PHIC', pdbout='prepared_wat.pdb', sigma=2) refmac_xyzin = 'prepared_wat.pdb' ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment('\nFree-R flags from the %s file, column %s.' % (('reference' if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment('\nReplace free-R flags') else: comment('\nGenerate free-R flags') free_mtz = 'free.mtz' wf.temporary_files |= {'unique.mtz', free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout='unique.mtz', ref=f_mtz_meta, resolution=cad_reso).run() else: comment(' (repeatably)') # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout='unique.mtz', ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin='unique.mtz', hklout=free_mtz, keys=('SEED' if opt.seed_freerflag else '')).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = 'prepared.mtz' wf.temporary_files.add(prepared_mtz) wf.cad( data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=[ 'sysab_keep', # does it matter? 'reso overall 1000.0 %g' % cad_reso ]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout='prepared2.mtz', keys='COMPLETE FREE=' + free_col, parser=' (again, for %d refl. more)' % freerflag_missing).run() prepared_mtz = 'prepared2.mtz' wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = 'matrix %f' % opt.weight else: refmac_weight = 'auto' restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += 'free %s\n' % opt.freecolumn_val refmac_labin = '%s FREE=%s' % (refmac_labin_nofree, free_col) comment('\nRestrained refinement, %d+%d cycles.' % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {'jelly.pdb', 'jelly.mtz'} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout='jelly.mtz', xyzout='jelly.pdb', labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys + 'ridge distance sigma 0.01\n' 'make hydrogen no\n' 'ncycle %d' % opt.jelly + opt.extra_ref_keys).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = 'jelly.pdb' restr_job = wf.refmac5( hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=(restr_ref_keys + 'ncycle %d' % opt.restr_cycles + opt.extra_ref_keys)).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and 'free_r' in prev[0].data: comment('\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f' % (prev[0].data['overall_r'], prev[0].data['free_r'], restr_job.data['free_r'] - prev[0].data['free_r'])) ####### check blobs ####### if opt.blob_search: if restr_job.data['free_r'] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment('\nTo see it in Coot run %s' % coot_script) else: comment('\nNo blob search for Rfree > %g.' % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None) if opt.anode: # check if mtz contains I+/- and SIGI+/- column_types = list(reindexed_mtz_meta.columns.values()) if column_types.count('K') != 2 and column_types.count('M') != 2: comment('\nColumns I+/- and SIG+/- not found. Skipping AnoDe.') return anode_name = 'anode' # convert to sca for input to shelxc scaout = anode_name + '.sca' wf.mtz2sca(prepared_mtz, scaout).run() wf.shelxc(scaout, reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry).run() wf.copy_uncompressed(opt.xyzout, anode_name + '.pdb') anode_job = wf.anode(anode_name).run() wf.temporary_files |= { scaout, anode_name + '.pdb', anode_name + '.hkl', anode_name + '.pha', anode_name + '_sad.cif', anode_name + '_fa.hkl' } cell = Cell(reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry) # need orthogonal not fractional coordinates to generate coot script anode_job.data['blobs'] = cell.orthogonalize(anode_job.data['xyz']) comment(_anode_anom_peak_lines(anode_job.data)) coot_script = _generate_scripts_and_pictures(wf, opt, anode_job.data, pha=anode_name + '.pha')
def dimple(wf, opt): comment(" ### Dimple v%s. Problems and suggestions:" " ccp4.github.io/dimple ###" % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment("\nNo non-empty pdb files given. Nothing to do.") return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment("\nPDBs in order of similarity (using the first one):") opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value("data_file", opt.mtz) utils.log_value("pdb_files", opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = "ini.pdb" wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error("PDB file missing CRYST1 record, starting from MR") if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment("\nHETATM marked as element X would choke many programs.") rb_xyzin = "prepared.pdb" wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment("\nRemoved %d HETATM atoms" % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error("rwcontents could not interpret %s" % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment("\nHmm... %.1f%% of solvent or incomplete model" % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment("\ndebug: problem when calculating volume?") ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = "pointless.mtz" wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys="TOLERANCE 5").run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment("\n %-10s CC: %-8.3f cell diff: %.1fA" % ( ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment("\n no good indexing") reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): mtz_meta.check_col_type(opt.icolumn or 'IMEAN', 'J') mtz_meta.check_col_type(opt.sigicolumn, 'Q') f_mtz = "amplit.mtz" wf.temporary_files.add(f_mtz) i_sigi_cols = (opt.icolumn or 'IMEAN', opt.sigicolumn) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin="/*/*/[%s,%s]" % i_sigi_cols).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols, labout="F=F SIGF=SIGF").run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment("\nMR requested unconditionally.") elif cell_diff > 0.1 and opt.mr_when_r < 1: comment("\nDifferent unit cells.") elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment("\nDifferent space groups.") else: comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.") if 'aa_count' in rw_data and 'water_count' in rw_data: comment(" %.1f waters/aa." % (rw_data['water_count'] / rw_data['aa_count'])) wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout="refmacRB.mtz", xyzout="refmacRB.pdb", labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle 10""").run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment("\nTry MR.") elif not wf.jobs[-1].data.get("overall_r"): comment("\nWARNING: unknown R factor, something went wrong.\n") refmac_xyzin = "refmacRB.pdb" elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r: comment("\nRun MR for R > %g." % opt.mr_when_r) else: comment("\nNo MR for R < %g." % opt.mr_when_r) refmac_xyzin = "refmacRB.pdb" ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment("\nWould try MR, but it is disabled.") return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = "ens_merged.pdb" mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb", "molrep.crd"} wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = "molrep.pdb" else: wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"} wf.phaser_auto(hklin=f_mtz, labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt="ALL", opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): return refmac_xyzin = "phaser.1.pdb" f_mtz = "phaser.1.mtz" if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2) refmac_xyzin = "prepared_wat.pdb" ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment("\nFree-R flags from the %s file, column %s." % (("reference" if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment("\nReplace free-R flags") else: comment("\nGenerate free-R flags") free_mtz = "free.mtz" wf.temporary_files |= {"unique.mtz", free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout="unique.mtz", ref=f_mtz_meta, resolution=cad_reso).run() else: comment(" (repeatably)") # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin="unique.mtz", hklout=free_mtz, keys=("SEED" if opt.seed_freerflag else "")).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = "prepared.mtz" wf.temporary_files.add(prepared_mtz) wf.cad(data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=["sysab_keep", # does it matter? "reso overall 1000.0 %g" % cad_reso]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz", keys="COMPLETE FREE="+free_col, parser=" (again, for %d refl. more)" % freerflag_missing ).run() prepared_mtz = "prepared2.mtz" wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = "matrix %f" % opt.weight else: refmac_weight = "auto" restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += "free %s\n" % opt.freecolumn_val refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col) comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {"jelly.pdb", "jelly.mtz"} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout="jelly.mtz", xyzout="jelly.pdb", labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+"ridge distance sigma 0.01\n" "make hydrogen no\n" "ncycle %d" % opt.jelly).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = "jelly.pdb" restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles)).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and "free_r" in prev[0].data: comment("\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f" % ( prev[0].data["overall_r"], prev[0].data["free_r"], restr_job.data["free_r"] - prev[0].data["free_r"])) ####### check blobs and finish ####### if restr_job.data["free_r"] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment("\nTo see it in Coot run %s" % coot_script) else: comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None)
def dimple(wf, opt): comment(" ### Dimple v%s. Problems and suggestions:" " ccp4.github.io/dimple ###" % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment("\nNo non-empty pdb files given. Nothing to do.") return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment("\nPDBs in order of similarity (using the first one):") opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value("data_file", opt.mtz) utils.log_value("pdb_files", opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = "ini.pdb" wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error("PDB file missing CRYST1 record, starting from MR") if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment("\nHETATM marked as element X would choke many programs.") rb_xyzin = "prepared.pdb" wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment("\nRemoved %d HETATM atoms" % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error("rwcontents could not interpret %s" % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment("\nHmm... %.1f%% of solvent or incomplete model" % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment("\ndebug: problem when calculating volume?") ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = "pointless.mtz" wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys="TOLERANCE 5").run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment("\n %-10s CC: %-8.3f cell diff: %.1fA" % ( ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment("\n no good indexing") reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): f_mtz = "amplit.mtz" wf.temporary_files.add(f_mtz) i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin="/*/*/[%s,%s]" % i_sigi_cols).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols, labout="F=F SIGF=SIGF").run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment("\nMR requested unconditionally.") elif cell_diff > 0.1 and opt.mr_when_r < 1: comment("\nDifferent unit cells.") elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment("\nDifferent space groups.") else: comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.") if 'aa_count' in rw_data and 'water_count' in rw_data: if rw_data['aa_count'] != 0: comment(" %.1f waters/aa." % (rw_data['water_count'] / rw_data['aa_count'])) else: comment(' %d/0 waters/aa.' % rw_data['water_count']) wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout="refmacRB.mtz", xyzout="refmacRB.pdb", labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle 10""").run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment("\nTry MR.") elif not wf.jobs[-1].data.get("overall_r"): comment("\nWARNING: unknown R factor, something went wrong.\n") refmac_xyzin = "refmacRB.pdb" elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r: comment("\nRun MR for R > %g." % opt.mr_when_r) else: comment("\nNo MR for R < %g." % opt.mr_when_r) refmac_xyzin = "refmacRB.pdb" ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment("\nWould try MR, but it is disabled.") return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = "ens_merged.pdb" mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb", "molrep.crd"} wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = "molrep.pdb" else: wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"} wf.phaser_auto(hklin=f_mtz, labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt="ALL", opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): return refmac_xyzin = "phaser.1.pdb" f_mtz = "phaser.1.mtz" if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2) refmac_xyzin = "prepared_wat.pdb" ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment("\nFree-R flags from the %s file, column %s." % (("reference" if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment("\nReplace free-R flags") else: comment("\nGenerate free-R flags") free_mtz = "free.mtz" wf.temporary_files |= {"unique.mtz", free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout="unique.mtz", ref=f_mtz_meta, resolution=cad_reso).run() else: comment(" (repeatably)") # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin="unique.mtz", hklout=free_mtz, keys=("SEED" if opt.seed_freerflag else "")).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = "prepared.mtz" wf.temporary_files.add(prepared_mtz) wf.cad(data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=["sysab_keep", # does it matter? "reso overall 1000.0 %g" % cad_reso]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz", keys="COMPLETE FREE="+free_col, parser=" (again, for %d refl. more)" % freerflag_missing ).run() prepared_mtz = "prepared2.mtz" wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = "matrix %f" % opt.weight else: refmac_weight = "auto" restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += "free %s\n" % opt.freecolumn_val refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col) comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {"jelly.pdb", "jelly.mtz"} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout="jelly.mtz", xyzout="jelly.pdb", labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+"ridge distance sigma 0.01\n" "make hydrogen no\n" "ncycle %d" % opt.jelly +opt.extra_ref_keys).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = "jelly.pdb" restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles) +opt.extra_ref_keys).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and "free_r" in prev[0].data: comment("\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f" % ( prev[0].data["overall_r"], prev[0].data["free_r"], restr_job.data["free_r"] - prev[0].data["free_r"])) ####### check blobs and finish ####### if restr_job.data["free_r"] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment("\nTo see it in Coot run %s" % coot_script) else: comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None)