def __init__(self, cell, symmetry, sg_number, dmin, dmax, columns, filename): assert isinstance(columns[0], tuple) Cell.__init__(self, cell, symmetry) self.sg_number = sg_number self.dmin = dmin self.dmax = dmax assert dmin >= dmax # yes, min > max here self.columns = OrderedDict(columns) self.filename = filename
def __init__(self, cryst1_line): assert cryst1_line.startswith("CRYST1") a = float(cryst1_line[6:15]) b = float(cryst1_line[15:24]) c = float(cryst1_line[24:33]) alpha = float(cryst1_line[33:40]) beta = float(cryst1_line[40:47]) gamma = float(cryst1_line[47:54]) symmetry = cryst1_line[55:66].strip() Cell.__init__(self, (a, b, c, alpha, beta, gamma), symmetry) self.has_hetatm_x = None
def __init__(self, cryst1_line): assert cryst1_line.startswith('CRYST1') a = float(cryst1_line[6:15]) b = float(cryst1_line[15:24]) c = float(cryst1_line[24:33]) alpha = float(cryst1_line[33:40]) beta = float(cryst1_line[40:47]) gamma = float(cryst1_line[47:54]) symmetry = cryst1_line[55:66].strip() Cell.__init__(self, (a, b, c, alpha, beta, gamma), symmetry) self.has_hetatm_x = None
class TestCell(unittest.TestCase): def setUp(self): self.cell = Cell((22.84, 32.84, 42.84, 80.84, 90.84, 100.84), None) def test_orth(self): orth = self.cell.get_orth_matrix() expected = np.array([[22.84, -6.17612, -0.628045], [0, 32.254, 6.82343], [0, 0, 42.2884]]) assert_allclose(to_np(orth), expected, rtol=1e-6) def test_frac(self): frac = self.cell.get_frac_matrix() orth = self.cell.get_orth_matrix() assert_allclose(to_np(frac.dot(orth)), np.identity(3), atol=1e-12)
def dimple(wf, opt): comment(' ### Dimple v%s. Problems and suggestions:' ' ccp4.github.io/dimple ###' % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line('MTZ (%.1fA)' % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment('\nNo non-empty pdb files given. Nothing to do.') return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment('\nPDBs in order of similarity (using the first one):') opt.pdbs.sort( key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value('data_file', opt.mtz) utils.log_value('pdb_files', opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = 'ini.pdb' wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error('PDB file missing CRYST1 record, starting from MR') if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment('\nHETATM marked as element X would choke many programs.') rb_xyzin = 'prepared.pdb' wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment('\nRemoved %d HETATM atoms' % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error('rwcontents could not interpret %s' % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment('\nHmm... %.1f%% of solvent or incomplete model' % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment('\ndebug: problem when calculating volume?') ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = 'pointless.mtz' wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys='TOLERANCE 5').run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment('\n %-10s CC: %-8.3f cell diff: %.1fA' % (ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment('\n no good indexing') reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): f_mtz = 'amplit.mtz' wf.temporary_files.add(f_mtz) i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): colano = None if opt.anode and all( col in mtz_meta.columns for col in ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']): colano = '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]' wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin='/*/*/[%s,%s]' % i_sigi_cols, colano=colano).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin='IMEAN=%s SIGIMEAN=%s' % i_sigi_cols, labout='F=F SIGF=SIGF').run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = 'FP=%s SIGFP=%s' % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment('\nMR requested unconditionally.') elif cell_diff > 0.1 and opt.mr_when_r < 1: comment('\nDifferent unit cells.') elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment('\nDifferent space groups.') else: comment('\nRigid-body refinement with resolution 3.5 A, %d cycles.' % opt.rigid_cycles) if 'aa_count' in rw_data and 'water_count' in rw_data: if rw_data['aa_count'] != 0: comment(' %.1f waters/aa.' % (rw_data['water_count'] / rw_data['aa_count'])) else: comment(' %d/0 waters/aa.' % rw_data['water_count']) wf.temporary_files |= {'refmacRB.pdb', 'refmacRB.mtz'} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout='refmacRB.mtz', xyzout='refmacRB.pdb', labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle %d""" % opt.rigid_cycles).run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment('\nTry MR.') elif not wf.jobs[-1].data.get('overall_r'): comment('\nWARNING: unknown R factor, something went wrong.\n') refmac_xyzin = 'refmacRB.pdb' elif wf.jobs[-1].data['overall_r'] > opt.mr_when_r: comment('\nRun MR for R > %g.' % opt.mr_when_r) else: comment('\nNo MR for R < %g.' % opt.mr_when_r) refmac_xyzin = 'refmacRB.pdb' ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(' Volume of asu: %.1f%% of model asu.' % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment('\nWould try MR, but it is disabled.') return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = 'ens_merged.pdb' mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= { 'molrep.pdb', 'molrep_dimer.pdb', 'molrep.crd' } wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = 'molrep.pdb' else: wf.temporary_files |= {'phaser.1.pdb', 'phaser.1.mtz'} wf.phaser_auto(hklin=f_mtz, labin='F=%s SIGF=%s' % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt='ALL', opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): raise RuntimeError('No phaser solution.') refmac_xyzin = 'phaser.1.pdb' f_mtz = 'phaser.1.mtz' if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f='FC', phi='PHIC', pdbout='prepared_wat.pdb', sigma=2) refmac_xyzin = 'prepared_wat.pdb' ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment('\nFree-R flags from the %s file, column %s.' % (('reference' if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment('\nReplace free-R flags') else: comment('\nGenerate free-R flags') free_mtz = 'free.mtz' wf.temporary_files |= {'unique.mtz', free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout='unique.mtz', ref=f_mtz_meta, resolution=cad_reso).run() else: comment(' (repeatably)') # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout='unique.mtz', ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin='unique.mtz', hklout=free_mtz, keys=('SEED' if opt.seed_freerflag else '')).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = 'prepared.mtz' wf.temporary_files.add(prepared_mtz) wf.cad( data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=[ 'sysab_keep', # does it matter? 'reso overall 1000.0 %g' % cad_reso ]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout='prepared2.mtz', keys='COMPLETE FREE=' + free_col, parser=' (again, for %d refl. more)' % freerflag_missing).run() prepared_mtz = 'prepared2.mtz' wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = 'matrix %f' % opt.weight else: refmac_weight = 'auto' restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += 'free %s\n' % opt.freecolumn_val refmac_labin = '%s FREE=%s' % (refmac_labin_nofree, free_col) comment('\nRestrained refinement, %d+%d cycles.' % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {'jelly.pdb', 'jelly.mtz'} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout='jelly.mtz', xyzout='jelly.pdb', labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys + 'ridge distance sigma 0.01\n' 'make hydrogen no\n' 'ncycle %d' % opt.jelly + opt.extra_ref_keys).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = 'jelly.pdb' restr_job = wf.refmac5( hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=(restr_ref_keys + 'ncycle %d' % opt.restr_cycles + opt.extra_ref_keys)).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and 'free_r' in prev[0].data: comment('\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f' % (prev[0].data['overall_r'], prev[0].data['free_r'], restr_job.data['free_r'] - prev[0].data['free_r'])) ####### check blobs ####### if opt.blob_search: if restr_job.data['free_r'] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment('\nTo see it in Coot run %s' % coot_script) else: comment('\nNo blob search for Rfree > %g.' % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None) if opt.anode: # check if mtz contains I+/- and SIGI+/- column_types = list(reindexed_mtz_meta.columns.values()) if column_types.count('K') != 2 and column_types.count('M') != 2: comment('\nColumns I+/- and SIG+/- not found. Skipping AnoDe.') return anode_name = 'anode' # convert to sca for input to shelxc scaout = anode_name + '.sca' wf.mtz2sca(prepared_mtz, scaout).run() wf.shelxc(scaout, reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry).run() wf.copy_uncompressed(opt.xyzout, anode_name + '.pdb') anode_job = wf.anode(anode_name).run() wf.temporary_files |= { scaout, anode_name + '.pdb', anode_name + '.hkl', anode_name + '.pha', anode_name + '_sad.cif', anode_name + '_fa.hkl' } cell = Cell(reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry) # need orthogonal not fractional coordinates to generate coot script anode_job.data['blobs'] = cell.orthogonalize(anode_job.data['xyz']) comment(_anode_anom_peak_lines(anode_job.data)) coot_script = _generate_scripts_and_pictures(wf, opt, anode_job.data, pha=anode_name + '.pha')
def __init__(self, entry): Cell.__init__(self, entry[2:8], symmetry=entry[1]) self.pdb_id = entry[0] self.uniprot_src = entry[8] # we may have a homolog of uniprot entry
def setUp(self): self.cell = Cell((22.84, 32.84, 42.84, 80.84, 90.84, 100.84), None)