def exercise(): wavelength = 1.025 mtz_file, pdb_file = generate_zinc_inputs(anonymize=False) null_out = libtbx.utils.null_out() cmdline = mmtbx.command_line.load_model_and_data(args=[ pdb_file, mtz_file, "wavelength={}".format(wavelength), "use_phaser=False", "use_svm=True" ], master_phil=master_phil(), out=null_out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True) os.remove(pdb_file) os.remove(mtz_file) os.remove(os.path.splitext(pdb_file)[0] + "_fmodel.eff") cmdline.xray_structure.set_inelastic_form_factors( photon=cmdline.params.input.wavelength, table="sasaki") cmdline.fmodel.update_xray_structure(cmdline.xray_structure, update_f_calc=True) manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=cmdline.params.input.wavelength, params=cmdline.params, nproc=cmdline.params.nproc, log=null_out) manager.validate_ions(out=null_out) for atom_props in manager.atoms_to_props.values(): i_seq = atom_props.i_seq chem_env = ChemicalEnvironment( i_seq, manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5), manager) scatter_env = ScatteringEnvironment( i_seq, manager, fo_density=manager.get_map_gaussian_fit("mFo", i_seq), fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=manager.get_map_gaussian_fit("anom", i_seq), ) vector = ion_vector(chem_env, scatter_env) resname = ion_class(chem_env) assert vector is not None assert resname != "" print "OK"
def dump_sites(manager): """ Iterate over all the ions and waters built into the model and dump out information about their properties. Parameters ---------- manager : mmtbx.ions.identify.manager Returns ------- list of tuple of mmtbx.ions.environment.ChemicalEnvironment, \ mmtbx.ions.environment.ScatteringEnvironment """ atoms = iterate_sites( manager.pdb_hierarchy, res_filter=ions.SUPPORTED + WATER_RES_NAMES, split_sites=True, ) # Can't pickle entire AtomProperties because they include references to the # Atom object. Instead, gather what properties we want and store them in a # second list properties = [] for atom in atoms: map_stats = manager.map_stats(atom.i_seq) fo_density = manager.get_map_gaussian_fit("mFo", atom.i_seq) chem_env = ChemicalEnvironment( atom.i_seq, manager.find_nearby_atoms(atom.i_seq, far_distance_cutoff=3.5), manager, ) scatter_env = ScatteringEnvironment( atom.i_seq, manager, fo_density=fo_density, fofc_density=(map_stats.fofc, 0), anom_density=(map_stats.anom, 0), ) properties.append((chem_env, scatter_env)) return properties
def exercise(): fns = [generate_calcium_inputs, generate_zinc_inputs] wavelengths = [1.025, 1.54] for fn, wavelength in zip(fns, wavelengths): mtz_file, pdb_file = fn(anonymize=True) null_out = libtbx.utils.null_out() cmdline = mmtbx.command_line.load_model_and_data( args=[ pdb_file, mtz_file, "wavelength={}".format(wavelength), "use_phaser=True", "use_svm=True" ], master_phil=master_phil(), out=null_out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True, set_inelastic_form_factors="sasaki", ) os.remove(pdb_file) os.remove(mtz_file) os.remove(os.path.splitext(mtz_file)[0] + "_fmodel.eff") os.remove(os.path.splitext(mtz_file)[0] + ".pdb") manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=cmdline.params.input.wavelength, params=cmdline.params, nproc=cmdline.params.nproc, log=null_out, manager_class=ions.svm.manager, ) # Build a list of properties of each water / ion site waters = [] for chain in manager.pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1): # alt conf, skip continue for atom_group in atom_groups: # Check for non standard atoms in the residue # Or a label indicating the residue is a water resname = atom_group.resname.strip().upper() if (resname in WATER_RES_NAMES): atoms = atom_group.atoms() if (len(atoms) == 1 ): # otherwise it probably has hydrogens, skip waters.append(atoms[0].i_seq) assert len(waters) > 0 atom_props = [AtomProperties(i_seq, manager) for i_seq in waters] for atom_prop in atom_props: i_seq = atom_prop.i_seq chem_env = ChemicalEnvironment( i_seq, manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5), manager, ) scatter_env = ScatteringEnvironment( i_seq, manager, fo_density=manager.get_map_gaussian_fit("mFo", i_seq), fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=manager.get_map_gaussian_fit("anom", i_seq), ) resname = ion_class(chem_env) assert resname != "" predictions = predict_ion(chem_env, scatter_env, elements=["HOH", "ZN", "CA"]) if predictions is None: print "Could not load SVM classifier" print "Skipping {}".format(os.path.split(__file__)[1]) return if resname != predictions[0][0]: print "Prediction ({}) did not match expected: {}" \ .format(predictions[0][0], resname) for element, prob in predictions: print " {}: {:.2f}".format(element, prob) sys.exit() print "OK"
def analyze_water(self, i_seq, debug=True, candidates=Auto, filter_outputs=True): """ Analyzes a single water site using a SVM to decide whether to re-assign it as an ion. Parameters ---------- i_seq : int debug : bool, optional candidates : list of str, optional Returns ------- svm_prediction or None """ atom_props = mmtbx.ions.identify.AtomProperties(i_seq, self) expected_atom_type = atom_props.get_atom_type(params=self.params.water) if (expected_atom_type == mmtbx.ions.identify.WATER_POOR): return None auto_candidates = candidates is Auto if auto_candidates: candidates = mmtbx.ions.DEFAULT_IONS elif isinstance(candidates, str) or isinstance(candidates, unicode): candidates = candidates.replace(",", " ").split() candidates = [i.strip().upper() for i in candidates] if (candidates == ['X' ]): # XXX hack for testing - X is "dummy" element candidates = [] if auto_candidates: candidates = None else: candidates.append("HOH") from mmtbx.ions.environment import ScatteringEnvironment, \ ChemicalEnvironment chem_env = ChemicalEnvironment(i_seq, atom_props.nearby_atoms, self) scatter_env = ScatteringEnvironment( i_seq=i_seq, manager=self, fo_density=self.get_map_gaussian_fit("mFo", i_seq), fofc_density=self.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=self.get_map_gaussian_fit("anom", i_seq)) predictions = predict_ion(chem_env, scatter_env, elements=candidates, svm_name=self.params.svm.svm_name) if (predictions is not None) and filter_outputs: predictions = utils.filter_svm_outputs(chem_env=chem_env, scatter_env=scatter_env, predictions=predictions) if (predictions is not None) and (len(predictions) > 0): final_choice = None predictions.sort(key=lambda x: -x[1]) best_guess, best_score = predictions[0] if (best_guess != "HOH"): if len(predictions) == 1: final_choice = mmtbx.ions.server.get_metal_parameters( best_guess) else: next_guess, next_score = predictions[1] if ((best_score >= self.params.svm.min_score) and (best_score >= (next_score * self.params.svm.min_fraction_of_next))): final_choice = mmtbx.ions.server.get_metal_parameters( best_guess) atom_info_out = StringIO() atom_props.show_properties(identity="HOH", out=atom_info_out) result = svm_prediction( i_seq=i_seq, pdb_id_str=self.pdb_atoms[i_seq].id_str(), atom_info_str=atom_info_out.getvalue(), map_stats=self.map_stats(i_seq), atom_types=[pred[0] for pred in predictions], scores=[pred[1] for pred in predictions], final_choice=final_choice) return result return None