def exercise(): wavelength = 1.025 mtz_file, pdb_file = generate_zinc_inputs(anonymize=False) null_out = libtbx.utils.null_out() cmdline = mmtbx.command_line.load_model_and_data(args=[ pdb_file, mtz_file, "wavelength={}".format(wavelength), "use_phaser=False", "use_svm=True" ], master_phil=master_phil(), out=null_out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True) os.remove(pdb_file) os.remove(mtz_file) os.remove(os.path.splitext(pdb_file)[0] + "_fmodel.eff") cmdline.xray_structure.set_inelastic_form_factors( photon=cmdline.params.input.wavelength, table="sasaki") cmdline.fmodel.update_xray_structure(cmdline.xray_structure, update_f_calc=True) manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=cmdline.params.input.wavelength, params=cmdline.params, nproc=cmdline.params.nproc, log=null_out) manager.validate_ions(out=null_out) for atom_props in manager.atoms_to_props.values(): i_seq = atom_props.i_seq chem_env = ChemicalEnvironment( i_seq, manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5), manager) scatter_env = ScatteringEnvironment( i_seq, manager, fo_density=manager.get_map_gaussian_fit("mFo", i_seq), fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=manager.get_map_gaussian_fit("anom", i_seq), ) vector = ion_vector(chem_env, scatter_env) resname = ion_class(chem_env) assert vector is not None assert resname != "" print "OK"
def dump_sites(manager): """ Iterate over all the ions and waters built into the model and dump out information about their properties. Parameters ---------- manager : mmtbx.ions.identify.manager Returns ------- list of tuple of mmtbx.ions.environment.ChemicalEnvironment, \ mmtbx.ions.environment.ScatteringEnvironment """ atoms = iterate_sites( manager.pdb_hierarchy, res_filter=ions.SUPPORTED + WATER_RES_NAMES, split_sites=True, ) # Can't pickle entire AtomProperties because they include references to the # Atom object. Instead, gather what properties we want and store them in a # second list properties = [] for atom in atoms: map_stats = manager.map_stats(atom.i_seq) fo_density = manager.get_map_gaussian_fit("mFo", atom.i_seq) chem_env = ChemicalEnvironment( atom.i_seq, manager.find_nearby_atoms(atom.i_seq, far_distance_cutoff=3.5), manager, ) scatter_env = ScatteringEnvironment( atom.i_seq, manager, fo_density=fo_density, fofc_density=(map_stats.fofc, 0), anom_density=(map_stats.anom, 0), ) properties.append((chem_env, scatter_env)) return properties
def exercise(): fns = [generate_calcium_inputs, generate_zinc_inputs] wavelengths = [1.025, 1.54] for fn, wavelength in zip(fns, wavelengths): mtz_file, pdb_file = fn(anonymize=True) null_out = libtbx.utils.null_out() cmdline = mmtbx.command_line.load_model_and_data( args=[ pdb_file, mtz_file, "wavelength={}".format(wavelength), "use_phaser=True", "use_svm=True" ], master_phil=master_phil(), out=null_out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True, set_inelastic_form_factors="sasaki", ) os.remove(pdb_file) os.remove(mtz_file) os.remove(os.path.splitext(mtz_file)[0] + "_fmodel.eff") os.remove(os.path.splitext(mtz_file)[0] + ".pdb") manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=cmdline.params.input.wavelength, params=cmdline.params, nproc=cmdline.params.nproc, log=null_out, manager_class=ions.svm.manager, ) # Build a list of properties of each water / ion site waters = [] for chain in manager.pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1): # alt conf, skip continue for atom_group in atom_groups: # Check for non standard atoms in the residue # Or a label indicating the residue is a water resname = atom_group.resname.strip().upper() if (resname in WATER_RES_NAMES): atoms = atom_group.atoms() if (len(atoms) == 1 ): # otherwise it probably has hydrogens, skip waters.append(atoms[0].i_seq) assert len(waters) > 0 atom_props = [AtomProperties(i_seq, manager) for i_seq in waters] for atom_prop in atom_props: i_seq = atom_prop.i_seq chem_env = ChemicalEnvironment( i_seq, manager.find_nearby_atoms(i_seq, far_distance_cutoff=3.5), manager, ) scatter_env = ScatteringEnvironment( i_seq, manager, fo_density=manager.get_map_gaussian_fit("mFo", i_seq), fofc_density=manager.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=manager.get_map_gaussian_fit("anom", i_seq), ) resname = ion_class(chem_env) assert resname != "" predictions = predict_ion(chem_env, scatter_env, elements=["HOH", "ZN", "CA"]) if predictions is None: print "Could not load SVM classifier" print "Skipping {}".format(os.path.split(__file__)[1]) return if resname != predictions[0][0]: print "Prediction ({}) did not match expected: {}" \ .format(predictions[0][0], resname) for element, prob in predictions: print " {}: {:.2f}".format(element, prob) sys.exit() print "OK"
def analyze_water(self, i_seq, debug=True, candidates=Auto, filter_outputs=True): """ Analyzes a single water site using a SVM to decide whether to re-assign it as an ion. Parameters ---------- i_seq : int debug : bool, optional candidates : list of str, optional Returns ------- svm_prediction or None """ atom_props = mmtbx.ions.identify.AtomProperties(i_seq, self) expected_atom_type = atom_props.get_atom_type(params=self.params.water) if (expected_atom_type == mmtbx.ions.identify.WATER_POOR): return None auto_candidates = candidates is Auto if auto_candidates: candidates = mmtbx.ions.DEFAULT_IONS elif isinstance(candidates, str) or isinstance(candidates, unicode): candidates = candidates.replace(",", " ").split() candidates = [i.strip().upper() for i in candidates] if (candidates == ['X' ]): # XXX hack for testing - X is "dummy" element candidates = [] if auto_candidates: candidates = None else: candidates.append("HOH") from mmtbx.ions.environment import ScatteringEnvironment, \ ChemicalEnvironment chem_env = ChemicalEnvironment(i_seq, atom_props.nearby_atoms, self) scatter_env = ScatteringEnvironment( i_seq=i_seq, manager=self, fo_density=self.get_map_gaussian_fit("mFo", i_seq), fofc_density=self.get_map_gaussian_fit("mFo-DFc", i_seq), anom_density=self.get_map_gaussian_fit("anom", i_seq)) predictions = predict_ion(chem_env, scatter_env, elements=candidates, svm_name=self.params.svm.svm_name) if (predictions is not None) and filter_outputs: predictions = utils.filter_svm_outputs(chem_env=chem_env, scatter_env=scatter_env, predictions=predictions) if (predictions is not None) and (len(predictions) > 0): final_choice = None predictions.sort(key=lambda x: -x[1]) best_guess, best_score = predictions[0] if (best_guess != "HOH"): if len(predictions) == 1: final_choice = mmtbx.ions.server.get_metal_parameters( best_guess) else: next_guess, next_score = predictions[1] if ((best_score >= self.params.svm.min_score) and (best_score >= (next_score * self.params.svm.min_fraction_of_next))): final_choice = mmtbx.ions.server.get_metal_parameters( best_guess) atom_info_out = StringIO() atom_props.show_properties(identity="HOH", out=atom_info_out) result = svm_prediction( i_seq=i_seq, pdb_id_str=self.pdb_atoms[i_seq].id_str(), atom_info_str=atom_info_out.getvalue(), map_stats=self.map_stats(i_seq), atom_types=[pred[0] for pred in predictions], scores=[pred[1] for pred in predictions], final_choice=final_choice) return result return None
def exercise(): if not libtbx.env.has_module("phenix_regression"): print "Skipping {}".format(os.path.split(__file__)[1]) return models = OrderedDict([ ("2qng", [ Counter({ chem_oxygen: 7, chem_carboxy: 2, chem_water: 2, chem_backbone: 3 }), Counter({ chem_oxygen: 6, chem_carboxy: 3, chem_water: 1, chem_backbone: 2 }), ]), ("3rva", [ Counter({ chem_oxygen: 6, chem_carboxy: 4, chem_water: 2 }), Counter({ chem_nitrogen: 1, chem_oxygen: 4, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 1 }), Counter({ chem_nitrogen: 4, chem_nitrogen_primary: 1, chem_nitrogen_secondary: 3, chem_backbone: 3 }), ]), ("1mjh", [ Counter({ chem_oxygen: 6, chem_water: 3, chem_phosphate: 3 }), Counter({ chem_oxygen: 6, chem_water: 3, chem_phosphate: 3 }), ]), ("4e1h", [ Counter({ chem_oxygen: 6, chem_carboxy: 4 }), Counter({ chem_oxygen: 6, chem_carboxy: 3 }), Counter({ chem_oxygen: 6, chem_carboxy: 3 }), ]), ("2xuz", [ Counter({chem_oxygen: 6}), ]), ("3zli", [ Counter({ chem_nitrogen: 2, chem_oxygen: 4, chem_nitrogen_secondary: 2, chem_carboxy: 1, chem_water: 1 }), Counter({chem_sulfur: 4}), Counter({ chem_nitrogen: 2, chem_oxygen: 4, chem_nitrogen_secondary: 2, chem_carboxy: 1, chem_water: 1 }), Counter({chem_sulfur: 4}), ]), ("3e0f", [ Counter({ chem_nitrogen: 2, chem_oxygen: 4, chem_nitrogen_secondary: 2, chem_carboxy: 2, chem_phosphate: 2 }), Counter({ chem_nitrogen: 2, chem_oxygen: 2, chem_nitrogen_secondary: 2, chem_carboxy: 1, chem_phosphate: 1 }), Counter({ chem_nitrogen: 2, chem_oxygen: 3, chem_nitrogen_secondary: 2, chem_carboxy: 2, chem_phosphate: 1 }), ]), ("3dkq", [ Counter({ chem_nitrogen: 4, chem_oxygen: 1, chem_nitrogen_secondary: 4, chem_carboxy: 1 }), Counter({ chem_nitrogen: 2, chem_oxygen: 1, chem_nitrogen_secondary: 2, chem_carboxy: 1 }), Counter({ chem_nitrogen: 4, chem_oxygen: 1, chem_nitrogen_secondary: 4, chem_carboxy: 1 }), ]), ("2o8q", [ Counter({ chem_nitrogen: 3, chem_oxygen: 3, chem_nitrogen_secondary: 3, chem_water: 3 }), Counter({ chem_nitrogen: 3, chem_oxygen: 3, chem_nitrogen_secondary: 3, chem_water: 3 }), ]), ("1tgg", [ Counter({ chem_oxygen: 5, chem_chloride: 1, chem_carboxy: 4, chem_water: 1 }), Counter({ chem_oxygen: 3, chem_chloride: 2, chem_carboxy: 3 }), Counter({ chem_oxygen: 4, chem_chloride: 2, chem_carboxy: 4 }), ]), ("3zu8", [ Counter({ chem_oxygen: 7, chem_carboxy: 3, chem_water: 1, chem_backbone: 2 }), Counter({ chem_nitrogen: 4, chem_oxygen: 1, chem_nitrogen_primary: 1, chem_nitrogen_secondary: 3, chem_carboxy: 1, chem_backbone: 3 }), ]), ("1ofs", [ Counter({ chem_nitrogen: 1, chem_oxygen: 4, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 1 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), ]), ("3ul2", [ Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_backbone: 1, chem_water: 2 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), ]), ("3snm", [ Counter({ chem_oxygen: 5, chem_amide: 1, chem_carboxy: 3, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 3, chem_nitrogen_secondary: 1, chem_carboxy: 3 }), ]), ("3qlq", [ Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_oxygen: 7, chem_amide: 1, chem_carboxy: 3, chem_water: 2, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 5, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 2 }), ]), ("2gdf", [ Counter({ chem_nitrogen: 1, chem_oxygen: 4, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 1 }), Counter({ chem_oxygen: 6, chem_amide: 1, chem_carboxy: 3, chem_water: 1, chem_backbone: 1 }), Counter({ chem_nitrogen: 1, chem_oxygen: 4, chem_nitrogen_secondary: 1, chem_carboxy: 3, chem_water: 1 }), Counter({ chem_oxygen: 6, chem_amide: 1, chem_carboxy: 3, chem_water: 1, chem_backbone: 1 }), ]), ("1q8h", [ Counter({ chem_oxygen: 7, chem_carboxy: 6, chem_water: 1 }), Counter({ chem_oxygen: 7, chem_carboxy: 4, chem_water: 3 }), Counter({ chem_oxygen: 8, chem_carboxy: 6, chem_water: 2 }), ]), ]) for model, expected_environments in models.items(): pdb_path = libtbx.env.find_in_repositories(relative_path=os.path.join( "phenix_regression", "mmtbx", "ions", model + ".pdb"), test=os.path.isfile) mon_lib_srv = monomer_library.server.server() ener_lib = monomer_library.server.ener_lib() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, file_name=pdb_path, raw_records=None, force_symmetry=True, log=libtbx.utils.null_out()) geometry = \ processed_pdb_file.geometry_restraints_manager(show_energies = False) xray_structure = processed_pdb_file.xray_structure() pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy connectivity = geometry.shell_sym_tables[0].full_simple_connectivity() manager = mmtbx.ions.identify.manager(fmodel=None, pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, connectivity=connectivity) elements = set(ions.DEFAULT_IONS + ions.TRANSITION_METALS) elements.difference_update(["CL"]) metals = [ i_seq for i_seq, atom in enumerate(manager.pdb_atoms) if atom.fetch_labels().resname.strip().upper() in elements ] assert len(metals) == len(expected_environments) for index, metal, expected_environment in \ zip(xrange(len(metals)), metals, expected_environments): env = ChemicalEnvironment( metal, manager.find_nearby_atoms(metal, filter_by_two_fofc=False), manager) if env.chemistry != expected_environment: print "Problem detecting chemistry environment in", model, index print "Found: ", env.chemistry print "Should be:", expected_environment sys.exit() print "OK"