def from_pdb(self): out = self.out params = self.params pdb_hierarchy = self.pdb_hierarchy pdb_sg, pdb_uc = None, None pdb_symm = self.pdb_in.crystal_symmetry() if (pdb_symm is not None): pdb_sg = pdb_symm.space_group_info() pdb_uc = pdb_symm.unit_cell() apply_sg = pdb_sg apply_uc = pdb_uc if (params.crystal_symmetry.space_group is not None): apply_sg = params.crystal_symmetry.space_group else: params.crystal_symmetry.space_group = pdb_sg if (params.crystal_symmetry.unit_cell is not None): apply_uc = params.crystal_symmetry.unit_cell else: params.crystal_symmetry.unit_cell = pdb_uc if (apply_sg is None) or (apply_uc is None): raise Sorry( "Incomplete symmetry information - please specify a space " + "group and unit cell for this structure.") from cctbx import crystal, adptbx from scitbx.array_family import flex apply_symm = crystal.symmetry(unit_cell=apply_uc, space_group_info=apply_sg) if (params.modify_pdb.remove_waters): print(" Removing solvent atoms...", file=out) for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): for atom_group in residue_group.atom_groups(): if (atom_group.resname in ["HOH", "WAT"]): residue_group.remove_atom_group( atom_group=atom_group) if (len(residue_group.atom_groups()) == 0): chain.remove_residue_group( residue_group=residue_group) if (len(chain.atoms()) == 0): model.remove_chain(chain=chain) if (params.modify_pdb.remove_alt_confs): print( " Removing all alternate conformations and resetting occupancies...", file=out) from mmtbx import pdbtools pdbtools.remove_alt_confs(hierarchy=pdb_hierarchy) xray_structure = self.pdb_in.xray_structure_simple( crystal_symmetry=apply_symm) sctr_keys = xray_structure.scattering_type_registry().type_count_dict() hd_selection = xray_structure.hd_selection() if (not (("H" in sctr_keys) or ("D" in sctr_keys))): print(" WARNING: this model does not contain hydrogen atoms!", file=out) print(" strongly recommend running phenix.ready_set or", file=out) print(" equivalent to ensure realistic simulated data.", file=out) print("", file=out) if (params.modify_pdb.convert_to_isotropic): xray_structure.convert_to_isotropic() set_b = None if (params.modify_pdb.set_mean_b_iso is not None): assert (not params.modify_pdb.set_wilson_b) print(" Scaling B-factors to have mean of %.2f" % \ params.modify_pdb.set_mean_b_iso, file=out) assert (params.modify_pdb.set_mean_b_iso > 0) set_b = params.modify_pdb.set_mean_b_iso elif (params.modify_pdb.set_wilson_b): print( " Scaling B-factors to match mean Wilson B for this resolution", file=out) set_b = get_mean_statistic_for_resolution(d_min=params.d_min, stat_type="wilson_b") print("", file=out) if (set_b is not None): u_iso = xray_structure.extract_u_iso_or_u_equiv() u_iso = u_iso.select(~hd_selection) u_mean = flex.mean(u_iso) b_mean = adptbx.u_as_b(u_mean) scale = set_b / b_mean xray_structure.scale_adps(scale) pdb_hierarchy.atoms().set_adps_from_scatterers( scatterers=xray_structure.scatterers(), unit_cell=xray_structure.unit_cell()) import mmtbx.command_line.fmodel from mmtbx import utils fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract( ) fmodel_params.high_resolution = params.d_min fake_data = params.fake_data_from_fmodel fmodel_params.fmodel = fake_data.fmodel if (fmodel_params.fmodel.b_sol == 0): print(" b_sol is zero - will use mean value for d_min +/- 0.2A", file=out) print(" (this is not strongly correlated with resolution, but", file=out) print( " it is preferrable to use a real value instead of leaving", file=out) print(" it set to 0)", file=out) fmodel_params.fmodel.b_sol = 46.0 print("", file=out) if (fmodel_params.fmodel.k_sol == 0): print(" k_sol is zero - will use mean value for d_min +/- 0.2A", file=out) print(" (this is not strongly correlated with resolution, but", file=out) print( " it is preferrable to use a real value instead of leaving", file=out) print(" it set to 0)", file=out) fmodel_params.fmodel.k_sol = 0.35 print("", file=out) fmodel_params.structure_factors_accuracy = fake_data.structure_factors_accuracy fmodel_params.mask = fake_data.mask fmodel_params.r_free_flags_fraction = params.r_free_flags.fraction fmodel_params.add_sigmas = False fmodel_params.output.type = "real" fmodel_ = utils.fmodel_from_xray_structure( xray_structure=xray_structure, params=fmodel_params) f_model = fmodel_.f_model r_free_flags = fmodel_.r_free_flags return (f_model, r_free_flags)
def from_pdb (self) : out = self.out params = self.params pdb_hierarchy = self.pdb_hierarchy pdb_sg, pdb_uc = None, None pdb_symm = self.pdb_in.crystal_symmetry() if (pdb_symm is not None) : pdb_sg = pdb_symm.space_group_info() pdb_uc = pdb_symm.unit_cell() apply_sg = pdb_sg apply_uc = pdb_uc if (params.crystal_symmetry.space_group is not None) : apply_sg = params.crystal_symmetry.space_group else : params.crystal_symmetry.space_group = pdb_sg if (params.crystal_symmetry.unit_cell is not None) : apply_uc = params.crystal_symmetry.unit_cell else : params.crystal_symmetry.unit_cell = pdb_uc if (apply_sg is None) or (apply_uc is None) : raise Sorry("Incomplete symmetry information - please specify a space "+ "group and unit cell for this structure.") from cctbx import crystal, adptbx from scitbx.array_family import flex apply_symm = crystal.symmetry( unit_cell=apply_uc, space_group_info=apply_sg) if (params.modify_pdb.remove_waters) : print >> out, " Removing solvent atoms..." for model in pdb_hierarchy.models() : for chain in model.chains() : for residue_group in chain.residue_groups() : for atom_group in residue_group.atom_groups() : if (atom_group.resname in ["HOH", "WAT"]) : residue_group.remove_atom_group(atom_group=atom_group) if (len(residue_group.atom_groups()) == 0) : chain.remove_residue_group(residue_group=residue_group) if (len(chain.atoms()) == 0) : model.remove_chain(chain=chain) if (params.modify_pdb.remove_alt_confs) : print >> out, " Removing all alternate conformations and resetting occupancies..." from mmtbx import pdbtools pdbtools.remove_alt_confs(hierarchy=pdb_hierarchy) xray_structure = self.pdb_in.xray_structure_simple( crystal_symmetry=apply_symm) sctr_keys = xray_structure.scattering_type_registry().type_count_dict().keys() hd_selection = xray_structure.hd_selection() if (not (("H" in sctr_keys) or ("D" in sctr_keys))) : print >> out, " WARNING: this model does not contain hydrogen atoms!" print >> out, " strongly recommend running phenix.ready_set or" print >> out, " equivalent to ensure realistic simulated data." print >> out, "" if (params.modify_pdb.convert_to_isotropic) : xray_structure.convert_to_isotropic() set_b = None if (params.modify_pdb.set_mean_b_iso is not None) : assert (not params.modify_pdb.set_wilson_b) print >> out, " Scaling B-factors to have mean of %.2f" % \ params.modify_pdb.set_mean_b_iso assert (params.modify_pdb.set_mean_b_iso > 0) set_b = params.modify_pdb.set_mean_b_iso elif (params.modify_pdb.set_wilson_b) : print >> out, " Scaling B-factors to match mean Wilson B for this resolution" set_b = get_mean_statistic_for_resolution( d_min=params.d_min, stat_type="wilson_b") print >> out, "" if (set_b is not None) : u_iso = xray_structure.extract_u_iso_or_u_equiv() u_iso = u_iso.select(~hd_selection) u_mean = flex.mean(u_iso) b_mean = adptbx.u_as_b(u_mean) scale = set_b / b_mean xray_structure.scale_adps(scale) pdb_hierarchy.atoms().set_adps_from_scatterers( scatterers=xray_structure.scatterers(), unit_cell=xray_structure.unit_cell()) import mmtbx.command_line.fmodel from mmtbx import utils fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract() fmodel_params.high_resolution = params.d_min fake_data = params.fake_data_from_fmodel fmodel_params.fmodel = fake_data.fmodel if (fmodel_params.fmodel.b_sol == 0) : print >> out, " b_sol is zero - will use mean value for d_min +/- 0.2A" print >> out, " (this is not strongly correlated with resolution, but" print >> out, " it is preferrable to use a real value instead of leaving" print >> out, " it set to 0)" fmodel_params.fmodel.b_sol = 46.0 print >> out, "" if (fmodel_params.fmodel.k_sol == 0) : print >> out, " k_sol is zero - will use mean value for d_min +/- 0.2A" print >> out, " (this is not strongly correlated with resolution, but" print >> out, " it is preferrable to use a real value instead of leaving" print >> out, " it set to 0)" fmodel_params.fmodel.k_sol = 0.35 print >> out, "" fmodel_params.structure_factors_accuracy = fake_data.structure_factors_accuracy fmodel_params.mask = fake_data.mask fmodel_params.r_free_flags_fraction = params.r_free_flags.fraction fmodel_params.add_sigmas = False fmodel_params.output.type = "real" fmodel_ = utils.fmodel_from_xray_structure( xray_structure=xray_structure, params=fmodel_params) f_model = fmodel_.f_model r_free_flags = fmodel_.r_free_flags return (f_model, r_free_flags)
def strip_model(pdb_hierarchy=None, xray_structure=None, file_name=None, params=None, remove_waters=True, remove_hydrogens=True, remove_alt_confs=True, convert_semet_to_met=True, convert_to_isotropic=True, reset_occupancies=True, remove_ligands=False, reset_hetatm_flag=False, preserve_remarks=False, preserve_symmetry=True, add_remarks=None, output_file=None, log=None): """ Utility for removing extraneous records from a model intended for use in molecular replacement, etc., including waters, alternate conformations, and other features specific to a particular dataset. """ if (params is not None): remove_waters = params.remove_waters remove_hydrogens = params.remove_hydrogens remove_alt_confs = params.remove_alt_confs convert_semet_to_met = params.convert_semet_to_met convert_to_isotropic = params.convert_to_isotropic reset_occupancies = params.reset_occupancies remove_ligands = params.remove_ligands reset_hetatm_flag = params.reset_hetatm_flag if (log is None): log = null_out() make_sub_header("Processing input model", out=log) from mmtbx import pdbtools remarks = None if (file_name is not None): print >> log, "Reading model from %s" % file_name assert ([pdb_hierarchy, xray_structure] == [None, None]) from iotbx import file_reader pdb_in = file_reader.any_file(file_name, force_type="pdb", raise_sorry_if_errors=True) pdb_in.check_file_type("pdb") remarks = pdb_in.file_object.input.remark_section() pdb_hierarchy = pdb_in.file_object.hierarchy xray_structure = pdb_in.file_object.xray_structure_simple() else: # XXX work with copies, not the original structure pdb_hierarchy = pdb_hierarchy.deep_copy() xray_structure = xray_structure.deep_copy_scatterers() pdb_hierarchy.atoms().reset_i_seq() if (len(pdb_hierarchy.models()) > 1): raise Sorry("Multiple models not supported.") if (remove_hydrogens): sele = ~(xray_structure.hd_selection()) n_hd = sele.count(False) if (n_hd > 0): pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d hydrogens" % n_hd pdb_hierarchy.atoms().reset_i_seq() if (remove_waters): sele = pdb_hierarchy.atom_selection_cache().selection( "not (resname HOH)") n_wat = sele.count(False) if (n_wat > 0): pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d waters" % n_wat pdb_hierarchy.atoms().reset_i_seq() assert_identical_id_str = True if (remove_alt_confs): n_atoms_start = xray_structure.scatterers().size() pdbtools.remove_alt_confs(pdb_hierarchy) i_seqs = pdb_hierarchy.atoms().extract_i_seq() n_atoms_end = i_seqs.size() if (n_atoms_end != n_atoms_start): print >> log, " removed %d atoms in alternate conformations" % \ (n_atoms_end - n_atoms_start) assert_identical_id_str = False xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() if (convert_semet_to_met): # XXX need to start from a copy here because the atom-parent relationship # seems to be messed up otherwise. this is probably a bug. pdb_hierarchy = pdb_hierarchy.deep_copy() n_mse = pdbtools.convert_semet_to_met(pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure) if (n_mse > 0): print >> log, " removed %d selenomethionine (MSE) residues" % n_mse assert_identical_id_str = False open("tmp1.pdb", "w").write(pdb_hierarchy.as_pdb_string()) sel = pdb_hierarchy.atom_selection_cache().selection assert sel("resname MSE").count(True) == 0 if (convert_to_isotropic): xray_structure.convert_to_isotropic() pdb_hierarchy.adopt_xray_structure( xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " converted all atoms to isotropic B-factors" if (reset_occupancies): assert (remove_alt_confs) xray_structure.adjust_occupancy(occ_max=1.0, occ_min=1.0) pdb_hierarchy.adopt_xray_structure( xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " reset occupancy to 1.0 for all atoms" if (reset_hetatm_flag): for atom in pdb_hierarchy.atoms(): atom.hetero = False if (remove_ligands): pdb_hierarchy.atoms().reset_i_seq() model = pdb_hierarchy.only_model() for chain in model.chains(): if (not chain.is_protein()) and (not chain.is_na()): print >> log, " removing %d ligand atoms in chain '%s'" % \ (len(chain.atoms()), chain.id) model.remove_chain(chain) i_seqs = pdb_hierarchy.atoms().extract_i_seq() xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() assert xray_structure.scatterers().size() == pdb_hierarchy.atoms_size() if (output_file is not None): f = open(output_file, "w") if (add_remarks is not None): f.write("\n".join(add_remarks)) f.write("\n") if (preserve_remarks) and (remarks is not None): f.write("\n".join(remarks)) f.write("\n") symm = None if (preserve_symmetry): symm = xray_structure f.write(pdb_hierarchy.as_pdb_string(crystal_symmetry=symm)) f.close() print >> log, " wrote model to %s" % output_file return pdb_hierarchy, xray_structure
def strip_model ( pdb_hierarchy=None, xray_structure=None, file_name=None, params=None, remove_waters=True, remove_hydrogens=True, remove_alt_confs=True, convert_semet_to_met=True, convert_to_isotropic=True, reset_occupancies=True, remove_ligands=False, reset_hetatm_flag=False, preserve_remarks=False, preserve_symmetry=True, add_remarks=None, output_file=None, log=None) : """ Utility for removing extraneous records from a model intended for use in molecular replacement, etc., including waters, alternate conformations, and other features specific to a particular dataset. """ if (params is not None) : remove_waters = params.remove_waters remove_hydrogens = params.remove_hydrogens remove_alt_confs = params.remove_alt_confs convert_semet_to_met = params.convert_semet_to_met convert_to_isotropic = params.convert_to_isotropic reset_occupancies = params.reset_occupancies remove_ligands = params.remove_ligands reset_hetatm_flag = params.reset_hetatm_flag if (log is None) : log = null_out() make_sub_header("Processing input model", out=log) from mmtbx import pdbtools remarks = None if (file_name is not None) : print >> log, "Reading model from %s" % file_name assert ([pdb_hierarchy, xray_structure] == [None, None]) from iotbx import file_reader pdb_in = file_reader.any_file(file_name, force_type="pdb", raise_sorry_if_errors=True) pdb_in.check_file_type("pdb") remarks = pdb_in.file_object.input.remark_section() pdb_hierarchy = pdb_in.file_object.hierarchy xray_structure = pdb_in.file_object.xray_structure_simple() else : # XXX work with copies, not the original structure pdb_hierarchy = pdb_hierarchy.deep_copy() xray_structure = xray_structure.deep_copy_scatterers() pdb_hierarchy.atoms().reset_i_seq() if (len(pdb_hierarchy.models()) > 1) : raise Sorry("Multiple models not supported.") if (remove_hydrogens) : sele = ~(xray_structure.hd_selection()) n_hd = sele.count(False) if (n_hd > 0) : pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d hydrogens" % n_hd pdb_hierarchy.atoms().reset_i_seq() if (remove_waters) : sele = pdb_hierarchy.atom_selection_cache().selection("not (resname HOH)") n_wat = sele.count(False) if (n_wat > 0) : pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d waters" % n_wat pdb_hierarchy.atoms().reset_i_seq() assert_identical_id_str = True if (remove_alt_confs) : n_atoms_start = xray_structure.scatterers().size() pdbtools.remove_alt_confs(pdb_hierarchy) i_seqs = pdb_hierarchy.atoms().extract_i_seq() n_atoms_end = i_seqs.size() if (n_atoms_end != n_atoms_start) : print >> log, " removed %d atoms in alternate conformations" % \ (n_atoms_end - n_atoms_start) assert_identical_id_str = False xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() if (convert_semet_to_met) : # XXX need to start from a copy here because the atom-parent relationship # seems to be messed up otherwise. this is probably a bug. pdb_hierarchy = pdb_hierarchy.deep_copy() n_mse = pdbtools.convert_semet_to_met( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure) if (n_mse > 0) : print >> log, " removed %d selenomethionine (MSE) residues" % n_mse assert_identical_id_str = False open("tmp1.pdb", "w").write(pdb_hierarchy.as_pdb_string()) sel = pdb_hierarchy.atom_selection_cache().selection assert sel("resname MSE").count(True) == 0 if (convert_to_isotropic) : xray_structure.convert_to_isotropic() pdb_hierarchy.adopt_xray_structure(xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " converted all atoms to isotropic B-factors" if (reset_occupancies) : assert (remove_alt_confs) xray_structure.adjust_occupancy(occ_max=1.0, occ_min=1.0) pdb_hierarchy.adopt_xray_structure(xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " reset occupancy to 1.0 for all atoms" if (reset_hetatm_flag) : for atom in pdb_hierarchy.atoms() : atom.hetero = False if (remove_ligands) : pdb_hierarchy.atoms().reset_i_seq() model = pdb_hierarchy.only_model() for chain in model.chains() : if (not chain.is_protein()) and (not chain.is_na()) : print >> log, " removing %d ligand atoms in chain '%s'" % \ (len(chain.atoms()), chain.id) model.remove_chain(chain) i_seqs = pdb_hierarchy.atoms().extract_i_seq() xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() assert xray_structure.scatterers().size() == pdb_hierarchy.atoms_size() if (output_file is not None) : f = open(output_file, "w") if (add_remarks is not None) : f.write("\n".join(add_remarks)) f.write("\n") if (preserve_remarks) and (remarks is not None) : f.write("\n".join(remarks)) f.write("\n") symm = None if (preserve_symmetry) : symm = xray_structure f.write(pdb_hierarchy.as_pdb_string(crystal_symmetry=symm)) f.close() print >> log, " wrote model to %s" % output_file return pdb_hierarchy, xray_structure