def get_total_charge( self, list_charges=False, assert_correct_chain_terminii=True, check=False, verbose=False, ): if self.electrons: from qrefine.utils import electrons if self.pdb_filename is None and self.raw_records is None: self.raw_records = hierarchy_utils.get_raw_records( pdb_hierarchy=self.pdb_hierarchy, crystal_symmetry=self.crystal_symmetry, ) charge = electrons.run( pdb_filename=self.pdb_filename, raw_records=self.raw_records, return_formal_charges=list_charges, verbose=verbose, ) return charge assert 0 total_charge = self.calculate_pdb_hierarchy_charge( self.pdb_hierarchy, hetero_charges=self.hetero_charges, inter_residue_bonds=self.inter_residue_bonds, list_charges=list_charges, assert_correct_chain_terminii=assert_correct_chain_terminii, check=check, verbose=verbose, ) return total_charge
def run(pdb_filename=None, pdb_hierarchy=None, crystal_symmetry=None, model_completion=True, original_pdb_filename=None, append_to_end_of_model=True, ): # # function as be used in two main modes # 1. completing a model with hydrogens in a protein-like manner # 2. completing a cluster with hydrogens in a QM-sensible manner # if pdb_hierarchy: assert crystal_symmetry assert pdb_filename is None if model_completion: use_capping_hydrogens=False fname = 'complete' else: use_capping_hydrogens=True fname = 'capping' #assert 0 # model has H params=None if use_capping_hydrogens: params = hierarchy_utils.get_pdb_interpretation_params() params.link_distance_cutoff=1.8 if pdb_hierarchy: raw_records = hierarchy_utils.get_raw_records( pdb_inp=None, pdb_hierarchy=pdb_hierarchy, crystal_symmetry=crystal_symmetry, ) ppf = hierarchy_utils.get_processed_pdb(raw_records=raw_records) sites_cart = pdb_hierarchy.atoms().extract_xyz() ppf.all_chain_proxies.pdb_hierarchy.atoms().set_xyz(sites_cart) else: ppf = hierarchy_utils.get_processed_pdb(pdb_filename=pdb_filename, params=params, ) ppf = complete_pdb_hierarchy( ppf.all_chain_proxies.pdb_hierarchy, ppf.geometry_restraints_manager(), use_capping_hydrogens=use_capping_hydrogens, append_to_end_of_model=append_to_end_of_model, # needed for clustering # code and Molprobity pdb_filename=pdb_filename, # used just for naming of debug output pdb_inp=ppf.all_chain_proxies.pdb_inp, # used in get_raw_records. why? original_pdb_filename=original_pdb_filename, # used to define breaks in # main chain for capping verbose=False, ) if pdb_filename: output = hierarchy_utils.write_hierarchy( pdb_filename, ppf.all_chain_proxies.pdb_inp, ppf.all_chain_proxies.pdb_hierarchy, fname, ) return ppf.all_chain_proxies.pdb_hierarchy
def use_electrons_to_add_hdyrogens(hierarchy, geometry_restraints_manager, use_capping_hydrogens=False, append_to_end_of_model=False, ): if not use_capping_hydrogens: return from qrefine.utils import electrons rc=[] raw_records = hierarchy_utils.get_raw_records( pdb_hierarchy=hierarchy, crystal_symmetry=geometry_restraints_manager.crystal_symmetry, ) charges = electrons.run(pdb_filename=None, raw_records=raw_records, return_formal_charges=True, ) charged_atoms = charges.get_charged_atoms() remove=[] for atom, electrons in charged_atoms: atom_group = atom.parent() #if atom_group.resname=='CYS' and atom.name==' SG ': # if electrons==-1 and atom_group.get_atom('HG'): # remove.append(atom_group.get_atom('HG')) if atom.element_is_hydrogen() and electrons==1: #print 'REMOVING', atom.quote() remove.append(atom) if get_class(atom.parent().resname) in ['common_amino_acid', ]: continue atom = hierarchy.atoms()[atom.i_seq] # this does not even work rc = _add_hydrogens_to_atom_group_using_bad( atom.parent(), ' H1 ', 'H', atom.name.strip(), 'C4', 'C3', 1., 120., 160., append_to_end_of_model=append_to_end_of_model, ) def _atom_i_seq(a1, a2): if a1.i_seq<a2.i_seq: return -1 return 1 if remove: remove.sort(_atom_i_seq) remove.reverse() for atom in remove: # this is a kludge name = atom.name atom = hierarchy.atoms()[atom.i_seq] atom_group = atom.parent() atom = atom_group.get_atom(name.strip()) atom_group.remove_atom(atom) return rc
def calculate_residue_charge( self, rg, assert_contains_hydrogens=True, assert_no_alt_loc=True, hetero_charges=None, inter_residue_bonds=None, verbose=False, ): if self.verbose: verbose = True if verbose: print '-' * 80 def _terminal(names, check): for name in check: if name not in names: break else: return True return False def n_terminal(residue_name, atom_names): if residue_name in ["PRO"]: check_names = [ [' H2 ', ' H3 '], [' H 1', ' H 2'], # CHARMM... [' HN1', ' HN2'], # BABEL... ] else: check_names = [ [' H1 ', ' H2 ', ' H3 '], ] for check_name in check_names: rc = _terminal(atom_names, check_name) if rc: break return rc def n_capping(residue_name, atom_names): if residue_name in ["PRO"]: check_names = [[' H2 ']] else: check_names = [ [' H1 ', ' H2 '], [' H ', ' H2 '], # from finalise ] for check_name in check_names: rc = _terminal(atom_names, check_name) if rc: break return rc def nh2_terminal(atom_names): return _terminal(atom_names, [' HT1', ' HT2']) def nh3_terminal(atom_names): return _terminal(atom_names, [' HT1', ' HT2', ' HT3']) def c_terminal(atom_names): rc = _terminal(atom_names, [' OXT']) if not rc: rc = _terminal(atom_names, [' OT1', ' OT2']) return rc def c_capping(atom_names): rc = _terminal(atom_names, [' HC ']) return rc def covalent_bond(i_seqs, inter_residue_bonds): for i_seq in i_seqs: if i_seq in inter_residue_bonds: return True return False ############ max_charge = 1 if assert_no_alt_loc: if len(rg.atom_groups()) > 1: raise Sorry("alt locs in %s" % hierarchy_utils.display_residue_group(rg)) # ions # needs to be centralised!!! resname = rg.atom_groups()[0].resname if get_class(resname) == "common_element": atom = rg.atoms()[0] if not atom.charge.strip(): if hetero_charges: charge = hetero_charges.get(atom.parent().resname.strip(), None) if charge is None: raise Sorry( 'no charge found in the model file or hetero_charges for "%s"' % atom.quote()) else: return charge, charge else: raise Sorry('no charge found in the model file for "%s"' % atom.quote()) else: return atom.charge_as_int(), atom.charge_as_int() # others hs = 0 atom_names = [] atom_i_seqs = [] for atom in rg.atoms(): if verbose: print '...', atom.quote() if atom.element_is_hydrogen(): hs += 1 atom_names.append(atom.name) atom_i_seqs.append(atom.i_seq) if verbose: print get_class(resname) if assert_contains_hydrogens: if hs == 0: hydrogens = get_aa_polymer_hydrogens(resname) if len(hydrogens) != 0: if verbose: for atom in rg.atoms(): print 'H', atom.quote() raise Sorry("no hydrogens: %s" % hierarchy_utils.display_residue_group(rg)) ag = rg.atom_groups()[0] charge = get_aa_charge(ag.resname) rc = get_aa_charge(ag.resname) if ag.resname in ['GLU', 'ASP']: rc = -1 # reporting only annot = '' if verbose: print '%s\nstarting charge: %s' % ('*' * 80, charge) if (get_class( ag.resname) in ["common_amino_acid", "modified_amino_acid"] or ag.resname in aac.three_letter_l_given_three_letter_d): if verbose: print ag.id_str() print 'number of hydrogens', len( get_aa_polymer_hydrogens(ag.resname)) poly_hs = len(get_aa_polymer_hydrogens(ag.resname)) - 2 diff_hs = hs - poly_hs if verbose: print 'charge: %s poly_hs: %s diff_hs: %s' % ( charge, poly_hs, diff_hs, ) if verbose: print atom_names if n_terminal(ag.resname, atom_names): diff_hs -= 1 max_charge += 1 if verbose: print 'n_terminal' print 'charge: %s poly_hs: %s diff_hs: %s' % ( charge, poly_hs, diff_hs, ) annot += 'N-term. ' elif nh3_terminal(atom_names): diff_hs -= 1 max_charge += 1 if verbose: print 'nh3_terminal True' annot += 'NH3-term. ' elif nh2_terminal(atom_names): diff_hs -= 1 max_charge += 1 if verbose: print 'nh2_terminal True' annot += 'NH2-term. ' elif n_capping(ag.resname, atom_names): diff_hs -= 1 if verbose: print 'n_capping True' annot += 'N-capp. ' else: if verbose: print 'no N term' if c_terminal(atom_names): diff_hs -= 1 max_charge += 1 if verbose: print 'c_terminal' print 'charge: %s poly_hs: %s diff_hs: %s' % ( charge, poly_hs, diff_hs, ) annot += 'C-term. ' elif c_capping(atom_names): diff_hs -= 1 #max_charge+=1 if verbose: print 'c_capping' print 'charge: %s poly_hs: %s diff_hs: %s' % ( charge, poly_hs, diff_hs, ) annot += 'C-capp. ' else: if verbose: print 'no C term' if covalent_bond(atom_i_seqs, inter_residue_bonds): diff_hs += 1 if verbose: print 'covalent_bond', atom_i_seqs #, inter_residue_bonds annot += 'Coval. ' if hierarchy_utils.is_n_terminal_atom_group(ag): if verbose: print 'subtracting due to N terminal' diff_hs -= 1 if verbose: print 'residue: %s charge: %s poly_hs: %2s diff_hs: %2s total: %2s %s' % ( ag.resname, charge, poly_hs, diff_hs, charge + diff_hs, annot, ) charge += diff_hs if charge: verbose = 0 if verbose: print ' %s charge: %-2s poly_hs: %s diff_hs: %-2s' % ( ag.id_str(), charge, poly_hs, diff_hs, ) assert abs( charge ) <= max_charge, 'residue %s charge %s is greater than %s' % ( rg.atoms()[0].quote(), charge, max_charge, ) if resname in allowable_amino_acid_charges: assert allowable_amino_acid_charges[ resname] - 1 <= charge <= allowable_amino_acid_charges[ resname] + 1, 'resname %s charge %s range %s %s' % ( resname, charge, allowable_amino_acid_charges[resname] - 1, allowable_amino_acid_charges[resname] + 1, ) else: restraints = _get_restraints_from_resname( ag.resname, self.mon_lib_server, ) if ag.resname in ['MTN']: from qrefine.utils import electrons if self.pdb_filename is None and self.raw_records is None: self.raw_records = hierarchy_utils.get_raw_records( pdb_hierarchy=self.pdb_hierarchy, crystal_symmetry=self.crystal_symmetry, ) charge = electrons.run( pdb_filename=self.pdb_filename, raw_records=self.raw_records, ) annot = 'non-polymer' else: ag_names = set() for atom in ag.atoms(): ag_names.add(atom.name.strip()) atom_dict = restraints.atom_dict() cif_names = set() total = 0 for name, atom in atom_dict.items(): total += atom.partial_charge cif_names.add(name) #assert len(cif_names)==len(cif_names.intersection(ag_names)) #assert len(ag_names)==len(cif_names.intersection(ag_names)) assert abs(total-int(total))<0.01, \ 'sum of parial charges fo %s not accurate %f' % (ag.name, total) charge = int(total) annot = 'ligand' return charge, rc, annot
def complete_pdb_hierarchy( hierarchy, geometry_restraints_manager, use_capping_hydrogens=False, append_to_end_of_model=False, pdb_filename=None, pdb_inp=None, original_pdb_filename=None, verbose=False, debug=False, ): for ag in hierarchy.atom_groups(): if get_class(ag.resname) in ['common_rna_dna']: raise Sorry('') from mmtbx.building import extend_sidechains params = None original_hierarchy = None if use_capping_hydrogens: params = hierarchy_utils.get_pdb_interpretation_params() params.link_distance_cutoff = 1.8 # avoid linking across a single missing AA if original_pdb_filename: original_pdb_inp = iotbx.pdb.input(original_pdb_filename) original_hierarchy = original_pdb_inp.construct_hierarchy() if debug: output = hierarchy_utils.write_hierarchy( pdb_filename, pdb_inp, hierarchy, 'temp1', ) # # assume model is heavy-atom complete # if not use_capping_hydrogens: if debug: ppf = hierarchy_utils.get_processed_pdb(pdb_filename=output) else: raw_records = hierarchy_utils.get_raw_records(pdb_inp, hierarchy) ppf = hierarchy_utils.get_processed_pdb( raw_records=raw_records, params=params, ) sites_cart = hierarchy.atoms().extract_xyz() ppf.all_chain_proxies.pdb_hierarchy.atoms().set_xyz(sites_cart) n_changed = extend_sidechains.extend_protein_model( ppf.all_chain_proxies.pdb_hierarchy, mon_lib_server, add_hydrogens=False, ) if debug: print 'number of side chains changed', n_changed output = hierarchy_utils.write_hierarchy( pdb_filename, pdb_inp, ppf.all_chain_proxies.pdb_hierarchy, 'temp2', ) # # need to use Reduce/ReadySet! to add hydrogens # if not use_capping_hydrogens: output = hierarchy_utils.write_hierarchy( pdb_filename, pdb_inp, ppf.all_chain_proxies.pdb_hierarchy, 'readyset_input', ) hierarchy = hierarchy_utils.add_hydrogens_using_ReadySet(output) # # remove side chain acid hydrogens - maybe not required since recent changes # if debug: ppf = hierarchy_utils.get_processed_pdb( pdb_filename=output, params=params, ) else: raw_records = hierarchy_utils.get_raw_records(pdb_inp, hierarchy) ppf = hierarchy_utils.get_processed_pdb( raw_records=raw_records, params=params, ) sites_cart = hierarchy.atoms().extract_xyz() ppf.all_chain_proxies.pdb_hierarchy.atoms().set_xyz(sites_cart) remove_acid_side_chain_hydrogens(ppf.all_chain_proxies.pdb_hierarchy) # # add hydrogens in special cases # eg ETA # eg N - H, H2 # if debug: ppf = hierarchy_utils.get_processed_pdb( pdb_filename=output, params=params, ) else: hierarchy = ppf.all_chain_proxies.pdb_hierarchy raw_records = hierarchy_utils.get_raw_records(pdb_inp, hierarchy) ppf = hierarchy_utils.get_processed_pdb( raw_records=raw_records, params=params, ) sites_cart = hierarchy.atoms().extract_xyz() ppf.all_chain_proxies.pdb_hierarchy.atoms().set_xyz(sites_cart) special_case_hydrogens( ppf.all_chain_proxies.pdb_hierarchy, ppf.geometry_restraints_manager(), #use_capping_hydrogens=use_capping_hydrogens, #append_to_end_of_model=append_to_end_of_model, #original_hierarchy=original_hierarchy, verbose=verbose, ) # # add terminals atoms including hydrogens and OXT - more docs here... # if debug: output = hierarchy_utils.write_hierarchy( pdb_filename, pdb_inp, ppf.all_chain_proxies.pdb_hierarchy, 'temp3', ) ppf = hierarchy_utils.get_processed_pdb( pdb_filename=output, params=params, ) else: hierarchy = ppf.all_chain_proxies.pdb_hierarchy raw_records = hierarchy_utils.get_raw_records(pdb_inp, hierarchy) ppf = hierarchy_utils.get_processed_pdb( raw_records=raw_records, params=params, ) sites_cart = hierarchy.atoms().extract_xyz() ppf.all_chain_proxies.pdb_hierarchy.atoms().set_xyz(sites_cart) add_terminal_hydrogens( ppf.all_chain_proxies.pdb_hierarchy, ppf.geometry_restraints_manager(), use_capping_hydrogens=use_capping_hydrogens, append_to_end_of_model=append_to_end_of_model, original_hierarchy=original_hierarchy, verbose=verbose, ) # in place ppf.all_chain_proxies.pdb_hierarchy.atoms( ).set_chemical_element_simple_if_necessary() ppf.all_chain_proxies.pdb_hierarchy.sort_atoms_in_place() #display_hierarchy_atoms(ppf.all_chain_proxies.pdb_hierarchy) #ppf.all_chain_proxies.pdb_hierarchy.atoms_reset_serial() #ppf.all_chain_proxies.pdb_hierarchy.atoms().reset_i_seq() return ppf