def calculate_rmsd_between_fragged_files(file1, file2): """Take a pair of homologous fragmented molecule files and return a list of the rmsds between the paired fragments in each molecule""" mol1 = check_pdb_readable(file1) mol2 = check_pdb_readable(file2) return calculate_rmsd_between_fragged_mols(mol1, mol2)
def order_structures_by_minimum_distance_to_reference(refpdb, pdbs, mincutoff=1.5, maxcutoff=6): """Return the molecules have at least one non-H atom within a certain distance of the reference structure""" # Deal with cases where mincutoff, maxcutoff are not specified if not mincutoff: mincutoff = 0 if not maxcutoff: maxcutoff = 9999999 pdbs_to_return = [] refmol = check_pdb_readable(refpdb) for pdbfile in pdbs: pdbmol = check_pdb_readable(pdbfile) min_dist = calculate_minimum_distance_between_mols(refmol, pdbmol) # Reject if there is a clash if min_dist >= mincutoff and min_dist <= maxcutoff: pdbs_to_return.append((min_dist, pdbfile)) # Order by minimum distance sorted_files = sorted(pdbs_to_return, key=lambda tup: tup[0]) return [t[1] for t in sorted_files]
def order_structures_by_number_of_contacts_to_reference( refpdb, pdbs, cutoff=3.5): """Calculate the number of contacts between `pdb` and refpdb, and return a list of decreasing contacts""" refmol = check_pdb_readable(refpdb) contacts_list = [] for pdbfile in pdbs: pdbmol = check_pdb_readable(pdbfile) # Add the number of contacts and the pdbfile to the list contacts_list.append( (calculate_number_of_pairwise_distances_within_cutoff( refmol, pdbmol, cutoff=cutoff), pdbfile)) return sorted(contacts_list, key=lambda tup: tup[0], reverse=True)
def calculate_coordinate_differences(model1, model2): """Calculate the differences between the atom coordinates of two identical structures""" # Read in mols and check for validity mol1 = check_pdb_readable(model1) mol2 = check_pdb_readable(model2) if (not mol1) or (not mol2): return None # Check that the mols are identical-ish if mol1.GetNumHeavyAtoms() != mol2.GetNumHeavyAtoms(): raise EqualityError( 'Molecules are not identical (Num Atoms) {!s} != {!s}.\n{!s}\n{!s}' .format(mol1.GetNumHeavyAtoms(), mol2.GetNumHeavyAtoms(), Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2))) if mol1.GetNumBonds() != mol2.GetNumBonds(): raise EqualityError( 'Molecules are not identical (Num Bonds) {!s} != {!s}:\n{!s}\n{!s}' .format(mol1.GetNumBonds(), mol2.GetNumBonds(), Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2))) # Gets atoms in mol1 (e.g. 14,5,3...) that match mol2 (1,2,3...) matchpatterns = mol1.GetSubstructMatches(mol2, uniquify=False) # Check to see if the molecules actually DO contain common substructures if not matchpatterns: return None differences = [] # Get the conformers to access the coords conf1 = mol1.GetConformer(0) conf2 = mol2.GetConformer(0) # May be more than one matching pattern. Calculate all of them. for matchlist in matchpatterns: # reset the vector of coord difference for each match pattern match_diffs = [] # idx2 = 0,1,2... idx1 = 14,5,3... for idx2, idx1 in enumerate(matchlist): # Get the atom coords atm1 = conf1.GetAtomPosition(idx1) atm2 = conf2.GetAtomPosition(idx2) # Append tuple of differences match_diffs.append( (atm1.x - atm2.x, atm1.y - atm2.y, atm1.z - atm2.z)) differences.append(match_diffs) # Return the differences corresponding to all of the ways of matching the molecules return differences
def break_and_rename_mol_to_file(infile, outfile): """Takes a ligand, breaks it up and renames the different fragments with inscodes. compiles into one file""" inmol = check_pdb_readable(infile) # PDB-rdkit disagreement... if not inmol: raise RDkitReadError('Cannot read {!s}'.format(infile)) orig_mol = MacroMol(infile) assert len(orig_mol.getChains( )) == 1, 'MORE THAN ONE CHAIN PRESENT IN FILE! {!s}'.format(infile) assert len(orig_mol.getResidues( )) == 1, 'MORE THAN ONE RESIDUE PRESENT IN FILE! {!s}'.format(infile) # Keep the atom numbers (these are conserved in the mappings) atom_numbers = dict([(a.atomname, a.serialnum) for a in orig_mol.getResidues()[0].atoms]) # Break the molecule into fragments (or get inmol back if no rotatable bonds) frags = break_on_rotatable_bonds_to_mols(inmol) # List of output pdb blocks out_blocks = [] for i, frag in enumerate(frags): raw_block = Chem.MolToPDBBlock(frag) raw_frag = MacroMol(raw_block) # Check there's only one residue in file assert len(raw_frag.getChains( )) == 1, 'FRAGMENT HAS MORE THAN ONE CHAIN? {!s}\n{!s}'.format( infile, raw_block) assert len(raw_frag.getResidues( )) == 1, 'FRAGMENT HAS MORE THAN ONE RESIDUE? {!s}\n{!s}'.format( infile, raw_block) # Form the new inscode new_inscode = chr(i + ord('A')) # Change the inscode residue = raw_frag.getResidues()[0] residue.update_inscode(new_inscode) # Change the atom numbers back to those in the original file for continuity residue.update_atom_numbers(atom_numbers) # Get the pdb string and use proc_block = residue.getPdbString() # Check for consistency and append out_blocks.append(proc_block) # Add easy-header header_block = ''.join(orig_mol.easy_header).strip('\n') # Add easy-footer footer_block = ''.join(orig_mol.easy_footer).strip('\n') # Create atom block out_block = ''.join(out_blocks).replace('END', '').strip('\n') # Join the blocks together block_to_write = '\n'.join( [header_block, out_block, footer_block, 'END\n']) # Check for consistency block_to_write = block_to_write.replace('\n\n', '\n') open(outfile, 'w').write(block_to_write) return outfile