def test_writeGenerateAndWriteConformers(self): sm = SmallMol(self.benzamidine_mol2) sm.generateConformers(num_confs=10, append=False) tmpfname = os.path.join(NamedTemporaryFile().name, 'benzamidine.sdf') tmpdir = os.path.dirname(tmpfname) sm.write(tmpfname, merge=False) direxists = os.path.isdir(tmpdir) n_files = len(glob(os.path.join(tmpdir, '*.sdf'))) self.assertTrue(direxists, 'The directory where to store the conformations where not created') self.assertGreater(n_files, 1, 'None conformations were written. At least one should be present')
def process_arpeggio(mol2_filename): slig = SmallMol(mol2_filename) slig.write('tmp.pdb') pdb_filename = 'tmp.pdb' # LOAD STRUCTURE (BIOPYTHON) pdb_parser = PDBParser() s = pdb_parser.get_structure('structure', pdb_filename) s_atoms = list(s.get_atoms()) logging.info('Loaded PDB structure (BioPython)') # CHECK FOR HYDROGENS IN THE INPUT STRUCTURE input_has_hydrogens = False hydrogens = [x for x in s_atoms if x.element == 'H'] if hydrogens: logging.info( 'Detected that the input structure contains hydrogens. Hydrogen addition will be skipped.' ) input_has_hydrogens = True # LOAD STRUCTURE (OPENBABEL) ob_conv = ob.OBConversion() ob_conv.SetInFormat('pdb') mol = ob.OBMol() ob_conv.ReadFile(mol, pdb_filename) # CHECK THAT EACH ATOM HAS A UNIQUE SERIAL NUMBER all_serials = [x.serial_number for x in s_atoms] if len(all_serials) > len(set(all_serials)): raise AtomSerialError # MAPPING OB ATOMS TO BIOPYTHON ATOMS AND VICE VERSA # FIRST MAP PDB SERIAL NUMBERS TO BIOPYTHON ATOMS FOR SPEED LATER # THIS AVOIDS LOOPING THROUGH `s_atoms` MANY TIMES serial_to_bio = {x.serial_number: x for x in s_atoms} # DICTIONARIES FOR CONVERSIONS ob_to_bio = {} bio_to_ob = {} for ob_atom in ob.OBMolAtomIter(mol): serial = ob_atom.GetResidue().GetSerialNum(ob_atom) # MATCH TO THE BIOPYTHON ATOM BY SERIAL NUMBER try: biopython_atom = serial_to_bio[serial] except KeyError: # ERRORWORTHY IF WE CAN'T MATCH AN OB ATOM TO A BIOPYTHON ONE raise OBBioMatchError(serial) # `Id` IS A UNIQUE AND STABLE ID IN OPENBABEL # CAN RECOVER THE ATOM WITH `mol.GetAtomById(id)` ob_to_bio[ob_atom.GetId()] = biopython_atom bio_to_ob[biopython_atom] = ob_atom.GetId() logging.info('Mapped OB to BioPython atoms and vice-versa.') # ADD EMPTY DATA STRUCTURES FOR TAGGED ATOM DATA # IN A SINGLE ITERATION for atom in s_atoms: # FOR ATOM TYPING VIA OPENBABEL atom.atom_types = set([]) # LIST FOR EACH ATOM TO STORE EXPLICIT HYDROGEN COORDINATES atom.h_coords = [] # DETECT METALS if atom.element.upper() in METALS: atom.is_metal = True else: atom.is_metal = False # DETECT HALOGENS if atom.element.upper() in HALOGENS: atom.is_halogen = True else: atom.is_halogen = False # ADD EXPLICIT HYDROGEN COORDS FOR H-BONDING INTERACTIONS # ADDING HYDROGENS DOESN'T SEEM TO INTERFERE WITH ATOM SERIALS (THEY GET ADDED AS 0) # SO WE CAN STILL GET BACK TO THE PERSISTENT BIOPYTHON ATOMS THIS WAY. if not input_has_hydrogens: mol.AddHydrogens(False, True, ph) # polaronly, correctForPH, pH logging.info('Added hydrogens.') # ATOM TYPING VIA OPENBABEL # ITERATE OVER ATOM TYPE SMARTS DEFINITIONS for atom_type, smartsdict in ATOM_TYPES.items(): #logging.info('Typing: {}'.format(atom_type)) # FOR EACH ATOM TYPE SMARTS STRING for smarts in smartsdict.values(): #logging.info('Smarts: {}'.format(smarts)) # GET OPENBABEL ATOM MATCHES TO THE SMARTS PATTERN ob_smart = ob.OBSmartsPattern() ob_smart.Init(str(smarts)) #logging.info('Initialised for: {}'.format(smarts)) ob_smart.Match(mol) #logging.info('Matched for: {}'.format(smarts)) matches = [x for x in ob_smart.GetMapList()] #logging.info('List comp matches: {}'.format(smarts)) if matches: # REDUCE TO A SINGLE LIST matches = set(reduce(operator.add, matches)) #logging.info('Set reduce matches: {}'.format(smarts)) for match in matches: atom = mol.GetAtom(match) ob_to_bio[atom.GetId()].atom_types.add(atom_type) #logging.info('Assigned types: {}'.format(smarts)) # ALL WATER MOLECULES ARE HYDROGEN BOND DONORS AND ACCEPTORS for atom in (x for x in s_atoms if x.get_full_id()[3][0] == 'W'): atom.atom_types.add('hbond acceptor') atom.atom_types.add('hbond donor') # OVERRIDE PROTEIN ATOM TYPING FROM DICTIONARY for residue in s.get_residues(): if residue.resname in STD_RES: for atom in residue.child_list: # REMOVE TYPES IF ALREADY ASSIGNED FROM SMARTS for atom_type in PROT_ATOM_TYPES.keys(): atom.atom_types.discard(atom_type) # ADD ATOM TYPES FROM DICTIONARY for atom_type, atom_ids in PROT_ATOM_TYPES.items(): atom_id = residue.resname.strip() + atom.name.strip() if atom_id in atom_ids: atom.atom_types.add(atom_type) def make_pymol_string(entity): ''' Feed me a BioPython atom or BioPython residue. See `http://pymol.sourceforge.net/newman/user/S0220commands.html`. chain-identifier/resi-identifier/name-identifier chain-identifier/resi-identifier/ ''' if isinstance(entity, Atom): chain = entity.get_parent().get_parent() residue = entity.get_parent() atom_name = entity.name elif isinstance(entity, Residue): chain = entity.get_parent() residue = entity atom_name = '' else: raise TypeError( 'Cannot make a PyMOL string from a non-Atom or Residue object.' ) res_num = residue.id[1] # ADD INSERTION CODE IF NEED BE if residue.id[2] != ' ': res_num = str(res_num) + residue.id[2] macro = '{}/{}/{}'.format(chain.id, res_num, atom_name) return macro ''' with open(pdb_filename.replace('.pdb', '.atomtypes'), 'w') as fo: if headers: fo.write('{}\n'.format('\t'.join( ['atom', 'atom_types'] ))) for atom in s_atoms: fo.write('{}\n'.format('\t'.join([str(x) for x in [make_pymol_string(atom), sorted(tuple(atom.atom_types))]]))) logging.info('Typed atoms.') ''' return s_atoms