Ejemplo n.º 1
0
 def test_writeGenerateAndWriteConformers(self):
     sm = SmallMol(self.benzamidine_mol2)
     sm.generateConformers(num_confs=10, append=False)
     tmpfname = os.path.join(NamedTemporaryFile().name, 'benzamidine.sdf')
     tmpdir = os.path.dirname(tmpfname)
     sm.write(tmpfname, merge=False)
     direxists = os.path.isdir(tmpdir)
     n_files = len(glob(os.path.join(tmpdir, '*.sdf')))
     self.assertTrue(direxists, 'The directory where to store the conformations where not created')
     self.assertGreater(n_files, 1, 'None conformations were written. At least one should be present')
Ejemplo n.º 2
0
def process_arpeggio(mol2_filename):

    slig = SmallMol(mol2_filename)
    slig.write('tmp.pdb')

    pdb_filename = 'tmp.pdb'

    # LOAD STRUCTURE (BIOPYTHON)
    pdb_parser = PDBParser()
    s = pdb_parser.get_structure('structure', pdb_filename)
    s_atoms = list(s.get_atoms())

    logging.info('Loaded PDB structure (BioPython)')

    # CHECK FOR HYDROGENS IN THE INPUT STRUCTURE
    input_has_hydrogens = False
    hydrogens = [x for x in s_atoms if x.element == 'H']

    if hydrogens:
        logging.info(
            'Detected that the input structure contains hydrogens. Hydrogen addition will be skipped.'
        )
        input_has_hydrogens = True

    # LOAD STRUCTURE (OPENBABEL)
    ob_conv = ob.OBConversion()
    ob_conv.SetInFormat('pdb')
    mol = ob.OBMol()
    ob_conv.ReadFile(mol, pdb_filename)

    # CHECK THAT EACH ATOM HAS A UNIQUE SERIAL NUMBER
    all_serials = [x.serial_number for x in s_atoms]

    if len(all_serials) > len(set(all_serials)):
        raise AtomSerialError

    # MAPPING OB ATOMS TO BIOPYTHON ATOMS AND VICE VERSA

    # FIRST MAP PDB SERIAL NUMBERS TO BIOPYTHON ATOMS FOR SPEED LATER
    # THIS AVOIDS LOOPING THROUGH `s_atoms` MANY TIMES
    serial_to_bio = {x.serial_number: x for x in s_atoms}

    # DICTIONARIES FOR CONVERSIONS
    ob_to_bio = {}
    bio_to_ob = {}

    for ob_atom in ob.OBMolAtomIter(mol):

        serial = ob_atom.GetResidue().GetSerialNum(ob_atom)

        # MATCH TO THE BIOPYTHON ATOM BY SERIAL NUMBER
        try:
            biopython_atom = serial_to_bio[serial]

        except KeyError:
            # ERRORWORTHY IF WE CAN'T MATCH AN OB ATOM TO A BIOPYTHON ONE
            raise OBBioMatchError(serial)

        # `Id` IS A UNIQUE AND STABLE ID IN OPENBABEL
        # CAN RECOVER THE ATOM WITH `mol.GetAtomById(id)`
        ob_to_bio[ob_atom.GetId()] = biopython_atom
        bio_to_ob[biopython_atom] = ob_atom.GetId()

    logging.info('Mapped OB to BioPython atoms and vice-versa.')

    # ADD EMPTY DATA STRUCTURES FOR TAGGED ATOM DATA
    # IN A SINGLE ITERATION
    for atom in s_atoms:

        # FOR ATOM TYPING VIA OPENBABEL
        atom.atom_types = set([])

        # LIST FOR EACH ATOM TO STORE EXPLICIT HYDROGEN COORDINATES
        atom.h_coords = []

        # DETECT METALS
        if atom.element.upper() in METALS:
            atom.is_metal = True
        else:
            atom.is_metal = False

        # DETECT HALOGENS
        if atom.element.upper() in HALOGENS:
            atom.is_halogen = True
        else:
            atom.is_halogen = False

    # ADD EXPLICIT HYDROGEN COORDS FOR H-BONDING INTERACTIONS
    # ADDING HYDROGENS DOESN'T SEEM TO INTERFERE WITH ATOM SERIALS (THEY GET ADDED AS 0)
    # SO WE CAN STILL GET BACK TO THE PERSISTENT BIOPYTHON ATOMS THIS WAY.
    if not input_has_hydrogens:
        mol.AddHydrogens(False, True, ph)  # polaronly, correctForPH, pH

        logging.info('Added hydrogens.')

    # ATOM TYPING VIA OPENBABEL
    # ITERATE OVER ATOM TYPE SMARTS DEFINITIONS
    for atom_type, smartsdict in ATOM_TYPES.items():

        #logging.info('Typing: {}'.format(atom_type))

        # FOR EACH ATOM TYPE SMARTS STRING
        for smarts in smartsdict.values():

            #logging.info('Smarts: {}'.format(smarts))

            # GET OPENBABEL ATOM MATCHES TO THE SMARTS PATTERN
            ob_smart = ob.OBSmartsPattern()
            ob_smart.Init(str(smarts))

            #logging.info('Initialised for: {}'.format(smarts))

            ob_smart.Match(mol)

            #logging.info('Matched for: {}'.format(smarts))

            matches = [x for x in ob_smart.GetMapList()]

            #logging.info('List comp matches: {}'.format(smarts))

            if matches:

                # REDUCE TO A SINGLE LIST
                matches = set(reduce(operator.add, matches))

                #logging.info('Set reduce matches: {}'.format(smarts))

                for match in matches:

                    atom = mol.GetAtom(match)
                    ob_to_bio[atom.GetId()].atom_types.add(atom_type)

                #logging.info('Assigned types: {}'.format(smarts))

    # ALL WATER MOLECULES ARE HYDROGEN BOND DONORS AND ACCEPTORS
    for atom in (x for x in s_atoms if x.get_full_id()[3][0] == 'W'):
        atom.atom_types.add('hbond acceptor')
        atom.atom_types.add('hbond donor')

    # OVERRIDE PROTEIN ATOM TYPING FROM DICTIONARY
    for residue in s.get_residues():

        if residue.resname in STD_RES:

            for atom in residue.child_list:

                # REMOVE TYPES IF ALREADY ASSIGNED FROM SMARTS
                for atom_type in PROT_ATOM_TYPES.keys():
                    atom.atom_types.discard(atom_type)

                # ADD ATOM TYPES FROM DICTIONARY
                for atom_type, atom_ids in PROT_ATOM_TYPES.items():

                    atom_id = residue.resname.strip() + atom.name.strip()

                    if atom_id in atom_ids:
                        atom.atom_types.add(atom_type)

    def make_pymol_string(entity):
        '''
        Feed me a BioPython atom or BioPython residue.
        See `http://pymol.sourceforge.net/newman/user/S0220commands.html`.
        chain-identifier/resi-identifier/name-identifier
        chain-identifier/resi-identifier/
        '''

        if isinstance(entity, Atom):

            chain = entity.get_parent().get_parent()
            residue = entity.get_parent()
            atom_name = entity.name

        elif isinstance(entity, Residue):
            chain = entity.get_parent()
            residue = entity
            atom_name = ''

        else:
            raise TypeError(
                'Cannot make a PyMOL string from a non-Atom or Residue object.'
            )

        res_num = residue.id[1]

        # ADD INSERTION CODE IF NEED BE
        if residue.id[2] != ' ':
            res_num = str(res_num) + residue.id[2]

        macro = '{}/{}/{}'.format(chain.id, res_num, atom_name)

        return macro

    '''

    with open(pdb_filename.replace('.pdb', '.atomtypes'), 'w') as fo:

        if headers:
            fo.write('{}\n'.format('\t'.join(
                ['atom', 'atom_types']
            )))

        for atom in s_atoms:
            fo.write('{}\n'.format('\t'.join([str(x) for x in [make_pymol_string(atom), sorted(tuple(atom.atom_types))]])))

    logging.info('Typed atoms.')


    '''

    return s_atoms