Ejemplo n.º 1
0
    def __init__(self, path='', verbosity=0):
        """ Constructs a UserStorageDirectory object

        :param str path: use this dir, default: $XDG_CONFIG_HOME/fep_automate
        :param int verbosity: verbosity level
        """

        if not path:
            try:
                self.path = os.environ['XDG_CONFIG_HOME']
            except KeyError:
                try:
                    self.path = os.path.join(os.environ['HOME'], '.config')
                except KeyError:
                    # Is this unix?
                    self.path = os.path.join(os.curdir, '.config')
                    os_util.local_print(
                        'You seem to be running on a non-UNIX system (or there are issues in your '
                        'environment). Trying to go on, but you may experience errors.',
                        msg_verbosity=os_util.verbosity_level.warning,
                        current_verbosity=verbosity)
        else:
            self.path = path

        self.path = os.path.join(self.path, 'fep_automate')

        try:
            os.mkdir(self.path)
        except FileExistsError:
            pass
Ejemplo n.º 2
0
def adjust_query_properties(query_molecule,
                            generic_atoms=False,
                            ignore_charge=True,
                            ignore_isotope=True,
                            verbosity=0):
    """ Adjust query settings removing all charges, isotope, aromaticity and valence info from core_structure SMARTS

    :param rdkit.Chem.Mol query_molecule: query molecule
    :param bool generic_atoms: make atoms generic
    :param bool ignore_charge: set all atomic charges to 0
    :param bool ignore_isotope: ignore atomic isotopes
    :param int verbosity: controls the verbosity level
    :rtype: rdkit.Chem.Mol
    """

    os_util.local_print(
        'Entering adjust_query_properties(query_molecule={} (SMILES={}), generic_atoms={}, '
        'verbosity={})'
        ''.format(query_molecule, rdkit.Chem.MolToSmiles(query_molecule),
                  generic_atoms, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    new_query_molecule = rdkit.Chem.Mol(query_molecule)

    # Parameters to GetSubstructMatch
    query_m = rdkit.Chem.rdmolops.AdjustQueryParameters()
    query_m.makeBondsGeneric = True
    query_m.makeDummiesQueries = True
    query_m.adjustDegree = False

    if generic_atoms:
        query_m.makeAtomsGeneric = True
    else:
        if ignore_isotope:
            [
                a0.SetQuery(
                    rdkit.Chem.MolFromSmarts('[#{}]'.format(
                        a0.GetAtomicNum())).GetAtomWithIdx(0))
                for a0 in new_query_molecule.GetAtoms()
                if isinstance(a0, rdkit.Chem.QueryAtom)
            ]
        if ignore_charge:
            [a0.SetFormalCharge(0) for a0 in new_query_molecule.GetAtoms()]

    new_query_molecule = rdkit.Chem.AdjustQueryProperties(
        new_query_molecule, query_m)

    os_util.local_print(
        'The molecule {} (SMARTS={}) was altered by adjust_query_properties to {} (SMARTS={})'
        ''.format(query_molecule,
                  rdkit.Chem.MolToSmiles(query_molecule), new_query_molecule,
                  rdkit.Chem.MolToSmiles(new_query_molecule)),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    return new_query_molecule
Ejemplo n.º 3
0
def process_custom_mcs(custom_mcs, savestate=None, verbosity=0):
    """ Parses user supplied custom MCS data

    :param [str, dict] custom_mcs: mcs data to be parsed 
    :param savestate_util.SavableState savestate: saved state data 
    :param int  verbosity: controls verbosity level 
    :rtype: dict
    """

    custom_mcs_result = {}
    if custom_mcs:
        custom_mcs = os_util.detect_type(custom_mcs, test_for_dict=True)
        if isinstance(custom_mcs, str):
            if rdkit.Chem.MolFromSmarts(custom_mcs) is not None:
                os_util.local_print(
                    'Using user-supplied MCS {} for all molecules.'.format(
                        custom_mcs),
                    msg_verbosity=os_util.verbosity_level.info,
                    current_verbosity=verbosity)
                custom_mcs_result = {'*': custom_mcs}
            else:
                os_util.local_print(
                    'Could not parse you custom MCS "{}".'.format(custom_mcs),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise SystemExit(1)
        elif isinstance(custom_mcs, dict):
            if all([(isinstance(key, frozenset) and len(key) == 2)
                    for key in custom_mcs]):
                custom_mcs_result = custom_mcs
            elif all([(isinstance(key, str) and key.count('-') == 1)
                      for key in custom_mcs]):
                custom_mcs_result = {
                    frozenset(key.split('-')): value
                    for key, value in custom_mcs.items()
                }
            else:
                os_util.local_print(
                    'Could not parse you custom MCS "{}". If providing a dict, make sure to follow '
                    'the required format (see documentation).'.format(
                        custom_mcs),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise SystemExit(1)
        else:
            os_util.local_print(
                'Could not parse you custom MCS. A string or dict is required, but your data "{}" '
                'was parsed as a {} (see documentation for formatting options).'
                ''.format(custom_mcs, type(custom_mcs)),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        if savestate is not None:
            savestate['custom_mcs'] = custom_mcs_result
            savestate.setdefault('mcs_dict', {}).update(custom_mcs_result)
            savestate.save_data()

    return custom_mcs_result
Ejemplo n.º 4
0
def generic_mol_read(ligand_format, ligand_data, verbosity=0):
    """ Tries to read a ligand detecting formats and types

    :param str ligand_format: data format or extension
    :param [str, rdkit.Chem.Mol] ligand_data: data to be read
    :param int verbosity: set verbosity
    :rtype: rdkit.Chem.Mol
    """

    if isinstance(ligand_data, rdkit.Chem.Mol):
        return ligand_data

    if ligand_format in ['mol2', '.mol2']:
        docking_mol_rd = rdkit.Chem.MolFromMol2Block(ligand_data,
                                                     removeHs=False)
        if docking_mol_rd is None:
            docking_mol_rd = rdkit.Chem.MolFromMol2File(ligand_data,
                                                        removeHs=False)
    elif ligand_format in ['mol', '.mol']:
        docking_mol_rd = rdkit.Chem.MolFromMolBlock(ligand_data,
                                                    removeHs=False)
        if docking_mol_rd is None:
            docking_mol_rd = rdkit.Chem.MolFromMolFile(ligand_data,
                                                       removeHs=False)
    elif ligand_format in ['pdbqt', '.pdbqt', 'pdb', '.pdb']:
        os_util.local_print(
            'You are reading a pdb or pdbqt file ({}), which requires openbabel. Should this fail, you '
            'may try converting it to a mol2 before hand. This may be unsafe.'.
            format(ligand_data),
            msg_verbosity=os_util.verbosity_level.warning,
            current_verbosity=verbosity)
        import pybel
        try:
            ob_molecule = pybel.readstring('pdb', ligand_data)
        except OSError:
            ob_molecule = pybel.readfile('pdb', ligand_data).__next__()

        docking_mol_rd = mol_util.obmol_to_rwmol(ob_molecule)

    else:
        os_util.local_print('Failed to read pose data from {} with type {}'
                            ''.format(ligand_data, ligand_format),
                            msg_verbosity=os_util.verbosity_level.error,
                            current_verbosity=verbosity)
        raise SystemExit(1)

    return docking_mol_rd
Ejemplo n.º 5
0
def get_position_matrix(each_mol,
                        each_mol_str=None,
                        atom_selection=None,
                        verbosity=0):
    """

    :param pybel.Molecule each_mol: molecule to get positions from
    :param list each_mol_str: a alignment string from where residues to be used will be read
    :param list atom_selection: use atoms matching this name (default: CA)
    :param int verbosity: sets the verbosity level
    :rtype: list
    """

    if atom_selection is None:
        atom_selection = ['CA']
    if each_mol_str is None:
        each_mol_str = [True for _ in range(len(each_mol.residues))]

    added_atoms = []
    return_list = []
    for each_residue, residue_alignment in zip(each_mol.residues,
                                               each_mol_str):
        for each_atom in each_residue.atoms:
            if each_atom.OBAtom.GetResidue().GetAtomID(
                    each_atom.OBAtom).lstrip().rstrip() in atom_selection:
                atom_str = '{}{}{}'.format(
                    each_atom.OBAtom.GetResidue().GetAtomID(each_atom.OBAtom),
                    each_residue.name, each_residue.idx)
                if residue_alignment:
                    if atom_str not in added_atoms:
                        return_list.append(each_atom.OBAtom.GetVector())
                        added_atoms.append(atom_str)
                    else:
                        os_util.local_print(
                            'Atom {} found twice in your protein {}. Cannot handle multiple '
                            'occupancies.'.format(atom_str, each_mol.title),
                            msg_verbosity=os_util.verbosity_level.error,
                            current_verbosity=verbosity)
                        raise SystemExit(1)

    return return_list
Ejemplo n.º 6
0
    def save_data(self, output_file='', verbosity=0):
        """ Save state to a pickle file

        :param str output_file: save result to this file
        :param int verbosity: controls verbosity level
        :rtype: bool
        """

        if output_file != '':
            self.data_file = output_file

        try:
            with open('.temp_pickle_test.pkl', 'wb') as file_handler:
                pickle.dump(self.__dict__, file_handler)
                os.fsync(file_handler.fileno())
        except (IOError, FileNotFoundError):
            os_util.local_print('Could not save data to {}'.format(
                self.data_file),
                                current_verbosity=verbosity,
                                msg_verbosity=os_util.verbosity_level.error)
            raise SystemExit(1)
        else:
            try:
                os.replace('.temp_pickle_test.pkl', self.data_file)
            except FileNotFoundError as error:
                os_util.local_print(
                    'Failed to save progress data to file {}'.format(
                        self.data_file),
                    current_verbosity=verbosity,
                    msg_verbosity=os_util.verbosity_level.error)
                raise FileNotFoundError(error)
            os_util.local_print('Saved data to {}'.format(self.data_file),
                                current_verbosity=verbosity,
                                msg_verbosity=os_util.verbosity_level.debug)
            return True
Ejemplo n.º 7
0
def extract_docking_receptor(receptor_file, verbosity=0):
    """ Reads a docking receptor file

    :param str receptor_file: receptor file
    :param int verbosity: be verbosity
    :rtype: pybel.OBMol
    """

    import pybel
    if verbosity <= 3:
        pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError)

    receptor_format = os.path.splitext(receptor_file)[1].lstrip('.')
    if receptor_format == 'pdbqt':
        receptor_format = 'pdb'

    os_util.local_print('Reading receptor data from {} as a {} file'.format(receptor_file, receptor_format),
                        msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity)

    try:
        receptor_mol_local = pybel.readfile(receptor_format, receptor_file).__next__()
    except ValueError as error_data:
        os_util.local_print('Could not understand format {} (guessed from extension). Error was: {}'
                            ''.format(receptor_format, error_data),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error)
        raise SystemExit(1)
    except (IOError, StopIteration) as error_data:
        os_util.local_print('Could not read file {} using format {} (guessed from extension). Error was: {}'
                            ''.format(receptor_file, receptor_format, error_data),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error)
        raise SystemExit(1)
    else:
        return receptor_mol_local
Ejemplo n.º 8
0
    def store_file(self, source, dest_file='', verbosity=0):
        """ Copy file or dir to storage dir

        :param str source: file to be copied
        :param str dest_file: new file name, default: use source_file name
        :param int verbosity: verbosity level
        :rtype: bool
        """

        if not dest_file:
            dest_file = os.path.basename(source)
            if dest_file == '':
                # source is a directory
                dest_file = source.split(os.sep)[-1]
            if dest_file in ['', '.']:
                os_util.local_print(
                    'Could not get a name from {} and dest_file was not supplied. Cannot go continue.'
                    ''.format(source),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise ValueError('invalid source name')

        backup_name = os.path.join(
            self.path, '{}_{}{}'.format(os.path.basename(dest_file),
                                        strftime('%H%M%S_%d%m%Y'),
                                        os.path.splitext(dest_file)))
        try:
            copy2(source, os.path.join(self.path, dest_file))
            copy2(source, backup_name)
        except IsADirectoryError:
            try:
                copytree(source, os.path.join(self.path, dest_file))
            except FileExistsError:
                rmtree(os.path.join(self.path, dest_file))
                copytree(source, os.path.join(self.path, dest_file))
            finally:
                copytree(source, os.path.join(self.path, backup_name))

        return True
Ejemplo n.º 9
0
    def __init__(self, input_file='', verbosity=0):
        """ Init SavableState by reading a pickle file, or doing nothing. If no input_file is given, a default name will
         be generated

        :param str input_file: save result to this file
        :param int verbosity: control verbosity level
        :rtype dict
        """

        super().__init__()

        if input_file:
            saved_data = self._read_data(input_file)
            for key, value in saved_data.items():
                setattr(self, key, value)
            try:
                if self.data_file != input_file and verbosity >= 0:
                    # User moved/renamed progress file or something wired happened
                    os_util.local_print(
                        'Progress file {} claims to be generated as file {}'
                        ''.format(input_file, self.data_file),
                        current_verbosity=verbosity,
                        msg_verbosity=os_util.verbosity_level.warning)
                self.data_file = input_file
            except AttributeError:
                os_util.local_print(
                    'Progress file {} does not contain data_file data. Is it a progress file?'
                    ''.format(input_file, self.data_file),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise ValueError('Invalid progress file')

        else:
            # User did not supplied a name, generate one
            from time import strftime
            self.data_file = 'savedata_{}.pkl'.format(
                strftime('%d%m%Y_%H%M%S'))
Ejemplo n.º 10
0
def align_sequences_match_residues(mobile_seq,
                                   target_seq,
                                   seq_align_mat='BLOSUM80',
                                   gap_penalty=-1.0,
                                   verbosity=0):
    """ Align two aminoacid sequences using Bio.pairwise2.globalds and substution matrix seq_align_mat, return a tuple
    with two list of residues to be used in the 3D alignment (mobile, refence)

    :param str mobile_seq: sequence of mobile protein
    :param str target_seq: sequence of target protein
    :param str seq_align_mat: use this substution matrix from Bio.SubsMat.MatrixInfo
    :param float gap_penalty: gap penalty to the alignment; avoid values too low in module
    :param int verbosity: sets the verbosity level
    :rtype: tuple
    """
    try:
        from Bio.pairwise2 import align
        from Bio.Align import substitution_matrices
        seq_align_mat = substitution_matrices.load(seq_align_mat)
    except ImportError as error:
        os_util.local_print(
            'Failed to import Biopython with error: {}\nBiopython is necessary to sequence'
            'alignment. Sequences to be aligned:\nReference: {}\nMobile: {}'
            ''.format(error, target_seq, mobile_seq),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise ImportError(error)
    except FileNotFoundError as error:
        available_matrices = substitution_matrices.load()
        os_util.local_print(
            'Failed to import substitution matrix {} with error: {}\nSubstitution matrix must be one '
            'of: {})'
            ''.format(seq_align_mat, error, available_matrices),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise FileNotFoundError(error)
    else:
        align_result = align.globalds(target_seq, mobile_seq, seq_align_mat,
                                      gap_penalty, gap_penalty)[0]
        os_util.local_print(
            'This is the alignment result to be used in protein alignment:\n{}'
            ''.format(align_result),
            msg_verbosity=os_util.verbosity_level.info,
            current_verbosity=verbosity)
        ref_align_str = [
            True if res_j != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_i != '-'
        ]
        mob_align_str = [
            True if res_i != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_j != '-'
        ]

        return mob_align_str, ref_align_str
Ejemplo n.º 11
0
def read_reference_structure(reference_structure, verbosity=0):
    """ Reads a structure file

    :param str reference_structure: receptor file
    :param int verbosity: be verbosity
    :rtype: pybel.OBMol
    """

    import pybel

    os_util.local_print(
        'Entering extract read_reference_structure(reference_structure={}, verbosity={})'
        ''.format(reference_structure, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    if isinstance(reference_structure, pybel.Molecule):
        # Flag that we cannot know the file path, if it's not already present. OpenBabel MoleculeData mimics a dict,
        # but lacks a setdefault method, so we're doing this the dumb way
        if not 'file_path' in reference_structure.data:
            reference_structure.data['file_path'] = False
        return reference_structure

    receptor_format = splitext(reference_structure)[1].lstrip('.')
    if receptor_format == 'pdbqt':
        receptor_format = 'pdb'

    os_util.local_print('Reading receptor data from {} as a {} file'.format(
        reference_structure, receptor_format),
                        msg_verbosity=os_util.verbosity_level.info,
                        current_verbosity=verbosity)

    try:
        receptor_mol_local = pybel.readfile(receptor_format,
                                            reference_structure).__next__()
    except (ValueError, StopIteration, IOError) as error_data:
        os_util.local_print(
            'Could not read file {}. Format {} was guessed from extension). Error message was "{}"'
            ''.format(reference_structure, receptor_format, error_data),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise SystemExit(1)
    else:
        receptor_mol_local.data['file_path'] = reference_structure
        return receptor_mol_local
Ejemplo n.º 12
0
def loose_replace_side_chains(mol, core_query, use_chirality=False, verbosity=True):
    """ Reconstruct a molecule based on common core. First, try to use the regular query. If fails, fallback to
        generalized bonds then generalized atoms.

    :param rdkit.Chem.Mol mol: the molecule to be modified
    :param rdkit.Chem.Mol core_query: the molecule to be used as a substructure query for recognizing the core
    :param bool use_chirality: match the substructure query using chirality
    :param int verbosity: set verbosity level
    :rtype: rdkit.Chem.Mol
    """

    temp_core_structure = rdkit.Chem.Mol(core_query)
    if num_explict_hydrogens(core_query) > 0 and num_explict_hydrogens(mol) == 0:
        os_util.local_print('loose_replace_side_chains was called with a mol without explict hydrogens and a '
                            'core_query with {} explict hydrogens. Removing core_query explict Hs.'
                            ''.format(num_explict_hydrogens(core_query)),
                            msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity)
        editable_core = rdkit.Chem.EditableMol(core_query)
        hydrogen_atoms = [each_atom.GetIdx() for each_atom in core_query.GetAtoms() if each_atom.GetAtomicNum() == 1]
        for idx in sorted(hydrogen_atoms, reverse=True):
            editable_core.RemoveAtom(idx)
        temp_core_structure = editable_core.GetMol()
        rdkit.Chem.SanitizeMol(temp_core_structure, catchErrors=True)

    result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_structure, useChirality=use_chirality)
    if result_core_structure is None:
        os_util.local_print('rdkit.Chem.ReplaceSidechains failed with mol={} (SMILES="{}") and coreQuery={} '
                            '(SMARTS="{}"). Retrying with adjust_query_properties.'
                            ''.format(mol, rdkit.Chem.MolToSmiles(mol), temp_core_structure,
                                      rdkit.Chem.MolToSmarts(temp_core_structure)),
                            msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity)
        temp_core_mol = adjust_query_properties(temp_core_structure, verbosity=verbosity)
        result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_mol, useChirality=use_chirality)
        if result_core_structure is None:
            os_util.local_print('rdkit.Chem.ReplaceSidechains failed with mol={} (SMILES="{}") and coreQuery={} '
                                '(SMARTS="{}"). Retrying with adjust_query_properties setting generic_atoms=True.'
                                ''.format(mol, rdkit.Chem.MolToSmiles(mol), temp_core_structure,
                                          rdkit.Chem.MolToSmarts(temp_core_structure)),
                                msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity)
            temp_core_mol = adjust_query_properties(temp_core_structure, generic_atoms=True, verbosity=verbosity)
            result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_mol, useChirality=use_chirality)

    return result_core_structure
Ejemplo n.º 13
0
def read_reference_structure(reference_structure, verbosity=0):
    """ Reads a structure file

    :param str reference_structure: receptor file
    :param int verbosity: be verbosity
    :rtype: pybel.OBMol
    """

    import pybel

    os_util.local_print(
        'Entering extract read_reference_structure(reference_structure={}, verbosity={})'
        ''.format(reference_structure, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    if isinstance(reference_structure, pybel.Molecule):
        return reference_structure

    receptor_format = splitext(reference_structure)[1].lstrip('.')
    if receptor_format == 'pdbqt':
        receptor_format = 'pdb'

    os_util.local_print('Reading receptor data from {} as a {} file'.format(
        reference_structure, receptor_format),
                        msg_verbosity=os_util.verbosity_level.info,
                        current_verbosity=verbosity)

    try:
        receptor_mol_local = pybel.readfile(receptor_format,
                                            reference_structure).__next__()
    except (ValueError, StopIteration, IOError) as error_data:
        os_util.local_print(
            'Could not read file {}. Format {} was guessed from extension). Error message was "{}"'
            ''.format(reference_structure, receptor_format, error_data),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise SystemExit(1)
    else:
        return receptor_mol_local
Ejemplo n.º 14
0
def test_center_molecule(map_bias, all_molecules, verbosity=0):
    """ Test center molecule to prepare star or wheel maps

    :param [list, str] map_bias: test this bias string or list
    :param list all_molecules: all molecules read com input
    :param int verbosity: sets the verbosity level
    :rtype: str
    """

    map_bias = os_util.detect_type(map_bias, test_for_list=True)
    if not map_bias:
        os_util.local_print(
            'A star map requires one, and only one, center molecule. You supplied none.',
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise SystemExit(1)
    if isinstance(map_bias, list) and len(map_bias) > 1:
        os_util.local_print(
            'A star map requires one, and only one, center molecule. You supplied {} ({})'
            ''.format(len(map_bias), map_bias),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise SystemExit(1)

    if isinstance(map_bias, list):
        map_bias = map_bias[0]

    if map_bias not in all_molecules:
        os_util.local_print(
            'The center molecule you supplied ({}) not found in {}.'
            ''.format(map_bias, ', '.join(all_molecules)),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise ValueError('Molecule not found')

    return map_bias
Ejemplo n.º 15
0
def extract_autodock4_poses(ligands_dict, poses_data=None, no_checks=False, verbosity=0):
    """
    :param dict ligands_dict: dictionary containing ligands data
    :param str poses_data: file with poses to be used
    :param bool no_checks: ignore checks and tries to go on
    :param int verbosity: be verbosity
    :rtype: dict
    """

    awk_extract_poses = """
    BEGIN {{
        Found = 0
        FoundM = 0 
    }}
    $0 == "\tLOWEST ENERGY DOCKED CONFORMATION from EACH CLUSTER" {{
        Found = 1
    }}
    Found == 1 && $1 == "MODEL" {{
        FoundM+=1
        if (FoundM > {0}) {{
            exit
        }}
    }}
    FoundM == {0} {{
        print $0
        if ($0 == "ENDMDL") {{
            exit
        }}
    }}
    """

    #FIXME: fix this method

    if isinstance(poses_data, str):
        raw_data = os_util.read_file_to_buffer(poses_data, die_on_error=True, return_as_list=True,
                                               error_message='Failed to read poses data file.', verbosity=verbosity)
        docking_poses_data = {}
        for each_line in raw_data:
            if (len(each_line) <= 1) or (each_line[0] in [';', '#']):
                continue
            lig_data = each_line.split('=')
            try:
                docking_poses_data[lig_data[0].rstrip()] = int(lig_data[1])
            except (ValueError, IndexError) as error_data:
                os_util.local_print('Could not read line "{}" from file {} with error {}'
                                    ''.format(each_line, poses_data, error_data),
                                    msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity)
                raise SystemExit(1)
    elif isinstance(poses_data, dict):
        docking_poses_data = poses_data
    else:
        docking_poses_data = {}

    os_util.local_print('{:=^50}\n{:<15} {:<15} {:<15}'.format(' Autodock4 poses ', 'Name', 'File', 'Cluster #'),
                        msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity)

    for each_name, each_mol in ligands_dict.items():
        # Extract cluster data and reads it
        cluster_num = docking_poses_data.get(each_name, 1)

        try:
            docking_cluster_pdb = subprocess.check_output(['awk', awk_extract_poses.format(cluster_num), each_mol])
        except subprocess.CalledProcessError as error_data:
            os_util.local_print('Could not run external program. Error: {}'.format(error_data),
                                msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity)
            if not no_checks:
                raise SystemExit(1)
            else:
                os_util.local_print('{:<15} {:<18} {:<15} ERROR!!!'
                                    ''.format(each_name, each_mol, cluster_num),
                                    msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity)
                continue
        else:
            mol_awk_result = docking_cluster_pdb.decode(sys.stdout.encoding)
            if len(mol_awk_result) < 3:
                os_util.local_print('Failed to read cluster {} from file {}.'
                                    ''.format(each_mol, cluster_num),
                                    msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity)
                if not no_checks:
                    raise SystemExit(1)
                else:
                    os_util.local_print('{:<15} {:<18} {:<15} ERROR!!!'
                                        ''.format(each_name, each_mol, cluster_num),
                                        msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity)
                    continue

            original_file_path = ligands_dict[each_name]
            ligands_dict[each_name] = all_classes.Namespace()
            ligands_dict[each_name].format = 'pdbqt'
            ligands_dict[each_name].data = mol_awk_result
            ligands_dict[each_name].comment = '{} cluster {}'.format(original_file_path, cluster_num)

    return extract_docking_poses(ligands_dict, verbosity=verbosity)
Ejemplo n.º 16
0
def process_dummy_atoms(molecule, verbosity=0):
    """ Sanitizes dummy atoms in a rdkit.Chem.Mol

    :param rdkit.Chem.Mol molecule: molecule to be verified
    :param int verbosity: controls the verbosity level
    :rtype: rdkit.Chem.Mol
    """

    os_util.local_print('Entering process_dummy_atoms(molecule=({}; SMILES={}), verbosity={})'
                        ''.format(molecule, rdkit.Chem.MolToSmiles(molecule), verbosity),
                        msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity)

    # Iterates over a copy of molecule ahd convert query atoms to dummy atoms, adding bonds if necessary
    temp_mol = rdkit.Chem.Mol(molecule)
    for atom_idx, each_atom in enumerate(temp_mol.GetAtoms()):
        if isinstance(each_atom, rdkit.Chem.rdchem.QueryAtom):
            newdummy = rdkit.Chem.Atom(0)
            rdedmol = rdkit.Chem.RWMol(molecule)
            rdedmol.ReplaceAtom(atom_idx, newdummy, preserveProps=True)
            molecule = rdedmol.GetMol()

            if each_atom.GetProp('_TriposAtomName')[:2] == 'LP':
                os_util.local_print('Lone pair found. Atom with id {} was assumed a lone pair by its name ({}) and '
                                    'its type ({}). If this is wrong, please change the atom name.'
                                    ''.format(atom_idx, each_atom.GetProp('_TriposAtomName'),
                                              each_atom.GetProp('_TriposAtomType')),
                                    msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity)
                if each_atom.GetBonds() == ():
                    if temp_mol.GetNumConformers() == 0:
                        os_util.local_print('Disconnected lone pair atom found in a molecule with no 3D coordinates. '
                                            '3D coordinates are used to guess the LP host, but are absent in molecule '
                                            '{}. I cannot continue.'.format(temp_mol),
                                            msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity)
                        raise SystemExit(1)
                    rdkit.Chem.rdMolTransforms.CanonicalizeConformer(temp_mol.GetConformer(0))
                    # LP is not bonded to any other atom. Connect it to the closer one
                    import numpy
                    temp_mol.GetConformer(0)
                    dist_list = numpy.argsort(numpy.array(rdkit.Chem.Get3DDistanceMatrix(temp_mol)[atom_idx]))
                    closer_atom = int(dist_list[1])
                    rdedmol = rdkit.Chem.RWMol(molecule)
                    rdedmol.AddBond(atom_idx, closer_atom)
                    molecule = rdedmol.GetMol()
                    rdkit.Chem.SanitizeMol(molecule)

                    os_util.local_print('Lonepair {} (id: {}) is not explicitly bonded to any atom in molecule, '
                                        'connecting it to the closer atom {} (id: {}). Please, check the output'
                                        ''.format(molecule.GetAtomWithIdx(atom_idx).GetProp('_TriposAtomName'),
                                                  atom_idx,
                                                  molecule.GetAtomWithIdx(closer_atom).GetProp('_TriposAtomName'),
                                                  closer_atom),
                                        msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity)

            else:
                # FIXME: support other dummy atoms (eg: in linear molecules)
                os_util.local_print('The molecule {} contains dummy atoms which are not lonepairs. This is not '
                                    'supported.'.format(molecule),
                                    msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity)
                raise SystemExit(1)

    return molecule
Ejemplo n.º 17
0
def verify_molecule_name(molecule, moldict, new_default_name=None, verbosity=0):
    """ Verify the a molecule name exists and is unique and return a valid name the molecule

    :param [rdkit.Chem.Mol, str] molecule: molecule to be verified
    :param dict moldict: dict of read molecules
    :param str new_default_name: if molecule lacks a name, use this name instead (Default: generate a random name)
    :param int verbosity: controls the verbosity level
    :rtype: str
    """

    if isinstance(molecule, rdkit.Chem.Mol):
        try:
            this_mol_name = molecule.GetProp('_Name')
        except KeyError:
            this_mol_name = None
    else:
        if not molecule:
            this_mol_name = None
        else:
            this_mol_name = molecule

    if this_mol_name is None:
        if new_default_name:
            this_mol_name = new_default_name
        else:
            this_mol_name = '(mol_{})'.format(numpy.random.randint(1, 999999999))
            while this_mol_name in moldict:
                this_mol_name = '(mol_{})'.format(numpy.random.randint(1, 999999999))
            if isinstance(molecule, rdkit.Chem.Mol):
                os_util.local_print('Molecule {} have no name. Molecule name is used to save molecule data '
                                    'and serves as an index. I will generate a random name for it, namely: {}'
                                    ''.format(rdkit.Chem.MolToSmiles(molecule), this_mol_name),
                                    msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity)
            else:
                os_util.local_print('A molecule have no name. Molecule name is used to save molecule data '
                                    'and serves as an index. I will generate a random name for it, namely: {}'
                                    ''.format(this_mol_name),
                                    msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity)

    if this_mol_name in moldict:
        colliding_name = this_mol_name
        this_mol_name = '{}_1'.format(this_mol_name)
        while this_mol_name in moldict:
            this_mol_name = this_mol_name[:-1] + str(int(this_mol_name[-1]) + 1)

        if isinstance(molecule, rdkit.Chem.Mol):
            os_util.local_print('Two molecules (Smiles: {} and {}) have the same name {}. Molecule name is used to '
                                'save molecule data and serves as an index. I will rename molecule {} to {}'
                                ''.format(rdkit.Chem.MolToSmiles(moldict[colliding_name]),
                                          rdkit.Chem.MolToSmiles(molecule), colliding_name,
                                          rdkit.Chem.MolToSmiles(molecule), this_mol_name),
                                msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity)
        else:
            os_util.local_print('Two molecules have the same name {}. Molecule name is used to '
                                'save molecule data and serves as an index. I will rename the last molecule {}'
                                ''.format(colliding_name, this_mol_name),
                                msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity)

    if isinstance(molecule, rdkit.Chem.Mol):
        molecule.SetProp('_Name', this_mol_name)

    return this_mol_name
Ejemplo n.º 18
0
def rwmol_to_obmol(rdkit_rwmol, verbosity=0):
    """ Converts a rdkit.RWMol to openbabel.OBMol

    :param rdkit.Chem.rdchem.Mol rdkit_rwmol: the ROMol to be converted
    :param int verbosity: be verbosity
    :rtype: pybel.ob.OBMol
    """

    import pybel

    if isinstance(rdkit_rwmol, pybel.ob.OBMol):
        os_util.local_print('Molecule {} (SMILES={}) is already a pybel.ob.OBMol'
                            ''.format(rdkit_rwmol, pybel.Molecule(rdkit_rwmol).write('smi')),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning)
        return rdkit_rwmol
    if isinstance(rdkit_rwmol, pybel.Molecule):
        os_util.local_print('Molecule {} (SMILES={}) is already a a pybel.Molecule, converting to pybel.ob.OBMol only'
                            ''.format(rdkit_rwmol, rdkit.Chem.MolToSmiles(rdkit_rwmol)),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning)
        return rdkit_rwmol.OBMol

    # Set some lookups
    _bondorders = {rdkit.Chem.BondType.SINGLE: 1,
                   rdkit.Chem.rdchem.BondType.UNSPECIFIED: 1,
                   rdkit.Chem.BondType.DOUBLE: 2,
                   rdkit.Chem.BondType.TRIPLE: 3,
                   rdkit.Chem.BondType.AROMATIC: 5}
    _bondstereo = {rdkit.Chem.rdchem.BondStereo.STEREONONE: 0,
                   rdkit.Chem.rdchem.BondStereo.STEREOE: 1,
                   rdkit.Chem.rdchem.BondStereo.STEREOZ: 2}

    new_obmol = pybel.ob.OBMol()
    new_obmol.BeginModify()

    # Assign atoms
    for index, each_atom in enumerate(rdkit_rwmol.GetAtoms()):
        new_atom = new_obmol.NewAtom()
        new_atom.SetAtomicNum(each_atom.GetAtomicNum())
        new_atom.SetFormalCharge(each_atom.GetFormalCharge())
        new_atom.SetImplicitValence(each_atom.GetImplicitValence())
        if each_atom.GetIsAromatic():
            new_atom.SetAromatic()
        new_atom.SetVector(rdkit_rwmol.GetConformer().GetAtomPosition(index).x,
                           rdkit_rwmol.GetConformer().GetAtomPosition(index).y,
                           rdkit_rwmol.GetConformer().GetAtomPosition(index).z)

    # Assing bonds
    for each_bond in rdkit_rwmol.GetBonds():
        new_obmol.AddBond(each_bond.GetBeginAtomIdx() + 1, each_bond.GetEndAtomIdx() + 1,
                          _bondorders[each_bond.GetBondType()])
        if each_bond.GetIsAromatic():
            new_obmol.GetBond(each_bond.GetBeginAtomIdx() + 1, each_bond.GetEndAtomIdx() + 1).SetAromatic()

    # FIXME: assign stereochemistry

    new_obmol.EndModify()

    os_util.local_print('Converted rdkit molecule SMILES {} to an openbabel molecule SMILES: {}'
                        ''.format(rdkit.Chem.MolToSmiles(rdkit_rwmol), pybel.Molecule(new_obmol).write('smi')),
                        current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info)

    return new_obmol
Ejemplo n.º 19
0
def extract_docking_poses(ligands_dict, no_checks=False, verbosity=0):
    """
    :param dict ligands_dict: dict containing docking poses
    :param bool no_checks: ignore checks and tries to go on
    :param int verbosity: be verbosity
    :rtype: dict
    """

    os_util.local_print(
        'Entering extract_docking_poses(poses_data={}, verbosity={})'
        ''.format(ligands_dict, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    os_util.local_print('{:=^50}\n{:<15} {:<20}'.format(
        ' Poses read ', 'Name', 'File'),
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    docking_mol_local = {}
    for each_name, each_mol in ligands_dict.items():

        if isinstance(each_mol, str):
            ligand_format = splitext(each_mol)[1].lower()
            docking_mol_rd = generic_mol_read(ligand_format,
                                              each_mol,
                                              verbosity=verbosity)
        elif isinstance(each_mol, all_classes.Namespace):
            docking_mol_rd = generic_mol_read(each_mol.format,
                                              each_mol.data,
                                              verbosity=verbosity)
        elif isinstance(each_mol, dict):
            if isinstance(each_mol['molecule'], rdkit.Chem.Mol):
                docking_mol_rd = each_mol['molecule']
            else:
                ligand_format = each_mol.setdefault(
                    'format',
                    os.path.splitext(each_mol['molecule'])[1])
                docking_mol_rd = generic_mol_read(ligand_format,
                                                  each_mol['molecule'],
                                                  verbosity=verbosity)
        elif isinstance(each_mol, rdkit.Chem.Mol):
            docking_mol_rd = each_mol
        else:
            os_util.local_print(
                "Could not understand type {} (repr: {}) for your ligand {}"
                "".format(type(each_mol), repr(each_mol), each_name),
                current_verbosity=verbosity,
                msg_verbosity=os_util.verbosity_level.error)
            raise TypeError('Ligand must be str or all_classes.Namespace')

        if docking_mol_rd is not None:
            os_util.local_print("Read molecule {} from {}"
                                "".format(each_name, each_mol),
                                current_verbosity=verbosity,
                                msg_verbosity=os_util.verbosity_level.info)
            docking_mol_rd = mol_util.process_dummy_atoms(docking_mol_rd,
                                                          verbosity=verbosity)

            # docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd, verbosity=verbosity)
            docking_mol_local[each_name] = docking_mol_rd

            os_util.local_print('{:<15} {:<18}'.format(each_name,
                                                       str(each_mol)),
                                msg_verbosity=os_util.verbosity_level.default,
                                current_verbosity=verbosity)
            os_util.local_print('Read molecule {} (SMILES: {}) from file {}'
                                ''.format(
                                    each_name,
                                    rdkit.Chem.MolToSmiles(docking_mol_rd),
                                    each_mol),
                                msg_verbosity=os_util.verbosity_level.debug,
                                current_verbosity=verbosity)

        elif no_checks:
            os_util.local_print(
                'Could not read data in {} using rdkit. Falling back to openbabel. It is strongly '
                'advised you to check your file and convert it to a valid mol2.'
                ''.format(str(each_mol)),
                msg_verbosity=os_util.verbosity_level.warning,
                current_verbosity=verbosity)
            import pybel

            if verbosity <= 3:
                pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError)
            try:
                if type(each_mol) == str:
                    ligand_format = splitext(each_mol)[1].lstrip('.').lower()
                    docking_mol_ob = pybel.readfile(ligand_format,
                                                    each_mol).__next__()
                elif type(each_mol) == all_classes.Namespace:
                    docking_mol_ob = pybel.readstring(each_mol.format,
                                                      each_mol.data)
                else:
                    os_util.local_print(
                        "Could not understand type {} (repr: {}) for your ligand {}"
                        "".format(type(each_mol), repr(each_mol), each_name))
                    raise TypeError(
                        'Ligand must be str or all_classes.Namespace')
            except (OSError, StopIteration) as error_data:
                os_util.local_print(
                    'Could not read your ligand {} from {} using rdkit nor openbabel. Please '
                    'check/convert your ligand file. Openbabel error was: {}'
                    ''.format(each_name, str(each_mol), error_data),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                if not no_checks:
                    raise SystemExit(1)
            else:
                # Convert and convert back to apply mol_util.process_dummy_atoms
                docking_mol_rd = mol_util.process_dummy_atoms(
                    mol_util.obmol_to_rwmol(docking_mol_ob))
                #docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd)
                docking_mol_local[each_name] = docking_mol_rd

                os_util.local_print(
                    '{:<15} {:<18}'
                    ''.format(
                        each_name, each_mol['comment'] if isinstance(
                            each_mol, dict) else each_mol),
                    msg_verbosity=os_util.verbosity_level.default,
                    current_verbosity=verbosity)
                os_util.local_print(
                    'Extracted molecule {} (SMILES: {}) using openbabel fallback from {}.'
                    ''.format(each_name,
                              rdkit.Chem.MolToSmiles(docking_mol_rd),
                              str(each_mol)),
                    msg_verbosity=os_util.verbosity_level.debug,
                    current_verbosity=verbosity)
        else:
            os_util.local_print(
                'Could not read data in {} using rdkit. Please, check your file and convert it to a '
                'valid mol2. (You can also use "no_checks" to enable reading using pybel)'
                ''.format(str(each_mol)),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(-1)

    return docking_mol_local
Ejemplo n.º 20
0
def superimpose_poses(ligand_data,
                      reference_pose_mol,
                      save_state=None,
                      num_threads=0,
                      num_conformers=200,
                      verbosity=0,
                      **kwargs):
    """
    :param dict ligand_data: dict with the ligands
    :param str reference_pose_mol: file with reference pose to be used
    :param int verbosity: be verbosity
    :param savestate_utils.SavableState save_state: object with saved data
    :param int num_threads: use this much threads
    :param int num_conformers: generate this much trial conformers to find a best shape match
    :param int verbosity: sets the verbosity level
    :rtype: dict
    """

    os_util.local_print(
        'Entering superimpose_poses(ligand_data={}, reference_pose_superimpose={}, save_state={}, '
        'verbosity={}, kwargs={})'
        ''.format(ligand_data, reference_pose_mol, save_state, verbosity,
                  kwargs),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    # Set default to no MCS
    kwargs.setdefault('mcs', None)
    kwargs.setdefault('superimpose_atom_map', {})

    # Test for data from a previous run
    if save_state:
        rdkit_reference_pose = None
        if 'superimpose_data' in save_state:
            try:
                saved_reference_pose = save_state['superimpose_data'][
                    'reference_pose_path']
            except KeyError:
                # Incorrect behavior, there is no reference_pose_path, so we cannot trust in save_state data at all
                os_util.local_print(
                    'Unexpected data strucuture in {}. The entry for superimpose data is corrupted.'
                    ' Trying to fix and going on.'
                    ''.format(save_state.data_file),
                    msg_verbosity=os_util.verbosity_level.warning,
                    current_verbosity=verbosity)
            else:
                if saved_reference_pose == reference_pose_mol:
                    # Reference pose is the same, we can use the data
                    rdkit_reference_pose = save_state['superimpose_data'][
                        'reference_pose_superimpose']
                    if len(save_state['superimpose_data']
                           ['ligand_dict']) == 0 and verbosity > 0:
                        os_util.local_print(
                            'No ligand poses were saved from previous run in file {}. I found a entry '
                            'for superimpose data, but it is empty.'
                            ''.format(save_state.data_file),
                            msg_verbosity=os_util.verbosity_level.warning,
                            current_verbosity=verbosity)
                        rdkit_reference_pose = None

        if rdkit_reference_pose is None:
            # Create a new superimpose_data entry
            from time import strftime
            backup_name = 'superimpose_data_{}'.format(
                strftime('%d%m%Y_%H%M%S'))
            save_state['superimpose_data'] = {}
            save_state['superimpose_data'][
                'reference_pose_path'] = reference_pose_mol
            rdkit_reference_pose = extract_docking_poses(
                {'reference': {
                    'molecule': reference_pose_mol
                }},
                verbosity=verbosity)['reference']
            save_state['superimpose_data'][
                'reference_pose_superimpose'] = rdkit_reference_pose
            save_state['superimpose_data']['ligand_dict'] = {}
            save_state[backup_name] = save_state['superimpose_data']

        # Save whatever we done
        save_state.save_data()
    else:
        # Not saving any data
        rdkit_reference_pose = extract_docking_poses(
            {'reference': {
                'molecule': reference_pose_mol
            }},
            verbosity=verbosity)['reference']

    if not rdkit_reference_pose.HasProp('_Name'):
        rdkit_reference_pose.SetProp('_Name', '<Superimpose reference pose>')

    # Extract data from ligands
    docking_poses_data = extract_docking_poses(ligand_data,
                                               verbosity=verbosity)
    new_docking_poses_data = {}

    os_util.local_print('{:=^50}\n{:<15} {:<25} {:<15}'
                        ''.format(' Superimposed poses ', 'Name', 'File',
                                  'Note'),
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    for ligand_name, each_ligand_mol in docking_poses_data.items():

        # If possible, load data from previous run
        if save_state:

            try:
                this_ligand = save_state['superimpose_data']['ligand_dict'][
                    ligand_name]
            except KeyError:
                os_util.local_print('Could not find data for ligand {} in {}'
                                    ''.format(ligand_name,
                                              save_state.data_file),
                                    msg_verbosity=os_util.verbosity_level.info,
                                    current_verbosity=verbosity)
            else:
                new_docking_poses_data[ligand_name] = this_ligand
                os_util.local_print(
                    '{:<15} {:<25} {:<15}'
                    ''.format(ligand_name, str(ligand_data[ligand_name]),
                              'Read from saved state'),
                    msg_verbosity=os_util.verbosity_level.default,
                    current_verbosity=verbosity)
                continue

        # Tries to find a custom atom match from the atom_map input. Note that
        this_atommap = kwargs['superimpose_atom_map'].get(ligand_name, None)

        thismol = merge_topologies.constrained_embed_shapeselect(
            each_ligand_mol,
            rdkit_reference_pose,
            num_threads=num_threads,
            save_state=save_state,
            num_conformers=num_conformers,
            verbosity=verbosity,
            atom_map=this_atommap,
            **kwargs)
        new_docking_poses_data[ligand_name] = thismol

        if save_state:
            # Save rdkit Mol
            save_state['superimpose_data']['ligand_dict'][
                ligand_name] = thismol
            save_state.save_data()

        os_util.local_print('{:<15} {:<15}'.format(
            ligand_name, str(ligand_data[ligand_name])),
                            msg_verbosity=os_util.verbosity_level.default,
                            current_verbosity=verbosity)

    os_util.local_print('=' * 50,
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    return new_docking_poses_data
Ejemplo n.º 21
0
    def update_pertubation_image(self,
                                 mol_a_name,
                                 mol_b_name,
                                 core_smarts=None,
                                 save=False,
                                 verbosity=0,
                                 **kwargs):
        """ Generate mol images describing a pertubation between the ligand pair

        :param str mol_a_name: name of the molecule A
        :param str mol_b_name: name of the molecule B
        :param str core_smarts: use this smarts as common core
        :param bool save: automatically save data
        :param int verbosity: controls verbosity level
        """
        # verbosity = 5

        self.ligands_data[mol_a_name].setdefault('images', {})
        self.ligands_data[mol_a_name]['images'].setdefault('perturbations', {})
        self.ligands_data[mol_b_name].setdefault('images', {})
        self.ligands_data[mol_b_name]['images'].setdefault('perturbations', {})

        import rdkit.Chem
        this_mol_a = rdkit.Chem.Mol(self.ligands_data[mol_a_name]['molecule'])
        this_mol_b = rdkit.Chem.Mol(self.ligands_data[mol_b_name]['molecule'])

        if core_smarts is None:
            # Get core_smarts using find_mcs
            from merge_topologies import find_mcs
            this_mol_a.RemoveAllConformers()
            this_mol_b.RemoveAllConformers()
            core_smarts = find_mcs([this_mol_a, this_mol_b],
                                   savestate=self,
                                   verbosity=verbosity,
                                   **kwargs).smartsString

        try:
            # Test whether the correct data structure is already present
            assert len(self.ligands_data[mol_a_name]['images']['perturbations']
                       [mol_b_name][core_smarts]) > 0
            assert len(self.ligands_data[mol_b_name]['images']['perturbations']
                       [mol_a_name][core_smarts]) > 0
        except (KeyError, AssertionError):
            # It isn't, go on and create the images
            os_util.local_print(
                'Perturbation images for molecules {} and {} with common core "{}" were not found. '
                'Generating it.'.format(mol_a_name, mol_b_name, core_smarts),
                msg_verbosity=os_util.verbosity_level.debug,
                current_verbosity=verbosity)
        else:
            return None

        from rdkit.Chem.Draw import MolDraw2DSVG
        from rdkit.Chem.AllChem import Compute2DCoords, GenerateDepictionMatching2DStructure

        core_mol = rdkit.Chem.MolFromSmarts(core_smarts)
        Compute2DCoords(core_mol)

        for each_name, each_mol, other_mol in zip([mol_a_name, mol_b_name],
                                                  [this_mol_a, this_mol_b],
                                                  [mol_b_name, mol_a_name]):
            GenerateDepictionMatching2DStructure(each_mol,
                                                 core_mol,
                                                 acceptFailure=True)

            # Draw mol with hydrogens
            draw_2d_svg = MolDraw2DSVG(300, 150)
            draw_2d_svg.drawOptions().addStereoAnnotation = True
            not_common_atoms = [
                i.GetIdx() for i in each_mol.GetAtoms()
                if i.GetIdx() not in each_mol.GetSubstructMatch(core_mol)
            ]
            draw_2d_svg.DrawMolecule(each_mol,
                                     legend=each_name,
                                     highlightAtoms=not_common_atoms)
            draw_2d_svg.FinishDrawing()
            svg_data_hs = draw_2d_svg.GetDrawingText()

            # Draw mol without hydrogens
            draw_2d_svg = MolDraw2DSVG(300, 150)
            draw_2d_svg.drawOptions().addStereoAnnotation = True
            each_mol = rdkit.Chem.RemoveHs(each_mol)
            not_common_atoms = [
                i.GetIdx() for i in each_mol.GetAtoms()
                if i.GetIdx() not in each_mol.GetSubstructMatch(
                    rdkit.Chem.RemoveHs(core_mol))
            ]
            draw_2d_svg.DrawMolecule(each_mol,
                                     legend=each_name,
                                     highlightAtoms=not_common_atoms)
            draw_2d_svg.FinishDrawing()
            svg_data_nohs = draw_2d_svg.GetDrawingText()

            perturbation_imgs = self.ligands_data[each_name]['images'][
                'perturbations']
            perturbation_imgs.setdefault(other_mol, {})[core_smarts] = {
                '2d_hs': svg_data_hs,
                '2d_nohs': svg_data_nohs
            }

        if save:
            self.save_data()
Ejemplo n.º 22
0
def align_protein(mobile_mol,
                  reference_mol,
                  align_method='openbabel',
                  seq_align_mat='BLOSUM80',
                  gap_penalty=-1,
                  verbosity=0):
    """ Align mobile_mol to reference_mol using method defined in align_method. Defaults to openbabel.OBAlign, which is
    fastest. rdkit's GetAlignmentTransform is much slower and may not work on larger systems.

    :param [rdkit.RWMol, pybel.Molecule] reference_mol: molecule to be used as alignment reference
    :param [rdkit.RWMol, pybel.Molecule] mobile_mol: rdkit.RWMol molecule to be aligned
    :param str align_method: method to be used, options are 'openbabel', 'rdkit'
    :param str seq_align_mat: use this matrix to sequence alignment, only used if sequences differ. Any value from
                                    Bio.SubsMat.MatrixInfo
    :param float gap_penalty: use this gap penalty to sequence alignment, only used if sequences differ.
    :param int verbosity: be verbosity
    :rtype: dict
    """

    os_util.local_print(
        'Entering align_protein(mobile_mol={}, reference_mol={}, align_method={}, verbosity={})'
        ''.format(mobile_mol.title, reference_mol.title, align_method,
                  verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    if align_method == 'rdkit':
        # Uses rdkit.Chem.rdMolAlign.GetAlignmentTransform to align mobile_mol to reference_mol
        import rdkit.Chem.rdMolAlign
        reference_mol_rwmol = obmol_to_rwmol(reference_mol)
        if reference_mol_rwmol is None:
            os_util.local_print('Could not internally convert reference_mol',
                                msg_verbosity=os_util.verbosity_level.error,
                                current_verbosity=verbosity)
            if verbosity >= os_util.verbosity_level.info:
                os_util.local_print('Dumping data to receptor_mol_error.pdb',
                                    msg_verbosity=os_util.verbosity_level.info,
                                    current_verbosity=verbosity)
                reference_mol.write('mol', 'receptor_mol_error.pdb')
            raise SystemExit(1)

        mobile_mol_rwmol = obmol_to_rwmol(mobile_mol)
        if mobile_mol_rwmol is None:
            os_util.local_print(
                'Could not internally convert OpenBabel mobile_mol to a RDKit.Chem.Mol object.',
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        os_utils.local_print(
            'Done reading and converting reference_mol {} and mobile_mol {}'
            ''.format(reference_mol_rwmol.GetProp('_Name'),
                      mobile_mol_rwmol.GetProp('_Name')),
            msg_verbosity=os_util.verbosity_level.debug,
            current_verbosity=verbosity)

        # FIXME: implement this
        transformation_mat = rdkit.Chem.rdMolAlign.GetAlignmentTransform(
            reference_mol_rwmol, mobile_mol_rwmol)
        raise NotImplementedError('rdkit aligment method not implemented')

    elif align_method == 'openbabel':
        # FIXME: implement a Biopython-only method
        from openbabel import OBAlign
        import pybel

        reference_mol_seq = reference_mol.write('fasta').split('\n',
                                                               1)[1].replace(
                                                                   '\n', '')
        mobile_mol_seq = mobile_mol.write('fasta').split('\n', 1)[1].replace(
            '\n', '')

        if reference_mol_seq != mobile_mol_seq:
            os_util.local_print(
                'Aminoacid sequences of {} and {} differs:\nReference: {}\nMobile: {}'
                ''.format(reference_mol.title, mobile_mol.title,
                          reference_mol_seq, mobile_mol_seq),
                msg_verbosity=os_util.verbosity_level.info,
                current_verbosity=verbosity)
            mob_align_str, ref_align_str = align_sequences_match_residues(
                mobile_mol_seq,
                reference_mol_seq,
                seq_align_mat=seq_align_mat,
                gap_penalty=gap_penalty,
                verbosity=verbosity)

        else:
            ref_align_str = None
            mob_align_str = None

        # Creates a new molecule containing only the selected atoms of both proteins
        ref_atom_vec = get_position_matrix(reference_mol, ref_align_str)
        reference_mol_vec = pybel.ob.vectorVector3(ref_atom_vec)
        mob_atom_vec = get_position_matrix(mobile_mol, mob_align_str)
        mobile_mol_vec = pybel.ob.vectorVector3(mob_atom_vec)
        os_util.local_print('Done extracting Ca from {} and {}'.format(
            reference_mol.title, mobile_mol.title),
                            msg_verbosity=os_util.verbosity_level.debug,
                            current_verbosity=verbosity)

        # Align mobile to reference using the Ca coordinates
        align_obj = OBAlign(reference_mol_vec, mobile_mol_vec)
        if not align_obj.Align():
            os_util.local_print(
                'Failed to align mobile_mol {} to reference_mol {}'
                ''.format(mobile_mol.title, reference_mol.title),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        os_util.local_print('Alignment RMSD is {}'.format(align_obj.GetRMSD()),
                            msg_verbosity=os_util.verbosity_level.info,
                            current_verbosity=verbosity)

        # Prepare translation and rotation matrices
        reference_mol_center = numpy.array([[a.GetX(),
                                             a.GetY(),
                                             a.GetZ()]
                                            for a in reference_mol_vec
                                            ]).mean(0)
        mobile_mol_center = numpy.array([[a.GetX(),
                                          a.GetY(),
                                          a.GetZ()]
                                         for a in mobile_mol_vec]).mean(0)
        translation_vector = pybel.ob.vector3(*reference_mol_center.tolist())
        centering_vector = pybel.ob.vector3(*(-mobile_mol_center).tolist())
        rot_matrix = align_obj.GetRotMatrix()
        rot_vector_1d = [
            rot_matrix.Get(i, j) for i in range(3) for j in range(3)
        ]

        os_util.local_print(
            'Alignment data:\n\tReference: {}\n\tMobile: {}\n\tCentering: {}\n\tTranslation: {}'
            '\n\tRotation matrix:\n\t\t{}, {}, {}\n\t\t{}, {}, {}\n\t\t{}, {}, {}'
            ''.format(reference_mol_center, mobile_mol_center,
                      centering_vector, translation_vector, *rot_vector_1d),
            current_verbosity=verbosity,
            msg_verbosity=os_util.verbosity_level.debug)

        return {
            'centering_vector': centering_vector,
            'translation_vector': translation_vector,
            'rotation_matrix': rot_vector_1d
        }

    else:
        # TODO implement a internal alignment method
        os_util.local_print(
            'Unknown alignment method {}. Currently, only "openbabel" is allowed.'
            .format(align_method),
            current_verbosity=verbosity,
            msg_verbosity=os_util.verbosity_level.error)
        raise ValueError('Unknown alignment method {}.'.format(align_method))
Ejemplo n.º 23
0
def extract_pdb_poses(poses_data,
                      reference_structure,
                      ligand_residue_name='LIG',
                      verbosity=0,
                      **kwargs):
    """
    :param dict poses_data: dict with the files bearing the poses and the receptor, potentially in a different
                            orientation and conformation
    :param pybel.Molecule reference_structure:
    :param str ligand_residue_name: the residues name of the ligand
    :param int verbosity: sets verbosity level
    :rtype: dict
    """

    os_util.local_print('{:=^50}\n{:<15} {:<20}'.format(
        ' Poses read ', 'Name', 'File'),
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    for k, v in {'seq_align_mat': 'blosum80', 'gap_penalty': -1}:
        kwargs.setdefault(k, v)

    docking_mol_local = {}
    # Iterate over the dict, reading the poses
    for ligand_name, ligand_dict in poses_data.items():
        receptor_format = ligand_dict.split('.')[-1]
        if receptor_format == 'pdbqt':
            receptor_format = 'pdb'

        # pdb and mol2 fills OBResidue, does any other format file do? If so, we have to add it to this list
        if receptor_format not in ['pdb', 'mol2']:
            os_util.local_print(
                'Using pdb_loader requires a pdb or a mol2 file, but you supplied {}. Try using '
                'generic_loader or converting your input files',
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        try:
            this_pdb_data = pybel.readfile(receptor_format,
                                           ligand_dict).__next__()
        except IOError as error_data:
            os_util.local_print('Could not read {}. Error: {}'.format(
                ligand_dict, error_data),
                                msg_verbosity=os_util.verbosity_level.error,
                                current_verbosity=verbosity)
            raise SystemExit(1)
        else:
            # Iterates over all residues looking for ligand_name. Note: this will select the first residue named
            # ligand_name.
            lig_residue = None
            for each_res in pybel.ob.OBResidueIter(this_pdb_data.OBMol):
                if each_res.GetName() == ligand_residue_name:
                    lig_residue = each_res
                    break
            else:
                # For was not break, we did not find ligand_name
                os_util.local_print(
                    'Could not find ligand molecule {} in file {}\nI have read the following '
                    'residues: {}\n'
                    ''.format(
                        ligand_name, ligand_dict, ', '.join([
                            this_pdb_data.OBMol.GetResidue(i).GetName()
                            for i in range(this_pdb_data.OBMol.NumResidues())
                        ])),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise SystemExit(1)
            dellist = [
                each_atom.GetIdx()
                for each_atom in pybel.ob.OBMolAtomIter(this_pdb_data.OBMol)
                if each_atom.GetIdx() not in [
                    atom_in_res.GetIdx()
                    for atom_in_res in pybel.ob.OBResidueAtomIter(lig_residue)
                ]
            ]

            ligand_ob_molecule = pybel.ob.OBMol(this_pdb_data.OBMol)
            [
                ligand_ob_molecule.DeleteAtom(ligand_ob_molecule.GetAtom(a))
                for a in reversed(dellist)
            ]

            docking_mol_local[ligand_name] = ligand_ob_molecule
            align_data = align_protein(this_pdb_data,
                                       reference_structure,
                                       seq_align_mat=kwargs['seq_align_mat'],
                                       gap_penalty=kwargs['gap_penalty'],
                                       verbosity=verbosity)
            docking_mol_local[ligand_name].Translate(
                align_data['centering_vector'])
            docking_mol_local[ligand_name].Rotate(
                pybel.ob.double_array(align_data['rotation_matrix']))
            docking_mol_local[ligand_name].Translate(
                align_data['translation_vector'])

            os_util.local_print('{:<15} {:<20}\n'.format(
                ligand_name, ligand_dict),
                                msg_verbosity=os_util.verbosity_level.default,
                                current_verbosity=verbosity)

    return docking_mol_local
Ejemplo n.º 24
0
def obmol_to_rwmol(openbabel_obmol, verbosity=0):
    """Converts a openbabel.OBMol to rdkit.RWMol

    Parameters
    ----------
    openbabel_obmol : pybel.ob.OBMol
        The OBMol to be converted
    verbosity : int
        Sets verbosity level

    Returns
    -------
    rdkit.Chem.Mol
        Converted molecule
    """

    import pybel

    if isinstance(openbabel_obmol, rdkit.Chem.Mol):
        os_util.local_print('Entering obmol_to_rwmol. Molecule {} (Props: {}) is already a rdkit.Chem.Mol object!'
                            ''.format(openbabel_obmol, openbabel_obmol.GetPropsAsDict()),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning)
        return openbabel_obmol
    elif isinstance(openbabel_obmol, pybel.Molecule):
        openbabel_obmol = openbabel_obmol.OBMol
    elif not isinstance(openbabel_obmol, pybel.ob.OBMol):
        os_util.local_print('Entering obmol_to_rwmol. Molecule {} is a {}, but pybel.Molecule or pybel.ob.OBMol '
                            'required.'
                            ''.format(openbabel_obmol, type(openbabel_obmol)),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error)
        raise ValueError('pybel.Molecule or pybel.ob.OBMol expected, got {} instead'.format(type(openbabel_obmol)))

    # Set some lookups
    _bondtypes = {0: rdkit.Chem.BondType.UNSPECIFIED,
                  1: rdkit.Chem.BondType.SINGLE,
                  2: rdkit.Chem.BondType.DOUBLE,
                  3: rdkit.Chem.BondType.TRIPLE,
                  5: rdkit.Chem.BondType.AROMATIC}
    _bondstereo = {0: rdkit.Chem.rdchem.BondStereo.STEREONONE,
                   1: rdkit.Chem.rdchem.BondStereo.STEREOE,
                   2: rdkit.Chem.rdchem.BondStereo.STEREOZ}

    rdmol = rdkit.Chem.Mol()
    rdedmol = rdkit.Chem.RWMol(rdmol)

    # Use pybel write to trigger residue data evaluation, otherwise we get and StopIteration error
    pybel.Molecule(openbabel_obmol).write('pdb')
    try:
        residue_iter = pybel.ob.OBResidueIter(openbabel_obmol).__next__()
    except StopIteration:
        os_util.local_print('Could not read atom names from molecule "{}" (Smiles: {})'
                            ''.format(openbabel_obmol.GetTitle(), pybel.Molecule(openbabel_obmol).write('smi')),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning)
        residue_iter = None

    # Assign atoms
    dummy_atoms = set()
    for index, each_atom in enumerate(pybel.ob.OBMolAtomIter(openbabel_obmol)):
        if residue_iter is not None and residue_iter.GetAtomID(each_atom)[0:2].upper() in ['LP', 'XX'] \
                and each_atom.GetAtomicMass() == 0:
            dummy_atoms.add(index)
            rdatom = rdkit.Chem.MolFromSmarts('*').GetAtomWithIdx(0)
            os_util.local_print('Atom {} was detected as a lone pair because of its name {} and its mass {}'
                                ''.format(index, residue_iter.GetAtomID(each_atom), each_atom.GetAtomicMass()),
                                current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info)

        elif residue_iter is None and each_atom.GetAtomicMass() == 0:
            dummy_atoms.add(index)
            rdatom = rdkit.Chem.MolFromSmarts('*').GetAtomWithIdx(0)
            os_util.local_print('Atom {} was detected as a lone pair because of its mass {} (Note: it was not possible '
                                'to read atom name)'
                                ''.format(index, residue_iter.GetAtomID(each_atom), each_atom.GetAtomicMass()),
                                current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info)

        else:
            rdatom = rdkit.Chem.Atom(each_atom.GetAtomicNum())

        new_atom = rdedmol.AddAtom(rdatom)
        rdedmol.GetAtomWithIdx(new_atom).SetFormalCharge(each_atom.GetFormalCharge())
        rdedmol.SetProp('_TriposAtomName', residue_iter.GetAtomID(each_atom))
        if each_atom.IsAromatic():
            rdedmol.GetAtomWithIdx(new_atom).SetIsAromatic(True)

        os_util.local_print('[DEBUG] These are the dummy atoms detected: dummy_atoms={}'.format(dummy_atoms),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug)

    # Assing bonds
    for each_bond in pybel.ob.OBMolBondIter(openbabel_obmol):
        rdedmol.AddBond(each_bond.GetBeginAtomIdx()-1, each_bond.GetEndAtomIdx()-1,
                        _bondtypes[each_bond.GetBondOrder()])
        if each_bond.IsAromatic():
            rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1,
                                        each_bond.GetEndAtomIdx() - 1).SetIsAromatic(True)
            rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1,
                                        each_bond.GetEndAtomIdx() - 1).SetBondType(_bondtypes[5])

        # This bond contains a dummy atom, converting bond to a UNSPECIFIED
        if dummy_atoms.intersection({each_bond.GetBeginAtomIdx() - 1, each_bond.GetEndAtomIdx() - 1}):
            rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1,
                                        each_bond.GetEndAtomIdx() - 1).SetBondType(_bondtypes[0])
        os_util.local_print('Bond between atoms {} and {} converted to an UNSPECIFIED type'
                            ''.format(each_bond.GetBeginAtomIdx()-1, each_bond.GetEndAtomIdx()-1),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug)

    # FIXME: assign stereochemistry

    rdmol = rdedmol.GetMol()

    # Copy coordinates, first generate at least one conformer
    rdkit.Chem.AllChem.EmbedMolecule(rdmol, useRandomCoords=True, maxAttempts=1000, enforceChirality=True,
                                     ignoreSmoothingFailures=True)
    if rdmol.GetNumConformers() != 1:
        os_util.local_print('Failed to generate coordinates to molecule',
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error)
        raise ValueError

    for atom_rdkit, atom_obmol in zip(rdmol.GetAtoms(), pybel.ob.OBMolAtomIter(openbabel_obmol)):
        this_position = rdkit.Geometry.rdGeometry.Point3D()
        this_position.x = atom_obmol.x()
        this_position.y = atom_obmol.y()
        this_position.z = atom_obmol.z()
        rdmol.GetConformer().SetAtomPosition(atom_rdkit.GetIdx(), this_position)

    # Copy data
    [rdmol.SetProp(k, v) for k, v in pybel.MoleculeData(openbabel_obmol).items()]
    rdmol.SetProp('_Name', openbabel_obmol.GetTitle())

    for each_atom in rdmol.GetAtoms():
        if each_atom.GetBonds() != ():
            continue
        import numpy
        dist_list = numpy.argsort(numpy.array(rdkit.Chem.AllChem.Get3DDistanceMatrix(rdmol)[each_atom.GetIdx()]))
        closer_atom = int(dist_list[1])
        rdedmol = rdkit.Chem.RWMol(rdmol)
        rdedmol.AddBond(each_atom.GetIdx(), closer_atom)
        rdmol = rdedmol.GetMol()
        rdkit.Chem.SanitizeMol(rdmol)

        os_util.local_print('Atom id: {} is not explicitly bonded to any atom in molecule, connecting it to the closer '
                            'atom id: {}'.format(each_atom.GetIdx(), closer_atom),
                            current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning)

    rdkit.Chem.SanitizeMol(rdmol)

    os_util.local_print("obmol_to_rwmol converted molecule {} (name: {}). Pybel SMILES: {} to rdkit SMILES: {}"
                        "".format(openbabel_obmol, openbabel_obmol.GetTitle(),
                                  pybel.Molecule(openbabel_obmol).write('smi'), rdkit.Chem.MolToSmiles(rdedmol)),
                        current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug)

    return rdmol
Ejemplo n.º 25
0
        'Minimum number of runs when all edges can be removed (Default: off)')
    optimal_opts.add_argument(
        '--optimal_permanent_edge_threshold',
        type=float,
        default=None,
        help='Edges with this much pheromone become static (Default: off)')
    process_user_input.add_argparse_global_args(Parser)
    arguments = process_user_input.read_options(
        Parser, unpack_section='generate_perturbation_map')

    progress_data = savestate_util.SavableState(arguments.progress_file)

    if arguments.input is None:
        os_util.local_print(
            'No input files were provided. Please, do so by using --input or input option in your '
            'configuration file',
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=arguments.verbose)
        raise SystemExit(1)

    if isinstance(arguments.map_communication_frequency,
                  int) and arguments.map_communication_frequency > 0:
        comm_freq = arguments.map_communication_frequency
    elif arguments.map_type != 'star':
        os_util.local_print(
            'Could not understand communication frequency (map_communication_frequency) value '
            '{}. Value must be a positive integer.'
            ''.format(arguments.map_communication_frequency),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=arguments.verbose)
        raise SystemExit(1)
Ejemplo n.º 26
0
def extract_pdb_poses(poses_data,
                      reference_structure,
                      ligand_residue_name='LIG',
                      save_state=None,
                      verbosity=0,
                      **kwargs):
    """
    :param dict poses_data: dict with the files bearing the poses and the receptor, potentially in a different
                            orientation and conformation
    :param pybel.Molecule reference_structure:
    :param str ligand_residue_name: the residues name of the ligand
    :param int verbosity: sets verbosity level
    :rtype: dict
    """

    os_util.local_print('{:=^50}\n{:<15} {:<20}'.format(
        ' Poses read ', 'Name', 'File'),
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    # Test for data from a previous run
    saved_pose_data = {}
    if save_state:
        if 'pdbpose_data' in save_state:
            try:
                saved_reference_structure = save_state['pdbpose_data'][
                    'reference_pose_path']
            except KeyError:
                # Incorrect behavior, there is no reference_pose_path, so we cannot trust in save_state data at all
                os_util.local_print(
                    'Unexpected data strucuture in {}. The entry for PDB pose data is corrupted.'
                    ' Trying to fix and going on.'
                    ''.format(save_state.data_file),
                    msg_verbosity=os_util.verbosity_level.warning,
                    current_verbosity=verbosity)
            else:
                if (reference_structure.data['file_path']
                        and saved_reference_structure
                        == reference_structure.data['file_path']):
                    # Reference pose is the same, we can use the data
                    if len(save_state['pdbpose_data']['ligand_dict']) == 0:
                        os_util.local_print(
                            'No ligand poses were saved from previous run in file {}. I found a entry '
                            'for pdb pose data, but it is empty.'
                            ''.format(save_state.data_file),
                            msg_verbosity=os_util.verbosity_level.warning,
                            current_verbosity=verbosity)
                    else:
                        os_util.local_print(
                            'Reading poses data from {}.'.format(
                                save_state.data_file),
                            msg_verbosity=os_util.verbosity_level.warning,
                            current_verbosity=verbosity)
                        saved_pose_data = save_state['pdbpose_data'][
                            'ligand_dict']
                else:
                    os_util.local_print(
                        'PDB poses data from {} was created for reference file {}, while this run uses '
                        '{} as reference file. Cannot use saved data.'
                        ''.format(save_state.data_file,
                                  saved_reference_structure,
                                  reference_structure.data['file_path']),
                        msg_verbosity=os_util.verbosity_level.warning,
                        current_verbosity=verbosity)

    for k, v in {'seq_align_mat': 'BLOSUM80', 'gap_penalty': -1}.items():
        kwargs.setdefault(k, v)

    docking_mol_local = {}
    # Iterate over the dict, reading the poses
    for ligand_name, ligand_dict in poses_data.items():
        # Try to load the ligand data from saved state
        try:
            docking_mol_local[ligand_name] = saved_pose_data[ligand_name]
        except KeyError:
            pass
        else:
            os_util.local_print('Readed {} pose from {}'.format(
                ligand_name, save_state.data_file),
                                msg_verbosity=os_util.verbosity_level.info,
                                current_verbosity=verbosity)
            continue

        receptor_format = ligand_dict.split('.')[-1]
        if receptor_format == 'pdbqt':
            receptor_format = 'pdb'

        # pdb and mol2 fills OBResidue, does any other format file do? If so, we have to add it to this list
        if receptor_format not in ['pdb', 'mol2']:
            os_util.local_print(
                'Using pdb_loader requires a pdb or a mol2 file, but you supplied {}. Try using '
                'generic_loader or converting your input files',
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        try:
            this_pdb_data = pybel.readfile(receptor_format,
                                           ligand_dict).__next__()
        except IOError as error_data:
            os_util.local_print('Could not read {}. Error: {}'.format(
                ligand_dict, error_data),
                                msg_verbosity=os_util.verbosity_level.error,
                                current_verbosity=verbosity)
            raise SystemExit(1)
        else:
            # Iterates over all residues looking for ligand_name. Note: this will select the first residue named
            # ligand_name.
            lig_residue = None
            for each_res in pybel.ob.OBResidueIter(this_pdb_data.OBMol):
                if each_res.GetName() == ligand_residue_name:
                    lig_residue = each_res
                    break
            else:
                # For was not broken, we did not find ligand_name
                os_util.local_print(
                    'Could not find ligand molecule {} in file {} using the residue name {}. I have '
                    'read the following residues: {}\n'
                    ''.format(
                        ligand_name, ligand_dict, lig_residue, ', '.join([
                            this_pdb_data.OBMol.GetResidue(i).GetName()
                            for i in range(this_pdb_data.OBMol.NumResidues())
                        ])),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                raise SystemExit(1)
            dellist = [
                each_atom.GetIdx()
                for each_atom in pybel.ob.OBMolAtomIter(this_pdb_data.OBMol)
                if each_atom.GetIdx() not in [
                    atom_in_res.GetIdx()
                    for atom_in_res in pybel.ob.OBResidueAtomIter(lig_residue)
                ]
            ]

            ligand_ob_molecule = pybel.ob.OBMol(this_pdb_data.OBMol)
            [
                ligand_ob_molecule.DeleteAtom(ligand_ob_molecule.GetAtom(a))
                for a in reversed(dellist)
            ]

            docking_mol_local[ligand_name] = ligand_ob_molecule
            align_data = align_protein(this_pdb_data,
                                       reference_structure,
                                       seq_align_mat=kwargs['seq_align_mat'],
                                       gap_penalty=kwargs['gap_penalty'],
                                       verbosity=verbosity)

            docking_mol_local[ligand_name].Translate(
                align_data['centering_vector'])
            docking_mol_local[ligand_name].Rotate(
                pybel.ob.double_array(align_data['rotation_matrix']))
            docking_mol_local[ligand_name].Translate(
                align_data['translation_vector'])

            os_util.local_print('{:<15} {:<20}'.format(ligand_name,
                                                       ligand_dict),
                                msg_verbosity=os_util.verbosity_level.default,
                                current_verbosity=verbosity)

        if save_state:
            save_dict = {
                'reference_pose_path': reference_structure.data['file_path'],
                'ligand_dict': {
                    k: rdkit.Chem.PropertyMol.PropertyMol(obmol_to_rwmol(v))
                    for k, v in docking_mol_local.items()
                }
            }
            save_state['pdbpose_data'] = save_dict
            save_state['pdbpose_data_{}'.format(
                strftime('%d%m%Y_%H%M%S'))] = save_dict.copy()
            save_state.save_data()

    return docking_mol_local
Ejemplo n.º 27
0
def run_workers(ant_colony,
                n_runs=-1,
                n_threads=1,
                elitism=-1,
                comm_freq=20,
                verbosity=0):
    """ Run optimization using multiprocessing

    :param all_classes.AntSolver ant_colony: optimizing object
    :param int n_runs: number of optimization ants. Default: -1 = automatically determine
    :param int n_threads: number of threads
    :param int elitism: use this many best ants for each parallel run to update pheromone matrix (default: -1: use all)
    :param int comm_freq: communicate between threads this often
    :param int verbosity: sets verbosity level
    """

    if n_runs == -1:
        # Automatically setting n_runs
        if n_threads == -1:
            n_runs = comm_freq * 20
        else:
            n_runs = n_threads * comm_freq * 20

    if 0 < elitism < 1:
        # Elitism was supplied as ratio, convert to int
        elitism = int(n_runs / (comm_freq * n_threads) * elitism)

    if n_threads == -1:
        os_util.local_print(
            'You are using non-threaded code (ie: threads = -1). The implementation of the ACO '
            'algorithm is slightly different when using the non-threaded code. This should only be '
            'used for developing purposes.',
            msg_verbosity=os_util.verbosity_level.warning,
            current_verbosity=verbosity)

        for run_n in range(int(n_runs / comm_freq)):
            os_util.local_print('Optimization round {} out of {}'
                                ''.format(run_n + 1, int(n_runs / comm_freq)),
                                msg_verbosity=os_util.verbosity_level.info,
                                current_verbosity=verbosity)

            # Run a hive
            results_list = [
                each_group for each_group in map(
                    ant_colony.run_multi_ants,
                    itertools.repeat(comm_freq, times=n_threads))
            ]

            if run_n > 0:
                ant_colony.evaporate_pheromone()

            # Aggregate results, deposit pheromone
            ant_colony.solutions.extend([
                each_result for each_group in results_list
                for each_result in each_group
            ])
            [
                ant_colony.deposit_pheromone(each_result.pheromone_multiplier,
                                             each_result.graph)
                for each_group in results_list for n, each_result in enumerate(
                    sorted(each_group, key=lambda x: x.cost))
                if n < elitism or elitism == -1
            ]

    else:
        with multiprocessing.Pool(n_threads) as thread_pool:
            for run_n in range(int(n_runs / (comm_freq * n_threads))):
                os_util.local_print('Optimization round {} out of {}'
                                    ''.format(
                                        run_n + 1,
                                        int(n_runs / (comm_freq * n_threads))),
                                    msg_verbosity=os_util.verbosity_level.info,
                                    current_verbosity=verbosity)

                # Run n_threads parallel hives
                results_list = [
                    each_group for each_group in thread_pool.map(
                        ant_colony.run_multi_ants,
                        itertools.repeat(comm_freq, times=n_threads))
                ]

                if run_n > 0:
                    ant_colony.evaporate_pheromone()

                # Aggregate results, deposit pheromone
                ant_colony.solutions.extend([
                    each_result for each_group in results_list
                    for each_result in each_group
                ])
                [
                    ant_colony.deposit_pheromone(
                        each_result.pheromone_multiplier, each_result.graph)
                    for each_group in results_list
                    for n, each_result in enumerate(
                        sorted(each_group, key=lambda x: x.cost))
                    if n < elitism or elitism == -1
                ]

        # Finish running workers (in case n_runs is not a multiple of comm_freq * n_threads)
        if len(ant_colony.solutions) < n_runs:
            results_list = ant_colony.run_multi_ants(n_runs -
                                                     len(ant_colony.solutions))
            ant_colony.solutions.extend(results_list)
            [
                ant_colony.deposit_pheromone(each_result.pheromone_multiplier,
                                             each_result.graph)
                for each_result in results_list
            ]
Ejemplo n.º 28
0
def read_options(argument_parser,
                 unpack_section='',
                 user_config_file=None,
                 default_internal_file=None,
                 verbosity=0):
    """ Process configuration files and command line arguments. Resolution order is arguments > user_config_file >
        default_config_file.

    :param argparse.ArgumentParser argument_parser: command line arguments to be processed
    :param str unpack_section: unpack all variables from this section from user_config_file (if present) and
                               default_config_file
    :param str user_config_file: read this configuration file, takes precedence over default_config_file
    :param str default_internal_file: read internal paths and vars from this file, this will not be superseeded by user
                                      input
    :param verbosity: set the verbosity level
    :rtype: all_classes.Namedlist
    """

    os_util.local_print(
        'Entering read_options(argument_parser={}, unpack_section={}, user_config_file={}, '
        'default_config_file={}, verbosity={})'
        ''.format(argument_parser, unpack_section, user_config_file,
                  default_internal_file, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    internals = configparser.ConfigParser()
    if not default_internal_file:
        read_data = internals.read(
            os.path.join(os.path.dirname(__file__), 'config', 'internal.ini'))
    else:
        read_data = internals.read(default_internal_file)

    if not read_data:
        os_util.local_print(
            'Failed to read internal data file. Cannot continue. Check your install, this should not '
            'happen'.format(default_internal_file if default_internal_file else
                            'config/internal.ini'),
            current_verbosity=verbosity,
            msg_verbosity=os_util.verbosity_level.error)
        raise SystemExit(-1)

    # Reads command line parameters
    arguments = argument_parser.parse_args()

    # Reads defaults from default_config_file or from default location
    default_config_file = os.path.join(os.path.dirname(__file__),
                                       internals['default']['default_config'])

    if arguments.config_file is not None and user_config_file is None:
        user_config_file = arguments.config_file

    result_data = configparser.ConfigParser()

    try:
        read_files = result_data.read(default_config_file)
    except IOError:
        os_util.local_print(
            'Failed to read the configuration file {}. I cannot continue without it.'
            ''.format(default_config_file),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise SystemExit(1)
    else:
        if not read_files:
            os_util.local_print(
                'Failed to read the configuration file {}. I cannot continue without it.'
                ''.format(default_config_file),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

    result_data = {
        key: dict(result_data.items(key))
        for key in result_data.sections()
    }
    if user_config_file:
        user_file = configparser.ConfigParser()

        if not user_file.read(user_config_file):
            os_util.local_print(
                'Failed to read the configuration file {}. I cannot continue without it.'
                ''.format(user_config_file),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)

        result_data = os_util.recursive_update(
            result_data,
            {key: dict(user_file.items(key))
             for key in user_file.sections()})

    if unpack_section:
        # Copy all info in section unpack_section to top level
        result_data = os_util.recursive_update(
            dict(result_data['globals']), dict(result_data[unpack_section]))

    # Overwrites values in result_data (ie: read from config files) with those from command line
    result_data.update(
        dict(filter(lambda x: x[1] is not None,
                    vars(arguments).items())))

    # If values were not provided in config files, load them from argparse defaults (None for most cases)
    result_data.update(
        {k: v
         for k, v in vars(arguments).items() if k not in result_data})

    # Detect all types in result_data
    result_data = all_classes.Namespace(
        os_util.recursive_map(os_util.detect_type, dict(result_data)))

    # Programmatically set some global variables
    if result_data.verbose is None:
        result_data.verbose = 0

    if result_data.quiet:
        if result_data.verbose > 0:
            os_util.local_print(
                'I cannot be quiet and verbose at once. Please, select only one of them.',
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)
        else:
            result_data.verbose = -1

    if result_data.verbose <= 2:
        from rdkit.rdBase import DisableLog
        DisableLog('rdApp.error')

    if result_data.threads == 0 or result_data.threads is None:
        try:
            from os import sched_getaffinity
        except ImportError:
            from os import cpu_count
            result_data.threads = cpu_count()
        else:
            result_data.threads = len(sched_getaffinity(0))
    if type(result_data.threads
            ) != int or result_data.threads < -1 or result_data.threads == 0:
        os_util.local_print(
            'Invalid number of threads supplied or detected. Falling back to threads = 1',
            msg_verbosity=os_util.verbosity_level.warning,
            current_verbosity=verbosity)
        result_data.threads = 1

    if result_data.no_checks is None:
        result_data.no_checks = False

    if result_data.progress_file is None:
        result_data.progress_file = 'progress.pkl'

    result_data['internal'] = all_classes.Namespace(
        {key: dict(internals.items(key))
         for key in internals.sections()})

    return result_data
Ejemplo n.º 29
0
def fill_thermograph(thermograph,
                     molecules,
                     pairlist=None,
                     use_hs=False,
                     threads=1,
                     custom_mcs=None,
                     savestate=None,
                     verbosity=0):
    """

    :param networkx.Graph thermograph: map to be edited
    :param dict molecules: molecules will be read from this dict, format {'molname': rdkit.Chem.Mol}
    :param list pairlist: create edges for these pairs (default: create edges for all possible pairs in molecules)
    :param bool use_hs: consider Hs in the perturbation costs (default: False)
    :param int threads: run this many threads (default = 1)
    :param dict custom_mcs: custom mcs and atom maps to be used
    :param savestate_util.SavableState savestate: saved state data
    :param int verbosity: set verbosity level
    """

    # Perturbations will connect larger molecules to smaller ones, by default.
    if not pairlist:
        pairlist = [(mol_i, mol_j) if molecules[mol_i].GetNumHeavyAtoms() >=
                    molecules[mol_j].GetNumHeavyAtoms() else (mol_j, mol_i)
                    for mol_i, mol_j in itertools.combinations(molecules, 2)]

    if custom_mcs is None:
        custom_mcs = {}

    if not savestate:
        todo_pairs = pairlist
    else:
        todo_pairs = [
            pair for pair in pairlist if frozenset([
                rdkit.Chem.MolToSmiles(molecules[pair[0]]),
                rdkit.Chem.MolToSmiles(molecules[pair[1]])
            ]) not in savestate.setdefault('mcs_dict', {})
        ]

    for pair in todo_pairs[:]:
        if frozenset(pair) in custom_mcs or '*' in custom_mcs:
            del todo_pairs[pair]

    if len(todo_pairs) > 0:
        if threads == -1:
            wrapper_fn_tmp = lambda args, kwargs: os_util.wrapper_fn(
                find_mcs, args, kwargs)
            mcs_data = map(
                wrapper_fn_tmp,
                [[[molecules[mol_i], molecules[mol_j]], None, verbosity]
                 for (mol_i, mol_j) in todo_pairs],
                itertools.repeat({
                    'completeRingsOnly': True,
                    'matchValences': True,
                    'ringMatchesRingOnly': True
                }))
        else:
            with multiprocessing.Pool(threads) as thread_pool:
                mcs_data = os_util.starmap_unpack(
                    find_mcs, thread_pool,
                    [[[molecules[mol_i], molecules[mol_j]], None, verbosity]
                     for (mol_i, mol_j) in todo_pairs],
                    itertools.repeat({
                        'completeRingsOnly': True,
                        'matchValences': True,
                        'ringMatchesRingOnly': True
                    }))
    else:
        mcs_data = []

    if savestate:
        for each_result, (mol_i, mol_j) in zip(mcs_data, todo_pairs):
            savestate['mcs_dict'][frozenset([
                rdkit.Chem.MolToSmiles(molecules[mol_i]),
                rdkit.Chem.MolToSmiles(molecules[mol_j])
            ])] = each_result
        savestate.save_data()
        search_dict = savestate['mcs_dict']

        for each_pair in pairlist:
            if frozenset(each_pair) in custom_mcs:
                search_dict[frozenset(each_pair)] = custom_mcs[frozenset(
                    each_pair)]
            elif '*' in custom_mcs:
                search_dict[frozenset(each_pair)] = custom_mcs['*']

    else:
        search_dict = {
            frozenset([
                rdkit.Chem.MolToSmiles(molecules[mol_i]),
                rdkit.Chem.MolToSmiles(molecules[mol_j])
            ]): each_result
            for each_result, (mol_i, mol_j) in zip(mcs_data, todo_pairs)
        }
        for each_pair in pairlist:
            if frozenset(each_pair) in custom_mcs:
                search_dict[frozenset(each_pair)] = custom_mcs[frozenset(
                    each_pair)]
            elif '*' in custom_mcs:
                search_dict[frozenset(each_pair)] = custom_mcs['*']

    for each_mol_i, each_mol_j in pairlist:
        this_molkey = frozenset([
            rdkit.Chem.MolToSmiles(molecules[each_mol_i]),
            rdkit.Chem.MolToSmiles(molecules[each_mol_j])
        ])
        if use_hs:
            num_core_atoms = rdkit.Chem.MolFromSmarts(
                search_dict[this_molkey].smartsString).GetNumAtoms()
            atoms_i = molecules[each_mol_i].GetNumAtoms()
            atoms_j = molecules[each_mol_j].GetNumAtoms()
        else:
            num_core_atoms = rdkit.Chem.MolFromSmarts(
                search_dict[this_molkey].smartsString).GetNumHeavyAtoms()
            atoms_i = molecules[each_mol_i].GetNumHeavyAtoms()
            atoms_j = molecules[each_mol_j].GetNumHeavyAtoms()

        # The edge cost is the number of perturbed atoms in a hypothetical transformation between the pair.
        perturbed_atoms = (atoms_i - num_core_atoms) + (atoms_j -
                                                        num_core_atoms)
        if perturbed_atoms == 0:
            os_util.local_print(
                'The perturbation between {} and {} would change no heavy atoms. Currently, this is '
                'not supported. Should you need to simulate this perturbation, pass perturbation_map '
                'directly to prepare_dual_topology.py'
                ''.format(molecules[each_mol_i].GetProp('_Name'),
                          molecules[each_mol_j].GetProp('_Name')))
            raise SystemExit(1)
        thermograph.add_edge(each_mol_i,
                             each_mol_j,
                             perturbed_atoms=perturbed_atoms,
                             desirability=1.0)

    all_pert_atoms = [
        i for _, _, i in thermograph.edges(data='perturbed_atoms')
    ]
    # Scale the number of perturbed atoms according to ln(0.2) * median(all_pert_atoms), so that the values are rescaled
    # to be [0, 1] and the median value will be 0.2
    # TODO: configurable beta expression
    beta = -1.6094379 / median(all_pert_atoms)
    for (edge_i, edge_j) in thermograph.edges:
        thermograph[edge_i][edge_j]['cost'] = 1 - exp(
            beta * thermograph[edge_i][edge_j]['perturbed_atoms'])
Ejemplo n.º 30
0
def align_sequences_match_residues(mobile_seq,
                                   target_seq,
                                   seq_align_mat='blosum80',
                                   gap_penalty=-1.0,
                                   verbosity=0):
    """ Align two aminoacid sequences using Bio.pairwise2.globalds and substution matrix seq_align_mat, return a tuple
    with two list of residues to be used in the 3D alignment (mobile, refence)

    :param str mobile_seq: sequence of mobile protein
    :param str target_seq: sequence of target protein
    :param str seq_align_mat: use this substution matrix from Bio.SubsMat.MatrixInfo
    :param float gap_penalty: gap penalty to the alignment; avoid values too low in module
    :param int verbosity: sets the verbosity level
    :rtype: tuple
    """
    try:
        from Bio.pairwise2 import align
        seq_align_mat = import_module(
            'Bio.SubsMat.MatrixInfo').__dict__[seq_align_mat]
    except ImportError as error:
        os_util.local_print(
            'Failed to import Biopython with error: {}\nBiopython is necessary to sequence'
            'alignment. Sequences to be aligned:\nReference: {}\nMobile: {}'
            ''.format(error, target_seq, mobile_seq),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise ImportError(error)
    except KeyError as error:
        try:
            from Bio.SubsMat.MatrixInfo import available_matrices
        except ImportError:
            os_util.local_print(
                "Failed to import Biopython. The sequences fo your protein structures mismatch, so I "
                "need Biopython to align them. See documentation.",
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(1)
        os_util.local_print(
            'Failed to import substitution matrix {} with error: {}\nSubstitution matrix must be one '
            'from Bio.SubsMat.MatrixInfo (in this installation: {})'
            ''.format(seq_align_mat, error, available_matrices),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise KeyError(error)
    else:
        align_result = align.globalds(target_seq, mobile_seq, seq_align_mat,
                                      gap_penalty, gap_penalty)[0]
        os_util.local_print(
            'This is the alignment result to be used in protein alignment:\n{}'
            ''.format(align_result),
            msg_verbosity=os_util.verbosity_level.info,
            current_verbosity=verbosity)
        ref_align_str = [
            True if res_j != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_i != '-'
        ]
        mob_align_str = [
            True if res_i != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_j != '-'
        ]

        return mob_align_str, ref_align_str