def remove_bonds(mol, list_of_atomiso_bondsets_to_remove):
    """
    This function removes bond from an rdkit mol based on
    a provided list. This list is a list of sets, with each set containing
    two atoms with the isotope label of that atom. Using Isotopes is to ensure
    that atom Idx dont change.

    Inputs:
    :param rdkit.Chem.rdchem.Mol mol: any rdkit mol
    :param list list_of_atomiso_bondsets_to_remove: a list of idx values to remove
                                        from mol
    Returns:
    :returns: rdkit.Chem.rdchem.Mol new_mol: the rdkit mol as input but with
                                            the atoms from the list removed
    """
    # None's often end up in a pipeline use of RDKit so we handle this data type as return None
    # instead of raise TypeError
    if mol is None:
        return None

    # If mol is wrong data type (excluding None) raise TypeError
    if type(mol) != rdkit.Chem.rdchem.Mol and type(
            mol) != rdkit.Chem.rdchem.RWMol:
        printout = "mol is the wrong data type. \n"
        printout = printout + "Input should be a rdkit.Chem.rdchem.Mol\n"
        printout = printout + "Input mol was {} type.".format(type(mol))
        raise TypeError(printout)
    new_mol = copy.deepcopy(mol)
    if len(list_of_atomiso_bondsets_to_remove) == 0:
        return None
    for atomiso_bondsets in list_of_atomiso_bondsets_to_remove:
        if len(atomiso_bondsets) == 0:
            continue
        if len(atomiso_bondsets) != 2:
            printout = "list_of_atomiso_bondsets_to_remove needs to be 2 isolabels for the atoms"
            raise TypeError(printout)

        atom_1_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[0]))
        atom_2_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[1]))

        try:
            new_mol = Chem.FragmentOnBonds(new_mol, [atom_1_idx, atom_2_idx],
                                           addDummies=False)
        except:
            return None

        new_mol = MOH.check_sanitization(new_mol)
        if new_mol is None:
            return None
    new_mol = MOH.check_sanitization(new_mol)
    if new_mol is None:
        return None
    return new_mol
def run_convert_on_single_pdb(pdb):

    """
    This function converts a ligand into SMILES
    and returns the list with the smiles with a name.
    The names are the basename of the file minus the .pdb

    Inputs:
    :param str pdb: path to the folder to a pdb file
    Returns:
    :returns: list output_data: A list containing all SMILES info from the file
    """

    try:
        mol = Chem.MolFromPDBFile(pdb)

        mol_sanitized = MOH.check_sanitization(mol)
        if mol_sanitized is not None:
            smiles = Chem.MolToSmiles(mol_sanitized, isomericSmiles=True)
            file_name = os.path.basename(pdb)
            file_stripped = file_name.replace(".pdb", "")
            output_data = smiles + "\t" + file_stripped
    except:
        pass
    return output_data
def get_ligands_from_smi(smi_file):
    """
    Get the ligands from the smi_file

    Inputs:
    :param str smi_file: Path to smiles file

    Returns:
    :returns: list list_of_ligands: A list of lists containing the chosen unique fragments.
            final_frag_list[0] = [SMILE, mol_id]
    """
    list_of_ligands = []
    with open(smi_file, "r") as smiles_file:
        line_counter = 0
        for line in smiles_file:
            line_counter = line_counter + 1
            line = line.replace("\n", "")
            parts = line.split(
                '\t')  # split line into parts separated by 4-spaces
            if len(parts) == 1:
                parts = line.split(
                    '    ')  # split line into parts separated by 4-spaces

            if len(parts) == 2 or len(parts) > 2:
                mol_string = parts[0]
                mol_id = parts[1]
                if type(mol_id) != str:
                    print("Miss Formatted within .SMI. Line number {}".format(
                        str(line_counter)))
                    continue

                try:
                    mol = Chem.MolFromSmiles(mol_string, sanitize=False)
                except:
                    print("Miss Formatted within .SMI. Line number {}".format(
                        str(line_counter)))
                    continue
                mol = MOH.check_sanitization(mol)
                if mol is None:
                    continue

                mol_smile = Chem.MolToSmiles(mol,
                                             isomericSmiles=True,
                                             canonical=True)
                mol_info = [mol_smile, mol_id]
                list_of_ligands.append(mol_info)

            else:
                continue
    print("Was able to import and sanitize {} \
          ligands from the .smi.".format(len(list_of_ligands)))
    if line_counter != len(list_of_ligands):
        print("\t Failed to sanitize/import \
              {} ligands from the .smi".format(line_counter -
                                               len(list_of_ligands)))
    print("########")

    return list_of_ligands
def make_list_of_all_unique_frags(fragment_list):
    """
    This function takes a list of all molecules after fragmentation and separates the
    the fragments into individual rdkit mol objects, sanitizes each, removes isotopes
    and converts them into a SMILES string. The SMILES are compiled into a list,
    and then redundant strings are reduced to a single entry.

    It returns a list of all unique sanitized canonical SMILES for every fragment made
    from all permutations of bond breaking.

    Inputs:
    :param list fragment_list: list of fragmented rdkit mols which haven't been separated
        yet

    Returns:
    :returns: list clean_frag_list: List of unique sanitized SMILES strings from all objects
                in fragment_list. Isotope labels are also removed here.
    """
    clean_frag_list = []
    for fragments in fragment_list:
        frags = Chem.GetMolFrags(fragments, asMols=True, sanitizeFrags=False)
        for frag in frags:
            frag = MOH.check_sanitization(frag)
            if frag is None:
                continue

            # Remove those under 2 atoms minimum
            list_mol_atoms = frag.GetAtoms()
            if len(list_mol_atoms) < 3:
                continue

            for atom in frag.GetAtoms():
                atom.SetIsotope(0)
            clean_frag_list.append(
                Chem.MolToSmiles(frag, isomericSmiles=True, canonical=True))
        list(set(list(clean_frag_list)))

    return clean_frag_list
Exemple #5
0
def make_frag_list_for_one_mol(mol_info, frags_per_seed_lig, run_brics,
                               run_frag, c_c_bonds_off=False):
    """
    This will take a ligand string and ID encased in the list mol_info.
    This will then be fragmented along all non Carbon-carbon rotatable bonds which
    are not aromatic.

    It will make all permutations of all potential bond breaks, reduce to only unique
    fragments and than pick the number of chosen fragments. Then it will create unique ID's
    for each and return a list of lists containing the chosen unique fragments.

    Inputs:
    :param list mol_info: list containing [mol_string, mol_id]
                mol_info[0] = the SMILE string of the parent mol
                mol_info[1] = the Unique ID of the parent mol
    :param bool run_brics: whether to fragment using BRICS method
    :param bool run_frag: whether to fragment all bonds
    :param bool c_c_bonds_off: whether to fragment C-C bonds

    Returns:
    :returns: list final_frag_list: A list of lists containing the chosen unique fragments.
            final_frag_list[0] = [SMILE, mol_id]
    """
    mol_smile = mol_info[0]
    lig_id = mol_info[1]

    mol = Chem.MolFromSmiles(mol_smile, sanitize=False)
    mol = MOH.check_sanitization(mol)
    if mol is None:
        printout = "\nMolecule {} failed to sanitize. \
                    Could not make any fragments from it".format(lig_id)
        raise Exception(printout)
    mol_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)


    mol = label_iso_num_w_idx(mol)
    mol_copy = copy.deepcopy(mol)
    bonds_to_remove_permutations = get_rot_bond_permutations_to_cut(mol_copy, c_c_bonds_off)

    fragment_list = []
    for bond_set_to_del in bonds_to_remove_permutations:
        mol_copy = copy.deepcopy(mol)
        x = remove_bonds(mol_copy, bond_set_to_del)
        if x is None:
            continue
        fragment_list.append(x)

    clean_frag_list = []
    if run_frag is True:
        clean_frag_list = make_list_of_all_unique_frags(fragment_list)
        clean_frag_list = list(set(clean_frag_list))

    if run_brics is True:
        mol_copy = copy.deepcopy(mol)
        bric_mols = get_brics_permutations(mol_copy, min_frag_size=3)

        clean_frag_list.extend(bric_mols)
        clean_frag_list = list(set(clean_frag_list))

    if len(clean_frag_list) == 0:
        printout = "\nNo fragments were made for {}.\n".format(lig_id)
        print(printout)
        return [[mol_smile, lig_id]]

    # Pick the number of ligands to make
    final_frag_list = [[mol_smile, lig_id]]

    if frags_per_seed_lig == -1:
        printout = "\nFor {}: {} fragmented were made.".format(lig_id, len(clean_frag_list))
        print(printout)
        for frag in clean_frag_list:
            unique_lig_id = make_unique_lig_id(lig_id, final_frag_list)
            temp_frag_info = [frag, unique_lig_id]
            final_frag_list.append(temp_frag_info)
    return final_frag_list
def run_all_for_fun_group(vars, fun_group, rxns_by_fun_group,
                          a_smiles_click_object):
    """
    This runs the all testing for a single functional group.

    This will also write the compounds which pass to a .smi file.

    Inputs:
    :param dict vars: Dictionary of User variables
    :param str fun_group: functional group name
    :param dict rxns_by_fun_group: Dictionary of rxns names organized by
        functional groups
    :param obj a_smiles_click_object: a a_smiles_click_object class object.
        This provides useful pathing information.

    Returns:
    :returns: list failed_to_react: a list of mol names which failed to react
    :returns: list failed_to_sanitize: a list of mol names which failed to sanitize
    """
    # unpack variables
    complementary_mol_dict = a_smiles_click_object.complementary_mol_dict
    reaction_dict = a_smiles_click_object.reaction_dict
    number_of_processors = vars["number_of_processors"]
    output_folder = vars["output_folder"]

    smi_comp_file = complementary_mol_dict[fun_group]
    fun_group_list = get_usable_format(smi_comp_file)
    fun_group_mol_list = []
    failed_to_sanitize = []
    for info in fun_group_list:
        mol = Chem.MolFromSmiles(info[0])
        mol = MOH.check_sanitization(mol)
        if mol is None:
            failed_to_sanitize.append(info)
            continue
        temp = copy.deepcopy(info)
        temp.append(mol)
        fun_group_mol_list.append(temp)

    # print info about failures
    if len(failed_to_sanitize) != 0:
        printout = "{} compounds ".format(len(failed_to_sanitize))
        printout = printout + "failed to sanitize from: {}".format(fun_group)
        print(printout)

    failed_to_react = []
    for rxn_name in rxns_by_fun_group[fun_group]:

        current_rxn_dict = reaction_dict[rxn_name]
        example_reactants, rxn_obj = get_rxn_and_examples(current_rxn_dict)

        list_of_reactants = []
        functional_groups_rxn = current_rxn_dict["functional_groups"]
        i_count_to_use = None
        for i_count in range(len(functional_groups_rxn)):
            f_group = functional_groups_rxn[i_count]

            if fun_group == f_group:
                i_count_to_use = i_count
            else:
                continue
        if i_count_to_use is None:
            raise Exception("This is a code error.")

        list_of_reactants = []
        for mol_info in fun_group_mol_list:
            mol_tuple_temp = []
            for i_count in range(len(functional_groups_rxn)):
                if i_count == i_count_to_use:
                    mol_tuple_temp.append(mol_info[-1])
                else:
                    mol_tuple_temp.append(example_reactants[i_count])

            list_of_reactants.append(
                tuple([tuple(mol_tuple_temp), mol_info[1], rxn_obj]))

        output = mp.multi_threading(list_of_reactants, number_of_processors,
                                    react_with_multiple_reactants)
        output = [x for x in output if x is not None]
        failed_to_react.append([rxn_name, output])

        # print info about failures
        if len(output) != 0:
            printout = "{} compounds failed to react from ".format(len(output))
            printout = printout + "react from {} ".format(fun_group)
            printout = printout + "in rxn: {}".format(rxn_name)
            print(printout)

    master_failed_to_react = []
    master_passes_reactions = []
    for fail_mol_list in failed_to_react:
        master_failed_to_react.extend(fail_mol_list[1])
    for mol_info in fun_group_list:
        if mol_info[1] in master_failed_to_react:
            continue
        master_passes_reactions.append("    ".join(mol_info))
    # write to output .smi file
    with open(output_folder + fun_group + ".smi", "w") as f:
        f.write("\n".join(master_passes_reactions))

    return failed_to_react, failed_to_sanitize
def get_rxn_and_examples(current_rxn_dict):
    """
    get the example reaction molecules from current_rxn_dict, create the rxn_obj,
    and test examples in the rxn.

    Inputs:
    :param dict current_rxn_dict: a dictionary of information about a reaction

    Returns:
    :returns: tuple example_rxn_reactants: a tuple of rdkit
            mol objects that are example compounds
    :returns: rdkit.Chem.rdChemReactions.ChemicalReaction rxn_obj: the
        reaction object to use
    """
    rxn_name = current_rxn_dict["reaction_name"]
    # Test example reactants
    example_smiles_rxn_reactants = current_rxn_dict["example_rxn_reactants"]
    example_smiles_rxn_reactants = example_smiles_rxn_reactants.replace(
        "['", "").replace("']", "")
    example_smiles_rxn_reactants = example_smiles_rxn_reactants.replace(
        " ", "").replace('"', "")
    example_smiles_rxn_reactants = example_smiles_rxn_reactants.split("','")

    example_rxn_reactants = []
    for smile_str in example_smiles_rxn_reactants:
        smile_str = smile_str.replace("'", "").replace('"', "")
        smile_str = smile_str.replace(" ", "")

        example_mol = Chem.MolFromSmiles(smile_str)

        example_mol = MOH.check_sanitization(example_mol)
        if example_mol is None:
            print(smile_str)
            printout = "example mol from rxn: {}".format(rxn_name)
            printout = printout + " failed to sanitize in RDKit"
            print(printout)
            raise Exception(printout)
        example_rxn_reactants.append(example_mol)

    # convert example_rxn_reactants to tuple
    example_rxn_reactants = tuple(example_rxn_reactants)
    reaction_string = current_rxn_dict["reaction_string"]
    try:
        rxn_obj = AllChem.ReactionFromSmarts(reaction_string)
        rxn_obj.Initialize()
    except:
        printout = "rxn {} failed to be created.".format(rxn_name)
        printout = printout + "Rxn SMART is flawed"
        print(printout)
        raise Exception(printout)

    # Demo on example reactants
    example_results = react_with_multiple_reactants(example_rxn_reactants,
                                                    "test_reactions", rxn_obj)
    if example_results is not None:
        printout = "rxn {} failed to run on example compounds.".format(
            rxn_name)
        printout = printout + "\nPlease check example compounds"
        print(printout)
        raise Exception(printout)

    return example_rxn_reactants, rxn_obj