Ejemplo n.º 1
0
def extract_side_chains(mol, remove_duplicates=False, mark='[*]'):
    """ Extract side chains from a smiles string. Core is handled as Murcko scaffold.

    :param mol: {str} smiles string of a molecule.
    :param remove_duplicates: {bool} Keep or remove duplicates.
    :param mark: character to mark attachment points.
    :return: smiles strings of side chains in a list, attachment points replaced by [R].
    """
    pos = range(0, 20)
    set_pos = ['[' + str(x) + '*]' for x in pos]

    m1 = MolFromSmiles(mol)
    try:
        core = MurckoScaffold.GetScaffoldForMol(m1)
        side_chain = ReplaceCore(m1, core)
        smi = MolToSmiles(side_chain, isomericSmiles=True
                          )  # isomericSmiles adds a number to the dummy atoms.
    except:
        return list()
    for i in pos:
        smi = smi.replace(''.join(set_pos[i]), mark)
    if remove_duplicates:
        return list(set(smi.split('.')))
    else:
        return smi.split('.')
Ejemplo n.º 2
0
def canonicalize(mol_list, showprogress=False):
    smi_list = []
    if showprogress:
        print('Canonicalising mols')
        for mol in tqdm(mol_list):
            #mol = MolFromSmiles(smi)
            if mol is not None:
                smi_list.append(MolToSmiles(mol))
    else:
        for mol in mol_list:
            #mol = MolFromSmiles(smi)
            if mol is not None:
                smi_list.append(MolToSmiles(mol))
    mol_list = list(set(smi_list))
    if showprogress:
        mol_list = [MolFromSmiles(smi) for smi in tqdm(mol_list)]
    else:
        mol_list = [MolFromSmiles(smi) for smi in mol_list]
    #if showprogress:
    #    mol_list = [MolFromSmiles(MolToSmiles(mol)) for mol in tqdm(mol_list)]
    #else:
    #    mol_list = [MolFromSmiles(MolToSmiles(mol)) for mol in mol_list]
    #mol_list = [mol for mol in mol_list if mol]
    #mol_list = list(set([MolToSmiles(mol) for mol in mol_list]))
    #if showprogress:
    #    mol_list = [MolFromSmiles(smi) for smi in tqdm(mol_list)]
    #else:
    #    mol_list = [MolFromSmiles(smi) for smi in mol_list]
    return mol_list
Ejemplo n.º 3
0
def get_SMILES_objects(mols):
    if type(mols) == list:
        SMILES = [MolToSmiles(mol) for mol in mols]
        return [CanonSmiles(SMILES) for SMILES in SMILES]
    if str(type(mols)) == "<class 'rdkit.Chem.rdchem.Mol'>":
        SMILES = MolToSmiles(mols)
        return CanonSmiles(SMILES)
Ejemplo n.º 4
0
def canonicalize(smi_list, showprogress=False):
    mol_list = []
    if showprogress:
        print('Canonicalising mols')
        for smi in tqdm(smi_list):
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    else:
        for smi in smi_list:
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    mol_list = list(set(mol_list))
    final_list = []
    if showprogress:
        print('Size of unfiltered final library: {}'.format(len(mol_list)))
        print('Filtering by n_heavy and logP:')
        for smi in tqdm(mol_list):
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    else:
        for smi in mol_list:
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    return final_list
Ejemplo n.º 5
0
def test_remove_stereo():
    mol = Filters.remove_stereo(MolFromSmiles('C[C@@H](C(=O)[O-])O'))
    assert MolToSmiles(mol) == 'CC(O)C(=O)[O-]'
    mol = Filters.remove_stereo(MolFromInchi(
        'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'))
    assert MolToSmiles(mol) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(O)=Nc2ccccc21'
    mol = Filters.commute_inchi(mol)  # Expected to change tautomerism
    assert MolToSmiles(mol) == 'O=C1NC(C2=CNC3=C2C=C(O)C=C3)=CC1=C1C(=O)NC2=CC=CC=C21'
Ejemplo n.º 6
0
def AddMurckoToFrame(frame, molCol = 'ROMol', MurckoCol = 'Murcko_SMILES', Generic = False):
  '''
  Adds column with SMILES of Murcko scaffolds to pandas DataFrame. Generic set to true results in SMILES of generic framework.
  '''
  if Generic:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(x[molCol]))), axis=1)
  else:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])), axis=1)
Ejemplo n.º 7
0
def retrieve_fragments(mol):
    substructures_smiles = MolToSmiles(mol)
    sidechains = []
    sidechains_smiles_list = substructures_smiles.split(".")
    for sidechain_smiles in sidechains_smiles_list:
        sidechain = MolFromSmiles(sidechain_smiles)
        if sidechain_smiles != "":
            sidechains.append(sidechain)
    return sidechains, sidechains_smiles_list
Ejemplo n.º 8
0
def test_keep_biggest():
    mol = Filters.keep_biggest(MolFromSmiles('CCCC.CC'))
    assert MolToSmiles(mol) == 'CCCC'
    mol = Filters.keep_biggest(MolFromSmiles('CCCCC.CC.[H].CCC'))
    assert MolToSmiles(mol) == 'CCCCC'
    mol = Filters.keep_biggest(MolFromInchi(
        'InChI=1S/C5H12N2O2.C4H7NO4/c6-3-1-2-4(7)5(8)9;5-2(4(8)9)1-3(6)7/h4H,1-3,6-7H2,(H,8,9);2H,1,5H2,(H,6,7)(H,8,9)/t4-;2-/m00/s1'))
    assert MolToInchi(mol) == 'InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H,6,7)(H,8,9)/t2-/m0/s1'
    mol = Filters.keep_biggest(MolFromInchi('InChI=1S/Mo.4O/q;;;2*-1'))
    assert MolToInchi(mol) == 'InChI=1S/Mo'
Ejemplo n.º 9
0
def structure_standardization(smi):
    """
    Standardization function to clean up smiles with RDKit. First, the input smiles is converted into a mol object.
    Not-readable SMILES are written to the log file. The molecule size is checked by the number of atoms (non-hydrogen).
    If the molecule has more than 100 non-hydrogen atoms, the compound is discarded and written in the log file.
    Molecules with number of non-hydrogen atoms <= 100 are standardized with the MolVS toolkit
    (https://molvs.readthedocs.io/en/latest/index.html) relying on RDKit. Molecules which failed the standardization
    process are saved in the log file. The remaining standardized structures are converted back into their canonical
    SMILES format.
    :param smi: Input SMILES from the given structure data file T4
    :return: smi_clean: Cleaned and standardized canonical SMILES of the given input SMILES.
    """
    tautomer.TAUTOMER_TRANSFORMS = update_tautomer_rules()
    importlib.reload(MolVS_standardizer)
    param = ReadConfig()
    standardization_param = param.get_conf_dict(parameters.get_parameters())['standardization']

    max_num_atoms = standardization_param['max_num_atoms']
    max_num_tautomers = standardization_param['max_num_tautomers']
    include_stereoinfo = standardization_param['include_stereoinfo']
    my_standardizer = MolVS_standardizer.Standardizer(max_tautomers=max_num_tautomers)

    mol = MolFromSmiles(smi)  # Read SMILES and convert it to RDKit mol object.
    if mol is not None:  # Check, if the input SMILES has been converted into a mol object.
        if mol.GetNumAtoms() <= max_num_atoms:  # check size of the molecule based on the non-hydrogen atom count.
            try:

                mol = my_standardizer.charge_parent(mol)  # standardize molecules using MolVS and RDKit
                mol = my_standardizer.isotope_parent(mol)
                if include_stereoinfo is False:
                    mol = my_standardizer.stereo_parent(mol)
                    mol = my_standardizer.tautomer_parent(mol)
                    mol_clean = my_standardizer.standardize(mol)
                    smi_clean = MolToSmiles(mol_clean)  # convert mol object back to SMILES
                else:
                    mol = my_standardizer.tautomer_parent(mol)
                    mol_clean = my_standardizer.standardize(mol)
                    smi_clean = MolToSmiles(mol_clean)
            except (ValueError, AttributeError) as e:
                smi_clean = np.nan
                logging.error(
                    'Standardization error, ' + smi + ', Error Type: ' + str(
                        e))  # write failed molecules during standardization to log file

        else:
            smi_clean = np.nan
            logging.error('Molecule too large, ' + smi)

    else:
        smi_clean = np.nan
        logging.error('Reading Error, ' + smi)

    return smi_clean
Ejemplo n.º 10
0
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False):
    prod_list = []
    for mol1 in tqdm(mol1_list):
        for mol2 in mol2_list:
            products = rxn.RunReactants((Chem.AddHs(mol1), Chem.AddHs(mol2)))
            if debug:
                logging.info(products)
            if products != ():
                for prod in products:
                    if debug:
                        logging.info(MolToSmiles(prod[0]))
                    prod_list.append(MolToSmiles(prod[0]))
    return prod_list
Ejemplo n.º 11
0
    def _get_inputs(self, rxn_list, pickaxe):
        """rxn_list will be pickaxe eventually"""
        # Get reactions information
        def get_cpd_smiles(cpd_id):
            return pickaxe.compounds[cpd_id]["SMILES"]

        reactions_info = {}
        for rxn_id in rxn_list:
            rxn = pickaxe.reactions[rxn_id]
            reactants = [
                get_cpd_smiles(v[1]) for v in rxn["Reactants"] if v[1].startswith("C")
            ]
            products = [
                get_cpd_smiles(v[1]) for v in rxn["Products"] if v[1].startswith("C")
            ]

            pairs = product(reactants, products)
            reactions_info[rxn["_id"]] = list(pairs)

        # Process this information
        input_info = {}
        input_fails = {}
        for rxn_id, reaction_pairs in reactions_info.items():
            if not reaction_pairs:
                continue

            for i, (reactant_smiles, product_smiles) in enumerate(reaction_pairs):
                if len(reactant_smiles) <= 120:
                    if len(product_smiles) <= 120:
                        mol1 = MolFromSmiles(reactant_smiles)
                        mol2 = MolFromSmiles(product_smiles)
                        mol1 = RemoveHs(mol1)
                        mol2 = RemoveHs(mol2)
                        reactant_smiles = MolToSmiles(mol1)
                        product_smiles = MolToSmiles(mol2)

                        # TODO what does this fix? from original code
                        if "M" in reactant_smiles or "M" in product_smiles:
                            input_fails[rxn_id + "_" + str(i)] = None
                        else:
                            input_info[rxn_id + "_" + str(i)] = [
                                reactant_smiles,
                                product_smiles,
                            ]
                    else:
                        input_fails[rxn_id + "_" + str(i)] = None
                else:
                    input_fails[rxn_id + "_" + str(i)] = None

        return input_info, input_fails
Ejemplo n.º 12
0
def simple_rxn(mol_list, rxn, debug=False):
    prod_list = []
    for mol in mol_list:
        if debug:
            print('Input: ' + MolToSmiles(mol))
        products = rxn.RunReactants((Chem.AddHs(mol), ))
        if debug:
            print('Products: {}'.format(products))
        if products != ():
            for prod in products:
                if debug:
                    print(prod)
                    print(MolToSmiles(prod[0]))
                prod_list.append(prod[0])
    return prod_list
Ejemplo n.º 13
0
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False):
    prod_list = []
    for mol1 in mol1_list:
        # if debug:
            # logging.info(MolToSmiles(mol1))
        for mol2 in mol2_list:

        # try:
        #     mol.UpdatePropertyCache()
        #     FastFindRings(mol)
        # except:
            # logging.info('This mol fails! ' + MolToSmiles(mol))
#             logging.info('This mol fails! ' +mol)
#             continue
            products = rxn.RunReactants((Chem.AddHs(mol1),Chem.AddHs(mol2)))
            if debug:
                logging.info(products)
        # products = rxn.RunReactants((MolFromSmiles(mol),))
        # if debug:
            if products != ():
                for prod in products:
                    if debug:
                        logging.info(MolToSmiles(prod[0]))
                    # logging.info(prod)
                    prod_list.append(prod[0])
                # for prod in products:
                #     logging.info(MolToSmiles(prod[0]))
                #     prod_list.append(prod[0])
                # if len(products)!=2:
                #     logging.info(len(products))
    return prod_list
Ejemplo n.º 14
0
def reaction(smarts,smiles1,smiles2):

    rxn = ReactionFromSmarts(smarts) 
    mole1 = MolFromSmiles(smiles1)  
    mole2 = MolFromSmiles(smiles2)  

    #prod1 = rxn.RunReactants(mole1)
    #prod2 = rxn.RunReactants(mole2)
    #smiles_p =  MolToSmiles(prod1, isomericSmiles=True)
    #print(smiles_p)

    mols = [mole1, mole2]
    products = rxn.RunReactants(mols)
    #print "Products: "
    list_prod = []
    for product in products:
       for mol in product:
          smiles_p =  MolToSmiles(mol, isomericSmiles=True)
          list_prod.append(smiles_p)
          #print(smiles_p)
    #for product in products:
    #     print(product)
    #     
    #     smiles_p =  MolToSmiles(product, isomericSmiles=True)
    #     print(smiles_p)
    return list_prod
Ejemplo n.º 15
0
    def smiles(self):
        """Smiles string of fragment

        Returns:
            str: Smiles
        """
        return MolToSmiles(self.molecule)
Ejemplo n.º 16
0
def time_kekulize(file_path: str, sample_size: int = -1):
    curr_dir = os.path.dirname(__file__)
    file_path = os.path.join(curr_dir, file_path)

    # load data
    with open(file_path, 'r') as file:
        smiles = [line.rstrip() for line in file.readlines()]
        smiles.pop(0)

        if sample_size > 0:
            smiles = random.sample(smiles, sample_size)

    print(f"Timing Kekulization of {len(smiles)} SMILES from {file_path}")

    # time selfies kekulization
    start = time.time()
    for s in smiles:
        list(kekulize_parser(_parse_smiles(s)))
    selfies_time = time.time() - start
    print(f"--> selfies kekulize: {selfies_time:0.7f}s")

    # time RDKit kekulization
    start = time.time()
    for s in smiles:
        m = MolFromSmiles(s)
        Kekulize(m)
        MolToSmiles(m, kekuleSmiles=True)
    rdkit_time = time.time() - start
    print(f"--> RDKit kekulize: {rdkit_time:0.7f}s")
Ejemplo n.º 17
0
 def graph(self, m):
     from rdkit.Chem import EditableMol, Atom, rdchem
     hcount = m.GetNumAtoms(False) - m.GetNumAtoms(True)
     # create new molecule using single bonds only
     em = EditableMol(Mol())
     nbridx = [None] * m.GetNumAtoms()
     iatom = 0
     for atom in m.GetAtoms():
         atnum = atom.GetAtomicNum()
         if atnum == 1 and atom.GetIsotope() == 1:
             #if atom.GetMass() > 1: pass
             hcount += 1
         else:
             newatom = Atom(atnum)
             #if atom.GetTotalDegree() == 0: newatom.SetNoImplicit(True) # otherwise [Na]. becomes [NaH].
             #newatom.SetFormalCharge(atom.GetFormalCharge())
             newatom.SetFormalCharge(0)
             em.AddAtom(newatom)
             aidx = atom.GetIdx()
             nbridx[aidx] = iatom
             iatom += 1
             for a2 in atom.GetNeighbors():
                 a2idx = nbridx[a2.GetIdx()]
                 if a2idx != None:
                     em.AddBond(aidx, a2idx, rdchem.BondType.SINGLE)
     #cansmi = self.cansmiles(em.GetMol())
     cansmi = MolToSmiles(m, isomericSmiles=True)
     #cansmi = cansmi.replace('+','').replace('-','').replace('[N]','N').replace('[O]','O').replace('[C]','C').replace('[I]','I').replace('[S]','S').replace('[P]','P').replace('[B]','B').replace('[Br]','Br').replace('[Cl]','Cl')
     return "%s%s%d%+d" % (cansmi, ' H', hcount, GetFormalCharge(m))
Ejemplo n.º 18
0
def main(args):
    data = pd.read_csv('data/unique_docked_smiles.csv')
    # filter out invalid SMILES
    data['mol'] = data['SMILES'].apply(lambda x: MolFromSmiles(x))
    data = data[data['mol'].notnull()]
    print('Number of unique SMILES: {}'.format(len(data)))
    # Create dictionary of key: scaffold and value: list of smiles
    scaffold_dict = {}
    scaff_list = []
    scaffold_generic_dict = {}
    scaff_generic_list = []
    for i, smiles in enumerate(data['SMILES'].values):
        scaffold = MurckoScaffoldSmilesFromSmiles(smiles)
        scaffold_generic = MolToSmiles(MakeScaffoldGeneric(MolFromSmiles(scaffold)))
        #print(scaffold)
        #print(scaffold_generic)
        scaff_list.append(scaffold)
        scaff_generic_list.append(scaffold_generic)
        if scaffold not in scaffold_dict.keys():
            scaffold_dict[scaffold] = [smiles]
        else:
            scaffold_dict[scaffold].append(smiles)
        if scaffold_generic not in scaffold_generic_dict.keys():
            scaffold_generic_dict[scaffold_generic] = [smiles]
        else:
            scaffold_generic_dict[scaffold_generic].append(smiles)
        #print(scaffold_dict)
    data['scaffold'] = scaff_list
    data['scaffold_generic'] = scaff_generic_list
    data.drop(columns='mol').to_csv('data/unique_docked_smiles_by_scaffold.csv', index=False)

    scaff_list = []
    print('\nNumber of distinct scaffolds: {}'.format(len(scaffold_dict)))
    n_scaff = 0
    for scaffold in sorted(scaffold_dict, key=lambda k: len(scaffold_dict[k]), reverse=True):
        n_scaff += 1
        if n_scaff<=20:
            print('#{} scaffold: {}, size: {}'.format(n_scaff, scaffold, len(scaffold_dict[scaffold])))
            scaff_list.append(scaffold)
        else:
            break
    scaff_mols = [MolFromSmiles(scaffold) for scaffold in scaff_list]
    img = Draw.MolsToGridImage(scaff_mols, molsPerRow=5, subImgSize=(200,200),
                               legends=['counts = {}'.format(len(scaffold_dict[scaffold])) for scaffold in scaff_list])
    img.save('data/scaff_grid.png')

    scaff_generic_list = []
    print('\nNumber of distinct generic scaffolds: {}'.format(len(scaffold_generic_dict)))
    n_scaff = 0
    for scaffold_gen in sorted(scaffold_generic_dict, key=lambda k: len(scaffold_generic_dict[k]), reverse=True):
        n_scaff += 1
        if n_scaff<=20:
            print('#{} generic scaffold: {}, size: {}'.format(n_scaff, scaffold_gen, len(scaffold_generic_dict[scaffold_gen])))
            scaff_generic_list.append(scaffold_gen)
        else:
            break
    scaffgen_mols = [MolFromSmiles(scaffold) for scaffold in scaff_generic_list]
    img = Draw.MolsToGridImage(scaffgen_mols, molsPerRow=5, subImgSize=(200,200),
                               legends=['counts = {}'.format(len(scaffold_generic_dict[scaffold])) for scaffold in scaff_generic_list])
    img.save('data/scaff_generic_grid.png')
Ejemplo n.º 19
0
def get_annotated_murcko_scaffold(mol, scaffold=None, as_mol=True):
    """
    Return an annotated murcko scaffold where side chains are replaced
    with a dummy atom ('*').

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
    scaffold : rdkit.Chem.rdchem.Mol, optional
        If a murcko scaffold is already calculated for the `mol`,
        this can be supplied as a template. The default is None.

    as_mol : bool, optional
        If True return rdkit.Chem.rdchem.Mol object else return
        a SMILES string representation. The default is True.

    Returns
    -------
    {str, rdkit.Chem.rdchem.Mol}
        Annotated Murcko scaffold.

    """
    if not scaffold:
        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    annotated = rdmolops.ReplaceSidechains(mol, scaffold)
    if as_mol:
        return annotated
    if annotated is None:
        return ''
    return MolToSmiles(annotated)
Ejemplo n.º 20
0
    def test_getitem(self, filled_fragmentsdb, expected_fragment):
        fragment = filled_fragmentsdb['1muu_GDX_frag7']

        assert MolToSmiles(fragment['mol']) == '[*]COP(=O)([O-])OP(=O)([O-])OC1OC(C(=O)[O-])C(O)C(O)C1O'
        del fragment['mol']

        assert fragment == expected_fragment
Ejemplo n.º 21
0
def get_desc_data(molecule):
    """
    Custom function that calculates and returns every available molecular
    descriptor in RDKit chemoinfo toolkt with corresponding header (name) for each
    :param molecule: rdkit's molecule object
    :return: values of descriptors and their headers
    """
    desc_dict = dict(Descriptors.descList)
    descs = list(desc_dict.keys())
    descs.remove('Ipc')
    ans = {}
    for descname in descs:
        try:
            desc = desc_dict[descname]
            bin_value = desc(molecule)
        except (ValueError, TypeError, ZeroDivisionError) as exception:
            print(
                'Descriptor {} wasn\'t calculated for a molecule {} due to {}'.
                format(str(descname), str(MolToSmiles(molecule)),
                       str(exception)))
            bin_value = 'NaN'

        bin_name = 'DESC_{}'.format(descname)
        ans[bin_name] = bin_value

    molstring = np.fromiter(ans.values(), dtype=float)
    headers = np.fromiter(ans.keys(), dtype='S32')

    return molstring, headers
Ejemplo n.º 22
0
 def _convert_depiction(self, idepic, itype='smiles', otype={'inchikey'}):
     # Import (if needed)
     if itype == 'smiles':
         rdmol = MolFromSmiles(idepic, sanitize=True)
     elif itype == 'inchi':
         rdmol = MolFromInchi(idepic, sanitize=True)
     else:
         raise NotImplementedError(
             '"{}" is not a valid input type'.format(itype))
     if rdmol is None:  # Check imprt
         raise self.DepictionError(
             'Import error from depiction "{}" of type "{}"'.format(
                 idepic, itype))
     # Export
     odepic = dict()
     for item in otype:
         if item == 'smiles':
             odepic[item] = MolToSmiles(
                 rdmol
             )  # MolToSmiles is tricky, one mays want to check the possible options..
         elif item == 'inchi':
             odepic[item] = MolToInchi(rdmol)
         elif item == 'inchikey':
             odepic[item] = MolToInchiKey(rdmol)
         else:
             raise NotImplementedError(
                 '"{}" is not a valid output type'.format(otype))
     return odepic
Ejemplo n.º 23
0
def delete_atom_generate_smiles(origin_graph_data, delete_idx):
    '''
        delete_idx is integer 0~N-1.
    '''
    graph_data = copy.deepcopy(origin_graph_data)
    graph_data.x = torch.cat(
        [graph_data.x[:delete_idx, :], graph_data.x[delete_idx + 1:, :]], 0)
    edge_num = graph_data.edge_index.shape[1]
    remaining_edge_idx = list(
        filter(
            lambda i: True if delete_idx not in
            list(graph_data.edge_index[:, i].numpy()) else False,
            range(edge_num)))
    graph_data.edge_index = torch.cat(
        [graph_data.edge_index[:, i].view(-1, 1) for i in remaining_edge_idx],
        1)
    graph_data.edge_attr = torch.cat(
        [graph_data.edge_attr[i, :].view(1, -1) for i in remaining_edge_idx],
        0)
    f = lambda x: x - 1 if x > delete_idx else x
    for i in range(graph_data.edge_index.shape[0]):
        for j in range(graph_data.edge_index.shape[1]):
            graph_data.edge_index[i][j] = f(graph_data.edge_index[i][j])
    try:
        mol = graph_data_obj_to_mol_simple(graph_data.x, graph_data.edge_index,
                                           graph_data.edge_attr)
        smiles = MolToSmiles(mol)
        return smiles
    except:
        return None
Ejemplo n.º 24
0
def convert_depiction(idepic, itype='smiles', otype={'inchikey'}):
    """Convert chemical depiction to others type of depictions
    
    :param  idepic: string depiction to be converted, str
    :param   itype: type of depiction provided as input, str
    :param   otype: types of depiction to be generated, {"", "", ..}
    :return odepic: generated depictions, {"otype1": "odepic1", ..}
    
    Usage example:
    - convert_depiction(idepic='CCO', otype={'inchi', 'smiles', 'inchikey'})
    - convert_depiction(idepic='InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3', itype='inchi', otype={'inchi', 'smiles', 'inchikey'})
    """
    # Import (if needed)
    if itype == 'smiles':
        rdmol = MolFromSmiles(idepic, sanitize=True)
    elif itype == 'inchi':
        rdmol = MolFromInchi(idepic, sanitize=True)
    else:
        raise NotImplementedError('"{}" is not a valid input type'.format(itype))
    if rdmol is None:  # Check imprt
        raise Exception('Import error from depiction "{}" of type "{}"'.format(idepic, itype))
    
    # Export
    odepic = dict()
    for item in otype:
        if item == 'smiles':
            odepic[item] = MolToSmiles(rdmol)  # MolToSmiles is tricky, one mays want to check the possible options..
        elif item == 'inchi':
            odepic[item] = MolToInchi(rdmol)
        elif item == 'inchikey':
            odepic[item] = MolToInchiKey(rdmol)
        else:
            raise NotImplementedError('"{}" is not a valid output type'.format(otype))

    return odepic
Ejemplo n.º 25
0
 def test_parser_roundtrip_no_rule_sequence(self):
     mol = MolFromSmiles(smiles1)
     tree = hypergraph_parser(mol)
     graph4 = graph_from_graph_tree(tree)
     mol4 = to_mol(graph4)
     smiles4 = MolToSmiles(mol4)
     assert smiles4 == smiles1
Ejemplo n.º 26
0
 def test_parser_roundtrip(self):
     mol = MolFromSmiles(smiles1)
     tree = hypergraph_parser(mol)
     graph4 = evaluate_rules(tree.rules())
     mol4 = to_mol(graph4)
     smiles4 = MolToSmiles(mol4)
     assert smiles4 == smiles1
Ejemplo n.º 27
0
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False):
    """
    Function that applies a two-reactant one-product reaction SMILES to a list of input RDKit molecules,
    returning the products as a list of RDKit molecules.
    """ 
    prod_list = []
    for mol1 in mol1_list:
        for mol2 in mol2_list:
            products = rxn.RunReactants((Chem.AddHs(mol1),Chem.AddHs(mol2)))
            if debug:
                logging.info(products)
            if products != ():
                for prod in products:
                    if debug:
                        logging.info(MolToSmiles(prod[0]))
                    prod_list.append(MolToSmiles(prod[0]))
    return prod_list
Ejemplo n.º 28
0
def test_sequence_minimal():
    # Violacein
    mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
    ans = Standardizer().compute(mol)
    assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
    assert MolToSmiles(ans) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21'
    # L-Lactate
    mol = MolFromInchi('')
Ejemplo n.º 29
0
def is_same_mol(smiles1, smiles2):
    """Helper method that returns True if smiles1 and smiles2 correspond
    to the same molecule.
    """

    if smiles1 is None or smiles2 is None:
        return False

    m1 = MolFromSmiles(smiles1)
    m2 = MolFromSmiles(smiles2)

    if m1 is None or m2 is None:
        return False

    can1 = MolToSmiles(m1)
    can2 = MolToSmiles(m2)

    return can1 == can2
Ejemplo n.º 30
0
def write_BRICS_csv(BRICS_builds_gen, filename):
    with open(filename, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",")
        writer.writerow(['CompoundSMILES'])
        for mol_list in BRICS_builds_gen:
            for mol in mol_list:
                mol.UpdatePropertyCache(strict=False)
            prods = [[MolToSmiles(mol)] for mol in mol_list]
            writer.writerows(prods)