Esempio n. 1
0
def main(name, argv):
    if len(argv) != 2:
        print_usage(name)
        return

    with open(argv[0], 'r') as f:
        smile = f.readline().split()[0]
    mol = Chem.MolFromSmiles(smile)
    size = mol.GetNumHeavyAtoms()
    hierarch = Recap.RecapDecompose(mol)
    children = []
    for child in hierarch.GetAllChildren().keys() + [smile]:
        new_smiles = child.replace('[*]', '[H]')
        new = Chem.MolFromSmiles(new_smiles)
        if not new == None:
            new_size = new.GetNumHeavyAtoms()
            if new_size > 7 and new_size <= 25:
                if rdMolDescriptors.CalcNumRotatableBonds(new) <= 5:
                    children.append(Chem.MolToSmiles(new, isomericSmiles=True))
                    #children.append(new_smiles)
                core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles(
                    new_smiles, includeChirality=True)
                core = Chem.MolFromSmiles(core_smile)
                if rdMolDescriptors.CalcNumRotatableBonds(
                        core) <= 5 and core.GetNumHeavyAtoms() > 7:
                    children.append(core_smile)
    with open(argv[1], 'w') as f:
        i = 1
        for m in set(children):
            if len(m) > 0:
                f.write(m + '\t' + str(i) + '\n')
                i += 1
Esempio n. 2
0
    def test1(self):
        " testing first 200 mols from NCI "
        # figure out which rotor version we are using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            rot_prop = NonStrict
        else:
            rot_prop = Strict

        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp(rot_prop))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                # test the underlying numrotatable bonds
                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
                orig = int(m.GetProp(NonStrict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
                orig = int(m.GetProp(Strict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

            idx += 1
Esempio n. 3
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 4
0
def computeFeatures(mol):
    numRings = rdMolDescriptors.CalcNumRings(mol)
    numRotBonds = rdMolDescriptors.CalcNumRotatableBonds(mol)
    nitrogenCount = countNitrogens(mol)
    oxygenCount = countOxygens(mol)
    carbonCount = countCarbons(mol)
    boronCount = countBorons(mol)
    phosCount = countPhos(mol)
    sulfurCount = countSulfurs(mol)
    fluorCount = countFluorine(mol)
    iodCount = countIodine(mol)
    doubleBonds = countDoubleBonds(mol)
    surf_area = rdMolDescriptors.CalcLabuteASA(mol)
    mol_weight = rdMolDescriptors.CalcExactMolWt(mol)
    s_logp = rdMolDescriptors.SlogP_VSA_(mol)
    dist_hs = recurseMolHCount(mol)
    output = [
        numRings, nitrogenCount, oxygenCount, carbonCount, boronCount,
        phosCount, sulfurCount, fluorCount, iodCount, doubleBonds, surf_area,
        mol_weight
    ]
    for s in s_logp:
        output.append(s)
    for d in dist_hs:
        output.append(dist_hs[d])
    return output
Esempio n. 5
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 6
0
def generate_conformations_sdf(smiles, name, max_conf=20):
    """
    Generate conformations and save it in SDF format
    """

    m = Chem.AddHs(Chem.MolFromSmiles(smiles))

    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m)

    confs = min(1 + 3*rot_bond, max_conf)

    AllChem.EmbedMultipleConfs(m, numConfs=confs,
                useExpTorsionAnglePrefs=True,
                useBasicKnowledge=True)

    conf_list = []

    for i, conf in enumerate(m.GetConformers()):

        tm = Chem.Mol(m, False, conf.GetId())
        confname = name+"-"+str(i)

        writer = Chem.SDWriter(confname+".sdf")
        writer.write(tm)

        conf_list.append(confname)

    return conf_list
Esempio n. 7
0
def identify_rotatable_bonds(mol):
    """
    Identify rotatable bonds in a molecule.

    Right now this is an extremely crude and inaccurate method that should *not* be used for production.
    This misses simple cases like benzoic acids, amides, etc.

    Parameters
    ----------
    mol: ROMol
        Input molecule

    Returns
    -------
    set of 2-tuples
        Set of bonds identified as rotatable.

    """
    pattern = Chem.MolFromSmarts("[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]")
    matches = mol.GetSubstructMatches(pattern, uniquify=1)

    # sanity check
    assert len(matches) >= rdMolDescriptors.CalcNumRotatableBonds(mol)

    sorted_matches = set()

    for i, j in matches:
        if j < i:
            i, j = j, i
        sorted_matches.add((i, j))

    return sorted_matches
def generate_conformations(fragments, max_confs=20):
    for fragment in fragments:
        rot_bond = rdMolDescriptors.CalcNumRotatableBonds(fragment)
        confs = min(3 + 3 * rot_bond, max_confs)
        AllChem.EmbedMultipleConfs(fragment, numConfs=confs)

    return fragments
Esempio n. 9
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    if mol is None:
        raise ValueError('You need to provide a mol argument.')
    mol = Chem.RemoveHs(mol)
    qedProperties = QEDproperties(
        MW=rdmd._CalcMolWt(mol),
        ALOGP=Crippen.MolLogP(mol),
        HBA=sum(
            len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
        HBD=rdmd.CalcNumHBD(mol),
        PSA=MolSurf.TPSA(mol),
        ROTB=rdmd.CalcNumRotatableBonds(mol,
                                        rdmd.NumRotatableBondsOptions.Strict),
        AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol),
                                                AliphaticRings)),
        ALERTS=sum(1 for alert in StructuralAlerts
                   if mol.HasSubstructMatch(alert)),
    )
    # The replacement
    # AROM=Lipinski.NumAromaticRings(mol),
    # is not identical. The expression above tends to count more rings
    # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
    # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
    # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
    return qedProperties
Esempio n. 10
0
    def testNumRotatableBonds(self):
        for s in [
                "C1CC1CC",
                "CCNC(=O)NCC",
                'Cc1cccc(C)c1c1c(C)cccc1C',
                'CCc1cccc(C)c1c1c(C)cccc1CC',
                'Cc1cccc(C)c1c1c(C)nccc1C',
                'Cc1cccc(C)c1c1c(C)cccc1',
                'CCO',
        ]:

            m = Chem.MolFromSmiles(s)

            v1 = rdMD.CalcNumRotatableBonds(m)

            v2 = rdMD.CalcNumRotatableBonds(m, False)
            v3 = rdMD.CalcNumRotatableBonds(m, True)

            v4 = rdMD.CalcNumRotatableBonds(
                m, rdMD.NumRotatableBondsOptions.Default)
            v5 = rdMD.CalcNumRotatableBonds(
                m, rdMD.NumRotatableBondsOptions.NonStrict)
            v6 = rdMD.CalcNumRotatableBonds(
                m, rdMD.NumRotatableBondsOptions.Strict)
            v7 = rdMD.CalcNumRotatableBonds(
                m, rdMD.NumRotatableBondsOptions.StrictLinkages)

            self.assertEquals(v1, v4)
            self.assertEquals(v2, v5)
            self.assertEquals(v3, v6)
Esempio n. 11
0
def _calculateDescriptors(mol):
    df = pd.DataFrame(index=[0])
    df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
    df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
    df["TPSA"] = Descriptors.TPSA(mol)
    df["AMW"] = Descriptors.MolWt(mol)
    df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol)
    df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
    df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
    df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
    df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
    df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
    df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
    df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
    df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol)
    df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol)
    df["NumRings"] = rdMolDescriptors.CalcNumRings(mol)
    df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
    df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
    df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
    df["NumAromaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
    df["NumSaturatedHeterocycles"] = \
        rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
    df["NumAliphaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
    df["NumAromaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
    df["NumSaturatedCarbocycles"] = \
        rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
    df["NumAliphaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
    df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
    df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol)
    df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol)
    df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol)
    df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol)
    df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol)
    df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol)
    df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol)
    df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol)
    df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol)
    df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
    df["kappa1"] = rdMolDescriptors.CalcKappa1(mol)
    df["kappa2"] = rdMolDescriptors.CalcKappa2(mol)
    df["kappa3"] = rdMolDescriptors.CalcKappa3(mol)
    slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13))))
    df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol))))
    smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11))))
    df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol))))
    peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15))))
    df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol))))
    MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43))))
    df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol))))
    return df
 def calculate_properties(self, mol):
     """this method calculates basic properties for the smiles
     returns : list of int or float (properties)"""
     properties = []
     properties.append(mol.GetNumAtoms())
     properties.append(desc.CalcCrippenDescriptors(mol)[0])
     properties.append(desc.CalcTPSA(mol))
     properties.append(desc.CalcNumRotatableBonds(mol))
     properties.append(desc.CalcFractionCSP3(mol))
     return properties
Esempio n. 13
0
def build_library(in_smile,
                  frags,
                  lib,
                  rules=os.environ["COVALIB"] +
                  "/Code/Covalentizer/numbered_reaction.re",
                  linker_lib=False,
                  linker_smiles=''):
    argv = [in_smile, frags, lib]
    with open(argv[0], 'r') as f:
        smile = f.readline().split()[0]
    mol = Chem.MolFromSmiles(smile)
    if mol == None:
        return
    size = mol.GetNumHeavyAtoms()
    hierarch = Recap.RecapDecompose(mol)
    children = []
    for child in hierarch.GetAllChildren().keys() + [smile]:
        new_smiles = child.replace('[*]', '[H]')
        new = Chem.MolFromSmiles(new_smiles)
        if not new == None:
            new_size = new.GetNumHeavyAtoms()
            if new_size > 7:
                if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds(
                        new) <= 5:
                    children.append(Chem.MolToSmiles(new, isomericSmiles=True))
                core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles(
                    new_smiles, includeChirality=True)
                core = Chem.MolFromSmiles(core_smile)
                if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds(
                        core) <= 5 and core.GetNumHeavyAtoms() > 7:
                    children.append(core_smile)
    with open(argv[1], 'w') as f:
        i = 1
        for m in set(children):
            if len(m) > 0:
                f.write(m + '\t' + str(i) + '\n')
                i += 1

    if not linker_lib:
        multi_react([rules, argv[1], argv[2]])
    else:
        multi_linkers([rules, argv[1], argv[2]], linker_smiles)
Esempio n. 14
0
    def run(self, mol, logP, logP_sol, atom_pair_sol):
        mw = Descriptors.ExactMolWt(mol)
        rb = rdMolDescriptors.CalcNumRotatableBonds(mol)
        ap = (len(mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
              / mol.GetNumHeavyAtoms())
        # Formula for estimating solubility (ESOL)
        esol = 0.16 - 0.63 * logP - 0.0062 * mw + 0.066 * rb - 0.74 * ap

        combined = np.asarray([logP_sol, atom_pair_sol, esol]).reshape(1, -1)
        scaled = self.scaler.transform(combined)
        return self.model.predict(scaled)[0]
Esempio n. 15
0
def n_rotatable_bonds(mol):
    """ The number of rotatable bonds.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

    Returns:
        int

    """
    return rdMolDescriptors.CalcNumRotatableBonds(mol)
Esempio n. 16
0
def calc_mol_prop(pdbfile):
    try:
        m = Chem.MolFromPDBFile(pdbfile)
        mw = Descriptors.ExactMolWt(m)
        numrbond = rdMolDescriptors.CalcNumRotatableBonds(m)
        numheavy = rdchem.Mol.GetNumHeavyAtoms(m)

    except:
        print("Error")
        return False, 0, 0, 0

    return True, mw, numrbond, numheavy
Esempio n. 17
0
def feature_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.MQNs_(mol)
    
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcExactMolWt(mol))
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcFractionCSP3(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol)))
    fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticRings(mol))
    fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol))
    fp.append(rdMolDescriptors.CalcNumRings(mol))
    fp.append(rdMolDescriptors.CalcNumAmideBonds(mol))
    fp.append(rdMolDescriptors.CalcNumHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol))
    fp.append(rdMolDescriptors.CalcTPSA(mol))
    
    return np.array(fp)
def main(in_file, output):

  Cmpds  = {}
  InMols = rdkit_open([in_file])
  print('\n # Number of input molecule: {0}'.format(len(InMols)))
  for mol in InMols:
    m = {}

    name = mol.GetProp('_Name').split()[0]
    
    m['Name'] = name
    m['Formula'] = rd.CalcMolFormula(mol)
    m['SMILES'] = Chem.MolToSmiles(mol)

    m['MW']   = rd._CalcMolWt(mol)               # Molecular Weight
    m['logP'] = rd.CalcCrippenDescriptors(mol)[0]  # Partition coefficient
    m['HDon'] = rd.CalcNumLipinskiHBD(mol)      # Lipinski Hbond donor
    m['HAcc'] = rd.CalcNumLipinskiHBA(mol)      # Lipinski Hbond acceptor
    m['TPSA'] = rd.CalcTPSA(mol)                # Topological polar surface area

    m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond
    m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1]         # Molar refractivity
    m['AliRing'] = rd.CalcNumAliphaticRings(mol)        # Aliphatic ring number
    m['AroRing'] = rd.CalcNumAromaticRings(mol)         # Aromatic ring number
#    m['Stereo'] = rd.CalcNumAtomStereoCenters(mol)      # Stereo center number
#    m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol)  # unspecified stereo

    m['SMILES'] = Chem.MolToSmiles(mol, 
                    isomericSmiles=True, allHsExplicit=False)
    Cmpds[name] = m

  ####################################

  df = pd.DataFrame.from_dict(Cmpds, orient='index')
  df.index.name = 'Name'

  # Columns of data to print out
  Columns = [ 'Formula',
              'MW',    'logP',   'HDon',    'HAcc',    'TPSA',
              'Rotat', 'MolRef', 'AliRing', 'AroRing', 
              #'Stereo', 'UnspStereo', 
              'SMILES', ]
  reorder = df[Columns]

  # Output to CSV
  reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8',
                  float_format='%.5f', header=True )

  # Output to Excel
  reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
Esempio n. 19
0
def get_conformations(smiles, max_conf=20, confs=None):

    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m)

    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m)

    if confs is None:
        confs = min(1 + 3 * rot_bond, max_conf)

    AllChem.EmbedMultipleConfs(m,
                               numConfs=confs,
                               useExpTorsionAnglePrefs=True,
                               useBasicKnowledge=True)

    return m
Esempio n. 20
0
    def _init_smiles(self, smiles, use_etdg_confs=False):
        """
        Initialise a Molecule object from a SMILES sting using RDKit
        :param smiles: (str) SMILES string
        :param use_etdg_confs: (bool) override the default conformer generation and use the ETDG algorithm
        :return:
        """
        logger.info('Initialising a Molecule from a SMILES string')
        try:
            self.mol_obj = Chem.MolFromSmiles(smiles)
            self.mol_obj = Chem.AddHs(self.mol_obj)
            self.charge = Chem.GetFormalCharge(self.mol_obj)
            self.n_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds(
                self.mol_obj)
            self.n_h_donors = rdMolDescriptors.CalcNumHBD(self.mol_obj)
            self.n_h_acceptors = rdMolDescriptors.CalcNumHBA(self.mol_obj)

        except:
            logger.error('RDKit failed to generate mol objects')
            return

        logger.info('Running conformation generation with RDKit... running')
        method = AllChem.ETKDGv2(
        ) if use_etdg_confs is False else AllChem.ETDG()
        method.pruneRmsThresh = 0.3
        method.numThreads = Config.n_cores
        conf_ids = list(
            AllChem.EmbedMultipleConfs(self.mol_obj,
                                       numConfs=self.n_confs,
                                       params=method))
        logger.info('                                          ... done')

        try:
            self.volume = AllChem.ComputeMolVolume(self.mol_obj)
        except ValueError:
            logger.error('RDKit failed to compute the molecular volume')
            return

        self.bonds = [(b.GetBeginAtomIdx(), b.GetEndAtomIdx())
                      for b in self.mol_obj.GetBonds()]
        self.conformers = extract_conformers_from_rdkit_mol_object(
            mol_obj=self.mol_obj, conf_ids=conf_ids)

        # Default to the first generated conformer in the absence of any other information
        self.set_atoms(atoms=self.conformers[0].atoms)

        return None
Esempio n. 21
0
def genereate_conformers(molsmi,
    max_conf=20,
    min_conf=10,
    max_steps=1000):
    """
    """

    molobj = Chem.MolFromSmiles(molsmi)

    if molobj is None:
        return None

    molobj = Chem.AddHs(molobj)

    status_embed = AllChem.EmbedMolecule(molobj)

    if status_embed != 0:
        return None

    status_optim = AllChem.UFFOptimizeMolecule(molobj, maxIters=max_steps)

    # Keep unconverged uff
    # if status_optim != 0:
    #     return None

    # Check bond lengths
    dist = Chem.rdmolops.Get3DDistanceMatrix(molobj)
    np.fill_diagonal(dist, 10.0)
    min_dist = np.min(dist)

    # For some atom_types in UFF, it will fail
    if min_dist < 0.001:
        print("fail", smilesstr)
        return None

    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(molobj)

    confs = min(1 + 3*rot_bond, max_conf)
    confs = max(confs, min_conf)

    status = AllChem.EmbedMultipleConfs(molobj,
        numConfs=confs,
        useExpTorsionAnglePrefs=True,
        useBasicKnowledge=True)

    return molobj
Esempio n. 22
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 23
0
    def _embed_conformers(self, mol):
        if self.num_confs:
            num_confs = self.num_confs
        else:
            n_rot = rdMolDescriptors.CalcNumRotatableBonds(mol)
            if n_rot <= 7:
                num_confs = 50
            elif n_rot <= 12:
                num_confs = 200
            else:
                num_confs = 300

        AllChem.EmbedMultipleConfs(mol,
                                   numConfs=num_confs,
                                   params=self.embed_parameters)

        return mol
Esempio n. 24
0
def generate_lowestE_conformer(my_smiles):
     
    m = Chem.AddHs(Chem.MolFromSmiles(my_smiles))

    # DECIDE how many conformers are to be computed
    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m)
    n_confs = min(1 + 3*rot_bond,MAP_paths.MAX_CONF)
    
    # GENERATE conformers
    my_mol, confID_energies = generate_conformations(m, n_confs)
    
    #print(confID_energies)
    
    # find lowest energy ID
    min_conformer_id = min(confID_energies, key=confID_energies.get)
    #print(min_conformer_id, min(confID_energies.values()))    
    
    return my_mol, min_conformer_id
Esempio n. 25
0
def get_conformations(smiles, max_conf=20):

    m = Chem.AddHs(Chem.MolFromSmiles(smiles))

    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m)

    confs = min(1 + 3 * rot_bond, max_conf)

    AllChem.EmbedMultipleConfs(m,
                               numConfs=confs,
                               useExpTorsionAnglePrefs=True,
                               useBasicKnowledge=True)

    conf_list = []

    for i, conf in enumerate(m.GetConformers()):
        tm = Chem.Mol(m, False, conf.GetId())
        conf_list.append(tm)

    return conf_list
Esempio n. 26
0
def get_torsions(ligand):
    '''
    Returns a list of the number of torsions for all the ligands in an input
    file. File must be in sdf or gzipped sdf format for now. 
    '''
    torsions = []
    ext = os.path.splitext(ligand)
    if ext[-1] == '.sdf':
        f = open(ligand)
    elif ext[-1] == '.sdf.gz':
        f = gzip.open(ligand)
    else:
        print 'Ligands not provided in sdf format\n'
        sys.exit()
    suppl = Chem.ForwardSDMolSupplier(f)
    for mol in suppl:
        if mol is None: continue
        torsions.append(rdMD.CalcNumRotatableBonds(mol))
    f.close()
    return torsions
Esempio n. 27
0
def main(name, argv):
    if len(argv) != 3:
        print_usage(name)
        return

    MW = float(argv[1])
    RB = int(argv[2])

    #read molport building blocks
    with open(argv[0], 'r') as f:
        for line in f:
            line_s = line.split()
            molecule = [Chem.MolFromSmiles(line_s[0]), line_s[1]]
            if molecule[0] == None:
                continue
            if Descriptors.MolWt(
                    molecule[0]
            ) <= MW and rdMolDescriptors.CalcNumRotatableBonds(
                    molecule[0]) <= RB:
                print Chem.MolToSmiles(molecule[0]) + "\t" + line_s[1]
    def filter_druglikeness_5_rules(self, smiles):

        count = 0
        for i in smiles:
            mol = Chem.MolFromSmiles(i)
            mol = Chem.RemoveHs(mol)

            MW = rdmd._CalcMolWt(mol)
            ALOGP = Crippen.MolLogP(mol)
            HBA = rdmd.CalcNumHBA(mol)
            HBD = rdmd.CalcNumHBD(mol)
            PSA = MolSurf.TPSA(mol)
            ROTB = rdmd.CalcNumRotatableBonds(
                mol, rdmd.NumRotatableBondsOptions.Strict)

            if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11:
                smiles.remove(i)
                count = count + 1
        print("unavaliable rule_5_drug:%i" % count)

        return smiles
Esempio n. 29
0
def generate_conformers(molobj, max_conf=100, min_conf=10):

    status = AllChem.EmbedMolecule(molobj)
    status = AllChem.UFFOptimizeMolecule(molobj)

    rot_bond = rdMolDescriptors.CalcNumRotatableBonds(molobj)

    confs = min(1 + 3 * rot_bond, max_conf)
    confs = max(confs, min_conf)

    AllChem.EmbedMultipleConfs(molobj,
                               numConfs=confs,
                               useExpTorsionAnglePrefs=True,
                               useBasicKnowledge=True)

    res = AllChem.MMFFOptimizeMoleculeConfs(molobj)
    res = np.array(res)

    status = res[:, 0]
    energies = res[:, 1]

    return energies
Esempio n. 30
0
 def calculate_properties(self, smiles=None, mol=None, props=[]):
     """this method calculates basic properties for the mol
     returns : error (bool)"""
     if len(props) == 0:
         return True
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return True
     if 'py_formula' in props:
         self.data['py_formula'] = desc.CalcMolFormula(mol)
     if 'py_em' in props:
         self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5)
     if 'py_n_Cl_Br' in props:
         all_atoms = []
         for atom in mol.GetAtoms():
             all_atoms.append(atom.GetSymbol())
         n_Cl = all_atoms.count('Cl')
         n_Br = all_atoms.count('Br')
         self.data['py_n_Cl_Br'] = n_Cl + n_Br
     if 'py_na' in props:
         self.data['py_na'] = mol.GetNumAtoms()
     if 'py_mw' in props:
         self.data['py_mw'] = desc._CalcMolWt(mol)
     if 'py_fsp3' in props:
         self.data['py_fsp3'] = desc.CalcFractionCSP3(mol)
     if 'py_rb' in props:
         self.data['py_rb'] = desc.CalcNumRotatableBonds(mol)
     if 'py_tpsa' in props:
         self.data['py_tpsa'] = desc.CalcTPSA(mol)
     if 'py_clogp' in props:
         self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0]
     if 'py_nar' in props:
         self.data['py_nar'] = desc.CalcNumAromaticRings(mol)
     if 'py_nhba' in props:
         self.data['py_nhba'] = desc.CalcNumHBA(mol)
     if 'py_nhbd' in props:
         self.data['py_nhbd'] = desc.CalcNumHBD(mol)
     return False