Example #1
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Example #2
0
def computeFeatures(mol):
    numRings = rdMolDescriptors.CalcNumRings(mol)
    numRotBonds = rdMolDescriptors.CalcNumRotatableBonds(mol)
    nitrogenCount = countNitrogens(mol)
    oxygenCount = countOxygens(mol)
    carbonCount = countCarbons(mol)
    boronCount = countBorons(mol)
    phosCount = countPhos(mol)
    sulfurCount = countSulfurs(mol)
    fluorCount = countFluorine(mol)
    iodCount = countIodine(mol)
    doubleBonds = countDoubleBonds(mol)
    surf_area = rdMolDescriptors.CalcLabuteASA(mol)
    mol_weight = rdMolDescriptors.CalcExactMolWt(mol)
    s_logp = rdMolDescriptors.SlogP_VSA_(mol)
    dist_hs = recurseMolHCount(mol)
    output = [
        numRings, nitrogenCount, oxygenCount, carbonCount, boronCount,
        phosCount, sulfurCount, fluorCount, iodCount, doubleBonds, surf_area,
        mol_weight
    ]
    for s in s_logp:
        output.append(s)
    for d in dist_hs:
        output.append(dist_hs[d])
    return output
Example #3
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Example #4
0
def _calculateDescriptors(mol):
    df = pd.DataFrame(index=[0])
    df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
    df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
    df["TPSA"] = Descriptors.TPSA(mol)
    df["AMW"] = Descriptors.MolWt(mol)
    df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol)
    df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
    df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
    df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
    df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
    df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
    df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
    df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
    df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol)
    df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol)
    df["NumRings"] = rdMolDescriptors.CalcNumRings(mol)
    df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
    df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
    df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
    df["NumAromaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
    df["NumSaturatedHeterocycles"] = \
        rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
    df["NumAliphaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
    df["NumAromaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
    df["NumSaturatedCarbocycles"] = \
        rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
    df["NumAliphaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
    df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
    df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol)
    df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol)
    df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol)
    df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol)
    df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol)
    df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol)
    df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol)
    df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol)
    df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol)
    df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
    df["kappa1"] = rdMolDescriptors.CalcKappa1(mol)
    df["kappa2"] = rdMolDescriptors.CalcKappa2(mol)
    df["kappa3"] = rdMolDescriptors.CalcKappa3(mol)
    slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13))))
    df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol))))
    smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11))))
    df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol))))
    peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15))))
    df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol))))
    MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43))))
    df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol))))
    return df
Example #5
0
def reward_target_num_rings(mol, target):
    """
    Reward for a target number of rings
    :param mol: rdkit mol object
    :param target: int
    :return: float (-inf, 1]
    """
    x = rdMolDescriptors.CalcNumRings(mol)
    reward = -1 * (x - target)**2 + 1
    return reward
Example #6
0
    def __init__(self, configuration: StatsExtractionConfig):
        self._filters = FilterTypesEnum

        self._columns = DataframeColumnsEnum
        self._stats = StatsExtractionEnum
        self._purging = PurgingEnum
        self._configuration = configuration
        standardisation_config_dict = self._configuration.standardisation_config
        standardisation_config = [
            FilterConfiguration(name=name, parameters=params)
            for name, params in standardisation_config_dict.items()
        ]

        dec_separator = self._stats.DECORATION_SEPARATOR_TOKEN
        attachment_token = self._stats.ATTACHMENT_POINT_TOKEN
        self._mol_wts_udf = psf.udf(
            lambda x: ExactMolWt(Chem.MolFromSmiles(x)), pst.FloatType())
        self._num_rings_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumRings(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._num_atoms_udf = psf.udf(
            lambda x: Chem.MolFromSmiles(x).GetNumHeavyAtoms(),
            pst.IntegerType())
        self._num_aromatic_rings_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumAromaticRings(
                Chem.MolFromSmiles(x)), pst.IntegerType())
        self._hbond_donors_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumHBD(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._hbond_acceptors_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumHBA(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._hetero_atom_ratio_udf = psf.udf(
            lambda x: len([
                atom for atom in Chem.MolFromSmiles(x).GetAtoms()
                if atom.GetAtomicNum() == 6
            ]) / Chem.MolFromSmiles(x).GetNumHeavyAtoms(), pst.FloatType())
        self._make_canonical_udf = psf.udf(
            lambda x: Chem.MolToSmiles(Chem.MolFromSmiles(x)),
            pst.StringType())
        self._standardise_smiles_udf = psf.udf(
            lambda x: RDKitStandardizer(standardisation_config, None).
            apply_filter(x), pst.StringType())
        pattern = self._stats.REGEX_TOKENS
        self.regex = re.compile(pattern)
        self._tokeniser_udf = psf.udf(self.regex.findall,
                                      pst.ArrayType(pst.StringType()))
        self._decoration_split_udf = psf.udf(lambda x: x.split(dec_separator),
                                             pst.ArrayType(pst.StringType()))
        self._count_decorations_udf = psf.udf(
            lambda s: list(s).count(attachment_token), pst.IntegerType())
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Example #8
0
def feature_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.MQNs_(mol)
    
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcExactMolWt(mol))
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcFractionCSP3(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol)))
    fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticRings(mol))
    fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol))
    fp.append(rdMolDescriptors.CalcNumRings(mol))
    fp.append(rdMolDescriptors.CalcNumAmideBonds(mol))
    fp.append(rdMolDescriptors.CalcNumHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol))
    fp.append(rdMolDescriptors.CalcTPSA(mol))
    
    return np.array(fp)
    def get_global_features(self, mol):
        u = []
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)

        # First get some basic features
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        HeavyAtomMolWt = Descriptors.HeavyAtomMolWt(mol)
        NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        MaxAbsPartialCharge = Descriptors.MaxAbsPartialCharge(mol)
        MaxPartialCharge = Descriptors.MaxPartialCharge(mol)
        MinAbsPartialCharge = Descriptors.MinAbsPartialCharge(mol)
        MinPartialCharge = Descriptors.MinPartialCharge(mol)
        '''
        #        FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol)
        #        FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol)
        #        FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol)

        # Get some features using chemical feature factory

        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0

        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1
            else:
                pass
                #print(feats[j].GetFamily())

        # Now get some features using rdMolDescriptors

        moreGlobalFeatures = [rdm.CalcNumRotatableBonds(mol), rdm.CalcChi0n(mol), rdm.CalcChi0v(mol), \
                            rdm.CalcChi1n(mol), rdm.CalcChi1v(mol), rdm.CalcChi2n(mol), rdm.CalcChi2v(mol), \
                            rdm.CalcChi3n(mol), rdm.CalcChi4n(mol), rdm.CalcChi4v(mol), \
                            rdm.CalcFractionCSP3(mol), rdm.CalcHallKierAlpha(mol), rdm.CalcKappa1(mol), \
                            rdm.CalcKappa2(mol), rdm.CalcLabuteASA(mol), \
                            rdm.CalcNumAliphaticCarbocycles(mol), rdm.CalcNumAliphaticHeterocycles(mol), \
                            rdm.CalcNumAliphaticRings(mol), rdm.CalcNumAmideBonds(mol), \
                            rdm.CalcNumAromaticCarbocycles(mol), rdm.CalcNumAromaticHeterocycles(mol), \
                            rdm.CalcNumAromaticRings(mol), rdm.CalcNumBridgeheadAtoms(mol), rdm.CalcNumHBA(mol), \
                            rdm.CalcNumHBD(mol), rdm.CalcNumHeteroatoms(mol), rdm.CalcNumHeterocycles(mol), \
                            rdm.CalcNumLipinskiHBA(mol), rdm.CalcNumLipinskiHBD(mol), rdm.CalcNumRings(mol), \
                            rdm.CalcNumSaturatedCarbocycles(mol), rdm.CalcNumSaturatedHeterocycles(mol), \
                            rdm.CalcNumSaturatedRings(mol), rdm.CalcNumSpiroAtoms(mol), rdm.CalcTPSA(mol)]


        u = [natoms, nbonds, mw, HeavyAtomMolWt, NumValenceElectrons, \
            nbrAcceptor, nbrDonor, nbrHydrophobe, nbrLumpedHydrophobe, \
            nbrPosIonizable, nbrNegIonizable]

        u = u + moreGlobalFeatures
        u = np.array(u).T
        # Some of the descriptors produice NAN. We can convert them to 0
        # If you are getting outliers in the training or validation set this could be
        # Because some important features were set to zero here because it produced NAN
        # Removing those features from the feature set might remove the outliers

        #u[np.isnan(u)] = 0

        #u = torch.tensor(u, dtype=torch.float)
        return (u)
Example #10
0
NumHeteroatoms.version = "1.0.0"
_Heteroatoms = lambda x, y=HeteroatomSmarts: x.GetSubstructMatches(y,
                                                                   uniquify=1)
NumRotatableBonds = lambda x: rdMolDescriptors.CalcNumRotatableBonds(x)
NumRotatableBonds.__doc__ = "Number of Rotatable Bonds"
NumRotatableBonds.version = "1.0.0"
_RotatableBonds = lambda x, y=RotatableBondSmarts: x.GetSubstructMatches(
    y, uniquify=1)
NOCount = lambda x: rdMolDescriptors.CalcNumLipinskiHBA(x)
NOCount.__doc__ = "Number of Nitrogens and Oxygens"
NOCount.version = "1.0.0"
NHOHCount = lambda x: rdMolDescriptors.CalcNumLipinskiHBD(x)
NHOHCount.__doc__ = "Number of NHs or OHs"
NHOHCount.version = "2.0.0"

RingCount = lambda x: rdMolDescriptors.CalcNumRings(x)
RingCount.version = "1.0.0"


def HeavyAtomCount(mol):
    " Number of heavy atoms a molecule."
    return mol.GetNumHeavyAtoms()


HeavyAtomCount.version = "1.0.1"

_bulkConvert = ("CalcFractionCSP3", "CalcNumAromaticRings",
                "CalcNumSaturatedRings", "CalcNumAromaticHeterocycles",
                "CalcNumAromaticCarbocycles", "CalcNumSaturatedHeterocycles",
                "CalcNumSaturatedCarbocycles", "CalcNumAliphaticRings",
                "CalcNumAliphaticHeterocycles", "CalcNumAliphaticCarbocycles")
Example #11
0
    return fig



#FRAGMENTS = {
#    "acyl_halide": Chem.MolFromSmarts('[#9,#17,#35,#53]=O'),  # C(=O)X
#    "anhydride": Chem.MolFromSmarts('[#6]-[#6](=O)-[#8]-[#6](-[#6])=O'),  # CC(=O)OC(=O)C
#    "peroxide": Chem.MolFromSmarts('[#8]-[#8]'),  # R-O-O-R'
#    "ab_unsaturated_ketone": Chem.MolFromSmarts('[#6]=[#6]-[#6]=O'),  # R=CC=O
#}

DESCRIPTORS = {
    # classical molecular descriptors
    "num_heavy_atoms": lambda x: x.GetNumAtoms(),
    "molecular_weight": lambda x: round(Desc.ExactMolWt(x), 4),
    "num_rings": lambda x: rdMolDesc.CalcNumRings(x),
    "num_rings_arom": lambda x: rdMolDesc.CalcNumAromaticRings(x),
    "num_rings_ali": lambda x: rdMolDesc.CalcNumAliphaticRings(x),
    "num_hbd": lambda x: rdMolDesc.CalcNumLipinskiHBD(x),
    "num_hba": lambda x: rdMolDesc.CalcNumLipinskiHBA(x),
    "slogp": lambda x: round(Crippen.MolLogP(x), 4),
    "tpsa": lambda x: round(rdMolDesc.CalcTPSA(x), 4),
    "num_rotatable_bond": lambda x: rdMolDesc.CalcNumRotatableBonds(x),
    "num_atoms_oxygen": lambda x: len(
        [a for a in x.GetAtoms() if a.GetAtomicNum() == 8]
    ),
    "num_atoms_nitrogen": lambda x: len(
        [a for a in x.GetAtoms() if a.GetAtomicNum() == 7]
    ),
    "num_atoms_halogen": Fragments.fr_halogen,
    "num_atoms_bridgehead": rdMolDesc.CalcNumBridgeheadAtoms,
Example #12
0
def datadump(database, dumpdir):

    db = pickle.load(open(database, "rb"))

    if os.path.exists(dumpdir):
        raise Warning(
            "Caution, %s already exists. Already existing data may be overwritten."
        )
    else:
        os.mkdir(dumpdir)
        os.mkdir(dumpdir + "/png")

    frag2mol = db.get_frag2mol()
    frag2lcapconn = db.get_frag2lcapconn()
    frag2rcapconn = db.get_frag2rcapconn()
    mol2frag = db.get_mol2frag()
    mol2conn = db.get_mol2conn()

    frag_log = logger(dumpdir + "/frag.dat")
    frag_log.log("### datadump of database %s" % database)
    frag_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    frag_log.log("### written by run_fragresp.py datadump routine.")
    frag_log.log("###")
    frag_log.log("### ----------------- ###")
    frag_log.log("### FRAGMENT DATA LOG ###")
    frag_log.log("### ----------------- ###")
    frag_log.log("###")
    frag_log.log(
        "# id smiles mol_id lcap_id rcap_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for frag_i in range(db.get_frag_count()):
        frag = db.get_frag(frag_i)
        Chem.SanitizeMol(frag)

        log_str = list()

        ### id
        log_str.append(str(frag_i) + " ")
        ### smiles
        log_str.append(str(Chem.MolToSmiles(frag, isomericSmiles=True)) + " ")

        ### mol_id
        mol_count = len(frag2mol[frag_i])
        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = frag2mol[frag_i][i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        ### lcap_id
        lcap_count = len(frag2lcapconn[frag_i])
        if lcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(lcap_count):
                cap_i = frag2lcapconn[frag_i][i]
                if i < lcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### rcap_id
        rcap_count = len(frag2rcapconn[frag_i])
        if rcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(rcap_count):
                cap_i = frag2rcapconn[frag_i][i]
                if i < rcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### N_atoms
        log_str.append(str(frag.GetNumAtoms()) + " ")
        ### N_bonds
        log_str.append(str(frag.GetNumBonds()) + " ")
        ### Nnonhatoms
        log_str.append(str(frag.GetNumHeavyAtoms()) + " ")
        ### Chg
        log_str.append(str(rdmolops.GetFormalCharge(frag)) + " ")
        ### Nhbd
        log_str.append(str(rdMolDescriptors.CalcNumHBD(frag)) + " ")
        ### Nhba
        log_str.append(str(rdMolDescriptors.CalcNumHBA(frag)) + " ")
        ### Nrotbonds
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(frag)) + " ")
        ### Nrings
        log_str.append(str(rdMolDescriptors.CalcNumRings(frag)) + " ")

        frag_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "frag_%d.png" % frag_i
        try:
            Chem.SanitizeMol(frag)
            AllChem.Compute2DCoords(frag)
            Draw.MolToFile(frag, png_path, size=(500, 500))
        except:
            #Chem.Kekulize(frag)
            print("Could not save frag %d to disk." % frag_i)

    frag_log.close()

    mol_log = logger(dumpdir + "/mol.dat")
    mol_log.log("### datadump of database %s" % database)
    mol_log.log("### timestamp %s" % time.asctime(time.localtime(time.time())))
    mol_log.log("### written by run_fragresp.py datadump routine.")
    mol_log.log("###")
    mol_log.log("### ----------------- ###")
    mol_log.log("### MOLECULE DATA LOG ###")
    mol_log.log("### ----------------- ###")
    mol_log.log("###")
    mol_log.log(
        "# id name smiles frag_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for mol_i in range(db.get_mol_count()):
        mol = db.get_mol(mol_i)
        Chem.SanitizeMol(mol)
        name = db.get_name(mol_i)
        decomp = db.get_decompose(mol_i)

        log_str = list()

        log_str.append(str(mol_i) + " ")
        log_str.append(name + " ")
        log_str.append(str(Chem.MolToSmiles(mol, isomericSmiles=True)) + " ")

        frag_count = decomp.get_frag_count()

        if frag_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(frag_count):
                frag_i = mol2frag[mol_i][i]
                if i < frag_count - 1:
                    log_str.append(str(frag_i) + ",")
                else:
                    log_str.append(str(frag_i) + " ")

        log_str.append(str(mol.GetNumAtoms()) + " ")
        log_str.append(str(mol.GetNumBonds()) + " ")
        log_str.append(str(mol.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(mol)) + " ")

        mol_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "mol_%d.png" % mol_i
        AllChem.Compute2DCoords(mol)
        Chem.Kekulize(mol)
        Draw.MolToFile(mol, png_path, size=(500, 500))

    mol_log.close()

    surr_log = logger(dumpdir + "/surr.dat")
    surr_log.log("### datadump of database %s" % database)
    surr_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    surr_log.log("### written by run_fragresp.py datadump routine.")
    surr_log.log("###")
    surr_log.log("### ----------------- ###")
    surr_log.log("### SURROGATE DATA LOG ###")
    surr_log.log("### ------------------ ###")
    surr_log.log("###")
    surr_log.log(
        "# id name smiles mol_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for conn_i, conn in enumerate(db.get_conn_list()):

        if conn.get_terminal():
            continue

        name = conn.get_name()

        conn_cap = conn.get_surrogate_cap()
        Chem.SanitizeMol(conn_cap)

        log_str = list()

        log_str.append(str(conn_i) + " ")
        log_str.append(name + " ")
        log_str.append(
            str(Chem.MolToSmiles(conn_cap, isomericSmiles=True)) + " ")

        conn2mol = db.get_conn2mol()[conn_i]
        mol_count = len(conn2mol)

        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = conn2mol[i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        log_str.append(str(conn_cap.GetNumAtoms()) + " ")
        log_str.append(str(conn_cap.GetNumBonds()) + " ")
        log_str.append(str(conn_cap.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(conn_cap)) + " ")
        log_str.append(
            str(rdMolDescriptors.CalcNumRotatableBonds(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(conn_cap)) + " ")

        surr_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "surr_%s.png" % (conn_i)
        AllChem.Compute2DCoords(conn_cap)
        Chem.Kekulize(conn_cap)
        Draw.MolToFile(conn_cap, png_path, size=(500, 500))

    surr_log.close()
Example #13
0
def num_rings(mol: Mol) -> int:
    return rdMolDescriptors.CalcNumRings(mol)
Example #14
0
 def calculate_number_rings(self):
     '''
     Number of rings in the molecule
     :return:
     '''
     return rdMolDescriptors.CalcNumRings(self.mol)
Example #15
0
def get_molecular_features(dataframe, mol_list):
    df = dataframe
    for i in range(len(mol_list)):
        print("Getting molecular features for molecule: ", i)
        mol = mol_list[i]
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        df.at[i,"NbrAtoms"] = natoms
        df.at[i,"NbrBonds"] = nbonds
        df.at[i,"mw"] = mw
        df.at[i,'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol)
        df.at[i,'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol)
        df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol)
        df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol)
        df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol)
        '''
        df.at[i,'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol)
        df.at[i,'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol)
        df.at[i,'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol)
        
        #print(natoms, nbonds)
        
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)
        #df["Acceptor"] = 0
        #df["Aromatic"] = 0
        #df["Hydrophobe"] = 0
        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0
        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1                
            else:
                pass#print(feats[j].GetFamily())
                        
        df.at[i,"Acceptor"] = nbrAcceptor
        df.at[i,"Donor"] = nbrDonor
        df.at[i,"Hydrophobe"] = nbrHydrophobe
        df.at[i,"LumpedHydrophobe"] = nbrLumpedHydrophobe
        df.at[i,"PosIonizable"] = nbrPosIonizable
        df.at[i,"NegIonizable"] = nbrNegIonizable
        
        # We can also get some more molecular features using rdMolDescriptors
        
        df.at[i,"NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
        df.at[i,"CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol)
        df.at[i,"CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol)
        df.at[i,"CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol)
        df.at[i,"CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol)
        df.at[i,"CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol)
        df.at[i,"CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol)
        df.at[i,"CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol)
        df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol)
        df.at[i,"CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol)
        df.at[i,"CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol)
        df.at[i,"CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
        df.at[i,"CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
        df.at[i,"CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol)
        df.at[i,"CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol)
        #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol)
        df.at[i,"CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
        df.at[i,"CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
        df.at[i,"CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
        df.at[i,"CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
        df.at[i,"CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
        df.at[i,"CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
        df.at[i,"CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
        df.at[i,"CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
        df.at[i,"CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
        df.at[i,"CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
        df.at[i,"CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
        df.at[i,"CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
        df.at[i,"CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(mol)
        df.at[i,"CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
        df.at[i,"CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
        df.at[i,"CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol)
        df.at[i,"CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
        df.at[i,"CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
        df.at[i,"CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
        df.at[i,"CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        df.at[i,"CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol)
    return(df)