コード例 #1
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
コード例 #2
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
コード例 #3
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    if mol is None:
        raise ValueError('You need to provide a mol argument.')
    mol = Chem.RemoveHs(mol)
    qedProperties = QEDproperties(
        MW=rdmd._CalcMolWt(mol),
        ALOGP=Crippen.MolLogP(mol),
        HBA=sum(
            len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
        HBD=rdmd.CalcNumHBD(mol),
        PSA=MolSurf.TPSA(mol),
        ROTB=rdmd.CalcNumRotatableBonds(mol,
                                        rdmd.NumRotatableBondsOptions.Strict),
        AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol),
                                                AliphaticRings)),
        ALERTS=sum(1 for alert in StructuralAlerts
                   if mol.HasSubstructMatch(alert)),
    )
    # The replacement
    # AROM=Lipinski.NumAromaticRings(mol),
    # is not identical. The expression above tends to count more rings
    # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
    # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
    # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
    return qedProperties
コード例 #4
0
def get_prop_array(mol):
    mw = CD.CalcExactMolWt(mol)
    logp = Chem.Crippen.MolLogP(mol)
    rotb = D.NumRotatableBonds(mol)
    hbd = CD.CalcNumHBD(mol)
    hba = CD.CalcNumHBA(mol)
    q = Chem.GetFormalCharge(mol)
    return np.array([mw, logp, rotb, hbd, hba, q])
コード例 #5
0
def _calculateDescriptors(mol):
    df = pd.DataFrame(index=[0])
    df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
    df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
    df["TPSA"] = Descriptors.TPSA(mol)
    df["AMW"] = Descriptors.MolWt(mol)
    df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol)
    df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
    df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
    df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
    df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
    df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
    df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
    df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
    df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol)
    df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol)
    df["NumRings"] = rdMolDescriptors.CalcNumRings(mol)
    df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
    df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
    df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
    df["NumAromaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
    df["NumSaturatedHeterocycles"] = \
        rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
    df["NumAliphaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
    df["NumAromaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
    df["NumSaturatedCarbocycles"] = \
        rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
    df["NumAliphaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
    df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
    df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol)
    df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol)
    df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol)
    df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol)
    df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol)
    df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol)
    df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol)
    df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol)
    df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol)
    df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
    df["kappa1"] = rdMolDescriptors.CalcKappa1(mol)
    df["kappa2"] = rdMolDescriptors.CalcKappa2(mol)
    df["kappa3"] = rdMolDescriptors.CalcKappa3(mol)
    slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13))))
    df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol))))
    smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11))))
    df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol))))
    peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15))))
    df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol))))
    MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43))))
    df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol))))
    return df
コード例 #6
0
def n_hba(mol):
    """ The number of h bond donors.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

    Returns:
        float
    """
    return rdMolDescriptors.CalcNumHBD(mol)
コード例 #7
0
    def __init__(self, configuration: StatsExtractionConfig):
        self._filters = FilterTypesEnum

        self._columns = DataframeColumnsEnum
        self._stats = StatsExtractionEnum
        self._purging = PurgingEnum
        self._configuration = configuration
        standardisation_config_dict = self._configuration.standardisation_config
        standardisation_config = [
            FilterConfiguration(name=name, parameters=params)
            for name, params in standardisation_config_dict.items()
        ]

        dec_separator = self._stats.DECORATION_SEPARATOR_TOKEN
        attachment_token = self._stats.ATTACHMENT_POINT_TOKEN
        self._mol_wts_udf = psf.udf(
            lambda x: ExactMolWt(Chem.MolFromSmiles(x)), pst.FloatType())
        self._num_rings_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumRings(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._num_atoms_udf = psf.udf(
            lambda x: Chem.MolFromSmiles(x).GetNumHeavyAtoms(),
            pst.IntegerType())
        self._num_aromatic_rings_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumAromaticRings(
                Chem.MolFromSmiles(x)), pst.IntegerType())
        self._hbond_donors_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumHBD(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._hbond_acceptors_udf = psf.udf(
            lambda x: rdMolDescriptors.CalcNumHBA(Chem.MolFromSmiles(x)),
            pst.IntegerType())
        self._hetero_atom_ratio_udf = psf.udf(
            lambda x: len([
                atom for atom in Chem.MolFromSmiles(x).GetAtoms()
                if atom.GetAtomicNum() == 6
            ]) / Chem.MolFromSmiles(x).GetNumHeavyAtoms(), pst.FloatType())
        self._make_canonical_udf = psf.udf(
            lambda x: Chem.MolToSmiles(Chem.MolFromSmiles(x)),
            pst.StringType())
        self._standardise_smiles_udf = psf.udf(
            lambda x: RDKitStandardizer(standardisation_config, None).
            apply_filter(x), pst.StringType())
        pattern = self._stats.REGEX_TOKENS
        self.regex = re.compile(pattern)
        self._tokeniser_udf = psf.udf(self.regex.findall,
                                      pst.ArrayType(pst.StringType()))
        self._decoration_split_udf = psf.udf(lambda x: x.split(dec_separator),
                                             pst.ArrayType(pst.StringType()))
        self._count_decorations_udf = psf.udf(
            lambda s: list(s).count(attachment_token), pst.IntegerType())
コード例 #8
0
    def _init_smiles(self, smiles, use_etdg_confs=False):
        """
        Initialise a Molecule object from a SMILES sting using RDKit
        :param smiles: (str) SMILES string
        :param use_etdg_confs: (bool) override the default conformer generation and use the ETDG algorithm
        :return:
        """
        logger.info('Initialising a Molecule from a SMILES string')
        try:
            self.mol_obj = Chem.MolFromSmiles(smiles)
            self.mol_obj = Chem.AddHs(self.mol_obj)
            self.charge = Chem.GetFormalCharge(self.mol_obj)
            self.n_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds(
                self.mol_obj)
            self.n_h_donors = rdMolDescriptors.CalcNumHBD(self.mol_obj)
            self.n_h_acceptors = rdMolDescriptors.CalcNumHBA(self.mol_obj)

        except:
            logger.error('RDKit failed to generate mol objects')
            return

        logger.info('Running conformation generation with RDKit... running')
        method = AllChem.ETKDGv2(
        ) if use_etdg_confs is False else AllChem.ETDG()
        method.pruneRmsThresh = 0.3
        method.numThreads = Config.n_cores
        conf_ids = list(
            AllChem.EmbedMultipleConfs(self.mol_obj,
                                       numConfs=self.n_confs,
                                       params=method))
        logger.info('                                          ... done')

        try:
            self.volume = AllChem.ComputeMolVolume(self.mol_obj)
        except ValueError:
            logger.error('RDKit failed to compute the molecular volume')
            return

        self.bonds = [(b.GetBeginAtomIdx(), b.GetEndAtomIdx())
                      for b in self.mol_obj.GetBonds()]
        self.conformers = extract_conformers_from_rdkit_mol_object(
            mol_obj=self.mol_obj, conf_ids=conf_ids)

        # Default to the first generated conformer in the absence of any other information
        self.set_atoms(atoms=self.conformers[0].atoms)

        return None
コード例 #9
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
コード例 #10
0
def extractFeatureData(mol):
    smr_vsa = rdMolDescriptors.SMR_VSA_(mol)
    slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol)
    peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol)
    hbd = rdMolDescriptors.CalcNumHBD(mol)
    hba = rdMolDescriptors.CalcNumHBA(mol)

    feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba]

    feature_data = []
    for f in feats:
        if (isinstance(f, int)):
            feature_data.append(f)
        else:
            for data in f:
                feature_data.append(data)
    #feature_data = np.asarray(feature_data)						# convert to numpy array
    return feature_data
コード例 #11
0
ファイル: svmodel.py プロジェクト: veselovmark/dbdock
def extractFeatureData(mol):
    global index_of_1d_feature
    smr_vsa = rdMolDescriptors.SMR_VSA_(mol)
    slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol)
    peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol)
    hbd = rdMolDescriptors.CalcNumHBD(mol)
    hba = rdMolDescriptors.CalcNumHBA(mol)

    index_of_1d_feature = -1  # Need to make sure this references the index of a 1D feature
    #  (a negative index refers to counting backwards from the end of a list)
    feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba]

    feature_data = []
    for f in feats:
        if (isinstance(f, int)):
            feature_data.append(f)
        else:
            for data in f:
                feature_data.append(data)
    #feature_data = np.asarray(feature_data)						# convert to numpy array
    return feature_data
コード例 #12
0
    def filter_druglikeness_5_rules(self, smiles):

        count = 0
        for i in smiles:
            mol = Chem.MolFromSmiles(i)
            mol = Chem.RemoveHs(mol)

            MW = rdmd._CalcMolWt(mol)
            ALOGP = Crippen.MolLogP(mol)
            HBA = rdmd.CalcNumHBA(mol)
            HBD = rdmd.CalcNumHBD(mol)
            PSA = MolSurf.TPSA(mol)
            ROTB = rdmd.CalcNumRotatableBonds(
                mol, rdmd.NumRotatableBondsOptions.Strict)

            if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11:
                smiles.remove(i)
                count = count + 1
        print("unavaliable rule_5_drug:%i" % count)

        return smiles
コード例 #13
0
 def calculate_properties(self, smiles=None, mol=None, props=[]):
     """this method calculates basic properties for the mol
     returns : error (bool)"""
     if len(props) == 0:
         return True
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return True
     if 'py_formula' in props:
         self.data['py_formula'] = desc.CalcMolFormula(mol)
     if 'py_em' in props:
         self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5)
     if 'py_n_Cl_Br' in props:
         all_atoms = []
         for atom in mol.GetAtoms():
             all_atoms.append(atom.GetSymbol())
         n_Cl = all_atoms.count('Cl')
         n_Br = all_atoms.count('Br')
         self.data['py_n_Cl_Br'] = n_Cl + n_Br
     if 'py_na' in props:
         self.data['py_na'] = mol.GetNumAtoms()
     if 'py_mw' in props:
         self.data['py_mw'] = desc._CalcMolWt(mol)
     if 'py_fsp3' in props:
         self.data['py_fsp3'] = desc.CalcFractionCSP3(mol)
     if 'py_rb' in props:
         self.data['py_rb'] = desc.CalcNumRotatableBonds(mol)
     if 'py_tpsa' in props:
         self.data['py_tpsa'] = desc.CalcTPSA(mol)
     if 'py_clogp' in props:
         self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0]
     if 'py_nar' in props:
         self.data['py_nar'] = desc.CalcNumAromaticRings(mol)
     if 'py_nhba' in props:
         self.data['py_nhba'] = desc.CalcNumHBA(mol)
     if 'py_nhbd' in props:
         self.data['py_nhbd'] = desc.CalcNumHBD(mol)
     return False
コード例 #14
0
def calculate_scalar_descriptors(molecule, symbols):
    features = list()
    features.append(rdMD.CalcAsphericity(molecule))
    features += list(rdMD.CalcCrippenDescriptors(molecule))
    features.append(rdMD.CalcExactMolWt(molecule))
    features.append(rdMD.CalcEccentricity(molecule))
    features.append(rdMD.CalcFractionCSP3(molecule))
    features.append(rdMD.CalcLabuteASA(molecule))
    features.append(rdMD.CalcNPR1(molecule))
    features.append(rdMD.CalcNPR2(molecule))
    features.append(rdMD.CalcHallKierAlpha(molecule))

    # elemental distribution
    symbols = np.array(symbols)
    features.append(np.sum(symbols == 'H'))
    features.append(np.sum(symbols == 'C'))
    features.append(np.sum(symbols == 'N'))
    features.append(np.sum(symbols == 'O'))
    features.append(np.sum(symbols == 'F'))

    # ring features
    features.append(rdMD.CalcNumAliphaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAliphaticHeterocycles(molecule))
    features.append(rdMD.CalcNumAromaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAromaticHeterocycles(molecule))
    features.append(rdMD.CalcNumSaturatedCarbocycles(molecule))
    features.append(rdMD.CalcNumSaturatedHeterocycles(molecule))
    features.append(rdMD.CalcNumSpiroAtoms(
        molecule))  # atom shared between rings with one bond
    features.append(rdMD.CalcNumBridgeheadAtoms(
        molecule))  # atom shared between rings with at least two bonds

    # other counts
    features.append(rdMD.CalcNumAmideBonds(molecule))
    features.append(rdMD.CalcNumHBA(molecule))  # number of hydrogen acceptors
    features.append(rdMD.CalcNumHBD(molecule))  # number of hydrogen donors

    return np.array(features)
コード例 #15
0
ファイル: tasks.py プロジェクト: tsufz/chembiohub_ws
def generateCompoundPropertiesTask(structure, debug=False):
    if debug:
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    molecule = structure.molecule
    if not molecule.compoundProperty:
        prop = CompoundProperties(molecule=molecule)
    else:
        prop = molecule.compoundProperty

    saltRemover = SaltRemover()
    mol = Chem.MolFromMolBlock(str(structure.molfile))
    base = saltRemover.StripMol(mol)
    prop.hbd = Descriptors.CalcNumHBD(mol)
    prop.hba = Descriptors.CalcNumHBA(mol)
    prop.rtb = Descriptors.CalcNumRotatableBonds(mol)
    prop.alogp = Crippen.MolLogP(mol)
    prop.psa = Descriptors.CalcTPSA(mol)
    prop.full_mwt = NewDescriptors.MolWt(mol)
    # prop.exact_mass = Descriptors.CalcExactMolWt(mol)

    if base.GetNumAtoms():
        prop.mw_freebase = NewDescriptors.MolWt(base)

    prop.full_molformula = Descriptors.CalcMolFormula(mol)

    try:
        prop.save()

    except IntegrityError as e:
        if debug:
            print e.message
        else:
            raise e
コード例 #16
0
    def get_global_features(self, mol):
        u = []
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)

        # First get some basic features
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        HeavyAtomMolWt = Descriptors.HeavyAtomMolWt(mol)
        NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        MaxAbsPartialCharge = Descriptors.MaxAbsPartialCharge(mol)
        MaxPartialCharge = Descriptors.MaxPartialCharge(mol)
        MinAbsPartialCharge = Descriptors.MinAbsPartialCharge(mol)
        MinPartialCharge = Descriptors.MinPartialCharge(mol)
        '''
        #        FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol)
        #        FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol)
        #        FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol)

        # Get some features using chemical feature factory

        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0

        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1
            else:
                pass
                #print(feats[j].GetFamily())

        # Now get some features using rdMolDescriptors

        moreGlobalFeatures = [rdm.CalcNumRotatableBonds(mol), rdm.CalcChi0n(mol), rdm.CalcChi0v(mol), \
                            rdm.CalcChi1n(mol), rdm.CalcChi1v(mol), rdm.CalcChi2n(mol), rdm.CalcChi2v(mol), \
                            rdm.CalcChi3n(mol), rdm.CalcChi4n(mol), rdm.CalcChi4v(mol), \
                            rdm.CalcFractionCSP3(mol), rdm.CalcHallKierAlpha(mol), rdm.CalcKappa1(mol), \
                            rdm.CalcKappa2(mol), rdm.CalcLabuteASA(mol), \
                            rdm.CalcNumAliphaticCarbocycles(mol), rdm.CalcNumAliphaticHeterocycles(mol), \
                            rdm.CalcNumAliphaticRings(mol), rdm.CalcNumAmideBonds(mol), \
                            rdm.CalcNumAromaticCarbocycles(mol), rdm.CalcNumAromaticHeterocycles(mol), \
                            rdm.CalcNumAromaticRings(mol), rdm.CalcNumBridgeheadAtoms(mol), rdm.CalcNumHBA(mol), \
                            rdm.CalcNumHBD(mol), rdm.CalcNumHeteroatoms(mol), rdm.CalcNumHeterocycles(mol), \
                            rdm.CalcNumLipinskiHBA(mol), rdm.CalcNumLipinskiHBD(mol), rdm.CalcNumRings(mol), \
                            rdm.CalcNumSaturatedCarbocycles(mol), rdm.CalcNumSaturatedHeterocycles(mol), \
                            rdm.CalcNumSaturatedRings(mol), rdm.CalcNumSpiroAtoms(mol), rdm.CalcTPSA(mol)]


        u = [natoms, nbonds, mw, HeavyAtomMolWt, NumValenceElectrons, \
            nbrAcceptor, nbrDonor, nbrHydrophobe, nbrLumpedHydrophobe, \
            nbrPosIonizable, nbrNegIonizable]

        u = u + moreGlobalFeatures
        u = np.array(u).T
        # Some of the descriptors produice NAN. We can convert them to 0
        # If you are getting outliers in the training or validation set this could be
        # Because some important features were set to zero here because it produced NAN
        # Removing those features from the feature set might remove the outliers

        #u[np.isnan(u)] = 0

        #u = torch.tensor(u, dtype=torch.float)
        return (u)
コード例 #17
0
ファイル: Lipinski.py プロジェクト: mivicms/clusfps
HeteroatomSmarts = Chem.MolFromSmarts('[!#6;!#1]')
#  NOTE: the Rotatable bond smarts here doesn't treat deuteriums (which are left in the graph
#  and therefore contribute to the degree of a carbon) the same as hydrogens (which are removed
#  from the graph). So the bond in [2H]C([2H])([2H])C([2H])([2H])[2H] *is* considered
#  rotatable.
RotatableBondSmarts = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
NHOHSmarts = Chem.MolFromSmarts('[#8H1,#7H1,#7H2,#7H3]')
NOCountSmarts = Chem.MolFromSmarts('[#7,#8]')


# this little trick saves duplicated code
def _NumMatches(mol, smarts):
    return len(mol.GetSubstructMatches(smarts, uniquify=1))


NumHDonors = lambda x: rdMolDescriptors.CalcNumHBD(x)
NumHDonors.__doc__ = "Number of Hydrogen Bond Donors"
NumHDonors.version = "1.0.0"
_HDonors = lambda x, y=HDonorSmarts: x.GetSubstructMatches(y, uniquify=1)
NumHAcceptors = lambda x: rdMolDescriptors.CalcNumHBA(x)
NumHAcceptors.__doc__ = "Number of Hydrogen Bond Acceptors"
NumHAcceptors.version = "2.0.0"
_HAcceptors = lambda x, y=HAcceptorSmarts: x.GetSubstructMatches(y, uniquify=1)
NumHeteroatoms = lambda x: rdMolDescriptors.CalcNumHeteroatoms(x)
NumHeteroatoms.__doc__ = "Number of Heteroatoms"
NumHeteroatoms.version = "1.0.0"
_Heteroatoms = lambda x, y=HeteroatomSmarts: x.GetSubstructMatches(y,
                                                                   uniquify=1)
NumRotatableBonds = lambda x: rdMolDescriptors.CalcNumRotatableBonds(x)
NumRotatableBonds.__doc__ = "Number of Rotatable Bonds"
NumRotatableBonds.version = "1.0.0"
コード例 #18
0
def get_molecular_features(dataframe, mol_list):
    df = dataframe
    for i in range(len(mol_list)):
        print("Getting molecular features for molecule: ", i)
        mol = mol_list[i]
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        df.at[i,"NbrAtoms"] = natoms
        df.at[i,"NbrBonds"] = nbonds
        df.at[i,"mw"] = mw
        df.at[i,'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol)
        df.at[i,'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol)
        df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol)
        df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol)
        df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol)
        '''
        df.at[i,'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol)
        df.at[i,'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol)
        df.at[i,'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol)
        
        #print(natoms, nbonds)
        
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)
        #df["Acceptor"] = 0
        #df["Aromatic"] = 0
        #df["Hydrophobe"] = 0
        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0
        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1                
            else:
                pass#print(feats[j].GetFamily())
                        
        df.at[i,"Acceptor"] = nbrAcceptor
        df.at[i,"Donor"] = nbrDonor
        df.at[i,"Hydrophobe"] = nbrHydrophobe
        df.at[i,"LumpedHydrophobe"] = nbrLumpedHydrophobe
        df.at[i,"PosIonizable"] = nbrPosIonizable
        df.at[i,"NegIonizable"] = nbrNegIonizable
        
        # We can also get some more molecular features using rdMolDescriptors
        
        df.at[i,"NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
        df.at[i,"CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol)
        df.at[i,"CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol)
        df.at[i,"CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol)
        df.at[i,"CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol)
        df.at[i,"CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol)
        df.at[i,"CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol)
        df.at[i,"CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol)
        df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol)
        df.at[i,"CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol)
        df.at[i,"CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol)
        df.at[i,"CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
        df.at[i,"CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
        df.at[i,"CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol)
        df.at[i,"CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol)
        #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol)
        df.at[i,"CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
        df.at[i,"CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
        df.at[i,"CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
        df.at[i,"CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
        df.at[i,"CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
        df.at[i,"CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
        df.at[i,"CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
        df.at[i,"CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
        df.at[i,"CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
        df.at[i,"CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
        df.at[i,"CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
        df.at[i,"CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
        df.at[i,"CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(mol)
        df.at[i,"CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
        df.at[i,"CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
        df.at[i,"CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol)
        df.at[i,"CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
        df.at[i,"CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
        df.at[i,"CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
        df.at[i,"CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        df.at[i,"CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol)
    return(df)
コード例 #19
0
def get_fingerprint(SMILES=None, E_BIND=None):
    """
    PRE: Takes in a MOLECULE as a SMILES
    POST: Prints its finger prints as two list, the first contains the names, the second contains the fingerprints
    """

    def get_atoms_coords(RDKIT_BLOCK):
        """Takes as input an RDKIT BLOCK and returns a list of atoms with a numpy array containing the coordinates"""
        RDKIT_BLOCK = RDKIT_BLOCK.split('\n')
        atm_number = int(RDKIT_BLOCK[3][:3])
        RDKIT_BLOCK = [x.split() for x in RDKIT_BLOCK]
        atm_list = []
        coords_array = np.zeros([atm_number, 3], dtype=float)
        for i, line in enumerate(RDKIT_BLOCK[4:4 + atm_number]):
            coords_atm = line
            atm_list.append(coords_atm[3])
            coords_array[i, :] = coords_atm[:3]
        return atm_list, coords_array

    def get_atom_types(mol):
        """
        PRE: Takes in the mol
        POST: Returns a dictionary with the atom types and numbers
        """
        atom_types = {}
        for atom in mol.GetAtoms():
            symbol = atom.GetSymbol()
            if symbol in atom_types:
                atom_types[symbol] += 1
            else:
                atom_types[symbol] = 1
        return atom_types

    def AreRingFused(mol):
        """
        PRE  : Takes in a mol rdkit
        POST : Returns the max number of fused rings. That is the maximum number of rings any atom belongs to
        """
        rings = Chem.GetSymmSSSR(mol)
        ring_dic = {}
        for ring in rings:
            for atom in list(ring):
                if atom in ring_dic:
                    ring_dic[atom] += 1
                else:
                    ring_dic[atom] = 1
        if ring_dic.values() == []:
            return 0
        else:
            return max(ring_dic.values())

    def getVolume(mol, atom_types):
        """
        PRE: Takes in a mol with HYDROGENS ADDED
        POST: Returns its volume computed as a linear combination of the contribution of the vdW volumes
        """
        index_of_vols = {'H': 7.24, 'C': 20.58, 'N': 15.60, 'O': 14.71, 'F': 13.31, 'Cl': 22.45, 'Br': 26.52,
                         'I': 32.52, 'P': 24.43, 'S': 24.43, 'As': 26.52, 'B': 40.48, 'Si': 38.79, 'Se': 28.73,
                         'Te': 36.62}
        gross_volume = 0
        # for sym in atom_types:
            # gross_volume += atom_types[sym] * index_of_vols[sym]
        bonds = mol.GetNumBonds()
        rings = Chem.GetSymmSSSR(mol)
        # print 'aromatic ring count is ',descriptors.CalcNumAromaticRings(mol)
        # print 'aliphatic ring count is ',descriptors.CalcNumAliphaticRings(mol)
        ra = 0
        largest_ra = 0
        rna = 0
        largest_rna = 0
        for ringId in range(len(rings)):
            if isRingAromatic(mol, tuple(rings[ringId])):
                ra += 1
                if largest_ra < len(rings[ringId]):
                    largest_ra = len(rings[ringId])
            else:
                rna += 1
                if largest_rna < len(rings[ringId]):
                    largest_rna = len(rings[ringId])
        # volume = gross_volume - 5.92 * bonds - 14.7 * ra - 3.8 * rna
        try:
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            volume = AllChem.ComputeMolVolume(mol)
        except:
            raise ValueError("Can't build the molecule")
        return volume, ra, rna, largest_ra, largest_rna

    def isRingAromatic(mol, ring):
        """
        PRE: Takes in a mol and a ring given as a tuple of atom id
        POST: Returns TRUE is all the atoms inside the ring are aromatic and FALSE otherwise
        """
        aromatic = True
        for ids in ring:
            if mol.GetAtomWithIdx(ids).GetIsAromatic():
                # print ids
                pass
            else:
                aromatic = False
                break
        return aromatic

    mol = SMILES
    features = [
        'atomNbr',
        'Volume',
        'NAtom',
        'OAtom',
        'SAtom',
        'PAtom',
        'ClAtom',
        'BrAtom',
        'FAtom',
        'IAtom',
        'AromaticRingNumber',
        'LargestAromaticRingAtomNbr',
        'NonAromaticRingNumber',
        'LargestNonAromaticRingAtomNbr',
        'MaxNbrFusedRings',
        'SurfaceArea',
        'Charge',
        # 'MinRadiusOfCylinder',
        # 'RadiusOfCylinderBestConf',
        'NitroNbr',
        'AlcoholNbr',
        'KetoneNbr',
        'NitrileNbr',
        'ThiolNbr',
        'Phenol_likeNbr',
        'EsterNbr',
        'SulfideNbr',
        'CarboxilicAcidNbr',
        'EtherNbr',
        'AmideNbr',
        'AnilineNbr',
        'PrimaryAmineNbr',
        'SecondaryAmineNbr',
        'RotableBondNum',
        'HBondDonor',
        'HBondAcceptor',
        'MolLogP',
        'MolMR'
    ]
    for i in range(6):
        features.append('Chi{}v'.format(i + 1))
        features.append('Chi{}n'.format(i + 1))
        if i < 3:
            features.append('Kappa{}'.format(i + 1))

    feature_dic = dict.fromkeys(features)
    if mol == None:
        return sorted(feature_dic.keys())

    mol = Chem.MolFromSmiles(SMILES)
    mol = Chem.AddHs(mol)

    feature_dic['RotableBondNum'] = descriptors.CalcNumRotatableBonds(mol)

    for i in range(6):
        feature_dic['Chi{}v'.format(i + 1)] = descriptors.CalcChiNv(mol, i + 1)
        feature_dic['Chi{}n'.format(i + 1)] = descriptors.CalcChiNn(mol, i + 1)

    feature_dic['Kappa1'] = descriptors.CalcKappa1(mol)
    feature_dic['Kappa2'] = descriptors.CalcKappa2(mol)
    feature_dic['Kappa3'] = descriptors.CalcKappa3(mol)

    feature_dic['HBondAcceptor'] = descriptors.CalcNumHBA(mol)
    feature_dic['HBondDonor'] = descriptors.CalcNumHBD(mol)

    CrippenDescriptors = descriptors.CalcCrippenDescriptors(mol)
    feature_dic['MolLogP'] = CrippenDescriptors[0]
    feature_dic['MolMR'] = CrippenDescriptors[1]

    atom_types = get_atom_types(mol)
    for feat, symbol in zip(['NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom'],
                            ['N', 'O', 'S', 'P', 'Cl', 'Br', 'F', 'I']):
        if symbol in atom_types:
            feature_dic[feat] = atom_types[symbol]
        else:
            feature_dic[feat] = 0

    feature_dic['atomNbr'] = mol.GetNumHeavyAtoms()
    feature_dic['Volume'], feature_dic['AromaticRingNumber'], feature_dic['NonAromaticRingNumber'], feature_dic[
        'LargestAromaticRingAtomNbr'], feature_dic['LargestNonAromaticRingAtomNbr'] = getVolume(mol, atom_types)
    feature_dic['MaxNbrFusedRings'] = AreRingFused(mol)
    feature_dic['SurfaceArea'] = descriptors.CalcTPSA(mol)
    feature_dic['Charge'] = Chem.GetFormalCharge(mol)

    funct_dic = {
        '[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]': 'NitroNbr',
        '[#6][OX2H]': 'AlcoholNbr',
        '[NX1]#[CX2]': 'NitrileNbr',
        '[#6][CX3](=O)[#6]': 'KetoneNbr',
        '[#16X2H]': 'ThiolNbr',
        "[OX2H][cX3][c]": 'Phenol_likeNbr',
        '[#6][CX3](=O)[OX2H0][#6]': 'EsterNbr',
        '[#16X2H0]': 'SulfideNbr',
        '[CX3](=O)[OX2H1]': 'CarboxilicAcidNbr',
        '[OD2]([#6])[#6]': 'EtherNbr',
        # '[NX3][CX3](=[OX1])[#6]':'AmideNbr',
        '[#7X3][#6X3](=[OX1])[#6]': 'AmideNbr',
        '[NX3][cc]': 'AnilineNbr',
        '[NX3H2;!$(NC=O)]': 'PrimaryAmineNbr',
        '[NX3H1;!$(NC=O)]': 'SecondaryAmineNbr'}

    for funct in funct_dic:
        patt = Chem.MolFromSmarts(funct)
        feature_dic[funct_dic[funct]] = len(mol.GetSubstructMatches(patt))

    # names, coords = get_atoms_coords(Chem.MolToMolBlock(mol))
    # feature_dic['MinRadiusOfCylinder'] = returnCircleAsTuple(coords[:,1:])[2]
    # feature_dic['MinRadiusOfCylinder'] = RADIUS[0]
    # feature_dic['RadiusOfCylinderBestConf'] = RADIUS[1]

    values = []
    for key in sorted(feature_dic.keys()):
        values.append(feature_dic[key])
    # print key, feature_dic[key]
    return values
コード例 #20
0
    'HBD', 'jIndex'
]
for name in prop_names:
    d[f'{name}'] = []

for i, s in enumerate(smiles):
    if (i % 10000 == 0):
        print(i)
    m = Chem.MolFromSmiles(s)
    if (m == None or 'i' in s or '.' in s):
        DUD = DUD.drop(i)
        print(s, i)
    else:
        d['QED'].append(QED.default(m))
        d['logP'].append(Crippen.MolLogP(m))
        d['molWt'].append(Descriptors.MolWt(m))
        d['maxCharge'].append(Descriptors.MaxPartialCharge(m))
        d['minCharge'].append(Descriptors.MinPartialCharge(m))
        d['valence'].append(Descriptors.NumValenceElectrons(m))
        d['TPSA'].append(rdMolDescriptors.CalcTPSA(m))
        d['HBA'].append(rdMolDescriptors.CalcNumHBA(m))
        d['HBD'].append(rdMolDescriptors.CalcNumHBD(m))
        d['jIndex'].append(GraphDescriptors.BalabanJ(m))

df = pd.DataFrame.from_dict(d)

df_merge = pd.merge(df, DUD, on=df.index)

#df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv')
df_merge.to_csv('C:/Users/jacqu/Documents/data/DUD_full.csv')
コード例 #21
0
ファイル: views.py プロジェクト: Arturossi/quimioteca
def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:
コード例 #22
0
def datadump(database, dumpdir):

    db = pickle.load(open(database, "rb"))

    if os.path.exists(dumpdir):
        raise Warning(
            "Caution, %s already exists. Already existing data may be overwritten."
        )
    else:
        os.mkdir(dumpdir)
        os.mkdir(dumpdir + "/png")

    frag2mol = db.get_frag2mol()
    frag2lcapconn = db.get_frag2lcapconn()
    frag2rcapconn = db.get_frag2rcapconn()
    mol2frag = db.get_mol2frag()
    mol2conn = db.get_mol2conn()

    frag_log = logger(dumpdir + "/frag.dat")
    frag_log.log("### datadump of database %s" % database)
    frag_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    frag_log.log("### written by run_fragresp.py datadump routine.")
    frag_log.log("###")
    frag_log.log("### ----------------- ###")
    frag_log.log("### FRAGMENT DATA LOG ###")
    frag_log.log("### ----------------- ###")
    frag_log.log("###")
    frag_log.log(
        "# id smiles mol_id lcap_id rcap_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for frag_i in range(db.get_frag_count()):
        frag = db.get_frag(frag_i)
        Chem.SanitizeMol(frag)

        log_str = list()

        ### id
        log_str.append(str(frag_i) + " ")
        ### smiles
        log_str.append(str(Chem.MolToSmiles(frag, isomericSmiles=True)) + " ")

        ### mol_id
        mol_count = len(frag2mol[frag_i])
        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = frag2mol[frag_i][i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        ### lcap_id
        lcap_count = len(frag2lcapconn[frag_i])
        if lcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(lcap_count):
                cap_i = frag2lcapconn[frag_i][i]
                if i < lcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### rcap_id
        rcap_count = len(frag2rcapconn[frag_i])
        if rcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(rcap_count):
                cap_i = frag2rcapconn[frag_i][i]
                if i < rcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### N_atoms
        log_str.append(str(frag.GetNumAtoms()) + " ")
        ### N_bonds
        log_str.append(str(frag.GetNumBonds()) + " ")
        ### Nnonhatoms
        log_str.append(str(frag.GetNumHeavyAtoms()) + " ")
        ### Chg
        log_str.append(str(rdmolops.GetFormalCharge(frag)) + " ")
        ### Nhbd
        log_str.append(str(rdMolDescriptors.CalcNumHBD(frag)) + " ")
        ### Nhba
        log_str.append(str(rdMolDescriptors.CalcNumHBA(frag)) + " ")
        ### Nrotbonds
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(frag)) + " ")
        ### Nrings
        log_str.append(str(rdMolDescriptors.CalcNumRings(frag)) + " ")

        frag_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "frag_%d.png" % frag_i
        try:
            Chem.SanitizeMol(frag)
            AllChem.Compute2DCoords(frag)
            Draw.MolToFile(frag, png_path, size=(500, 500))
        except:
            #Chem.Kekulize(frag)
            print("Could not save frag %d to disk." % frag_i)

    frag_log.close()

    mol_log = logger(dumpdir + "/mol.dat")
    mol_log.log("### datadump of database %s" % database)
    mol_log.log("### timestamp %s" % time.asctime(time.localtime(time.time())))
    mol_log.log("### written by run_fragresp.py datadump routine.")
    mol_log.log("###")
    mol_log.log("### ----------------- ###")
    mol_log.log("### MOLECULE DATA LOG ###")
    mol_log.log("### ----------------- ###")
    mol_log.log("###")
    mol_log.log(
        "# id name smiles frag_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for mol_i in range(db.get_mol_count()):
        mol = db.get_mol(mol_i)
        Chem.SanitizeMol(mol)
        name = db.get_name(mol_i)
        decomp = db.get_decompose(mol_i)

        log_str = list()

        log_str.append(str(mol_i) + " ")
        log_str.append(name + " ")
        log_str.append(str(Chem.MolToSmiles(mol, isomericSmiles=True)) + " ")

        frag_count = decomp.get_frag_count()

        if frag_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(frag_count):
                frag_i = mol2frag[mol_i][i]
                if i < frag_count - 1:
                    log_str.append(str(frag_i) + ",")
                else:
                    log_str.append(str(frag_i) + " ")

        log_str.append(str(mol.GetNumAtoms()) + " ")
        log_str.append(str(mol.GetNumBonds()) + " ")
        log_str.append(str(mol.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(mol)) + " ")

        mol_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "mol_%d.png" % mol_i
        AllChem.Compute2DCoords(mol)
        Chem.Kekulize(mol)
        Draw.MolToFile(mol, png_path, size=(500, 500))

    mol_log.close()

    surr_log = logger(dumpdir + "/surr.dat")
    surr_log.log("### datadump of database %s" % database)
    surr_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    surr_log.log("### written by run_fragresp.py datadump routine.")
    surr_log.log("###")
    surr_log.log("### ----------------- ###")
    surr_log.log("### SURROGATE DATA LOG ###")
    surr_log.log("### ------------------ ###")
    surr_log.log("###")
    surr_log.log(
        "# id name smiles mol_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for conn_i, conn in enumerate(db.get_conn_list()):

        if conn.get_terminal():
            continue

        name = conn.get_name()

        conn_cap = conn.get_surrogate_cap()
        Chem.SanitizeMol(conn_cap)

        log_str = list()

        log_str.append(str(conn_i) + " ")
        log_str.append(name + " ")
        log_str.append(
            str(Chem.MolToSmiles(conn_cap, isomericSmiles=True)) + " ")

        conn2mol = db.get_conn2mol()[conn_i]
        mol_count = len(conn2mol)

        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = conn2mol[i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        log_str.append(str(conn_cap.GetNumAtoms()) + " ")
        log_str.append(str(conn_cap.GetNumBonds()) + " ")
        log_str.append(str(conn_cap.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(conn_cap)) + " ")
        log_str.append(
            str(rdMolDescriptors.CalcNumRotatableBonds(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(conn_cap)) + " ")

        surr_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "surr_%s.png" % (conn_i)
        AllChem.Compute2DCoords(conn_cap)
        Chem.Kekulize(conn_cap)
        Draw.MolToFile(conn_cap, png_path, size=(500, 500))

    surr_log.close()
コード例 #23
0
 def calculate(self):
     return rdMolDescriptors.CalcNumHBD(self.mol)
コード例 #24
0
 def compute_HBD(self, mol_input):
     return rdMolDescriptors.CalcNumHBD(mol_input)
コード例 #25
0
                H_count += num_Hs
        feature_list.append(count)
    feature_list[16] = H_count
    
    #Calculates the total mass of the aromatic atoms in the molecule
    mass_aromatic_atoms = 0
    for atom in mol_obj.GetAtoms():
            if atom.GetIsAromatic():
                mass_aromatic_atoms += atom.GetMass()
                num_Hs = atom.GetTotalNumHs()
                mass_aromatic_atoms += H_mass * num_Hs
    feature_list.append(mass_aromatic_atoms)

    #Counting number of H-bond donors and acceptors
    HBAs = Descriptor.CalcNumHBA(mol_obj)
    HBDs = Descriptor.CalcNumHBD(mol_obj)
    feature_list.append(HBAs + HBDs)
    
    return tuple(feature_list)


#Overwrites the feature_array key of the second-level dictionary of the molecule
for mol in suppl:
    if mol is None: 
        continue
    #Defines the chemical name of the molecule
    chemical_name = mol.GetProp('ChemName').rstrip()
    
    #Try's to update the feature_array key of the second-level dictionary of the molecule
    try:
        molecule_dict[chemical_name]["feature_array"] = get_feature_array(mol)