def add_atom(self, mol): old_mol = AllChem.Mol(mol) # probability of adding ring atom if np.random.random() < 0.63: rxn_smarts = np.random.choice(self.rxn_smarts_ring_list, p=self.p_ring) smarts = AllChem.MolFromSmarts('[r3,r4,r5]') if not mol.HasSubstructMatch( smarts) or AllChem.CalcNumAliphaticRings(mol) == 0: rxn_smarts = np.random.choice(self.rxn_smarts_make_ring, p=self.p_make_ring) # probability of starting a fused ring if np.random.random() < 0.036: rxn_smarts = rxn_smarts.replace("!", "") else: if mol.HasSubstructMatch( AllChem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')): rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]' else: rxn_smarts = np.random.choice(self.rxn_smarts_list, p=self.p) mol = self.run_rxn(rxn_smarts, mol) if self.valences_not_too_large(mol): return mol else: return old_mol
def test_monomer_importer(json_importer, independent_importers): monomer_importer = importers.MonomerImporter(json_importer) ids = monomer_importer.import_data() monomer_repo = repo.create_monomer_repository() backbone_repo = repo.create_backbone_repository() monomer_data = list(monomer_repo.load(ids)) monomer_docs = json_importer.load(monomer_importer.saver.TYPE.STRING) backbone_data = list(backbone_repo.load()) kekules = [doc['kekule'] for doc in monomer_docs] backbones = [mol.to_reduced_dict() for mol in backbone_data] assert(len(monomer_data) == 4) for mol in monomer_data: rdkit_mol = mol.mol assert(mol._id != None) assert(mol.required == bool(AllChem.CalcNumAromaticRings(rdkit_mol))) assert(mol.backbone in backbones) assert(mol.sidechain is None) assert(mol.connection is None) assert(mol.proline == bool(AllChem.CalcNumAliphaticRings( rdkit_mol) and rdkit_mol.HasSubstructMatch(PROLINE_N_TERM))) assert(mol.imported == True) assert(mol.kekule in kekules) kekules.remove(mol.kekule)
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [ MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr ] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def add_atom(mol): if np.random.random() < 0.63: # probability of adding ring atom rxn_smarts = np.random.choice(rxn_smarts_ring_list, p=p_ring) if not mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]'))\ or AllChem.CalcNumAliphaticRings(mol) == 0: rxn_smarts = np.random.choice(rxn_smarts_make_ring, p=p_make_ring) if np.random.random( ) < 0.056: # probability of starting a fused ring rxn_smarts = rxn_smarts.replace("!", "") else: if mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')): rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]' else: rxn_smarts = np.random.choice(rxn_smarts_list, p=p) mol = run_rxn(rxn_smarts, mol) return mol
def properties(fnames, labels, is_active=False): """ Five structural properties calculation for each molecule in each given file. These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond, Aliphatic Ring, Aromatic Ring and Heterocycle. Arguments: fnames (list): the file path of molecules. labels (list): the label for each file in the fnames. is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False) if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected. (Default: False) Returns: df (DataFrame): the table contains three columns; 'Set' is the label of fname the molecule belongs to, 'Property' is the name of one of five properties, 'Number' is the property value. """ props = [] for i, fname in enumerate(fnames): df = pd.read_table(fname) if 'SCORE' in df.columns: df = df[df.SCORE > (0.5 if is_active else 0)] elif 'PCHEMBL_VALUE' in df.columns: df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)] df = df.drop_duplicates(subset='CANONICAL_SMILES') if len(df) > int(1e5): df = df.sample(int(1e5)) for smile in tqdm(df.CANONICAL_SMILES): mol = Chem.MolFromSmiles(smile) HA = Lipinski.NumHAcceptors(mol) props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA]) HD = Lipinski.NumHDonors(mol) props.append([labels[i], 'Hydrogen\nBond Donor', HD]) RB = Lipinski.NumRotatableBonds(mol) props.append([labels[i], 'Rotatable\nBond', RB]) RI = AllChem.CalcNumAliphaticRings(mol) props.append([labels[i], 'Aliphatic\nRing', RI]) AR = Lipinski.NumAromaticRings(mol) props.append([labels[i], 'Aromatic\nRing', AR]) HC = AllChem.CalcNumHeterocycles(mol) props.append([labels[i], 'Heterocycle', HC]) df = pd.DataFrame(props, columns=['Set', 'Property', 'Number']) return df
def add_atom(rdkit_mol, stats: Stats): old_mol = Chem.Mol(rdkit_mol) if np.random.random() < 0.63: # probability of adding ring atom rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring) if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \ or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0: rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring) if np.random.random() < 0.036: # probability of starting a fused ring rxn_smarts = rxn_smarts.replace("!", "") else: if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')): rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]' else: rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p) rdkit_mol = run_rxn(rxn_smarts, rdkit_mol) if valences_not_too_large(rdkit_mol): return rdkit_mol else: return old_mol
def is_proline(mol): return bool( AllChem.CalcNumAliphaticRings(mol) and mol.HasSubstructMatch(PROLINE_N_TERM))