def get_numRotatableBonds(mol): ''' Rotatable Bond Count ''' return CalcNumRotatableBonds(mol)
def populate_all_molecules( params, redo_size, redo_prop, mol_file=None ): """ Populate all molecules in pickle files in directory. """ vdwScale = params['vdwScale'] boxMargin = params['boxMargin'] spacing = params['spacing'] show_vdw = params['show_vdw'] plot_ellip = params['plot_ellip'] N_conformers = int(params['N_conformers']) MW_thresh = params['MW_thresh'] seed = int(params['seed']) fail_list = IO.fail_list_read( directory=params['molec_dir'], file_name='failures.txt' ) print(mol_file) if mol_file is None: molecule_list = glob.glob('*_unopt.mol') else: molecule_list = IO.read_molecule_list(mol_file) print(f'{len(molecule_list)} molecules in DB.') count = 0 for mol in sorted(molecule_list): count += 1 name = mol.replace('_unopt.mol', '') if name in fail_list: continue opt_file = name+'_opt.mol' etkdg_fail = name+'_unopt.ETKDGFAILED' diam_file = name+'_size.csv' prop_file = name+'_prop.json' smiles = rdkf.read_structure_to_smiles(mol) # Check for generics. if '*' in smiles: IO.fail_list_write( new_name=name, directory=params['molec_dir'], file_name='failures.txt' ) fail_list.append(name) continue # Get molecular properties from 2D structure. if not exists(prop_file) or redo_prop: print( f'>> calculating molecule descriptors of {mol}, ' f'{count} of {len(molecule_list)}' ) prop_dict = {} rdkitmol = Chem.MolFromSmiles(smiles) rdkitmol.Compute2DCoords() Chem.SanitizeMol(rdkitmol) prop_dict['logP'] = Descriptors.MolLogP( rdkitmol, includeHs=True ) prop_dict['logS'] = rdkf.get_logSw(rdkitmol) prop_dict['Synth_score'] = rdkf.get_SynthA_score(rdkitmol) prop_dict['NHA'] = rdkitmol.GetNumHeavyAtoms() prop_dict['MW'] = Descriptors.MolWt(rdkitmol) nrb = CalcNumRotatableBonds(rdkitmol) nb = rdkitmol.GetNumBonds(onlyHeavy=True) prop_dict['NRB'] = nrb if nb == 0: prop_dict['NRBr'] = 0.0 else: prop_dict['NRBr'] = nrb / nb prop_dict['bertzCT'] = BertzCT(rdkitmol) prop_dict['purchasability'] = chemcost_IO.is_purchasable( name=name, smiles=smiles ) with open(prop_file, 'w') as f: json.dump(prop_dict, f) # Get a 3D representation of all molecules using ETKDG. chk1 = (not exists(opt_file) or redo_size) if chk1 and not exists(etkdg_fail): print( f'>> optimising {mol}, ' f'{count} of {len(molecule_list)}' ) rdkit_mol = rdkf.ETKDG(mol, seed=seed) if rdkit_mol is not None: rdkf.write_structure(opt_file, rdkit_mol) # Only property to determine at the moment is the molecular # size. This produces a csv for all conformers, which will be # used in the analysis. chk2 = (not exists(diam_file) or redo_size) if chk2 and not exists(etkdg_fail): print( f'>> getting molecular size of {mol}, ' f'{count} of {len(molecule_list)}' ) _ = rdkf.calc_molecule_diameter( name, smiles, out_file=diam_file, vdwScale=vdwScale, boxMargin=boxMargin, spacing=spacing, MW_thresh=MW_thresh, show_vdw=show_vdw, plot_ellip=plot_ellip, N_conformers=N_conformers, rSeed=seed ) del _
def mol_to_feature(m1, m1_uff, m2, interaction_data, pos_noise_std): # Remove hydrogens m1 = Chem.RemoveHs(m1) m2 = Chem.RemoveHs(m2) # extract valid amino acids # m2 = extract_valid_amino_acid(m2, self.amino_acids) # random rotation angle = np.random.uniform(0, 360, 1)[0] axis = np.random.uniform(-1, 1, 3) # m1 = rotate(m1, angle, axis, False) # m2 = rotate(m2, angle, axis, False) angle = np.random.uniform(0, 360, 1)[0] axis = np.random.uniform(-1, 1, 3) m1_rot = rotate(copy.deepcopy(m1), angle, axis, True) # prepare ligand n1 = m1.GetNumAtoms() d1 = np.array(m1.GetConformers()[0].GetPositions()) d1 += np.random.normal(0.0, pos_noise_std, d1.shape) d1_rot = np.array(m1_rot.GetConformers()[0].GetPositions()) adj1 = GetAdjacencyMatrix(m1) + np.eye(n1) h1 = get_atom_feature(m1, True) # prepare protein n2 = m2.GetNumAtoms() c2 = m2.GetConformers()[0] d2 = np.array(c2.GetPositions()) d2 += np.random.normal(0.0, pos_noise_std, d2.shape) adj2 = GetAdjacencyMatrix(m2) + np.eye(n2) h2 = get_atom_feature(m2, True) # prepare distance vector dmv = dm_vector(d1, d2) dmv_rot = dm_vector(d1_rot, d2) # get interaction matrix # A_int = get_interaction_matrix(d1, d2, interaction_data) A_int = np.zeros( (len(interaction_types), m1.GetNumAtoms(), m2.GetNumAtoms())) A_int[-2] = get_A_hydrophobic(m1, m2) A_int[1] = get_A_hbond(m1, m2) A_int[-1] = get_A_metal_complexes(m1, m2) # cal sasa sasa = cal_sasa(m1) dsasa = sasa - cal_sasa(m1_uff) # count rotatable bonds rotor = CalcNumRotatableBonds(m1) # dm = distance_matrix(d1, d2) # rotor = count_active_rotatable_bond(m1, dm) # charge # charge1 = cal_charge(m1) # charge2 = cal_charge(m2) charge1 = np.zeros((n1, )) charge2 = np.zeros((n2, )) """ mp1 = AllChem.MMFFGetMoleculeProperties(m1) mp2 = AllChem.MMFFGetMoleculeProperties(m2) charge1 = [mp1.GetMMFFPartialCharge(i) for i in range(m1.GetNumAtoms())] charge2 = [mp2.GetMMFFPartialCharge(i) for i in range(m2.GetNumAtoms())] """ # partial charge calculated by gasteiger charge1 = np.array(charge1) charge2 = np.array(charge2) # There is nan for some cases. charge1 = np.nan_to_num(charge1, nan=0, neginf=0, posinf=0) charge2 = np.nan_to_num(charge2, nan=0, neginf=0, posinf=0) # valid valid1 = np.ones((n1, )) valid2 = np.ones((n2, )) # no metal metal_symbols = ["Zn", "Mn", "Co", "Mg", "Ni", "Fe", "Ca", "Cu"] no_metal1 = np.array([ 1 if a.GetSymbol() not in metal_symbols else 0 for a in m1.GetAtoms() ]) no_metal2 = np.array([ 1 if a.GetSymbol() not in metal_symbols else 0 for a in m2.GetAtoms() ]) # vdw radius vdw_radius1 = np.array([get_vdw_radius(a) for a in m1.GetAtoms()]) vdw_radius2 = np.array([get_vdw_radius(a) for a in m2.GetAtoms()]) vdw_epsilon, vdw_sigma = get_epsilon_sigma(m1, m2, False) # uff energy difference # delta_uff = cal_uff(m1)-cal_uff(m1_uff) # delta_uff = get_torsion_energy(m1) - get_torsion_energy(m1_uff) # delta_uff = cal_torsion_energy(m1)+cal_internal_vdw(m1) delta_uff = 0.0 sample = { "h1": h1, "adj1": adj1, "h2": h2, "adj2": adj2, "A_int": A_int, "dmv": dmv, "dmv_rot": dmv_rot, "pos1": d1, "pos2": d2, "sasa": sasa, "dsasa": dsasa, "rotor": rotor, "charge1": charge1, "charge2": charge2, "vdw_radius1": vdw_radius1, "vdw_radius2": vdw_radius2, "vdw_epsilon": vdw_epsilon, "vdw_sigma": vdw_sigma, "delta_uff": delta_uff, "valid1": valid1, "valid2": valid2, "no_metal1": no_metal1, "no_metal2": no_metal2, } return sample
def predict(): req_data = request.get_json() print("Data requested") print(req_data) conditions = req_data["conditions"] num_rounds = req_data["num_rounds"] loyality = req_data["loyality"] num_of_mols = req_data["num_of_mols"] # molecules closer to aspirin # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules #conditions = [120, 285, -2.1, 0.7, 10, 10] #data = conditions[] result_arr = [] for round in range(num_rounds): print(f"round {round}") number_generate = 100 endp = torch.tensor(scaler.transform(np.array([conditions]))) print(endp.shape) c = deepcopy(endp) c = [str(l) for l in list(c.numpy())] # endp = endp.unsqueeze(0) endp = endp.repeat(100, 1) endp = endp.unsqueeze(0) endp = endp.repeat(3, 1, 1) endp = endp.float() endp = endp.cuda() res = model.sample(endp, number_generate, dataset.model) valid = len(res) * 100 / number_generate print("valid : {} %".format(valid)) # writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) mols = res print("Mols obtained") print(mols) vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict", json={'smiles': mols}).json() for idx in range(len(vals_another)): elem = vals_another[idx]['data'] for e in elem: e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]] e2v = [] for idx in range(len(vals_another)): e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']], [e['value'] for e in vals_another[idx]['data']]))) smiles = [val['smiles'] for val in vals_another] mols = [robust_standardizer(mol) for mol in smiles] mols = [Chem.MolFromSmiles(mol) for mol in mols] molecular_weights = [CalcExactMolWt(mol) for mol in mols] logp = [MolLogP(mol) for mol in mols] atom_count = [mol.GetNumAtoms() for mol in mols] molar_reflactivity = [MolMR(mol) for mol in mols] numRings = [CalcNumRings(mol) for mol in mols] numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols] numHAcceptors = [NumHAcceptors(mol) for mol in mols] numHDonors = [NumHDonors(mol) for mol in mols] bcf = [e['Bioconcentration factor'] for e in e2v] dev_tox = [e['Developmental toxicity'] for e in e2v] flash_point = [e['Flash point'] for e in e2v] boiling_point = [e['Boiling point'] for e in e2v] melting_points = [e['Melting point'] for e in e2v] water_solubility = [e['Water Solubility'] for e in e2v] result = [0] * len(smiles) for idx in range(len(smiles)): val = 0 if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160): val += 1 if (logp[idx] <= 5.6 and logp[idx] >= -0.4): val += 1 if (atom_count[idx] <= 70 and atom_count[idx] >= 20): val += 1 if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130): val += 1 if (bcf[idx] < 3): val += 1 if (dev_tox[idx] == 'Negative'): val += 1 if (flash_point[idx] > (350 - 273.15)): val += 1 if (boiling_point[idx] > (300 - 273.15)): val += 1 if (numRings[idx] > 0): val += 1 if (numRotBonds[idx] < 5): val += 1 if (numHAcceptors[idx] <= 10): val += 1 if (numHDonors[idx] <= 5): val += 1 if (val / 12 >= loyality): result[idx] = val print(result) for idx in range(len(result)): if (result[idx] > 0): result_arr.append((smiles[idx], result[idx], (melting_points[idx], boiling_point[idx], water_solubility[idx]), mean_squared_error( scaler.transform(np.array( [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])), scaler.transform(np.array([conditions])) ))) result_arr.sort(key=lambda x: x[3]) print(result_arr[:num_of_mols]) return jsonify(result_arr[:num_of_mols])
def calculate(self): return CalcNumRotatableBonds(self.mol)