def distance_matrix_eigenvalues(mol_list, invert=False): eigenvalue_list = [] max_length = 0 for mol in mol_list: matrix = GetDistanceMatrix(mol) if (invert): matrix = np.reciprocal(matrix) matrix = np.nan_to_num(matrix) evs = np.linalg.eigvals(matrix) evs = np.real(evs) evs = sorted(evs, reverse=True) #sort (should be default Numpy sbehaviour) eigenvalue_list += [evs] length = len(evs) if (length > max_length): max_length = length #zero padding for i in range(len(eigenvalue_list)): pad_width = max_length - len(eigenvalue_list[i]) eigenvalue_list[i] += [0] * pad_width return np.array(eigenvalue_list)
def _all_pairs(self, mol, atoms_env): atom_pairs = [] distance_matrix = GetDistanceMatrix(mol) num_atoms = mol.GetNumAtoms() shingle_dict = defaultdict(int) for idx1, idx2 in itertools.combinations(range(num_atoms), 2): dist = str(int(distance_matrix[idx1][idx2])) for i in range(self.radius): env_a = atoms_env[idx1][i] env_b = atoms_env[idx2][i] ordered = sorted([env_a, env_b]) shingle = '{}|{}|{}'.format(ordered[0], dist, ordered[1]) if self.is_counted: shingle_dict[shingle] += 1 shingle += '|' + str(shingle_dict[shingle]) atom_pairs.append(shingle.encode('utf-8')) return list(set(atom_pairs))
def cal_internal_vdw(m): retval = 0 n = m.GetNumAtoms() c = m.GetConformers()[0] d = np.array(c.GetPositions()) dm = distance_matrix(d, d) adj = GetAdjacencyMatrix(m) topological_dm = GetDistanceMatrix(m) for i1 in range(n): for i2 in range(0, i1): param = GetUFFVdWParams(m, i1, i2) if param is None: continue d, e = param d = d * 1.0 if adj[i1, i2] == 1: continue if topological_dm[i1, i2] < 4: continue retval += e * ((d / dm[i1, i2])**12 - 2 * ((d / dm[i1, i2])**6)) # print (i1, i2, e, d) return retval
def local_optimize(model, lf, pf, of, loof, args, device): st = time.time() # read ligand and protein. Then, convert to rdkit object m1 = utils.read_molecule(lf) m2 = utils.extract_binding_pocket(m1, pf) # preprocess: convert rdkit mol obj to feature sample = dataset.mol_to_feature(m1, m1, m2, None, 0.0) sample["affinity"] = 0.0 sample["key"] = "None" sample = dataset.tensor_collate_fn([sample]) sample = utils.dic_to_device(sample, device) with torch.no_grad(): # get embedding vector h1, h2 = model.get_embedding_vector(sample) h1_repeat = h1.unsqueeze(2).repeat(1, 1, h2.size(1), 1) h2_repeat = h2.unsqueeze(1).repeat(1, h1.size(1), 1, 1) h = torch.cat([h1_repeat, h2_repeat], -1) # vdw radius parameter dev_vdw_radius = model.cal_vdw_interaction_B(h).squeeze(-1) dev_vdw_radius = dev_vdw_radius * args.dev_vdw_radius vdw_radius1, vdw_radius2 = sample["vdw_radius1"], sample["vdw_radius2"] vdw_radius1_repeat = vdw_radius1.unsqueeze(2)\ .repeat(1, 1, vdw_radius2.size(1)) vdw_radius2_repeat = vdw_radius2.unsqueeze(1)\ .repeat(1, vdw_radius1.size(1), 1) sum_vdw_radius = vdw_radius1_repeat + vdw_radius2_repeat + dev_vdw_radius # vdw interaction vdw_N = args.vdw_N vdw_A = model.cal_vdw_interaction_A(h).squeeze(-1) vdw_A = vdw_A * (args.max_vdw_interaction - args.min_vdw_interaction) vdw_A = vdw_A + args.min_vdw_interaction #hbond and hydrophobic hbond_coeff = model.vina_hbond_coeff * model.vina_hbond_coeff hydrophobic_coeff = model.vina_hydrophobic_coeff \ * model.vina_hydrophobic_coeff pos1, pos2, A_int = sample["pos1"], sample["pos2"], sample["A_int"] epsilon, sigma = dataset.get_epsilon_sigma(m1, m1, False) epsilon = torch.from_numpy(epsilon) sigma = torch.from_numpy(sigma) fix_pair = torch.from_numpy(distance_fix_pair(m1)) initial_dm_internal = model.cal_distance_matrix(pos1, pos1, 0.5) topological_dm = torch.from_numpy(GetDistanceMatrix(m1)) # optimizer pos1.requires_grad = True optimizer = torch.optim.Adam([pos1], lr=0.01) for iter in range(100): optimizer.zero_grad() dm = model.cal_distance_matrix(pos1, pos2, 0.5) dm_internal = model.cal_distance_matrix(pos1, pos1, 0.1) vdw = cal_vdw_energy(dm, sum_vdw_radius, vdw_A, vdw_N) hbond1 = cal_hbond_energy(dm, sum_vdw_radius, hbond_coeff, A_int[:, 1]) hbond2 = cal_hbond_energy(dm, sum_vdw_radius, hbond_coeff, A_int[:, -1]) hydrophobic = cal_hydrophobic_energy(dm, sum_vdw_radius, hydrophobic_coeff, A_int[:, -2]) # constraint internal_vdw = cal_internal_vdw_energy(dm_internal, topological_dm, epsilon, sigma) dev_fix_distance = torch.pow(initial_dm_internal - dm_internal, 2).squeeze() dev_fix_distance = (dev_fix_distance * fix_pair).sum() if iter == 0: initial_internal_vdw = internal_vdw.detach() initial_pred = torch.stack([vdw, hbond1, hbond2, hydrophobic]) initial_pos1 = pos1.clone().detach() # loss loss = vdw + hbond1 + hbond2 + hydrophobic loss = loss + torch.max(internal_vdw, initial_internal_vdw) loss = loss + dev_fix_distance loss.backward() optimizer.step() # rotor penalty rotor_penalty = 1 + model.rotor_coeff * model.rotor_coeff * sample["rotor"] lig_vdw = cal_vdw_energy(dm, sum_vdw_radius, vdw_A, vdw_N, is_last=True) lig_hbond1 = cal_hbond_energy(dm, sum_vdw_radius, hbond_coeff, A_int[:, 1], is_last=True) lig_hbond2 = cal_hbond_energy(dm, sum_vdw_radius, hbond_coeff, A_int[:, -1], is_last=True) lig_hydrophobic = cal_hydrophobic_energy(dm, sum_vdw_radius, hydrophobic_coeff, A_int[:, -2], is_last=True) lig_energy = lig_vdw + lig_hbond1 + lig_hbond2 + lig_hydrophobic pos1 = pos1.data.cpu().numpy()[0] initial_pos1 = initial_pos1.data.cpu().numpy()[0] pred = torch.stack([vdw, hbond1, hbond2, hydrophobic]) pred = pred / rotor_penalty pred = pred.data.cpu().numpy() initial_pred = initial_pred.data.cpu().numpy() init_pred = np.sum(initial_pred) delta_pred = np.sum(pred) - np.sum(initial_pred) init_internal_vdw = initial_internal_vdw.item() final_internal_vdw = internal_vdw.item() final_dev_fix_distance = dev_fix_distance.item() ligand_pos_change = (np.abs(pos1 - initial_pos1)).sum().item() extra_data = { "Initial prediction": f"{init_pred:.3f} Kcal/mol", "Delta prediction": f"{delta_pred:.3f} Kcal/mol", "Initial internal vdw": f"{init_interval_vdw:.3f}", "Final internal vdw": f"{final_internal_vdw:.3f}", "Final dev fix distance": f"{final_dev_fix_distance:.3f}", "ligand pos change": f"{ligand_pos_change:.3f}", } end = time.time() write(of, model, pred, end - st, args, extra_data) write_molecule(loof, m1, pos1) return lig_energy