def CalculateArithmeticTopoIndex(mol: Chem.Mol) -> float: """Get Arithmetic topological index. Or Arto. From Narumi H., MATCH (Comm. Math. Comp. Chem.), (1987), 22,195-207. """ nAtoms = mol.GetNumAtoms() nBonds = mol.GetNumBonds() res = 2. * nBonds / nAtoms return res
def CalculateKappaAlapha1(mol: Chem.Mol) -> float: """Calculate molecular shape index for one bonded fragment.""" P1 = mol.GetNumBonds(onlyHeavy=1) A = mol.GetNumHeavyAtoms() alpha = _HallKierAlpha(mol) denom = P1 + alpha if denom: kappa = (A + alpha) * (A + alpha - 1)**2 / denom**2 else: kappa = 0.0 return round(kappa, 3)
def mol_to_data_dict(self, mol: Chem.Mol) -> Dict[Text, np.ndarray]: """Gets data dict from a single mol.""" nodes = np.array([self.atom_features(atom) for atom in mol.GetAtoms()]) edges = np.zeros((mol.GetNumBonds() * 2, len(BOND_TYPES))) senders = [] receivers = [] for index, bond in enumerate(mol.GetBonds()): id1 = bond.GetBeginAtom().GetIdx() id2 = bond.GetEndAtom().GetIdx() bond_arr = self.bond_features(bond) edges[index * 2, :] = bond_arr edges[index * 2 + 1, :] = bond_arr senders.extend([id1, id2]) receivers.extend([id2, id1]) data_dict = { 'nodes': nodes.astype(np.float32), 'edges': edges.astype(np.float32), 'globals': np.array([0.], dtype=np.float32), 'senders': np.array(senders, np.int32), 'receivers': np.array(receivers, np.int32) } return data_dict
def CalculateBalaban(mol: Chem.Mol) -> float: """Get Balaban index of a molecule. Or J. """ adjMat = Chem.GetAdjacencyMatrix(mol) Distance = Chem.GetDistanceMatrix(mol) Nbond = mol.GetNumBonds() Natom = mol.GetNumAtoms() S = numpy.sum(Distance, axis=1) mu = Nbond - Natom + 1 sumk = 0. for i in range(len(Distance)): si = S[i] for j in range(i, len(Distance)): if adjMat[i, j] == 1: sumk += 1. / numpy.sqrt(si * S[j]) if mu + 1 != 0: J = float(Nbond) / float(mu + 1) * sumk else: J = 0 return J
def num_bonds(mol: Mol) -> int: """Total number of bonds (int). """ return mol.GetNumBonds()
def mol2tensors(mol: Chem.Mol, cliques=False): if mol is None: return None, None nodes_dict = {} root = 0 if cliques: cliques, edges = tree_decomp(mol) n_cliques = len(cliques) nodes = torch.zeros((n_cliques,N_ATOM_FEATS)) for i, clique in enumerate(cliques): print(f'Clique {i}') cmol = get_clique_mol(mol, clique) nodes[i] = torch.Tensor(get_atom_features(cmol)) csmiles = get_smiles(cmol) nodes_dict[i] = dict( smiles=csmiles, #mol=get_mol(csmiles), clique=[]) if min(clique) == 0: root = i if root > 0: for attr in nodes_dict[0]: nodes_dict[0][attr], nodes_dict[root][attr] =\ nodes_dict[root][attr], nodes_dict[0][attr] edge_index = torch.zeros((n_edges * 2,2), dtype=torch.long) for i, (_x, _y) in zip(itertools.count(), edges): x = 0 if _x == root else root if _x == 0 else _x y = 0 if _y == root else root if _y == 0 else _y edge_index[2*i] = torch.LongTensor([x, y]) edge_index[2*i+1] = torch.LongTensor([y, x]) nodes_dict[x]['clique'].append(y) nodes_dict[y]['clique'].append(x) else: n_nodes = mol.GetNumAtoms() n_edges = mol.GetNumBonds() nodes = torch.zeros((n_nodes,N_ATOM_FEATS), dtype=torch.float64) for i, rd_atom in enumerate(mol.GetAtoms()): nodes[i] = get_atom_features(rd_atom) edge_index = torch.zeros((n_edges * 2,2), dtype=torch.long) edge_attr = torch.zeros((n_edges * 2,N_BOND_FEATS), dtype=torch.float64) for i, bond in zip(itertools.count(), mol.GetBonds()): _x = bond.GetBeginAtom().GetIdx() _y = bond.GetEndAtom().GetIdx() x = 0 if _x == root else root if _x == 0 else _x y = 0 if _y == root else root if _y == 0 else _y edge_index[2*i] = torch.LongTensor([x, y]) edge_index[2*i+1] = torch.LongTensor([y, x]) edge_attr[2*i] = get_bond_features(bond) edge_attr[2*i+1] = edge_attr[2*i].clone() if cliques: return nodes, edge_index, edge_attr, nodes_dict else: return nodes, edge_index, edge_attr
def get_array_from_mol(mol: Chem.Mol, scaffold_nodes: t.Iterable, nh_nodes: t.Iterable, np_nodes: t.Iterable, k: int, p: float, ms: MoleculeSpec = MoleculeSpec.get_default() ) -> t.Tuple[np.ndarray, np.ndarray]: """ Represent the molecule using `np.ndarray` Args: mol (Chem.Mol): The input molecule scaffold_nodes (Iterable): The location of scaffold represented as `list`/`np.ndarray` nh_nodes (Iterable): Nodes with modifications np_nodes (Iterable): Nodes with modifications k (int): The number of importance samples p (float): Degree of uncertainty during route sampling, should be in (0, 1) ms (mol_spec.MoleculeSpec) Returns: mol_array (np.ndarray): The numpy representation of the molecule dtype - np.int32, shape - [k, num_bonds + 1, 5] logp (np.ndarray): The log-likelihood of each route dtype - np.float32, shape - [k, ] """ atom_types, bond_info = [], [] _, num_bonds = mol.GetNumAtoms(), mol.GetNumBonds() # sample route scaffold_nodes = np.array(list(scaffold_nodes), dtype=np.int32) route_list, step_ids_list, logp = _sample_ordering(mol, scaffold_nodes, k, p) for atom_id, atom in enumerate(mol.GetAtoms()): if atom_id in nh_nodes: atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1) if atom_id in np_nodes: atom.SetFormalCharge(atom.GetFormalCharge() - 1) atom_types.append(ms.get_atom_type(atom)) for bond in mol.GetBonds(): bond_info.append([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), ms.get_bond_type(bond)]) # shape: # atom_types: num_atoms # bond_info: num_bonds x 3 atom_types, bond_info = (np.array(atom_types, dtype=np.int32), np.array(bond_info, dtype=np.int32)) # initialize packed molecule array data mol_array = [] for sample_id in range(k): # get the route and step_ids for the i-th sample (route_i, step_ids_i) = (route_list[sample_id, :], step_ids_list[sample_id, :]) # reorder atom types and bond info # note: bond_info [start_ids, end_ids, bond_type] (atom_types_i, bond_info_i, is_append) = _reorder(atom_types, bond_info, route_i, step_ids_i) # atom type added at each step # -1 if the current step is connect atom_types_added = np.full([num_bonds, ], -1, dtype=np.int32) atom_types_added[is_append] = \ atom_types_i[bond_info_i[:, 1]][is_append] # pack into mol_array_i # size: num_bonds x 4 # note: [atom_types_added, start_ids, end_ids, bond_type] mol_array_i = np.concatenate([atom_types_added[:, np.newaxis], bond_info_i], axis=-1) # add initialization step init_step = np.array([[atom_types_i[0], -1, 0, -1]], dtype=np.int32) # concat into mol_array # size: (num_bonds + 1) x 4 mol_array_i = np.concatenate([init_step, mol_array_i], axis=0) # Mark up scaffold bonds is_scaffold = np.logical_and(mol_array_i[:, 1] < len(scaffold_nodes), mol_array_i[:, 2] < len(scaffold_nodes)) is_scaffold = is_scaffold.astype(np.int32) # Concatenate # shape: k x (num_bonds + 1) x 5 mol_array_i = np.concatenate((mol_array_i, is_scaffold[:, np.newaxis]), axis=-1) mol_array.append(mol_array_i) # num_samples x (num_bonds + 1) x 4 mol_array = np.stack(mol_array, axis=0) # Output size: # mol_array: k x (num_bonds + 1) x 4 # logp: k return mol_array, logp