Beispiel #1
0
def CalculateArithmeticTopoIndex(mol: Chem.Mol) -> float:
    """Get Arithmetic topological index.

    Or Arto.
    From Narumi H., MATCH (Comm. Math. Comp. Chem.), (1987), 22,195-207.
    """
    nAtoms = mol.GetNumAtoms()
    nBonds = mol.GetNumBonds()
    res = 2. * nBonds / nAtoms
    return res
Beispiel #2
0
def CalculateKappaAlapha1(mol: Chem.Mol) -> float:
    """Calculate molecular shape index for one bonded fragment."""
    P1 = mol.GetNumBonds(onlyHeavy=1)
    A = mol.GetNumHeavyAtoms()
    alpha = _HallKierAlpha(mol)
    denom = P1 + alpha
    if denom:
        kappa = (A + alpha) * (A + alpha - 1)**2 / denom**2
    else:
        kappa = 0.0
    return round(kappa, 3)
 def mol_to_data_dict(self, mol: Chem.Mol) -> Dict[Text, np.ndarray]:
     """Gets data dict from a single mol."""
     nodes = np.array([self.atom_features(atom) for atom in mol.GetAtoms()])
     edges = np.zeros((mol.GetNumBonds() * 2, len(BOND_TYPES)))
     senders = []
     receivers = []
     for index, bond in enumerate(mol.GetBonds()):
         id1 = bond.GetBeginAtom().GetIdx()
         id2 = bond.GetEndAtom().GetIdx()
         bond_arr = self.bond_features(bond)
         edges[index * 2, :] = bond_arr
         edges[index * 2 + 1, :] = bond_arr
         senders.extend([id1, id2])
         receivers.extend([id2, id1])
     data_dict = {
         'nodes': nodes.astype(np.float32),
         'edges': edges.astype(np.float32),
         'globals': np.array([0.], dtype=np.float32),
         'senders': np.array(senders, np.int32),
         'receivers': np.array(receivers, np.int32)
     }
     return data_dict
Beispiel #4
0
def CalculateBalaban(mol: Chem.Mol) -> float:
    """Get Balaban index of a molecule.

    Or J.
    """
    adjMat = Chem.GetAdjacencyMatrix(mol)
    Distance = Chem.GetDistanceMatrix(mol)
    Nbond = mol.GetNumBonds()
    Natom = mol.GetNumAtoms()
    S = numpy.sum(Distance, axis=1)
    mu = Nbond - Natom + 1
    sumk = 0.
    for i in range(len(Distance)):
        si = S[i]
        for j in range(i, len(Distance)):
            if adjMat[i, j] == 1:
                sumk += 1. / numpy.sqrt(si * S[j])
    if mu + 1 != 0:
        J = float(Nbond) / float(mu + 1) * sumk
    else:
        J = 0
    return J
def num_bonds(mol: Mol) -> int:
    """Total number of bonds (int).
    """
    return mol.GetNumBonds()
def mol2tensors(mol: Chem.Mol, cliques=False):
    if mol is None:
        return None, None
    
    nodes_dict = {}
    root = 0
    
    if cliques:
        cliques, edges = tree_decomp(mol)
        n_cliques = len(cliques)
        nodes = torch.zeros((n_cliques,N_ATOM_FEATS))
    
        for i, clique in enumerate(cliques):
            print(f'Clique {i}')
            cmol = get_clique_mol(mol, clique)
            nodes[i] = torch.Tensor(get_atom_features(cmol))
            csmiles = get_smiles(cmol)
            nodes_dict[i] = dict(
                smiles=csmiles,
                #mol=get_mol(csmiles),
                clique=[])
            if min(clique) == 0:
                root = i

        if root > 0:
            for attr in nodes_dict[0]:
                nodes_dict[0][attr], nodes_dict[root][attr] =\
                nodes_dict[root][attr], nodes_dict[0][attr]
                
        edge_index = torch.zeros((n_edges * 2,2), 
                                 dtype=torch.long)
        
        for  i, (_x, _y) in zip(itertools.count(), edges):
            x = 0 if _x == root else root if _x == 0 else _x
            y = 0 if _y == root else root if _y == 0 else _y
            edge_index[2*i] = torch.LongTensor([x, y])
            edge_index[2*i+1] = torch.LongTensor([y, x])
            nodes_dict[x]['clique'].append(y)
            nodes_dict[y]['clique'].append(x)        
            
    else:
        n_nodes = mol.GetNumAtoms()
        n_edges = mol.GetNumBonds()
        nodes = torch.zeros((n_nodes,N_ATOM_FEATS), 
                        dtype=torch.float64)
        for i, rd_atom in enumerate(mol.GetAtoms()):
            nodes[i] = get_atom_features(rd_atom)
            
        edge_index = torch.zeros((n_edges * 2,2), 
                                 dtype=torch.long)
        edge_attr = torch.zeros((n_edges * 2,N_BOND_FEATS),
                                dtype=torch.float64)
        
        for  i, bond in zip(itertools.count(), mol.GetBonds()):
            _x = bond.GetBeginAtom().GetIdx()
            _y = bond.GetEndAtom().GetIdx()
            x = 0 if _x == root else root if _x == 0 else _x
            y = 0 if _y == root else root if _y == 0 else _y
            edge_index[2*i] = torch.LongTensor([x, y])
            edge_index[2*i+1] = torch.LongTensor([y, x])
            edge_attr[2*i] = get_bond_features(bond)
            edge_attr[2*i+1] = edge_attr[2*i].clone()  
            
    
      
    if cliques:
        return nodes, edge_index, edge_attr, nodes_dict
    else:
        return nodes, edge_index, edge_attr
        
Beispiel #7
0
def get_array_from_mol(mol: Chem.Mol,
                       scaffold_nodes: t.Iterable,
                       nh_nodes: t.Iterable,
                       np_nodes: t.Iterable,
                       k: int,
                       p: float,
                       ms: MoleculeSpec = MoleculeSpec.get_default()
                       ) -> t.Tuple[np.ndarray, np.ndarray]:
    """
    Represent the molecule using `np.ndarray`

    Args:
        mol (Chem.Mol):
            The input molecule
        scaffold_nodes (Iterable):
            The location of scaffold represented as `list`/`np.ndarray`
        nh_nodes (Iterable):
            Nodes with modifications
        np_nodes (Iterable):
            Nodes with modifications
        k (int):
            The number of importance samples
        p (float):
            Degree of uncertainty during route sampling, should be in (0, 1)
        ms (mol_spec.MoleculeSpec)

    Returns:
        mol_array (np.ndarray):
            The numpy representation of the molecule
            dtype - np.int32, shape - [k, num_bonds + 1, 5]
        logp (np.ndarray):
            The log-likelihood of each route
            dtype - np.float32, shape - [k, ]
    """
    atom_types, bond_info = [], []
    _, num_bonds = mol.GetNumAtoms(), mol.GetNumBonds()

    # sample route
    scaffold_nodes = np.array(list(scaffold_nodes), dtype=np.int32)
    route_list, step_ids_list, logp = _sample_ordering(mol,
                                                       scaffold_nodes,
                                                       k,
                                                       p)

    for atom_id, atom in enumerate(mol.GetAtoms()):
        if atom_id in nh_nodes:
            atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1)
        if atom_id in np_nodes:
            atom.SetFormalCharge(atom.GetFormalCharge() - 1)
        atom_types.append(ms.get_atom_type(atom))

    for bond in mol.GetBonds():
        bond_info.append([bond.GetBeginAtomIdx(),
                          bond.GetEndAtomIdx(),
                          ms.get_bond_type(bond)])

    # shape:
    # atom_types: num_atoms
    # bond_info: num_bonds x 3
    atom_types, bond_info = (np.array(atom_types, dtype=np.int32),
                             np.array(bond_info, dtype=np.int32))

    # initialize packed molecule array data
    mol_array = []

    for sample_id in range(k):
        # get the route and step_ids for the i-th sample
        (route_i,
         step_ids_i) = (route_list[sample_id, :],
                        step_ids_list[sample_id, :])

        # reorder atom types and bond info
        # note: bond_info [start_ids, end_ids, bond_type]
        (atom_types_i,
         bond_info_i,
         is_append) = _reorder(atom_types,
                               bond_info,
                               route_i,
                               step_ids_i)

        # atom type added at each step
        # -1 if the current step is connect
        atom_types_added = np.full([num_bonds, ],
                                   -1,
                                   dtype=np.int32)
        atom_types_added[is_append] = \
            atom_types_i[bond_info_i[:, 1]][is_append]

        # pack into mol_array_i
        # size: num_bonds x 4
        # note: [atom_types_added, start_ids, end_ids, bond_type]
        mol_array_i = np.concatenate([atom_types_added[:, np.newaxis],
                                      bond_info_i],
                                     axis=-1)

        # add initialization step
        init_step = np.array([[atom_types_i[0], -1, 0, -1]], dtype=np.int32)

        # concat into mol_array
        # size: (num_bonds + 1) x 4
        mol_array_i = np.concatenate([init_step, mol_array_i], axis=0)

        # Mark up scaffold bonds
        is_scaffold = np.logical_and(mol_array_i[:, 1] < len(scaffold_nodes),
                                     mol_array_i[:, 2] < len(scaffold_nodes))
        is_scaffold = is_scaffold.astype(np.int32)

        # Concatenate
        # shape: k x (num_bonds + 1) x 5
        mol_array_i = np.concatenate((mol_array_i,
                                      is_scaffold[:, np.newaxis]),
                                     axis=-1)

        mol_array.append(mol_array_i)

    # num_samples x (num_bonds + 1) x 4
    mol_array = np.stack(mol_array, axis=0)

    # Output size:
    # mol_array: k x (num_bonds + 1) x 4
    # logp: k

    return mol_array, logp