Example #1
0
  def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
    """
    Featurize the molecule.

    Parameters
    ----------
    datapoint: RDKitMol
      RDKit mol object.

    Returns
    -------
    MATEncoding
      A MATEncoding dataclass instance consisting of processed node_features, adjacency_matrix and distance_matrix.
    """
    if 'mol' in kwargs:
      datapoint = kwargs.get("mol")
      raise DeprecationWarning(
          'Mol is being phased out as a parameter, please pass "datapoint" instead.'
      )
    from rdkit import Chem

    datapoint = self.construct_mol(datapoint)

    node_features = self.construct_node_features_matrix(datapoint)
    adjacency_matrix = Chem.GetAdjacencyMatrix(datapoint)
    distance_matrix = Chem.GetDistanceMatrix(datapoint)

    node_features, adjacency_matrix, distance_matrix = self._add_dummy_node(
        node_features, adjacency_matrix, distance_matrix)

    node_features = self._pad_sequence(node_features)
    adjacency_matrix = self._pad_sequence(adjacency_matrix)
    distance_matrix = self._pad_sequence(distance_matrix)

    return MATEncoding(node_features, adjacency_matrix, distance_matrix)
Example #2
0
def test_mat_encoder_layer():
    """Test invoking MATEncoderLayer."""
    torch.manual_seed(0)
    from rdkit import Chem
    input_ar = torch.Tensor([[1., 2.], [5., 6.]])
    mask = torch.Tensor([[1., 1.], [1., 1.]])
    mol = Chem.MolFromSmiles("CC")
    adj_matrix = Chem.GetAdjacencyMatrix(mol)
    distance_matrix = Chem.GetDistanceMatrix(mol)
    layer = torch_layers.MATEncoderLayer(dist_kernel='softmax',
                                         lambda_attention=0.33,
                                         lambda_distance=0.33,
                                         h=2,
                                         sa_hsize=2,
                                         sa_dropout_p=0.0,
                                         output_bias=True,
                                         d_input=2,
                                         d_hidden=2,
                                         d_output=2,
                                         activation='relu',
                                         n_layers=2,
                                         ff_dropout_p=0.0,
                                         encoder_hsize=2,
                                         encoder_dropout_p=0.0)
    result = layer(input_ar, mask, adj_matrix, distance_matrix, 0.0)
    output_ar = torch.tensor([[[0.9988, 2.0012], [-0.9999, 3.9999],
                               [0.9988, 2.0012], [-0.9999, 3.9999]],
                              [[5.0000, 6.0000], [3.0000, 8.0000],
                               [5.0000, 6.0000], [3.0000, 8.0000]]])
    assert torch.allclose(result, output_ar, rtol=1e-4)
Example #3
0
def create_adjancy_matrix(mol):
    mol_adj = Chem.GetAdjacencyMatrix(mol)
    row_num = len(mol_adj)
    adj = np.array(mol_adj, dtype=np.int8)
    for i in range(row_num):  # Set diagonal elements to 1, fill others with the adjacency matrix from RDkit
        adj[i][i] = int(1)
    return adj
Example #4
0
def extract_info(smiles: str):

    # First convert to the SMILES strings to rdkit Mol object
    try:
        mol = Chem.MolFromSmiles(smiles)
        assert mol
    except AssertionError:
        raise

    print('SMARTS strings: %s' % Chem.MolToSmarts(mol))
    # print('Molecule block: \n%s' % Chem.MolToMolBlock(mol))

    # Get all the atoms, bonds, information on both, and adjacency matrix
    for idx, atom in enumerate(mol.GetAtoms()):

        print('Information on atom #%i in the molecule: ' % idx)
        print('\tAtom: %s (%i)' % (atom.GetSymbol(), atom.GetAtomicNum()))

        print(atom.GetHybridization())

        print(atom.GetSymbol())

    for bond in mol.GetBonds():
        print(bond)

    print(Chem.GetAdjacencyMatrix(mol))

    pass
Example #5
0
    def create_dataset(self, filename, dataset, radius, device):
        dir_dataset = '../dataset/' + dataset + '/'
        '''Load a dataset.'''
        with open(dir_dataset + filename, 'r') as f:
            smiles_property = f.readline().strip().split()
            data_original = f.read().strip().split('\n')

        '''Exclude the data contains '.' in its smiles.'''
        data_original = [data for data in data_original
                            if '.' not in data.split()[0]]

        dataset = []
        for data in data_original:
            smiles, property = data.strip().split()

            '''Create each data with the above defined functions.'''
            mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
            atoms = self.create_atoms(mol, self.atom_dict)
            molecular_size = len(atoms)
            i_jbond_dict = self.create_ijbonddict(mol, self.bond_dict)
            fingerprints = self.extract_fingerprints(radius, atoms, i_jbond_dict,
                                                self.fingerprint_dict, self.edge_dict)
            adjacency = Chem.GetAdjacencyMatrix(mol)

            '''Transform the above each data of numpy
            to pytorch tensor on a device (i.e., CPU or GPU).
            '''
            fingerprints = torch.LongTensor(fingerprints).to(device)
            adjacency = torch.FloatTensor(adjacency).to(device)
            property = torch.LongTensor([int(property)]).to(device)

            dataset.append((fingerprints, adjacency, molecular_size, property))

        return dataset
Example #6
0
    def __init__(
        self, mol, radius: int = 2, nbits: int = 2048, n_feat: np.array = None
    ):
        self.mol = mol
        self.radius = radius
        self.nbits = nbits
        self.fps = np.zeros(shape=(self.nbits,), dtype=np.int32)

        if n_feat is None:
            n_feat = self.createNodeFeatures()

        n_feat = np.array(n_feat, dtype=np.int32)
        self.adj = Chem.GetAdjacencyMatrix(mol)

        # concatenate node features.
        self.identifier: Dict[int, Dict[int, int]] = defaultdict(dict)
        for i in range(radius + 1):
            self.identifier[i] = {}
        self.identifier[0].update(
            {
                i: k
                for i, k in enumerate(
                    [
                        hash("".join([str(f) for f in n_feat[i]]))
                        for i in range(len(n_feat))
                    ]
                )
            }
        )
Example #7
0
def test_smiles_from_adjacent_matrix(smiles):

    charged_fragments = True
    quick = True

    # Cut apart the smiles
    mol = get_mol(smiles)
    atoms = get_atoms(mol)
    charge = Chem.GetFormalCharge(mol)
    adjacent_matrix = Chem.GetAdjacencyMatrix(mol)

    #
    mol = Chem.RemoveHs(mol)
    canonical_smiles = Chem.MolToSmiles(mol)

    # Define new molecule template from atoms
    new_mol = x2m.get_proto_mol(atoms)

    # reconstruct the molecule from adjacent matrix, atoms and total charge
    new_mols = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge,
                          charged_fragments, quick)

    new_mol_smiles_list = []
    for new_mol in new_mols:
        new_mol = Chem.RemoveHs(new_mol)
        new_mol_smiles = Chem.MolToSmiles(new_mol)

        new_mol_smiles_list.append(new_mol_smiles)

    assert canonical_smiles in new_mol_smiles_list

    return
Example #8
0
def read_graph(source_path,MAX_size):
  Vertex = []
  Adj = [] # Normalized adjacency matrix
  mycount=1
  PAD=0
  mydict={}
  max_size=0
  with tf.gfile.GFile(source_path, mode="r") as source_file:
      source = source_file.readline().strip()
      counter = 0
      while source:
        mol = Chem.MolFromSmiles(source)
        atom_list = []
        for a in mol.GetAtoms():
            m = a.GetSymbol()
            if m not in mydict:
              mydict[m]=mycount
              mycount = mycount +1

            atom_list.append(mydict[m])

        if len(atom_list) > max_size:
           max_size = len(atom_list)


        if len(atom_list) < MAX_size:
           pad = [PAD] * (MAX_size - len(atom_list))
           atom_list = atom_list+pad


        vertex = np.array(atom_list, np.int32)
        Vertex.append(vertex)

        adja_mat = Chem.GetAdjacencyMatrix(mol)
        adj_temp = []
        for adja in adja_mat:
            if len(adja) < MAX_size:
               pad = [PAD]*(MAX_size - len(adja))
               adja = np.array(list(adja)+pad,np.int32)
            adj_temp.append(adja)
       cur_len = len(adj_temp)
        for i in range(MAX_size - cur_len):
            adja =np.array( [PAD]*MAX_size,np.int32)
            adj_temp.append(adja)

        adj_temp = adj_temp + np.eye(MAX_size) # A_hat = A + I
        deg = np.power(np.sum(adj_temp,axis=1),-0.5)
        deg_new = []
        for i in range(MAX_size):
            if deg[i]==1:
               deg_new.append(0)
            else:
               deg_new.append(deg[i])

        deg_new = np.array(deg_new)
        deg_diag = np.diag(deg_new)
        adj = np.matmul(deg_diag,adj_temp)
        adj = np.matmul(adj,deg_diag) # normalized
        Adj.append(adj)
        source = source_file.readline().strip()
Example #9
0
def CalculateBalaban(mol):
    """
    #################################################################
    Calculation of Balaban index in a molecule
    
    ---->J
    
    Usage: 
        
        result=CalculateBalaban(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    adjMat = Chem.GetAdjacencyMatrix(mol)
    Distance = Chem.GetDistanceMatrix(mol)
    Nbond = mol.GetNumBonds()
    Natom = mol.GetNumAtoms()
    S = numpy.sum(Distance, axis=1)
    mu = Nbond - Natom + 1
    sumk = 0.
    for i in range(len(Distance)):
        si = S[i]
        for j in range(i, len(Distance)):
            if adjMat[i, j] == 1:
                sumk += 1. / numpy.sqrt(si * S[j])
    if mu + 1 != 0:
        J = float(Nbond) / float(mu + 1) * sumk
    else:
        J = 0
    return J
def extract_smiles(xyz_file, charge, allow_charge=True, check_ac=False):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True, use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False, use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    if check_ac:
        global AC_SAME
        ac = Chem.GetAdjacencyMatrix(input_mol)
        if not np.all(AC == ac):
            AC_SAME = False
            print("change in AC: stopping")

    return structure_smiles, GetFormalCharge(structure_mol), res_status
Example #11
0
    def calculate(self, An=None, A1=None):
        if self.order == 1:
            return Chem.GetAdjacencyMatrix(self.mol,
                                           useBO=self.useBO,
                                           force=True)

        return An.dot(A1)
Example #12
0
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix:
    """Calculate weighted Burden matrix and eigenvalues."""
    mol = Chem.AddHs(mol)
    Natom = mol.GetNumAtoms()
    AdMatrix = Chem.GetAdjacencyMatrix(mol)
    bondindex = numpy.argwhere(AdMatrix)
    AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32)
    # The diagonal elements of B, Bii, are either given by
    # the carbon normalized atomic mass,
    # van der Waals volume, Sanderson electronegativity,
    # and polarizability of atom i.
    for i in range(Natom):
        atom = mol.GetAtomWithIdx(i)
        temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel)
        AdMatrix1[i, i] = round(temp, 3)
    # The element of B connecting atoms i and j, Bij,
    # is equal to the square root of the bond
    # order between atoms i and j.
    for i in bondindex:
        bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1]))
        if bond.GetBondType().name == 'SINGLE':
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3)
        if bond.GetBondType().name == "DOUBLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3)
        if bond.GetBondType().name == "TRIPLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3)
        if bond.GetBondType().name == "AROMATIC":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3)
    # All other elements of B (corresponding non bonded
    # atom pairs) are set to 0.001
    bondnonindex = numpy.argwhere(AdMatrix == 0)
    for i in bondnonindex:
        if i[0] != i[1]:
            AdMatrix1[i[0], i[1]] = 0.001
    return numpy.real(numpy.linalg.eigvals(AdMatrix1))
Example #13
0
def create_multi_adjancy_matrix(mol):
    mol_adj = Chem.GetAdjacencyMatrix(mol, useBO=True)
    num = mol.GetNumAtoms()
    nch = 5
    adj = np.zeros((nch, num, num), dtype=np.int)
    for b in mol.GetBonds():
        i = b.GetBeginAtomIdx()
        j = b.GetEndAtomIdx()
        t = b.GetBondType()
        if t == Chem.rdchem.BondType.SINGLE:
            ch = 0
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.DOUBLE:
            ch = 1
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.TRIPLE:
            ch = 2
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.AROMATIC:
            ch = 3
            adj[ch, i, j] = 1
        else:
            ch = 4
            adj[ch, i, j] = 1
    for ch in range(nch):
        for i in range(num):
            adj[ch][i][i] = int(1)
    return adj
Example #14
0
 def create_adjacency(mol):
     """
     :param mol: rdkit.Chem.Mol object
     :return:
     """
     adjacency = Chem.GetAdjacencyMatrix(mol)
     return np.array(adjacency, dtype=np.int32)
Example #15
0
def CalculateSchiultz(mol: Chem.Mol) -> float:
    """Get Schiultz number.

    Or Tsch.
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd')
    VertexDegree = sum(Adjacent)
    return sum(scipy.dot((Distance + Adjacent), VertexDegree))
Example #16
0
def create_adjacency(mol):
    adjacency = Chem.GetAdjacencyMatrix(mol)
    n = adjacency.shape[0]
    adjacency = adjacency + np.eye(n)
    degree = sum(adjacency)
    d_half = np.sqrt(np.diag(degree))
    d_half_inv = np.linalg.inv(d_half)
    adjacency = np.matmul(d_half_inv, np.matmul(adjacency, d_half_inv))
    return np.array(adjacency)
Example #17
0
def valences_not_too_large(mol):
    valence_dict = {5: 3, 6: 4, 7: 3, 8: 2, 9: 1, 16: 6, 17: 1, 35: 1, 53: 1}
    atomicNumList = [a.GetAtomicNum() for a in mol.GetAtoms()]
    valences = [valence_dict[atomic_num] for atomic_num in atomicNumList]
    BO = Chem.GetAdjacencyMatrix(mol, useBO=True)
    number_of_bonds_list = BO.sum(axis=1)
    for valence, number_of_bonds in zip(valences, number_of_bonds_list):
        if number_of_bonds > valence:
            return False

    return True
Example #18
0
def CalculateSchiultz(mol):
    """
    Calculation of Schiultz number
        Parameters:
            mol: RDKit molecule object
        Returns:
            Tsch: Thara number
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd')
    VertexDegree = sum(Adjacent)
    return sum(scipy.dot((Distance + Adjacent), VertexDegree))
Example #19
0
def buildMPNN(molecule, med_voc, radius=1, device="cpu:0"):

    atom_dict = defaultdict(lambda: len(atom_dict))
    bond_dict = defaultdict(lambda: len(bond_dict))
    fingerprint_dict = defaultdict(lambda: len(fingerprint_dict))
    edge_dict = defaultdict(lambda: len(edge_dict))
    MPNNSet, average_index = [], []

    for index, atc3 in med_voc.items():

        smilesList = list(molecule[atc3])
        """Create each data with the above defined functions."""
        counter = 0 # counter how many drugs are under that ATC-3
        for smiles in smilesList:
            try:
                mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
                atoms = create_atoms(mol, atom_dict)
                molecular_size = len(atoms)
                i_jbond_dict = create_ijbonddict(mol, bond_dict)
                fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                                    fingerprint_dict, edge_dict)
                adjacency = Chem.GetAdjacencyMatrix(mol)
                # if fingerprints.shape[0] == adjacency.shape[0]:
                for _ in range(adjacency.shape[0] - fingerprints.shape[0]):
                    fingerprints = np.append(fingerprints, 1)
                
                fingerprints = torch.LongTensor(fingerprints).to(device)
                adjacency = torch.FloatTensor(adjacency).to(device)
                MPNNSet.append((fingerprints, adjacency, molecular_size))
                counter += 1
            except:
                continue
        
        average_index.append(counter)

        """Transform the above each data of numpy
        to pytorch tensor on a device (i.e., CPU or GPU).
        """

    N_fingerprint = len(fingerprint_dict)
    # transform into projection matrix
    n_col = sum(average_index)
    n_row = len(average_index)

    average_projection = np.zeros((n_row, n_col))
    col_counter = 0
    for i, item in enumerate(average_index):
        if item > 0:
            average_projection[i, col_counter : col_counter + item] = 1 / item
        col_counter += item

    return MPNNSet, N_fingerprint, torch.FloatTensor(average_projection)
    def _process_row(self, smiles, label=None):
        mol = Chem.MolFromSmiles(smiles)

        if self.feature_extractor is None:
            adj = Chem.GetAdjacencyMatrix(mol)
            features = {
                'num_nodes': adj.shape[0],
                'edge_index': torch.LongTensor(np.stack(np.nonzero(adj)))
            }
        else:
            features = self.feature_extractor(mol)

        return Data(y=label, **features)
Example #21
0
def create_dataset_randomsplit(x, y, path, dataname):
    dir_input = path + 'SMRT-'
    with open(dir_input + 'atom_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            atom_dict.get(k)
            atom_dict[k] = c[k]
    with open(dir_input + 'bond_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            bond_dict.get(k)
            bond_dict[k] = c[k]

    with open(dir_input + 'edge_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            edge_dict.get(k)
            edge_dict[k] = c[k]

    with open(dir_input + 'fingerprint_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            fingerprint_dict.get(k)
            fingerprint_dict[k] = c[k]
    dataset = []
    for i in range(len(x)):
        smiles = x[i]
        property = y[i]
        """Create each data with the above defined functions."""
        mol = Chem.MolFromInchi(smiles)
        mol = Chem.AddHs(Chem.MolFromInchi(smiles))
        atoms = create_atoms(mol, atom_dict)
        molecular_size = len(atoms)
        i_jbond_dict = create_ijbonddict(mol, bond_dict)
        fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                            fingerprint_dict, edge_dict)
        adjacency = np.float32((Chem.GetAdjacencyMatrix(mol)))
        #Transform the above each data of numpy to pytorch tensor on a device (i.e., CPU or GPU).
        fingerprints = torch.LongTensor(fingerprints).to(device)
        adjacency = torch.FloatTensor(adjacency).to(device)
        property = torch.FloatTensor([[float(property)]]).to(device)

        dataset.append(
            (smiles, fingerprints, adjacency, molecular_size, property))
    dir_dataset = path
    dump_dictionary(fingerprint_dict,
                    dir_dataset + dataname + '-fingerprint_dict.pickle')
    dump_dictionary(atom_dict, dir_dataset + dataname + '-atom_dict.pickle')
    dump_dictionary(bond_dict, dir_dataset + dataname + '-bond_dict.pickle')
    dump_dictionary(edge_dict, dir_dataset + dataname + '-edge_dict.pickle')
    return dataset
Example #22
0
def read_graph(source_path,MAX_size):
  Vertex = []
  Adj = [] # Normalized adjacency matrix
  mycount=1
  PAD=0
  mydict={}
  max_size=0
  with tf.gfile.GFile(source_path, mode="r") as source_file:
      source = source_file.readline().strip()
      counter = 0
      while source:
        mol = Chem.MolFromSmiles(source)
        atom_list = []
        for a in mol.GetAtoms():
            m = a.GetSymbol()
            if m not in mydict:
              mydict[m]=mycount
              mycount = mycount +1
            
            atom_list.append(mydict[m])

        if len(atom_list) > max_size:
           max_size = len(atom_list)


        if len(atom_list) < MAX_size:
           pad = [PAD] * (MAX_size - len(atom_list))
           atom_list = atom_list+pad

        
        vertex = np.array(atom_list, np.int32)
        Vertex.append(vertex)

        adja_mat = Chem.GetAdjacencyMatrix(mol)
        adj_temp = []
        for adja in adja_mat:
            if len(adja) < MAX_size:
               pad = [PAD]*(MAX_size - len(adja))
               adja = np.array(list(adja)+pad,np.int32)
            adj_temp.append(adja)
      
        cur_len = len(adj_temp)
        for i in range(MAX_size - cur_len):
            adja =np.array( [PAD]*MAX_size,np.int32)
            adj_temp.append(adja)

        adj_temp = adj_temp + np.eye(MAX_size) # A_hat = A + I
        Adj.append(adj_temp) 
        source = source_file.readline().strip()
  return Vertex,Adj,max_size
Example #23
0
def get_adjacency_matrix(smiles: str):
    """
    Compute adjacency matrix between atoms. Only works for single molecules atm and not for rxns

    Args:
        smiles: SMILES representation of a molecule

    Returns:
        Numpy array representing the adjacency between each atom and every other atom in the molecular SMILES.
        Equivalent to `distance_matrix[distance_matrix == 1]`
    """

    mol = Chem.MolFromSmiles(smiles)
    return Chem.GetAdjacencyMatrix(mol)
Example #24
0
    def create_dataset(filepath):

        """Load a dataset."""
        with open(filepath, 'r') as f:
            #smiles_property = f.readline().strip().split()
            #data_original = f.read().strip().split('\n')
            data_original = f.readlines()
        print(len(data_original))

        data_original = [[data.strip('\n').split('\t')[6], data.strip('\n').split('\t')[7]]
                         for data in data_original]
        """Exclude the data contains '.' in its smiles.
        data_original = [data for data in data_original
                         if '.' not in data.split()[0]]
        """

        dataset = []
        mask = []

        for data in data_original:
            dataset_ = []
            for smiles in data:
                """Replace the smiles its contains '.' with 'CC'
                   Replace the no smiles data with 'CC'"""
                if '.' in smiles or smiles == '':
                    smiles = 'CC'
                    mask = [0]
                else:
                    try:
                        Chem.AddHs(Chem.MolFromSmiles(smiles))
                        mask = [1]
                    except:
                        """Replace invalid smiles with 'CC'"""
                        smiles = 'CC'
                        mask = [0]

                """Create each data with the above defined functions."""
                mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
                atoms = create_atoms(mol, atom_dict)
                molecular_size = len(atoms)
                i_jbond_dict = create_ijbonddict(mol, bond_dict)
                fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                                    fingerprint_dict, edge_dict)
                adjacency = Chem.GetAdjacencyMatrix(mol)

                dataset_.append((fingerprints, adjacency, molecular_size, mask))
            dataset.append(dataset_)

        return dataset
Example #25
0
def smiles_to_mol_graph(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        adj_mat = Chem.GetAdjacencyMatrix(mol)
        node_feat_mat = np.empty([mol.GetNumAtoms(), atomic_props.get(1).shape[0]])

        ind = 0
        for atom in mol.GetAtoms():
            node_feat_mat[ind, :] = atomic_props.get(atom.GetAtomicNum())
            ind = ind + 1

        return mol, construct_mol_graph(smiles, mol, adj_mat, node_feat_mat)
    except:
        print(smiles + ' could not be converted to molecular graph due to the internal errors of RDKit')
        return None, None
Example #26
0
    def adjacency_matrix(self, bond_orders=False, force=True):
        """ The vertex adjacency matrix.

        Args:
            bond_orders (bool):
                Whether to use bond orders.
            force (bool):
                Whether to recalculate or used rdkit cached value.

        Returns:
            np.array[int]
        """

        return Chem.GetAdjacencyMatrix(self.owner,
                                       useBO=bond_orders,
                                       force=force)
Example #27
0
def smiles_to_pseudo_xyz(smiles):
    configs = []
    valid = []
    for idx, smi in enumerate(smiles):
        mol = chem.MolFromSmiles(smi)
        mol = chem.AddHs(mol)
        if mol is None:
            pass
        else:
            symbols = [ a.GetSymbol() for a in mol.GetAtoms() ]
            pos = np.zeros((len(symbols),3))
            config = readwrite.ExtendedXyz(pos=pos, symbols=symbols)
            config.info["lmat"] = 1.*chem.GetAdjacencyMatrix(mol)
            configs.append(config)
            valid.append(idx)
    return configs
def take_elementary_step(mol, charge, E_cutoff, heterolytic, quick):
    chiral_parent = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
    parent_is_chiral = len(chiral_parent) > 0
    if parent_is_chiral:
        atom2chirality = {key: value for (key, value) in chiral_parent}

    atomicNumList = [a.GetAtomicNum() for a in mol.GetAtoms()]
    proto_mol = xyz2mol.get_proto_mol(atomicNumList)

    AC = Chem.GetAdjacencyMatrix(mol)

    num_atoms = len(atomicNumList)
    I_elementary = get_I_elementary(AC, num_atoms, atomicNumList)

    smiles_list = []
    molecules = []
    raw_smiles_list = []
    raw_molecules = []
    for I in I_elementary:
        newmol = xyz2mol.AC2mol(proto_mol, I, atomicNumList, charge,
                                heterolytic, quick)
        if parent_is_chiral:
            newmol = set_chirality(mol, newmol, atom2chirality)

        raw_smiles = Chem.MolToSmiles(newmol, isomericSmiles=True)
        if raw_smiles not in raw_smiles_list:
            raw_smiles_list.append(raw_smiles)
            raw_molecules.append(newmol)

    energy_of_reactant = get_BO_energy(mol)
    for smiles, raw_mol in zip(raw_smiles_list, raw_molecules):
        try:
            test_mol = Chem.MolFromSmiles(smiles)
        except:
            continue
        if test_mol != None:
            energy = get_BO_energy(raw_mol)
            if smiles not in smiles_list and energy_of_reactant - energy < E_cutoff:
                smiles_list.append(smiles)
                molecules.append(raw_mol)

    smiles_list.insert(0, Chem.MolToSmiles(mol, isomericSmiles=True))
    molecules.insert(0, mol)

    return smiles_list, molecules
Example #29
0
def transferlearning_dataset_predict(x, path):
    dir_input = path + 'SMRT-'
    with open(dir_input + 'atom_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            atom_dict.get(k)
            atom_dict[k] = c[k]
    with open(dir_input + 'bond_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            bond_dict.get(k)
            bond_dict[k] = c[k]

    with open(dir_input + 'edge_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            edge_dict.get(k)
            edge_dict[k] = c[k]

    with open(dir_input + 'fingerprint_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            fingerprint_dict.get(k)
            fingerprint_dict[k] = c[k]
    dataset = []
    for i in range(len(x)):
        smiles = x[i]
        """Create each data with the above defined functions."""
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            continue
        else:
            smi = Chem.MolToSmiles(mol)
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
        atoms = create_atoms(mol, atom_dict)
        molecular_size = len(atoms)
        i_jbond_dict = create_ijbonddict(mol, bond_dict)
        fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                            fingerprint_dict, edge_dict)
        adjacency = np.float32((Chem.GetAdjacencyMatrix(mol)))
        #Transform the above each data of numpy to pytorch tensor on a device (i.e., CPU or GPU).
        fingerprints = torch.LongTensor(fingerprints).to(device)
        adjacency = torch.FloatTensor(adjacency).to(device)
        dataset.append((smiles, fingerprints, adjacency, molecular_size))
    return dataset
Example #30
0
def CalculateBalaban(mol):
    adjMat = Chem.GetAdjacencyMatrix(mol)
    Distance = Chem.GetDistanceMatrix(mol)
    Nbond = mol.GetNumBonds()
    Natom = mol.GetNumAtoms()
    S = numpy.sum(Distance, axis=1)
    mu = Nbond - Natom + 1
    sumk = 0.
    for i in range(len(Distance)):
        si = S[i]
        for j in range(i, len(Distance)):
            if adjMat[i, j] == 1:
                sumk += 1. / numpy.sqrt(si * S[j])
    if mu + 1 != 0:
        J = float(Nbond) / float(mu + 1) * sumk
    else:
        J = 0
    return J