Python Chem.GetAdjacencyMatrix Examples

Programming Language: Python

Namespace/Package Name: rdkit

Class/Type: Chem

Method/Function: GetAdjacencyMatrix

Examples at hotexamples.com: 30

The python rdkit.Chem.GetAdjacencyMatrix function is used in the RDKit library to generate an adjacency matrix from a molecule or a molecule fragment. The adjacency matrix represents the connectivity between atoms in the molecule, where each element of the matrix denotes whether there is a bond between two atoms. This function is helpful in various molecular modeling and drug discovery applications where analyzing the connectivity pattern is important.

Python Chem.GetAdjacencyMatrix - 30 examples found. These are the top rated real world Python examples of rdkit.Chem.GetAdjacencyMatrix extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AddHs(30)

CanonSmiles(30)

GetAdjacencyMatrix(30)

FragmentOnBonds(30)

ForwardSDMolSupplier(30)

FindMolChiralCenters(30)

FindAtomEnvironmentOfRadiusN(30)

DeleteSubstructs(30)

Conformer(30)

CombineMols(30)

CanonicalRankAtoms(30)

GetDistanceMatrix(30)

Atom(30)

AssignStereochemistry(30)

Get3DDistanceMatrix(29)

FindAllPathsOfLengthN(29)

EditableMol(24)

AssignAtomChiralTagsFromStructure(24)

AtomPDBResidueInfo(21)

FastFindRings(14)

AdjustQueryProperties(13)

DetectBondStereochemistry(10)

BondType(9)

FindPotentialStereoBonds(9)

Cleanup(8)

AdjustQueryParameters(8)

AtomFromSmiles(5)

AssignStereochemistryFrom3D(5)

EmbedMolecule(5)

FragmentOnBRICSBonds(4)

DetectChemistryProblems(4)

ClearMolSubstanceGroups(4)

CreateAtomDoublePropertyList(3)

ETKDG(3)

CreateMolSubstanceGroup(2)

FindAllSubgraphsOfLengthN(2)

AssignRadicals(2)

FindUniqueSubgraphsOfLengthN(2)

CalcPMI3(1)

GetBondBetweenAtoms(1)

GetBestRMS(1)

GetAtomPairFingerPrint(1)

CalcNumSpiroAtoms(1)

CalcPMI1(1)

FragmentOnSomeBonds(1)

CalcPMI2(1)

ClearMolSGroups(1)

AtomMonomerInfo(1)

ForwardSDMOLSupplier(1)

DetectBondStereoChemistry(1)

Example #1

Show file

  def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
    """
    Featurize the molecule.

    Parameters
    ----------
    datapoint: RDKitMol
      RDKit mol object.

    Returns
    -------
    MATEncoding
      A MATEncoding dataclass instance consisting of processed node_features, adjacency_matrix and distance_matrix.
    """
    if 'mol' in kwargs:
      datapoint = kwargs.get("mol")
      raise DeprecationWarning(
          'Mol is being phased out as a parameter, please pass "datapoint" instead.'
      )
    from rdkit import Chem

    datapoint = self.construct_mol(datapoint)

    node_features = self.construct_node_features_matrix(datapoint)
    adjacency_matrix = Chem.GetAdjacencyMatrix(datapoint)
    distance_matrix = Chem.GetDistanceMatrix(datapoint)

    node_features, adjacency_matrix, distance_matrix = self._add_dummy_node(
        node_features, adjacency_matrix, distance_matrix)

    node_features = self._pad_sequence(node_features)
    adjacency_matrix = self._pad_sequence(adjacency_matrix)
    distance_matrix = self._pad_sequence(distance_matrix)

    return MATEncoding(node_features, adjacency_matrix, distance_matrix)

Example #2

Show file

File: test_layers.py Project: Pa-Sky/deepchem

def test_mat_encoder_layer():
    """Test invoking MATEncoderLayer."""
    torch.manual_seed(0)
    from rdkit import Chem
    input_ar = torch.Tensor([[1., 2.], [5., 6.]])
    mask = torch.Tensor([[1., 1.], [1., 1.]])
    mol = Chem.MolFromSmiles("CC")
    adj_matrix = Chem.GetAdjacencyMatrix(mol)
    distance_matrix = Chem.GetDistanceMatrix(mol)
    layer = torch_layers.MATEncoderLayer(dist_kernel='softmax',
                                         lambda_attention=0.33,
                                         lambda_distance=0.33,
                                         h=2,
                                         sa_hsize=2,
                                         sa_dropout_p=0.0,
                                         output_bias=True,
                                         d_input=2,
                                         d_hidden=2,
                                         d_output=2,
                                         activation='relu',
                                         n_layers=2,
                                         ff_dropout_p=0.0,
                                         encoder_hsize=2,
                                         encoder_dropout_p=0.0)
    result = layer(input_ar, mask, adj_matrix, distance_matrix, 0.0)
    output_ar = torch.tensor([[[0.9988, 2.0012], [-0.9999, 3.9999],
                               [0.9988, 2.0012], [-0.9999, 3.9999]],
                              [[5.0000, 6.0000], [3.0000, 8.0000],
                               [5.0000, 6.0000], [3.0000, 8.0000]]])
    assert torch.allclose(result, output_ar, rtol=1e-4)

Example #3

Show file

File: utils.py Project: qianrenjian/kGCN-1

def create_adjancy_matrix(mol):
    mol_adj = Chem.GetAdjacencyMatrix(mol)
    row_num = len(mol_adj)
    adj = np.array(mol_adj, dtype=np.int8)
    for i in range(row_num):  # Set diagonal elements to 1, fill others with the adjacency matrix from RDkit
        adj[i][i] = int(1)
    return adj

Example #4

Show file

File: smiles_extracting.py Project: xduan7/DLTM

def extract_info(smiles: str):

    # First convert to the SMILES strings to rdkit Mol object
    try:
        mol = Chem.MolFromSmiles(smiles)
        assert mol
    except AssertionError:
        raise

    print('SMARTS strings: %s' % Chem.MolToSmarts(mol))
    # print('Molecule block: \n%s' % Chem.MolToMolBlock(mol))

    # Get all the atoms, bonds, information on both, and adjacency matrix
    for idx, atom in enumerate(mol.GetAtoms()):

        print('Information on atom #%i in the molecule: ' % idx)
        print('\tAtom: %s (%i)' % (atom.GetSymbol(), atom.GetAtomicNum()))

        print(atom.GetHybridization())

        print(atom.GetSymbol())

    for bond in mol.GetBonds():
        print(bond)

    print(Chem.GetAdjacencyMatrix(mol))

    pass

Example #5

Show file

    def create_dataset(self, filename, dataset, radius, device):
        dir_dataset = '../dataset/' + dataset + '/'
        '''Load a dataset.'''
        with open(dir_dataset + filename, 'r') as f:
            smiles_property = f.readline().strip().split()
            data_original = f.read().strip().split('\n')

        '''Exclude the data contains '.' in its smiles.'''
        data_original = [data for data in data_original
                            if '.' not in data.split()[0]]

        dataset = []
        for data in data_original:
            smiles, property = data.strip().split()

            '''Create each data with the above defined functions.'''
            mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
            atoms = self.create_atoms(mol, self.atom_dict)
            molecular_size = len(atoms)
            i_jbond_dict = self.create_ijbonddict(mol, self.bond_dict)
            fingerprints = self.extract_fingerprints(radius, atoms, i_jbond_dict,
                                                self.fingerprint_dict, self.edge_dict)
            adjacency = Chem.GetAdjacencyMatrix(mol)

            '''Transform the above each data of numpy
            to pytorch tensor on a device (i.e., CPU or GPU).
            '''
            fingerprints = torch.LongTensor(fingerprints).to(device)
            adjacency = torch.FloatTensor(adjacency).to(device)
            property = torch.LongTensor([int(property)]).to(device)

            dataset.append((fingerprints, adjacency, molecular_size, property))

        return dataset

Example #6

Show file

    def __init__(
        self, mol, radius: int = 2, nbits: int = 2048, n_feat: np.array = None
    ):
        self.mol = mol
        self.radius = radius
        self.nbits = nbits
        self.fps = np.zeros(shape=(self.nbits,), dtype=np.int32)

        if n_feat is None:
            n_feat = self.createNodeFeatures()

        n_feat = np.array(n_feat, dtype=np.int32)
        self.adj = Chem.GetAdjacencyMatrix(mol)

        # concatenate node features.
        self.identifier: Dict[int, Dict[int, int]] = defaultdict(dict)
        for i in range(radius + 1):
            self.identifier[i] = {}
        self.identifier[0].update(
            {
                i: k
                for i, k in enumerate(
                    [
                        hash("".join([str(f) for f in n_feat[i]]))
                        for i in range(len(n_feat))
                    ]
                )
            }
        )

Example #7

Show file

def test_smiles_from_adjacent_matrix(smiles):

    charged_fragments = True
    quick = True

    # Cut apart the smiles
    mol = get_mol(smiles)
    atoms = get_atoms(mol)
    charge = Chem.GetFormalCharge(mol)
    adjacent_matrix = Chem.GetAdjacencyMatrix(mol)

    #
    mol = Chem.RemoveHs(mol)
    canonical_smiles = Chem.MolToSmiles(mol)

    # Define new molecule template from atoms
    new_mol = x2m.get_proto_mol(atoms)

    # reconstruct the molecule from adjacent matrix, atoms and total charge
    new_mols = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge,
                          charged_fragments, quick)

    new_mol_smiles_list = []
    for new_mol in new_mols:
        new_mol = Chem.RemoveHs(new_mol)
        new_mol_smiles = Chem.MolToSmiles(new_mol)

        new_mol_smiles_list.append(new_mol_smiles)

    assert canonical_smiles in new_mol_smiles_list

    return

Example #8

Show file

def read_graph(source_path,MAX_size):
  Vertex = []
  Adj = [] # Normalized adjacency matrix
  mycount=1
  PAD=0
  mydict={}
  max_size=0
  with tf.gfile.GFile(source_path, mode="r") as source_file:
      source = source_file.readline().strip()
      counter = 0
      while source:
        mol = Chem.MolFromSmiles(source)
        atom_list = []
        for a in mol.GetAtoms():
            m = a.GetSymbol()
            if m not in mydict:
              mydict[m]=mycount
              mycount = mycount +1

            atom_list.append(mydict[m])

        if len(atom_list) > max_size:
           max_size = len(atom_list)


        if len(atom_list) < MAX_size:
           pad = [PAD] * (MAX_size - len(atom_list))
           atom_list = atom_list+pad


        vertex = np.array(atom_list, np.int32)
        Vertex.append(vertex)

        adja_mat = Chem.GetAdjacencyMatrix(mol)
        adj_temp = []
        for adja in adja_mat:
            if len(adja) < MAX_size:
               pad = [PAD]*(MAX_size - len(adja))
               adja = np.array(list(adja)+pad,np.int32)
            adj_temp.append(adja)
       cur_len = len(adj_temp)
        for i in range(MAX_size - cur_len):
            adja =np.array( [PAD]*MAX_size,np.int32)
            adj_temp.append(adja)

        adj_temp = adj_temp + np.eye(MAX_size) # A_hat = A + I
        deg = np.power(np.sum(adj_temp,axis=1),-0.5)
        deg_new = []
        for i in range(MAX_size):
            if deg[i]==1:
               deg_new.append(0)
            else:
               deg_new.append(deg[i])

        deg_new = np.array(deg_new)
        deg_diag = np.diag(deg_new)
        adj = np.matmul(deg_diag,adj_temp)
        adj = np.matmul(adj,deg_diag) # normalized
        Adj.append(adj)
        source = source_file.readline().strip()

Example #9

Show file

File: topology.py Project: mrauha/chemoinformatics-2018

def CalculateBalaban(mol):
    """
    #################################################################
    Calculation of Balaban index in a molecule
    
    ---->J
    
    Usage: 
        
        result=CalculateBalaban(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    adjMat = Chem.GetAdjacencyMatrix(mol)
    Distance = Chem.GetDistanceMatrix(mol)
    Nbond = mol.GetNumBonds()
    Natom = mol.GetNumAtoms()
    S = numpy.sum(Distance, axis=1)
    mu = Nbond - Natom + 1
    sumk = 0.
    for i in range(len(Distance)):
        si = S[i]
        for j in range(i, len(Distance)):
            if adjMat[i, j] == 1:
                sumk += 1. / numpy.sqrt(si * S[j])
    if mu + 1 != 0:
        J = float(Nbond) / float(mu + 1) * sumk
    else:
        J = 0
    return J

Example #10

Show file

File: reaction_box_no_slurm.py Project: jensengroup/ReactionDiscovery

def extract_smiles(xyz_file, charge, allow_charge=True, check_ac=False):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True, use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False, use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    if check_ac:
        global AC_SAME
        ac = Chem.GetAdjacencyMatrix(input_mol)
        if not np.all(AC == ac):
            AC_SAME = False
            print("change in AC: stopping")

    return structure_smiles, GetFormalCharge(structure_mol), res_status

Example #11

Show file

    def calculate(self, An=None, A1=None):
        if self.order == 1:
            return Chem.GetAdjacencyMatrix(self.mol,
                                           useBO=self.useBO,
                                           force=True)

        return An.dot(A1)

Example #12

Show file

def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix:
    """Calculate weighted Burden matrix and eigenvalues."""
    mol = Chem.AddHs(mol)
    Natom = mol.GetNumAtoms()
    AdMatrix = Chem.GetAdjacencyMatrix(mol)
    bondindex = numpy.argwhere(AdMatrix)
    AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32)
    # The diagonal elements of B, Bii, are either given by
    # the carbon normalized atomic mass,
    # van der Waals volume, Sanderson electronegativity,
    # and polarizability of atom i.
    for i in range(Natom):
        atom = mol.GetAtomWithIdx(i)
        temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel)
        AdMatrix1[i, i] = round(temp, 3)
    # The element of B connecting atoms i and j, Bij,
    # is equal to the square root of the bond
    # order between atoms i and j.
    for i in bondindex:
        bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1]))
        if bond.GetBondType().name == 'SINGLE':
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3)
        if bond.GetBondType().name == "DOUBLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3)
        if bond.GetBondType().name == "TRIPLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3)
        if bond.GetBondType().name == "AROMATIC":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3)
    # All other elements of B (corresponding non bonded
    # atom pairs) are set to 0.001
    bondnonindex = numpy.argwhere(AdMatrix == 0)
    for i in bondnonindex:
        if i[0] != i[1]:
            AdMatrix1[i[0], i[1]] = 0.001
    return numpy.real(numpy.linalg.eigvals(AdMatrix1))

Example #13

Show file

def create_multi_adjancy_matrix(mol):
    mol_adj = Chem.GetAdjacencyMatrix(mol, useBO=True)
    num = mol.GetNumAtoms()
    nch = 5
    adj = np.zeros((nch, num, num), dtype=np.int)
    for b in mol.GetBonds():
        i = b.GetBeginAtomIdx()
        j = b.GetEndAtomIdx()
        t = b.GetBondType()
        if t == Chem.rdchem.BondType.SINGLE:
            ch = 0
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.DOUBLE:
            ch = 1
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.TRIPLE:
            ch = 2
            adj[ch, i, j] = 1
        elif t == Chem.rdchem.BondType.AROMATIC:
            ch = 3
            adj[ch, i, j] = 1
        else:
            ch = 4
            adj[ch, i, j] = 1
    for ch in range(nch):
        for i in range(num):
            adj[ch][i][i] = int(1)
    return adj

Example #14

Show file

 def create_adjacency(mol):
     """
     :param mol: rdkit.Chem.Mol object
     :return:
     """
     adjacency = Chem.GetAdjacencyMatrix(mol)
     return np.array(adjacency, dtype=np.int32)

Example #15

Show file

def CalculateSchiultz(mol: Chem.Mol) -> float:
    """Get Schiultz number.

    Or Tsch.
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd')
    VertexDegree = sum(Adjacent)
    return sum(scipy.dot((Distance + Adjacent), VertexDegree))

Example #16

Show file

def create_adjacency(mol):
    adjacency = Chem.GetAdjacencyMatrix(mol)
    n = adjacency.shape[0]
    adjacency = adjacency + np.eye(n)
    degree = sum(adjacency)
    d_half = np.sqrt(np.diag(degree))
    d_half_inv = np.linalg.inv(d_half)
    adjacency = np.matmul(d_half_inv, np.matmul(adjacency, d_half_inv))
    return np.array(adjacency)

Example #17

Show file

File: GB-GM.py Project: anandchandra88/GB-GM

def valences_not_too_large(mol):
    valence_dict = {5: 3, 6: 4, 7: 3, 8: 2, 9: 1, 16: 6, 17: 1, 35: 1, 53: 1}
    atomicNumList = [a.GetAtomicNum() for a in mol.GetAtoms()]
    valences = [valence_dict[atomic_num] for atomic_num in atomicNumList]
    BO = Chem.GetAdjacencyMatrix(mol, useBO=True)
    number_of_bonds_list = BO.sum(axis=1)
    for valence, number_of_bonds in zip(valences, number_of_bonds_list):
        if number_of_bonds > valence:
            return False

    return True

Example #18

Show file

def CalculateSchiultz(mol):
    """
    Calculation of Schiultz number
        Parameters:
            mol: RDKit molecule object
        Returns:
            Tsch: Thara number
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd')
    VertexDegree = sum(Adjacent)
    return sum(scipy.dot((Distance + Adjacent), VertexDegree))

Example #19

Show file

File: util.py Project: ycq091044/SafeDrug

def buildMPNN(molecule, med_voc, radius=1, device="cpu:0"):

    atom_dict = defaultdict(lambda: len(atom_dict))
    bond_dict = defaultdict(lambda: len(bond_dict))
    fingerprint_dict = defaultdict(lambda: len(fingerprint_dict))
    edge_dict = defaultdict(lambda: len(edge_dict))
    MPNNSet, average_index = [], []

    for index, atc3 in med_voc.items():

        smilesList = list(molecule[atc3])
        """Create each data with the above defined functions."""
        counter = 0 # counter how many drugs are under that ATC-3
        for smiles in smilesList:
            try:
                mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
                atoms = create_atoms(mol, atom_dict)
                molecular_size = len(atoms)
                i_jbond_dict = create_ijbonddict(mol, bond_dict)
                fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                                    fingerprint_dict, edge_dict)
                adjacency = Chem.GetAdjacencyMatrix(mol)
                # if fingerprints.shape[0] == adjacency.shape[0]:
                for _ in range(adjacency.shape[0] - fingerprints.shape[0]):
                    fingerprints = np.append(fingerprints, 1)
                
                fingerprints = torch.LongTensor(fingerprints).to(device)
                adjacency = torch.FloatTensor(adjacency).to(device)
                MPNNSet.append((fingerprints, adjacency, molecular_size))
                counter += 1
            except:
                continue
        
        average_index.append(counter)

        """Transform the above each data of numpy
        to pytorch tensor on a device (i.e., CPU or GPU).
        """

    N_fingerprint = len(fingerprint_dict)
    # transform into projection matrix
    n_col = sum(average_index)
    n_row = len(average_index)

    average_projection = np.zeros((n_row, n_col))
    col_counter = 0
    for i, item in enumerate(average_index):
        if item > 0:
            average_projection[i, col_counter : col_counter + item] = 1 / item
        col_counter += item

    return MPNNSet, N_fingerprint, torch.FloatTensor(average_projection)

Example #20

Show file

File: covid.py Project: CLAIRE-COVID-T4/pyg-covid-datasets

    def _process_row(self, smiles, label=None):
        mol = Chem.MolFromSmiles(smiles)

        if self.feature_extractor is None:
            adj = Chem.GetAdjacencyMatrix(mol)
            features = {
                'num_nodes': adj.shape[0],
                'edge_index': torch.LongTensor(np.stack(np.nonzero(adj)))
            }
        else:
            features = self.feature_extractor(mol)

        return Data(y=label, **features)

Example #21

Show file

def create_dataset_randomsplit(x, y, path, dataname):
    dir_input = path + 'SMRT-'
    with open(dir_input + 'atom_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            atom_dict.get(k)
            atom_dict[k] = c[k]
    with open(dir_input + 'bond_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            bond_dict.get(k)
            bond_dict[k] = c[k]

    with open(dir_input + 'edge_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            edge_dict.get(k)
            edge_dict[k] = c[k]

    with open(dir_input + 'fingerprint_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            fingerprint_dict.get(k)
            fingerprint_dict[k] = c[k]
    dataset = []
    for i in range(len(x)):
        smiles = x[i]
        property = y[i]
        """Create each data with the above defined functions."""
        mol = Chem.MolFromInchi(smiles)
        mol = Chem.AddHs(Chem.MolFromInchi(smiles))
        atoms = create_atoms(mol, atom_dict)
        molecular_size = len(atoms)
        i_jbond_dict = create_ijbonddict(mol, bond_dict)
        fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                            fingerprint_dict, edge_dict)
        adjacency = np.float32((Chem.GetAdjacencyMatrix(mol)))
        #Transform the above each data of numpy to pytorch tensor on a device (i.e., CPU or GPU).
        fingerprints = torch.LongTensor(fingerprints).to(device)
        adjacency = torch.FloatTensor(adjacency).to(device)
        property = torch.FloatTensor([[float(property)]]).to(device)

        dataset.append(
            (smiles, fingerprints, adjacency, molecular_size, property))
    dir_dataset = path
    dump_dictionary(fingerprint_dict,
                    dir_dataset + dataname + '-fingerprint_dict.pickle')
    dump_dictionary(atom_dict, dir_dataset + dataname + '-atom_dict.pickle')
    dump_dictionary(bond_dict, dir_dataset + dataname + '-bond_dict.pickle')
    dump_dictionary(edge_dict, dir_dataset + dataname + '-edge_dict.pickle')
    return dataset

Example #22

Show file

def read_graph(source_path,MAX_size):
  Vertex = []
  Adj = [] # Normalized adjacency matrix
  mycount=1
  PAD=0
  mydict={}
  max_size=0
  with tf.gfile.GFile(source_path, mode="r") as source_file:
      source = source_file.readline().strip()
      counter = 0
      while source:
        mol = Chem.MolFromSmiles(source)
        atom_list = []
        for a in mol.GetAtoms():
            m = a.GetSymbol()
            if m not in mydict:
              mydict[m]=mycount
              mycount = mycount +1
            
            atom_list.append(mydict[m])

        if len(atom_list) > max_size:
           max_size = len(atom_list)


        if len(atom_list) < MAX_size:
           pad = [PAD] * (MAX_size - len(atom_list))
           atom_list = atom_list+pad

        
        vertex = np.array(atom_list, np.int32)
        Vertex.append(vertex)

        adja_mat = Chem.GetAdjacencyMatrix(mol)
        adj_temp = []
        for adja in adja_mat:
            if len(adja) < MAX_size:
               pad = [PAD]*(MAX_size - len(adja))
               adja = np.array(list(adja)+pad,np.int32)
            adj_temp.append(adja)
      
        cur_len = len(adj_temp)
        for i in range(MAX_size - cur_len):
            adja =np.array( [PAD]*MAX_size,np.int32)
            adj_temp.append(adja)

        adj_temp = adj_temp + np.eye(MAX_size) # A_hat = A + I
        Adj.append(adj_temp) 
        source = source_file.readline().strip()
  return Vertex,Adj,max_size

Example #23

Show file

def get_adjacency_matrix(smiles: str):
    """
    Compute adjacency matrix between atoms. Only works for single molecules atm and not for rxns

    Args:
        smiles: SMILES representation of a molecule

    Returns:
        Numpy array representing the adjacency between each atom and every other atom in the molecular SMILES.
        Equivalent to `distance_matrix[distance_matrix == 1]`
    """

    mol = Chem.MolFromSmiles(smiles)
    return Chem.GetAdjacencyMatrix(mol)

Example #24

Show file

    def create_dataset(filepath):

        """Load a dataset."""
        with open(filepath, 'r') as f:
            #smiles_property = f.readline().strip().split()
            #data_original = f.read().strip().split('\n')
            data_original = f.readlines()
        print(len(data_original))

        data_original = [[data.strip('\n').split('\t')[6], data.strip('\n').split('\t')[7]]
                         for data in data_original]
        """Exclude the data contains '.' in its smiles.
        data_original = [data for data in data_original
                         if '.' not in data.split()[0]]
        """

        dataset = []
        mask = []

        for data in data_original:
            dataset_ = []
            for smiles in data:
                """Replace the smiles its contains '.' with 'CC'
                   Replace the no smiles data with 'CC'"""
                if '.' in smiles or smiles == '':
                    smiles = 'CC'
                    mask = [0]
                else:
                    try:
                        Chem.AddHs(Chem.MolFromSmiles(smiles))
                        mask = [1]
                    except:
                        """Replace invalid smiles with 'CC'"""
                        smiles = 'CC'
                        mask = [0]

                """Create each data with the above defined functions."""
                mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
                atoms = create_atoms(mol, atom_dict)
                molecular_size = len(atoms)
                i_jbond_dict = create_ijbonddict(mol, bond_dict)
                fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                                    fingerprint_dict, edge_dict)
                adjacency = Chem.GetAdjacencyMatrix(mol)

                dataset_.append((fingerprints, adjacency, molecular_size, mask))
            dataset.append(dataset_)

        return dataset

Example #25

Show file

File: mol_conv.py Project: KRICT-DATA/EGCN-1

def smiles_to_mol_graph(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        adj_mat = Chem.GetAdjacencyMatrix(mol)
        node_feat_mat = np.empty([mol.GetNumAtoms(), atomic_props.get(1).shape[0]])

        ind = 0
        for atom in mol.GetAtoms():
            node_feat_mat[ind, :] = atomic_props.get(atom.GetAtomicNum())
            ind = ind + 1

        return mol, construct_mol_graph(smiles, mol, adj_mat, node_feat_mat)
    except:
        print(smiles + ' could not be converted to molecular graph due to the internal errors of RDKit')
        return None, None

Example #26

Show file

File: atom.py Project: yccai/scikit-chem

    def adjacency_matrix(self, bond_orders=False, force=True):
        """ The vertex adjacency matrix.

        Args:
            bond_orders (bool):
                Whether to use bond orders.
            force (bool):
                Whether to recalculate or used rdkit cached value.

        Returns:
            np.array[int]
        """

        return Chem.GetAdjacencyMatrix(self.owner,
                                       useBO=bond_orders,
                                       force=force)

Example #27

Show file

File: utils.py Project: capoe/benchml

def smiles_to_pseudo_xyz(smiles):
    configs = []
    valid = []
    for idx, smi in enumerate(smiles):
        mol = chem.MolFromSmiles(smi)
        mol = chem.AddHs(mol)
        if mol is None:
            pass
        else:
            symbols = [ a.GetSymbol() for a in mol.GetAtoms() ]
            pos = np.zeros((len(symbols),3))
            config = readwrite.ExtendedXyz(pos=pos, symbols=symbols)
            config.info["lmat"] = 1.*chem.GetAdjacencyMatrix(mol)
            configs.append(config)
            valid.append(idx)
    return configs

Example #28

Show file

File: take_elementary_step.py Project: plin1112/take_elementary_step

def take_elementary_step(mol, charge, E_cutoff, heterolytic, quick):
    chiral_parent = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
    parent_is_chiral = len(chiral_parent) > 0
    if parent_is_chiral:
        atom2chirality = {key: value for (key, value) in chiral_parent}

    atomicNumList = [a.GetAtomicNum() for a in mol.GetAtoms()]
    proto_mol = xyz2mol.get_proto_mol(atomicNumList)

    AC = Chem.GetAdjacencyMatrix(mol)

    num_atoms = len(atomicNumList)
    I_elementary = get_I_elementary(AC, num_atoms, atomicNumList)

    smiles_list = []
    molecules = []
    raw_smiles_list = []
    raw_molecules = []
    for I in I_elementary:
        newmol = xyz2mol.AC2mol(proto_mol, I, atomicNumList, charge,
                                heterolytic, quick)
        if parent_is_chiral:
            newmol = set_chirality(mol, newmol, atom2chirality)

        raw_smiles = Chem.MolToSmiles(newmol, isomericSmiles=True)
        if raw_smiles not in raw_smiles_list:
            raw_smiles_list.append(raw_smiles)
            raw_molecules.append(newmol)

    energy_of_reactant = get_BO_energy(mol)
    for smiles, raw_mol in zip(raw_smiles_list, raw_molecules):
        try:
            test_mol = Chem.MolFromSmiles(smiles)
        except:
            continue
        if test_mol != None:
            energy = get_BO_energy(raw_mol)
            if smiles not in smiles_list and energy_of_reactant - energy < E_cutoff:
                smiles_list.append(smiles)
                molecules.append(raw_mol)

    smiles_list.insert(0, Chem.MolToSmiles(mol, isomericSmiles=True))
    molecules.insert(0, mol)

    return smiles_list, molecules

Example #29

Show file

def transferlearning_dataset_predict(x, path):
    dir_input = path + 'SMRT-'
    with open(dir_input + 'atom_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            atom_dict.get(k)
            atom_dict[k] = c[k]
    with open(dir_input + 'bond_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            bond_dict.get(k)
            bond_dict[k] = c[k]

    with open(dir_input + 'edge_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            edge_dict.get(k)
            edge_dict[k] = c[k]

    with open(dir_input + 'fingerprint_dict.pickle', 'rb') as f:
        c = pickle.load(f)
        for k in c.keys():
            fingerprint_dict.get(k)
            fingerprint_dict[k] = c[k]
    dataset = []
    for i in range(len(x)):
        smiles = x[i]
        """Create each data with the above defined functions."""
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            continue
        else:
            smi = Chem.MolToSmiles(mol)
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
        atoms = create_atoms(mol, atom_dict)
        molecular_size = len(atoms)
        i_jbond_dict = create_ijbonddict(mol, bond_dict)
        fingerprints = extract_fingerprints(radius, atoms, i_jbond_dict,
                                            fingerprint_dict, edge_dict)
        adjacency = np.float32((Chem.GetAdjacencyMatrix(mol)))
        #Transform the above each data of numpy to pytorch tensor on a device (i.e., CPU or GPU).
        fingerprints = torch.LongTensor(fingerprints).to(device)
        adjacency = torch.FloatTensor(adjacency).to(device)
        dataset.append((smiles, fingerprints, adjacency, molecular_size))
    return dataset

Example #30

Show file

def CalculateBalaban(mol):
    adjMat = Chem.GetAdjacencyMatrix(mol)
    Distance = Chem.GetDistanceMatrix(mol)
    Nbond = mol.GetNumBonds()
    Natom = mol.GetNumAtoms()
    S = numpy.sum(Distance, axis=1)
    mu = Nbond - Natom + 1
    sumk = 0.
    for i in range(len(Distance)):
        si = S[i]
        for j in range(i, len(Distance)):
            if adjMat[i, j] == 1:
                sumk += 1. / numpy.sqrt(si * S[j])
    if mu + 1 != 0:
        J = float(Nbond) / float(mu + 1) * sumk
    else:
        J = 0
    return J