def atom_level_descriptors(mol,
                           include=['functional'],
                           asOneHot=False,
                           ORIGINAL_VERSION=False):
    '''
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	returns: (label, attributes)
	'''

    attributes = [[] for i in mol.GetAtoms()]
    labels = []
    if 'functional' in include:

        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(mol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(mol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

    if 'structural' in include:
        [attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
         for i in range(len(attributes))]
        labels.append('--many structural--')

    return (labels, attributes)
  def testCrippen(self):
    mol = Chem.MolFromSmiles("n1ccccc1CO");
    contribs = rdMD._CalcCrippenContribs(mol)
    self.failUnlessEqual(len(contribs),mol.GetNumAtoms());

    ts = [0]*mol.GetNumAtoms()
    contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypes=ts)
    self.failUnlessEqual(ts,[59, 25, 25, 25, 25, 28, 17, 69])

    ls = ['']*mol.GetNumAtoms()
    contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypeLabels=ls)
    self.failUnlessEqual(ls,['N11', 'C18', 'C18', 'C18', 'C18', 'C21', 'C10', 'O2'])
Exemple #3
0
  def testCrippen(self):
    mol = Chem.MolFromSmiles("n1ccccc1CO");
    contribs = rdMD._CalcCrippenContribs(mol)
    self.assertEqual(len(contribs),mol.GetNumAtoms());

    ts = [0]*mol.GetNumAtoms()
    contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypes=ts)
    self.assertEqual(ts,[59, 25, 25, 25, 25, 28, 17, 69])

    ls = ['']*mol.GetNumAtoms()
    contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypeLabels=ls)
    self.assertEqual(ls,['N11', 'C18', 'C18', 'C18', 'C18', 'C21', 'C10', 'O2'])
def mol_to_nx(mol) -> nx.Graph:
    G = nx.Graph()
    conf = mol.GetConformer()

    SanitizeMol(mol,
                SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_PROPERTIES)

    ComputeGasteigerCharges(mol)
    ring_info = mol.GetRingInfo()
    crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol)
    tpsa_contribs = rdMolDescriptors._CalcTPSAContribs(mol)

    for atom in mol.GetAtoms():
        idx = atom.GetIdx()

        # if atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2:
        #     formal_charge = -1
        # elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 4:
        #     formal_charge = 1
        # elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1:
        #     formal_charge = -1
        # else:
        #     formal_charge = atom.GetFormalCharge()
        formal_charge = atom.GetFormalCharge()

        G.add_node(
            idx,
            pos=conf.GetAtomPosition(idx),
            formal_charge=formal_charge,
            chiral_tag=atom.GetChiralTag(),
            hybridization=atom.GetHybridization(),
            # num_explicit_hs=atom.GetNumExplicitHs(),  # All same
            is_aromatic=atom.GetIsAromatic(),
            num_atom_rings=ring_info.NumAtomRings(idx),
            is_in_ring_size3=atom.IsInRingSize(3),
            is_in_ring_size4=atom.IsInRingSize(4),
            is_in_ring_size5=atom.IsInRingSize(5),
            is_in_ring_size6=atom.IsInRingSize(6),
            symbol=atom.GetSymbol(),
            total_valence=atom.GetTotalValence(),
            gasteiger_charge=atom.GetProp('_GasteigerCharge'),
            num_implicit_hs=atom.GetNumImplicitHs(),
            total_degree=atom.GetTotalDegree(),
            crippen_logp=crippen_contribs[idx][0],
            crippen_mr=crippen_contribs[idx][1],
            tpsa=tpsa_contribs[idx],
        )

    for bond in mol.GetBonds():
        G.add_edge(
            bond.GetBeginAtomIdx(),
            bond.GetEndAtomIdx(),
            bond_type=bond.GetBondType(),
            is_conjugated=bond.GetIsConjugated(),
        )

    return G
Exemple #5
0
    def calculate_logP(self, mol):
        """Calculates Crippen contributions, i.e. logP of ligand molecule.
		Takes:
			* mol * - mol file in rdkit environment
		Returns:
			* contribs * - tuple of Wildman-Crippen logP, MR (molar refractivity -
				measure of the volume occupied by a molecule of the substance) values
		"""
        contribs = rdMolDescriptors._CalcCrippenContribs(mol)
        return contribs
	def calculate_logP(self,mol):
		"""Calculates Crippen contributions, i.e. logP of ligand molecule.
		Takes:
			* mol * - mol file in rdkit environment
		Returns:
			* contribs * - tuple of Wildman-Crippen logP, MR (molar refractivity -
				measure of the volume occupied by a molecule of the substance) values
		"""
		contribs = rdMolDescriptors._CalcCrippenContribs(mol)
		return contribs
Exemple #7
0
 def test8CrippenO3A(self):
   sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
                      'MolAlign', 'test_data', 'ref_e2.sdf')
   # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
   #                           'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf')
   molS = Chem.SDMolSupplier(sdf, True, False)
   # molW = Chem.SDWriter(alignedSdf)
   refNum = 48
   refMol = molS[refNum]
   cumScore = 0.0
   cumMsd = 0.0
   refList = rdMolDescriptors._CalcCrippenContribs(refMol, True)
   for prbMol in molS:
     prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True)
     pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList)
     cumScore += pyO3A.Score()
     rmsd = pyO3A.Align()
     cumMsd += rmsd * rmsd
     # molW.write(prbMol)
   cumMsd /= len(molS)
   self.failUnlessAlmostEqual(cumScore,4918,0)
   self.failUnlessAlmostEqual(math.sqrt(cumMsd),.304,3)
Exemple #8
0
 def test10CrippenO3A(self):
   sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
                      'MolAlign', 'test_data', 'ref_e2.sdf')
   alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
                             'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf')
   molS = Chem.SDMolSupplier(sdf, True, False)
   molW = Chem.SDWriter(alignedSdf)
   refNum = 48
   refMol = molS[refNum]
   cumScore = 0.0
   cumMsd = 0.0
   refList = rdMolDescriptors._CalcCrippenContribs(refMol, True)
   for prbMol in molS:
     prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True)
     pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList)
     cumScore += pyO3A.Score()
     rmsd = pyO3A.Align()
     cumMsd += rmsd * rmsd
     molW.write(prbMol)
   cumMsd /= len(molS)
   self.assertAlmostEqual(cumScore,4918,0)
   self.assertAlmostEqual(math.sqrt(cumMsd),.304,3)
Exemple #9
0
def align_set_of_ligands(ligands: Sequence) -> Tuple[List[Chem.Mol], List[float]]:
    """ Align a set of ligands to each other

        Parameters
        ----------
        ligands : list of rdkit.Chem.rdchem.Mol or rdkit.Chem.SmilesMolSupplier or rdkit.Chem.SDMolSupplier
            List of ligands.
        
        Returns
        ----------
        aligned_molecules : list of rdkit.Chem.rdchem.Mol
            List of aligned ligands.
        
        crippen_score : list of float
            List with crippen scores calculated during the alignment.

    """
    
    if not isinstance(ligands, list):
        ligands = list(ligands)

    molecules = copy.deepcopy(ligands)
    molecules = [generate_conformers(mol, 100) for mol in molecules]

    crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in molecules]
    crippen_ref_contrib = crippen_contribs[0]
    crippen_prob_contribs = crippen_contribs

    ref_mol = molecules[0]
    probe_mols = molecules

    crippen_score = []
    aligned_molecules = []
    for idx, mol in enumerate(probe_mols):
        tempscore = []
        
        for cid in range(100):
            crippenO3A = rdMolAlign.GetCrippenO3A(mol, ref_mol, crippen_prob_contribs[idx], crippen_ref_contrib, cid, 0)
            crippenO3A.Align()
            tempscore.append(crippenO3A.Score())
            
        best = np.argmax(tempscore)
        mol_string = Chem.MolToMolBlock(mol, confId=int(best))
        temp_mol = Chem.MolFromMolBlock(mol_string, removeHs=False)
        
        crippen_score.append(tempscore[best])
        aligned_molecules.append(temp_mol)
    
    return aligned_molecules, crippen_score
        
    
Exemple #10
0
def assignProperties(mol):
    '''
    Calculate atom-level descriptors that can be used in featurization
    '''
    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_logp',x[0])
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_mr', x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('tpsa', x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0]):
        mol.GetAtomWithIdx(i).SetDoubleProp('asa', x)
    for (i, x) in enumerate(EState.EStateIndices(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('estate', x)
    rdPartialCharges.ComputeGasteigerCharges(mol) # '_GasteigerCharge', '_GasteigerHCharge'
Exemple #11
0
def get_molecular_attributes(rdmol):
    """
  Molecular attributes calculated as:
    [Crippen contribution to logp,
     Crippen contribution to mr,
     TPSA contribution,
     Labute ASA contribution,
     EState Index,
     Gasteiger partial charge,
     Gasteiger hydrogen partial charge]

  Parameters
  ----------
  rdmol : rdkit.Chem.rdchem.Mol
    rdkit molecule class

  Returns
  -------
  attributes : list
    feature vector

  """
    attributes = [[] for _ in rdmol.GetAtoms()]

    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol)):
        attributes[i].append(x[0])
        attributes[i].append(x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol)):
        attributes[i].append(x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0]):
        attributes[i].append(x)
    for (i, x) in enumerate(EState.EStateIndices(rdmol)):
        attributes[i].append(x)

    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerHCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)

    return attributes
Exemple #12
0
def molToGraph(rdmol):
    '''
    Converts an RDKit molecule to an attributed undirected graph
    @param rdmol: RDKit molecule
    @return: Graph
    '''
    graph = Graph()

    # Calculate atom-level molecule descriptors
    nodesFeatures = [[] for i in rdmol.GetAtoms()]

    #6 (25) Crippen contribution to logp
    [nodesFeatures[i].append(x[0]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #7 (26) Crippen contribution to mr
    [nodesFeatures[i].append(x[1]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #8 (27) TPSA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]

    #9 (28) Labute ASA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]

    #10 (29) EState Index
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(EState.EStateIndices(rdmol))]

    # Calculate Gasteiger charges for features 30 and 31
    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    # The computed charges are stored on each atom with computed property
    # under the name _GasteigerCharge and _GasteigerHCharge.
    # Values could be NaN.

    #11 (30)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerCharge')))

    #12 (31)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerHCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerHCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerHCharge')))

    # Add edges to graph
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.id = bond.GetIdx()
        edge.features = getBondFeatures(bond).astype('float32')
        edge.ends = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)

    # Add nodes to graph
    for i, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.id = atom.GetIdx()
        node.features = getAtomFeatures(atom, nodesFeatures[i])

        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))

        graph.nodes.append(node)

    graph.nodeNum = len(graph.nodes)
    graph.nodeFeatureDim = len(graph.nodes[0].features)
    if (len(graph.edges) > 0):
        graph.edgeFeatureDim = len(graph.edges[0].features)

    return graph
Exemple #13
0
def molToGraph(rdmol, molecular_attributes=False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes=attributes[k])
        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph
Exemple #14
0
def make_graph(name, gb_structure, gb_scalar_coupling):
    # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type','scalar_coupling_constant']
    coupling_df = gb_scalar_coupling.get_group(name)

    # [molecule_name,atom_index,atom,x,y,z]
    df = gb_structure.get_group(name)
    df = df.sort_values(['atom_index'], ascending=True)
    a = df.atom.values.tolist()
    xyz = df[['x', 'y', 'z']].values

    mol = mol_from_axyz(a, xyz)
    mol_op = openbabel.OBMol()
    obConversion.ReadFile(mol_op, f'../input/champs-scalar-coupling/structures/{name}.xyz')

    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)

    num_atom = mol.GetNumAtoms()
    symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8)  # category
    acceptor = np.zeros((num_atom, 1), np.uint8)
    donor = np.zeros((num_atom, 1), np.uint8)
    aromatic = np.zeros((num_atom, 1), np.uint8)
    hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8)
    num_h = np.zeros((num_atom, 1), np.float32)  # real
    atomic = np.zeros((num_atom, 1), np.float32)

    # new features
    degree = np.zeros((num_atom, 1), np.uint8)
    formalCharge = np.zeros((num_atom, 1), np.float32)
    chiral_tag = np.zeros((num_atom, 1), np.uint8)
    crippen_contribs = np.zeros((num_atom, 2), np.float32)
    tpsa = np.zeros((num_atom, 1), np.float32)
    labute_asac = np.zeros((num_atom, 1), np.float32)
    gasteiger_charges = np.zeros((num_atom, 1), np.float32)
    esataindices = np.zeros((num_atom, 1), np.float32)
    atomic_radiuss = np.zeros((num_atom, 1), np.float32)
    electronegate = np.zeros((num_atom, 1), np.float32)
    electronegate_sqre = np.zeros((num_atom, 1), np.float32)
    mass = np.zeros((num_atom, 1), np.float32)
    van = np.zeros((num_atom, 1), np.float32)
    cov = np.zeros((num_atom, 1), np.float32)
    ion = np.zeros((num_atom, 1), np.float32)

    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        atom_op = mol_op.GetAtomById(i)
        symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL)
        aromatic[i] = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION)
        num_h[i] = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()

        degree[i] = atom.GetTotalDegree()
        formalCharge[i] = atom.GetFormalCharge()
        chiral_tag[i] = int(atom.GetChiralTag())

        crippen_contribs[i] = rdMolDescriptors._CalcCrippenContribs(mol)[i]
        tpsa[i] = rdMolDescriptors._CalcTPSAContribs(mol)[i]
        labute_asac[i] = rdMolDescriptors._CalcLabuteASAContribs(mol)[0][i]
        gasteiger_charges[i] = atom_op.GetPartialCharge()
        esataindices[i] = EState.EStateIndices(mol)[i]
        atomic_radiuss[i] = atomic_radius[atom.GetSymbol()]
        electronegate[i] = electronegativity[atom.GetSymbol()]
        electronegate_sqre[i] = electronegativity_square[atom.GetSymbol()]
        mass[i] = atomic_mass[atom.GetSymbol()]
        van[i] = vanderwaalsradius[atom.GetSymbol()]
        cov[i] = covalenzradius[atom.GetSymbol()]
        ion[i] = ionization_energy[atom.GetSymbol()]

    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1

    num_edge = num_atom * num_atom - num_atom
    edge_index = np.zeros((num_edge, 2), np.uint32)
    bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint32)
    distance = np.zeros((num_edge, 1), np.float32)
    angle = np.zeros((num_edge, 1), np.float32)

    norm_xyz = preprocessing.normalize(xyz, norm='l2')

    ij = 0
    for i in range(num_atom):
        for j in range(num_atom):
            if i == j: continue
            edge_index[ij] = [i, j]

            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE)

            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            angle[ij] = (norm_xyz[i] * norm_xyz[j]).sum()

            ij += 1

    xyz = xyz * 1.889726133921252

    atom = System(symbols=a, positions=xyz)
    acsf = ACSF_GENERATOR.create(atom)

    l = []
    for item in coupling_df[['atom_index_0', 'atom_index_1']].values.tolist():
        i = edge_index.tolist().index(item)
        l.append(i)

    l = np.array(l)

    coupling_edge_index = np.concatenate([coupling_df[['atom_index_0', 'atom_index_1']].values, l.reshape(len(l), 1)],
                                         axis=1)

    coupling = Coupling(coupling_df['id'].values,
                        coupling_df[['fc', 'sd', 'pso', 'dso']].values,
                        coupling_edge_index,
                        np.array([COUPLING_TYPE.index(t) for t in coupling_df.type.values], np.int32),
                        coupling_df['scalar_coupling_constant'].values,
                        )

    graph = Graph(
        name,
        Chem.MolToSmiles(mol),
        [a, xyz],
        [acsf, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic, degree, formalCharge, chiral_tag,
         crippen_contribs, tpsa, labute_asac, gasteiger_charges, esataindices, atomic_radiuss, electronegate,
         electronegate_sqre, mass, van, cov, ion],
        [bond_type, distance, angle, ],
        edge_index,
        coupling,
    )

    return graph
Exemple #15
0
def molToGraph(rdmol, bondtype_list_order, atomtype_list_order, molecular_attributes = False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes
    graph.bondtype_list_order = bondtype_list_order
    bond_list = bondtype_list_order
    graph.atomtype_list_order = atomtype_list_order

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.orderAtt = list(oneHotVector(bond.GetBondTypeAsDouble(), [1.0, 1.5, 2.0, 3.0]))
        edge.aromAtt = list(oneHotVector(bond.GetIsAromatic(), [1.0, 0.0]))
        edge.conjAtt = list(oneHotVector(bond.GetIsConjugated(), [1.0, 0.0]))
        edge.ringAtt = list(oneHotVector(bond.IsInRing(), [1.0, 0.0]))

        BeginAtom, EndAtom = bond.GetBeginAtom(), bond.GetEndAtom()
        begin_idx, end_idx = BeginAtom.GetAtomicNum(), EndAtom.GetAtomicNum()
        if begin_idx < end_idx:
            bond_type = str(begin_idx) + '_' + str(end_idx)
        else:
            bond_type= str(end_idx) + '_' + str(begin_idx)

        bond_attributes = []
        bond_attributes = bond_attributes + list(oneHotVector(bond_type, bondtype_list_order))
        edge.attributesAtt = np.array(bond_attributes, dtype=att_dtype)

        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes = attributes[k])
        node_type = atom.GetAtomicNum()
        node_attributesAtt = []
        node_attributesAtt = node_attributesAtt + list(oneHotVector(node_type, atomtype_list_order))
        node.attributesAtt = np.array(node_attributesAtt, dtype=att_dtype)
        for neighbor in atom.GetNeighbors():
            node.neighbors.append((
                neighbor.GetIdx(),
                rdmol.GetBondBetweenAtoms(
                    atom.GetIdx(),
                    neighbor.GetIdx()
                ).GetIdx()
            ))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph
Exemple #16
0
def atom_level_descriptors(mol, include = ['functional'], asOneHot = False, ORIGINAL_VERSION = False):
	"""
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	Returns:
		(labels, attributes)
	"""

	attributes = [[] for i in mol.GetAtoms()]
	labels = []
	if 'functional' in include:

		[attributes[i].append(x[0]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to logp')

		[attributes[i].append(x[1]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to mr')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
		labels.append('TPSA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
		labels.append('Labute ASA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(EState.EStateIndices(mol))]
		labels.append('EState Index')

		rdPartialCharges.ComputeGasteigerCharges(mol)
		[attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

		[attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger hydrogen partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

	if 'structural' in include:
		[attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
			for i in range(len(attributes))]
		labels.append('--many structural--')

	if 'dftb' in include:
		try:
			dftb_atom_atts = atom_dftb(mol)
		except ValueError as e:# often, an invalid element
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		except KeyError as e:
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		[attributes[i].extend(dftb_atom_atts[i]) for i in range(mol.GetNumAtoms())]
		labels.append('--many DFTB--')

	return (labels, attributes)
Exemple #17
0
# lis2 = df['fps']
d2f = d2f.reset_index(drop=True)
lis = d2f['mol_blocks']
d2f.head(100)

mol_list = []
for m in lis:
    m1 = Chem.MolFromMolBlock(m, removeHs=False)
    mol_list.append(m1)

cdk2mol = [m for m in mol_list]

cdk2mol2 = copy.deepcopy(cdk2mol)
crippen_contribs = [
    rdMolDescriptors._CalcCrippenContribs(mol) for mol in cdk2mol2
]
ref = cdk2mol_reference
ref_contrib = rdMolDescriptors._CalcCrippenContribs(ref)
targets = cdk2mol2[0:]
targets_contrib = crippen_contribs[0:]

for i, target in enumerate(targets):
    crippenO3A = rdMolAlign.GetCrippenO3A(target, ref, targets_contrib[i],
                                          ref_contrib)
    crippenO3A.Align()

v.DeleteAll()
v.ShowMol(ref, name='ref', showOnly=False)
for i in range(len(targets)):
    name = f'probe_{i}'