Ejemplo n.º 1
0
  def _validate(self, vals, places=2, tol=1e-2, debug=False):
    for smi, ans in vals:
      ans = np.array(ans)
      mol = Chem.MolFromSmiles(smi)
      inds = EState.EStateIndices(mol)
      if debug:  # pragma: nocover
        print(inds)
      self._compareEstates(ans, inds, 'bad EStates for smiles: {0}'.format(smi), tol=tol)

      self.assertLess(abs(EState.MaxEStateIndex(mol) - max(ans)), tol)
      self.assertLess(abs(EState.MinEStateIndex(mol) - min(ans)), tol)
      self.assertLess(abs(EState.MaxAbsEStateIndex(mol) - max(abs(ans))), tol)
      self.assertLess(abs(EState.MinAbsEStateIndex(mol) - min(abs(ans))), tol)
Ejemplo n.º 2
0
 def test_GetPrincipleQuantumNumber(self):
     for principalQN, (nmin, nmax) in enumerate([(1, 2), (3, 10), (11, 18),
                                                 (19, 36), (37, 54),
                                                 (55, 86), (87, 120)], 1):
         for n in range(nmin, nmax + 1):
             self.assertEqual(EState.GetPrincipleQuantumNumber(n),
                              principalQN)
Ejemplo n.º 3
0
def atom_level_descriptors(mol,
                           include=['functional'],
                           asOneHot=False,
                           ORIGINAL_VERSION=False):
    '''
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	returns: (label, attributes)
	'''

    attributes = [[] for i in mol.GetAtoms()]
    labels = []
    if 'functional' in include:

        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(mol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(mol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

    if 'structural' in include:
        [attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
         for i in range(len(attributes))]
        labels.append('--many structural--')

    return (labels, attributes)
Ejemplo n.º 4
0
    def _validate(self, vals, tol=1e-2, show=0):
        for smi, ans in vals:
            mol = Chem.MolFromSmiles(smi)
            ans = numpy.array(ans)
            inds = EState.EStateIndices(mol)

            maxV = max(abs(ans - inds))
            if show: print inds
            assert maxV < tol, 'bad EStates for smiles: %s' % (smi)
Ejemplo n.º 5
0
def assignProperties(mol):
    '''
    Calculate atom-level descriptors that can be used in featurization
    '''
    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_logp',x[0])
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_mr', x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('tpsa', x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0]):
        mol.GetAtomWithIdx(i).SetDoubleProp('asa', x)
    for (i, x) in enumerate(EState.EStateIndices(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('estate', x)
    rdPartialCharges.ComputeGasteigerCharges(mol) # '_GasteigerCharge', '_GasteigerHCharge'
Ejemplo n.º 6
0
    def test_cacheEstate(self):
        mol = Chem.MolFromSmiles('CCCC')
        expected = [2.18, 1.32, 1.32, 2.18]

        # The mol object has no information about E-states
        self.assertFalse(hasattr(mol, '_eStateIndices'))
        inds = EState.EStateIndices(mol)
        self._compareEstates(inds, expected, 'cacheTest')

        # We now have E-states stored with the molecule
        self.assertTrue(hasattr(mol, '_eStateIndices'))

        # Let's make sure that we skip the calculation next time if force is False
        mol._eStateIndices = 'cached'
        self.assertTrue(hasattr(mol, '_eStateIndices'))

        inds = EState.EStateIndices(mol, force=False)
        self.assertEqual(inds, 'cached')

        # But with force (default) we calculate again
        inds = EState.EStateIndices(mol)
        self._compareEstates(inds, expected, 'cacheTest')
        self._compareEstates(mol._eStateIndices, expected, 'cacheTest')
Ejemplo n.º 7
0
def get_molecular_attributes(rdmol):
    """
  Molecular attributes calculated as:
    [Crippen contribution to logp,
     Crippen contribution to mr,
     TPSA contribution,
     Labute ASA contribution,
     EState Index,
     Gasteiger partial charge,
     Gasteiger hydrogen partial charge]

  Parameters
  ----------
  rdmol : rdkit.Chem.rdchem.Mol
    rdkit molecule class

  Returns
  -------
  attributes : list
    feature vector

  """
    attributes = [[] for _ in rdmol.GetAtoms()]

    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol)):
        attributes[i].append(x[0])
        attributes[i].append(x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol)):
        attributes[i].append(x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0]):
        attributes[i].append(x)
    for (i, x) in enumerate(EState.EStateIndices(rdmol)):
        attributes[i].append(x)

    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerHCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)

    return attributes
Ejemplo n.º 8
0
 def calculate(self):
     return EState.TypeAtoms(self.mol), EState.EStateIndices(self.mol)
Ejemplo n.º 9
0
def molToGraph(rdmol, bondtype_list_order, atomtype_list_order, molecular_attributes = False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes
    graph.bondtype_list_order = bondtype_list_order
    bond_list = bondtype_list_order
    graph.atomtype_list_order = atomtype_list_order

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.orderAtt = list(oneHotVector(bond.GetBondTypeAsDouble(), [1.0, 1.5, 2.0, 3.0]))
        edge.aromAtt = list(oneHotVector(bond.GetIsAromatic(), [1.0, 0.0]))
        edge.conjAtt = list(oneHotVector(bond.GetIsConjugated(), [1.0, 0.0]))
        edge.ringAtt = list(oneHotVector(bond.IsInRing(), [1.0, 0.0]))

        BeginAtom, EndAtom = bond.GetBeginAtom(), bond.GetEndAtom()
        begin_idx, end_idx = BeginAtom.GetAtomicNum(), EndAtom.GetAtomicNum()
        if begin_idx < end_idx:
            bond_type = str(begin_idx) + '_' + str(end_idx)
        else:
            bond_type= str(end_idx) + '_' + str(begin_idx)

        bond_attributes = []
        bond_attributes = bond_attributes + list(oneHotVector(bond_type, bondtype_list_order))
        edge.attributesAtt = np.array(bond_attributes, dtype=att_dtype)

        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes = attributes[k])
        node_type = atom.GetAtomicNum()
        node_attributesAtt = []
        node_attributesAtt = node_attributesAtt + list(oneHotVector(node_type, atomtype_list_order))
        node.attributesAtt = np.array(node_attributesAtt, dtype=att_dtype)
        for neighbor in atom.GetNeighbors():
            node.neighbors.append((
                neighbor.GetIdx(),
                rdmol.GetBondBetweenAtoms(
                    atom.GetIdx(),
                    neighbor.GetIdx()
                ).GetIdx()
            ))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph
Ejemplo n.º 10
0
def atom_level_descriptors(mol, include = ['functional'], asOneHot = False, ORIGINAL_VERSION = False):
	"""
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	Returns:
		(labels, attributes)
	"""

	attributes = [[] for i in mol.GetAtoms()]
	labels = []
	if 'functional' in include:

		[attributes[i].append(x[0]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to logp')

		[attributes[i].append(x[1]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to mr')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
		labels.append('TPSA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
		labels.append('Labute ASA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(EState.EStateIndices(mol))]
		labels.append('EState Index')

		rdPartialCharges.ComputeGasteigerCharges(mol)
		[attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

		[attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger hydrogen partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

	if 'structural' in include:
		[attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
			for i in range(len(attributes))]
		labels.append('--many structural--')

	if 'dftb' in include:
		try:
			dftb_atom_atts = atom_dftb(mol)
		except ValueError as e:# often, an invalid element
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		except KeyError as e:
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		[attributes[i].extend(dftb_atom_atts[i]) for i in range(mol.GetNumAtoms())]
		labels.append('--many DFTB--')

	return (labels, attributes)
Ejemplo n.º 11
0
def molToGraph(rdmol):
    '''
    Converts an RDKit molecule to an attributed undirected graph
    @param rdmol: RDKit molecule
    @return: Graph
    '''
    graph = Graph()

    # Calculate atom-level molecule descriptors
    nodesFeatures = [[] for i in rdmol.GetAtoms()]

    #6 (25) Crippen contribution to logp
    [nodesFeatures[i].append(x[0]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #7 (26) Crippen contribution to mr
    [nodesFeatures[i].append(x[1]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #8 (27) TPSA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]

    #9 (28) Labute ASA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]

    #10 (29) EState Index
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(EState.EStateIndices(rdmol))]

    # Calculate Gasteiger charges for features 30 and 31
    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    # The computed charges are stored on each atom with computed property
    # under the name _GasteigerCharge and _GasteigerHCharge.
    # Values could be NaN.

    #11 (30)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerCharge')))

    #12 (31)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerHCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerHCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerHCharge')))

    # Add edges to graph
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.id = bond.GetIdx()
        edge.features = getBondFeatures(bond).astype('float32')
        edge.ends = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)

    # Add nodes to graph
    for i, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.id = atom.GetIdx()
        node.features = getAtomFeatures(atom, nodesFeatures[i])

        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))

        graph.nodes.append(node)

    graph.nodeNum = len(graph.nodes)
    graph.nodeFeatureDim = len(graph.nodes[0].features)
    if (len(graph.edges) > 0):
        graph.edgeFeatureDim = len(graph.edges[0].features)

    return graph
Ejemplo n.º 12
0
def make_graph(name, gb_structure, gb_scalar_coupling):
    # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type','scalar_coupling_constant']
    coupling_df = gb_scalar_coupling.get_group(name)

    # [molecule_name,atom_index,atom,x,y,z]
    df = gb_structure.get_group(name)
    df = df.sort_values(['atom_index'], ascending=True)
    a = df.atom.values.tolist()
    xyz = df[['x', 'y', 'z']].values

    mol = mol_from_axyz(a, xyz)
    mol_op = openbabel.OBMol()
    obConversion.ReadFile(mol_op, f'../input/champs-scalar-coupling/structures/{name}.xyz')

    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)

    num_atom = mol.GetNumAtoms()
    symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8)  # category
    acceptor = np.zeros((num_atom, 1), np.uint8)
    donor = np.zeros((num_atom, 1), np.uint8)
    aromatic = np.zeros((num_atom, 1), np.uint8)
    hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8)
    num_h = np.zeros((num_atom, 1), np.float32)  # real
    atomic = np.zeros((num_atom, 1), np.float32)

    # new features
    degree = np.zeros((num_atom, 1), np.uint8)
    formalCharge = np.zeros((num_atom, 1), np.float32)
    chiral_tag = np.zeros((num_atom, 1), np.uint8)
    crippen_contribs = np.zeros((num_atom, 2), np.float32)
    tpsa = np.zeros((num_atom, 1), np.float32)
    labute_asac = np.zeros((num_atom, 1), np.float32)
    gasteiger_charges = np.zeros((num_atom, 1), np.float32)
    esataindices = np.zeros((num_atom, 1), np.float32)
    atomic_radiuss = np.zeros((num_atom, 1), np.float32)
    electronegate = np.zeros((num_atom, 1), np.float32)
    electronegate_sqre = np.zeros((num_atom, 1), np.float32)
    mass = np.zeros((num_atom, 1), np.float32)
    van = np.zeros((num_atom, 1), np.float32)
    cov = np.zeros((num_atom, 1), np.float32)
    ion = np.zeros((num_atom, 1), np.float32)

    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        atom_op = mol_op.GetAtomById(i)
        symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL)
        aromatic[i] = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION)
        num_h[i] = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()

        degree[i] = atom.GetTotalDegree()
        formalCharge[i] = atom.GetFormalCharge()
        chiral_tag[i] = int(atom.GetChiralTag())

        crippen_contribs[i] = rdMolDescriptors._CalcCrippenContribs(mol)[i]
        tpsa[i] = rdMolDescriptors._CalcTPSAContribs(mol)[i]
        labute_asac[i] = rdMolDescriptors._CalcLabuteASAContribs(mol)[0][i]
        gasteiger_charges[i] = atom_op.GetPartialCharge()
        esataindices[i] = EState.EStateIndices(mol)[i]
        atomic_radiuss[i] = atomic_radius[atom.GetSymbol()]
        electronegate[i] = electronegativity[atom.GetSymbol()]
        electronegate_sqre[i] = electronegativity_square[atom.GetSymbol()]
        mass[i] = atomic_mass[atom.GetSymbol()]
        van[i] = vanderwaalsradius[atom.GetSymbol()]
        cov[i] = covalenzradius[atom.GetSymbol()]
        ion[i] = ionization_energy[atom.GetSymbol()]

    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1

    num_edge = num_atom * num_atom - num_atom
    edge_index = np.zeros((num_edge, 2), np.uint32)
    bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint32)
    distance = np.zeros((num_edge, 1), np.float32)
    angle = np.zeros((num_edge, 1), np.float32)

    norm_xyz = preprocessing.normalize(xyz, norm='l2')

    ij = 0
    for i in range(num_atom):
        for j in range(num_atom):
            if i == j: continue
            edge_index[ij] = [i, j]

            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE)

            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            angle[ij] = (norm_xyz[i] * norm_xyz[j]).sum()

            ij += 1

    xyz = xyz * 1.889726133921252

    atom = System(symbols=a, positions=xyz)
    acsf = ACSF_GENERATOR.create(atom)

    l = []
    for item in coupling_df[['atom_index_0', 'atom_index_1']].values.tolist():
        i = edge_index.tolist().index(item)
        l.append(i)

    l = np.array(l)

    coupling_edge_index = np.concatenate([coupling_df[['atom_index_0', 'atom_index_1']].values, l.reshape(len(l), 1)],
                                         axis=1)

    coupling = Coupling(coupling_df['id'].values,
                        coupling_df[['fc', 'sd', 'pso', 'dso']].values,
                        coupling_edge_index,
                        np.array([COUPLING_TYPE.index(t) for t in coupling_df.type.values], np.int32),
                        coupling_df['scalar_coupling_constant'].values,
                        )

    graph = Graph(
        name,
        Chem.MolToSmiles(mol),
        [a, xyz],
        [acsf, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic, degree, formalCharge, chiral_tag,
         crippen_contribs, tpsa, labute_asac, gasteiger_charges, esataindices, atomic_radiuss, electronegate,
         electronegate_sqre, mass, van, cov, ion],
        [bond_type, distance, angle, ],
        edge_index,
        coupling,
    )

    return graph
Ejemplo n.º 13
0
def molToGraph(rdmol, molecular_attributes=False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes=attributes[k])
        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph