def from_reaction_template(cls, template_smarts): template = ReactionTemplate(template_smarts) _rdkit_config = rdkit_config(reaction_center=template.ReactingAtomsMN, reactant_or_product='reactant', IsSanitized=False, set_morgan_identifier=False) reaction = Graph.from_rdkit(template.reactants[0], _rdkit_config).to_networkx() for reactant in template.reactants[1:]: g = Graph.from_rdkit(reactant, _rdkit_config).to_networkx() reaction = nx.disjoint_union(reaction, g) _rdkit_config = rdkit_config(reaction_center=template.ReactingAtomsMN, reactant_or_product='product', IsSanitized=False, set_morgan_identifier=False) for product in template.products: g = Graph.from_rdkit(product, _rdkit_config).to_networkx() reaction = nx.disjoint_union(reaction, g) g = _from_networkx(cls, reaction) if g.nodes.to_pandas()['ReactingCenter'].max() <= 0: raise RuntimeError(f'No reacting atoms are found in reactants: ' f'{template_smarts}') if g.nodes.to_pandas()['ReactingCenter'].min() >= 0: raise RuntimeError(f'No reacting atoms are found in products: ' f'{template_smarts}') return g
def from_cr(cls, cr, HASH): _rdkit_config = rdkit_config(reaction_center=cr.ReactingAtomsMN, reactant_or_product='reactant') reaction = HashGraph.from_rdkit(cr.reactants[0], '1', _rdkit_config).to_networkx() for reactant in cr.reactants[1:]: g = HashGraph.from_rdkit(reactant, '1', _rdkit_config).\ to_networkx() reaction = nx.disjoint_union(reaction, g) _rdkit_config = rdkit_config(reaction_center=cr.ReactingAtomsMN, reactant_or_product='product') for product in cr.products: g = HashGraph.from_rdkit(product, '1', _rdkit_config).\ to_networkx() reaction = nx.disjoint_union(reaction, g) g = _from_networkx(cls, reaction) g.hash = HASH if g.nodes.to_pandas()['ReactingCenter'].max() <= 0: raise RuntimeError(f'No reacting atoms are found in reactants: ' f'{cr.reaction_smarts}') if g.nodes.to_pandas()['ReactingCenter'].min() >= 0: raise RuntimeError(f'No reacting atoms are found in products: ' f'{cr.reaction_smarts}') return g
def __init__(self, data: List[MolecularGraph2D], concentration: List[float] = None, graph_type: Literal['single_graph', 'multi_graph'] = 'single_graph'): # read data point self.data = data # features_mol set None self.features_mol = None # set concentration if concentration is None: self.concentration = [1.0] * len(data) else: self.concentration = concentration graphs = [d.graph for d in self.data] map(lambda x, y: x.update_concentration(y), graphs, self.concentration) # set graph self.graph_type = graph_type if graph_type == 'single_graph': # combine several graphs into a disconnected graph self.graph = nx.disjoint_union_all( [g.to_networkx() for g in graphs]) self.graph = _from_networkx(HashGraph, self.graph) self.graph.normalize_concentration() else: self.graph = [ rv for r in zip(graphs, self.concentration) for rv in r ]
def agent_from_cr(cls, cr, HASH, _rdkit_config=rdkit_config()): if len(cr.agents) == 0: return HashGraph.from_smiles('[He]', HASH, _rdkit_config) agents = HashGraph.from_rdkit(cr.agents[0], '1', _rdkit_config).to_networkx() for mol in cr.agents[1:]: g = HashGraph.from_rdkit(mol, '1', _rdkit_config).to_networkx() agents = nx.disjoint_union(agents, g) g = _from_networkx(cls, agents) g.hash = HASH return g
def from_atom_list(cls, atom_list, HASH): emode = pd.read_csv(os.path.join(CWD, 'emodes.dat'), sep='\s+') g = nx.Graph() for i, an in enumerate(atom_list): g.add_node(i) g.nodes[i]['ElementalMode1'] = emode[emode.an == an].em1.ravel()[0] g.nodes[i]['ElementalMode2'] = emode[emode.an == an].em2.ravel()[0] for j in range(i + 1, len(atom_list)): ij = (i, j) g.add_edge(*ij) g.edges[ij]['Order'] = 1. g = _from_networkx(cls, g) g.hash = HASH return g
def product_from_reaction_smarts(cls, reaction_smarts, HASH): cr = ChemicalReaction(reaction_smarts) _rdkit_config = rdkit_config(reaction_center=cr.ReactingAtomsMN, reactant_or_product='reactant') reaction = HashGraph.from_rdkit(cr.products[0], '1', _rdkit_config).to_networkx() for product in cr.products[1:]: g = HashGraph.from_rdkit(product, '1', _rdkit_config).\ to_networkx() reaction = nx.disjoint_union(reaction, g) g = _from_networkx(cls, reaction) g.hash = HASH return g
def _from_rdkit(cls, mol, bond_type='order', set_ring_list=True, set_ring_stereo=False, add_hydrogen=False, morgan_radius=3, depth=5): g = nx.Graph() emode = pd.read_csv(os.path.join(CWD, 'emodes.dat'), sep='\s+') # calculate morgan substrcutre hasing value if add_hydrogen: mol = Chem.AddHs(mol) morgan_info = dict() atomidx_hash_dict = dict() radius = morgan_radius Chem.GetMorganFingerprint(mol, radius, bitInfo=morgan_info, useChirality=False) while len(atomidx_hash_dict) != mol.GetNumAtoms(): for key in morgan_info.keys(): if morgan_info[key][0][1] != radius: continue for a in morgan_info[key]: if a[0] not in atomidx_hash_dict: atomidx_hash_dict[a[0]] = key radius -= 1 for i, atom in enumerate(mol.GetAtoms()): g.add_node(i) an = atom.GetAtomicNum() g.nodes[i]['atomic_number'] = an g.nodes[i]['em1'] = emode[emode.an == an].em1.ravel()[0] g.nodes[i]['em2'] = emode[emode.an == an].em2.ravel()[0] g.nodes[i]['em3'] = emode[emode.an == an].em3.ravel()[0] g.nodes[i]['em4'] = emode[emode.an == an].em4.ravel()[0] g.nodes[i]['charge'] = atom.GetFormalCharge() g.nodes[i]['hcount'] = atom.GetTotalNumHs() g.nodes[i]['hybridization'] = atom.GetHybridization() g.nodes[i]['aromatic'] = atom.GetIsAromatic() g.nodes[i]['chiral'] = get_chiral_tag(mol, atom) g.nodes[i]['morgan_hash'] = atomidx_hash_dict[atom.GetIdx()] if set_ring_list: for i, rings in enumerate(get_ringlist(mol)): g.nodes[i]['ring_list'] = rings if rings == [0]: g.nodes[i]['ring_number'] = 0 else: g.nodes[i]['ring_number'] = len(rings) for bond in mol.GetBonds(): ij = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) g.add_edge(*ij) if bond_type == 'order': g.edges[ij]['order'] = bond.GetBondTypeAsDouble() else: g.edges[ij]['type'] = bond.GetBondType() g.edges[ij]['aromatic'] = bond.GetIsAromatic() g.edges[ij]['conjugated'] = bond.GetIsConjugated() g.edges[ij]['stereo'] = bond.GetStereo() g.edges[ij]['symmetry'] = IsSymmetric(mol, ij) if set_ring_stereo is True: g.edges[ij]['ring_stereo'] = 0. if set_ring_stereo: bond_orientation_dict = get_bond_orientation_dict(mol) for ring_idx in mol.GetRingInfo().AtomRings(): atom_updown = [] for idx in ring_idx: if len(g.nodes[idx]['ring_list']) != 1: atom_updown.append(0) else: atom = mol.GetAtomWithIdx(idx) atom_updown.append( get_atom_ring_stereo( mol, atom, ring_idx, depth=depth, bond_orientation_dict=bond_orientation_dict ) ) atom_updown = np.array(atom_updown) for j in range(len(ring_idx)): b = j e = j + 1 if j != len(ring_idx) - 1 else 0 StereoOfRingBond = float(atom_updown[b] * atom_updown[e] * len(ring_idx)) if ring_idx[b] < ring_idx[e]: ij = (ring_idx[b], ring_idx[e]) else: ij = (ring_idx[e], ring_idx[b]) if g.edges[ij]['ring_stereo'] != 0.: raise Exception(ij, g.edges[ij]['ring_stereo'], StereoOfRingBond) else: g.edges[ij]['ring_stereo'] = StereoOfRingBond return _from_networkx(cls, g)
def _from_rdkit(cls, mol, rdkit_config): if rdkit_config.set_hydrogen_explicit: mol = Chem.AddHs(mol) g = nx.Graph() # For single heavy-atom molecules, such as water, methane and metalic ion. # A ghost atom is created and bond to it, because there must be at least # two nodes and one edge in graph kernel. if mol.GetNumBonds() == 0: for i, atom in enumerate(mol.GetAtoms()): assert (atom.GetIdx() == i) g.add_node(i) rdkit_config.set_node(g.nodes[i], atom, mol) if mol.GetNumAtoms() == 1: ij = (0, 0) g.add_edge(*ij) rdkit_config.set_ghost_edge(g.edges[ij]) else: I, J = np.triu_indices(mol.GetNumAtoms(), k=1) for i in range(len(I)): ij = (I[i], J[i]) g.add_edge(*ij) rdkit_config.set_ghost_edge(g.edges[ij]) else: for i, atom in enumerate(mol.GetAtoms()): assert (atom.GetIdx() == i) g.add_node(i) rdkit_config.set_node(g.nodes[i], atom, mol) for bond in mol.GetBonds(): ij = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) g.add_edge(*ij) rdkit_config.set_edge(g.edges[ij], bond) # set ring stereo if rdkit_config.set_ring_stereo: bond_orientation_dict = get_bond_orientation_dict(mol) for ring_idx in mol.GetRingInfo().AtomRings(): atom_updown = [] for idx in ring_idx: if g.nodes[idx]['Ring_count'] != 1: atom_updown.append(0) else: atom = mol.GetAtomWithIdx(idx) atom_updown.append( get_atom_ring_stereo( mol, atom, ring_idx, depth=rdkit_config.depth, bond_orientation_dict=bond_orientation_dict)) atom_updown = np.array(atom_updown) for j in range(len(ring_idx)): b = j e = j + 1 if j != len(ring_idx) - 1 else 0 StereoOfRingBond = float(atom_updown[b] * atom_updown[e] * len(ring_idx)) if ring_idx[b] < ring_idx[e]: ij = (ring_idx[b], ring_idx[e]) else: ij = (ring_idx[e], ring_idx[b]) if g.edges[ij]['RingStereo'] != 0.: raise Exception(ij, g.edges[ij]['RingStereo'], StereoOfRingBond) else: g.edges[ij]['RingStereo'] = StereoOfRingBond # rdkit_config.set_node_propogation(g, mol, 'Chiral', depth=1) rdkit_config.set_node_propogation(g, mol, 'AtomicNumber', depth=5, sum=False, usehash=False) rdkit_config.set_node_propogation(g, mol, 'Hcount', depth=1, sum=True, usehash=False) # rdkit_config.set_node_propogation(g, mol, 'FirstNeighbors', depth=4) # rdkit_config.set_node_propogation(g, mol, 'Aromatic', depth=4) return _from_networkx(cls, g)