def from_qc_json(cls, qc_json) -> "ReadInput": """ Given a QC JSON object, extracts the topology, atoms and coords of the molecule. #TODO we need to be absle to read mapped smiles for this to work with stereochem and aromaticity """ topology = nx.Graph() atoms = [] for i, atom in enumerate(qc_json.symbols): atoms.append( Atom( atomic_number=Element().number(atom), atom_index=i, atom_name=f"{atom}{i}", )) topology.add_node(i) for bond in qc_json.connectivity: topology.add_edge(*bond[:2]) coords = np.array(qc_json.geometry).reshape( (len(atoms), 3)) * BOHR_TO_ANGS atoms = atoms or None return cls(name=None, rdkit_mol=None, coords=coords)
def _mol_from_rdkit(self): """ Using an RDKit Molecule object, extract the name, topology, coordinates and atoms """ if self.name is None: self.name = self.rdkit_mol.GetProp("_Name") atoms = [] self.topology = nx.Graph() # Collect the atom names and bonds for atom in self.rdkit_mol.GetAtoms(): # Collect info about each atom atomic_number = atom.GetAtomicNum() index = atom.GetIdx() try: # PDB file extraction atom_name = atom.GetMonomerInfo().GetName().strip() except AttributeError: try: # Mol2 file extraction atom_name = atom.GetProp("_TriposAtomName") except KeyError: # smiles and mol files have no atom names so generate them here if they are not declared atom_name = f"{atom.GetSymbol()}{index}" qube_atom = Atom(atomic_number, index, atom_name, formal_charge=atom.GetFormalCharge()) # Instance the basic qube_atom qube_atom.atom_type = atom.GetSmarts() # Add the atoms as nodes self.topology.add_node(atom.GetIdx()) # Add the bonds for bonded in atom.GetNeighbors(): self.topology.add_edge(atom.GetIdx(), bonded.GetIdx()) qube_atom.add_bond(bonded.GetIdx()) # Now add the atom to the molecule atoms.append(qube_atom) self.coords = self.rdkit_mol.GetConformer().GetPositions() self.atoms = atoms or None
def _read_pdb(self): """ Internal pdb reader. Only called when RDKit failed to read the pdb. Extracts the topology, atoms and coords of the molecule. """ coords = [] self.topology = nx.Graph() atoms = [] atom_count = 0 print('called!') with open(self.mol_input) as pdb: for line in pdb: if 'ATOM' in line or 'HETATM' in line: print('reading!') # start collecting the atom class info atomic_symbol = str(line[76:78]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_symbol = atomic_symbol.strip() atom_name = str(line.split()[2]) # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2])[:-1] atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_number = Element().number(atomic_symbol) # Now instance the qube atom qube_atom = Atom(atomic_number, atom_count, atom_name) atoms.append(qube_atom) # Also add the atom number as the node in the graph self.topology.add_node(atom_count) atom_count += 1 coords.append([ float(line[30:38]), float(line[38:46]), float(line[46:54]) ]) if 'CONECT' in line: atom_index = int(line.split()[1]) - 1 # Search the connectivity section and add all edges to the graph corresponding to the bonds. for i in range(2, len(line.split())): if int(line.split()[i]) != 0: bonded_index = int(line.split()[i]) - 1 self.topology.add_edge(atom_index, bonded_index) atoms[atom_index].add_bond(bonded_index) atoms[bonded_index].add_bond(atom_index) # put the object back into the correct place self.coords = np.array(coords) self.atoms = atoms or None
def _read_pdb_protein(self): """ :return: """ with open(self.mol_input, 'r') as pdb: lines = pdb.readlines() coords = [] atoms = [] self.topology = nx.Graph() self.Residues = [] self.pdb_names = [] # atom counter used for graph node generation atom_count = 0 for line in lines: if 'ATOM' in line or 'HETATM' in line: atomic_symbol = str(line[76:78]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol).strip() # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) # now make sure we have a valid element if atomic_symbol.lower() != 'cl' and atomic_symbol.lower() != 'br': atomic_symbol = atomic_symbol[0] atom_name = f'{atomic_symbol}{atom_count}' qube_atom = Atom(Element().number(atomic_symbol), atom_count, atom_name) atoms.append(qube_atom) self.pdb_names.append(str(line.split()[2])) # also get the residue order from the pdb file so we can rewrite the file self.Residues.append(str(line.split()[3])) # Also add the atom number as the node in the graph self.topology.add_node(atom_count) atom_count += 1 coords.append([float(line[30:38]), float(line[38:46]), float(line[46:54])]) elif 'CONECT' in line: conect_terms = line.split() for atom in conect_terms[2:]: if int(atom): self.topology.add_edge(int(conect_terms[1]) - 1, int(atom) - 1) self.atoms = atoms self.coords = np.array(coords) self.residues = [res for res, group in groupby(self.Residues)]
def _read_qc_json(self): """ Given a QC JSON object, extracts the topology, atoms and coords of the molecule. """ self.topology = nx.Graph() atoms = [] for i, atom in enumerate(self.mol_input.symbols): atoms.append(Atom(atomic_number=Element().number(atom), atom_index=i, atom_name=f'{atom}{i}')) self.topology.add_node(i) for bond in self.mol_input.connectivity: self.topology.add_edge(*bond[:2]) self.coords = np.array(self.mol_input.geometry).reshape((len(atoms), 3)) * constants.BOHR_TO_ANGS self.atoms = atoms or None
def from_pdb(cls, file_name: str, name: Optional[str] = None): """ Read the protein input pdb file. :return: """ with open(file_name, "r") as pdb: lines = pdb.readlines() coords = [] atoms = [] bonds = [] Residues = [] pdb_names = [] # atom counter used for graph node generation atom_count = 0 for line in lines: if "ATOM" in line or "HETATM" in line: atomic_symbol = str(line[76:78]) atomic_symbol = re.sub("[0-9]+", "", atomic_symbol).strip() # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2]) atomic_symbol = re.sub("[0-9]+", "", atomic_symbol) # now make sure we have a valid element if atomic_symbol.lower() != "cl" and atomic_symbol.lower( ) != "br": atomic_symbol = atomic_symbol[0] atom_name = f"{atomic_symbol}{atom_count}" # TODO should we use a protein pdb package for this? qube_atom = Atom( atomic_number=Element().number(atomic_symbol), atom_index=atom_count, atom_name=atom_name, formal_charge=0, aromatic=False, ) atoms.append(qube_atom) pdb_names.append(str(line.split()[2])) # also get the residue order from the pdb file so we can rewrite the file Residues.append(str(line.split()[3])) atom_count += 1 coords.append([ float(line[30:38]), float(line[38:46]), float(line[46:54]) ]) elif "CONECT" in line: conect_terms = line.split() for atom in conect_terms[2:]: if int(atom): bond = Bond( atom1_index=int(conect_terms[1]) - 1, atom2_index=int(atom) - 1, bond_order=1, aromatic=False, ) bonds.append(bond) coords = np.array(coords) residues = [res for res, group in groupby(Residues)] if name is None: name = Path(file_name).stem return cls( atoms=atoms, bonds=bonds, coords=coords, pdb_names=pdb_names, residues=residues, name=name, )
def _read_mol2(self): """ Internal mol2 reader. Only called when RDKit failed to read the mol2. Extracts the topology, atoms and coords of the molecule. """ coords = [] self.topology = nx.Graph() atoms = [] atom_count = 0 with open(self.mol_input, 'r') as mol2: atom_flag = False bond_flag = False for line in mol2: if '@<TRIPOS>ATOM' in line: atom_flag = True continue elif '@<TRIPOS>BOND' in line: atom_flag = False bond_flag = True continue elif '@<TRIPOS>SUBSTRUCTURE' in line: bond_flag = False continue if atom_flag: # Add the molecule information atomic_symbol = line.split()[1][:2] atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_symbol = atomic_symbol.strip().title() atomic_number = Element().number(atomic_symbol) coords.append([float(line.split()[2]), float(line.split()[3]), float(line.split()[4])]) # Collect the atom names atom_name = str(line.split()[1]) # Add the nodes to the topology object self.topology.add_node(atom_count) atom_count += 1 # Get the atom types atom_type = line.split()[5] atom_type = atom_type.replace(".", "") # Make the qube_atom qube_atom = Atom(atomic_number, atom_count, atom_name) qube_atom.atom_type = atom_type atoms.append(qube_atom) if bond_flag: # Add edges to the topology network atom_index, bonded_index = int(line.split()[1]) - 1, int(line.split()[2]) - 1 self.topology.add_edge(atom_index, bonded_index) atoms[atom_index].add_bond(bonded_index) atoms[bonded_index].add_bond(atom_index) # put the object back into the correct place self.coords = np.array(coords) self.atoms = atoms or None
def __init__(self): self.name = "water" self.atoms = [ Atom(8, "O", 0, -0.827099, [1, 2]), Atom(1, "H", 1, 0.41355, [0]), Atom(1, "H", 1, 0.41355, [0]), ] self.coords = { "qm": np.array([ [-0.00191868, 0.38989824, 0.0], [-0.7626204, -0.19870548, 0.0], [0.76453909, -0.19119276, 0.0], ]) } self.topology = nx.Graph() self.topology.add_node(0) self.topology.add_node(1) self.topology.add_node(2) self.topology.add_edge(0, 1) self.topology.add_edge(0, 2) self.ddec_data = { 0: CustomNamespace( a_i=44312.906375462444, atomic_symbol="O", b_i=47.04465571009466, charge=-0.827099, r_aim=1.7571614241044191, volume=29.273005, ), 1: CustomNamespace( a_i=0.0, atomic_symbol="H", b_i=0, charge=0.41355, r_aim=0.6833737065249833, volume=2.425428, ), 2: CustomNamespace( a_i=0.0, atomic_symbol="H", b_i=0, charge=0.413549, r_aim=0.6833737065249833, volume=2.425428, ), } self.dipole_moment_data = { 0: CustomNamespace(x_dipole=-0.000457, y_dipole=0.021382, z_dipole=0.0), 1: CustomNamespace(x_dipole=-0.034451, y_dipole=-0.010667, z_dipole=0.0), 2: CustomNamespace(x_dipole=0.03439, y_dipole=-0.010372, z_dipole=-0.0), } self.quadrupole_moment_data = { 0: CustomNamespace(q_3z2_r2=-0.786822, q_x2_y2=0.307273, q_xy=0.001842, q_xz=-0.0, q_yz=0.0), 1: CustomNamespace(q_3z2_r2=-0.097215, q_x2_y2=0.018178, q_xy=0.001573, q_xz=0.0, q_yz=0.0), 2: CustomNamespace( q_3z2_r2=-0.097264, q_x2_y2=0.018224, q_xy=-0.001287, q_xz=-0.0, q_yz=-0.0, ), } self.cloud_pen_data = { 0: CustomNamespace(a=-0.126185, atomic_symbol="O", b=2.066872), 1: CustomNamespace(a=-2.638211, atomic_symbol="H", b=1.917509), 2: CustomNamespace(a=-2.627835, atomic_symbol="H", b=1.920499), } self.enable_symmetry = True self.v_site_error_factor = 1.005