def from_qc_json(cls, qc_json) -> "ReadInput": """ Given a QC JSON object, extracts the topology, atoms and coords of the molecule. #TODO we need to be absle to read mapped smiles for this to work with stereochem and aromaticity """ topology = nx.Graph() atoms = [] for i, atom in enumerate(qc_json.symbols): atoms.append( Atom( atomic_number=Element().number(atom), atom_index=i, atom_name=f"{atom}{i}", )) topology.add_node(i) for bond in qc_json.connectivity: topology.add_edge(*bond[:2]) coords = np.array(qc_json.geometry).reshape( (len(atoms), 3)) * BOHR_TO_ANGS atoms = atoms or None return cls(name=None, rdkit_mol=None, coords=coords)
def _read_pdb(self): """ Internal pdb reader. Only called when RDKit failed to read the pdb. Extracts the topology, atoms and coords of the molecule. """ coords = [] self.topology = nx.Graph() atoms = [] atom_count = 0 print('called!') with open(self.mol_input) as pdb: for line in pdb: if 'ATOM' in line or 'HETATM' in line: print('reading!') # start collecting the atom class info atomic_symbol = str(line[76:78]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_symbol = atomic_symbol.strip() atom_name = str(line.split()[2]) # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2])[:-1] atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_number = Element().number(atomic_symbol) # Now instance the qube atom qube_atom = Atom(atomic_number, atom_count, atom_name) atoms.append(qube_atom) # Also add the atom number as the node in the graph self.topology.add_node(atom_count) atom_count += 1 coords.append([ float(line[30:38]), float(line[38:46]), float(line[46:54]) ]) if 'CONECT' in line: atom_index = int(line.split()[1]) - 1 # Search the connectivity section and add all edges to the graph corresponding to the bonds. for i in range(2, len(line.split())): if int(line.split()[i]) != 0: bonded_index = int(line.split()[i]) - 1 self.topology.add_edge(atom_index, bonded_index) atoms[atom_index].add_bond(bonded_index) atoms[bonded_index].add_bond(atom_index) # put the object back into the correct place self.coords = np.array(coords) self.atoms = atoms or None
def _read_pdb_protein(self): """ :return: """ with open(self.mol_input, 'r') as pdb: lines = pdb.readlines() coords = [] atoms = [] self.topology = nx.Graph() self.Residues = [] self.pdb_names = [] # atom counter used for graph node generation atom_count = 0 for line in lines: if 'ATOM' in line or 'HETATM' in line: atomic_symbol = str(line[76:78]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol).strip() # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2]) atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) # now make sure we have a valid element if atomic_symbol.lower() != 'cl' and atomic_symbol.lower() != 'br': atomic_symbol = atomic_symbol[0] atom_name = f'{atomic_symbol}{atom_count}' qube_atom = Atom(Element().number(atomic_symbol), atom_count, atom_name) atoms.append(qube_atom) self.pdb_names.append(str(line.split()[2])) # also get the residue order from the pdb file so we can rewrite the file self.Residues.append(str(line.split()[3])) # Also add the atom number as the node in the graph self.topology.add_node(atom_count) atom_count += 1 coords.append([float(line[30:38]), float(line[38:46]), float(line[46:54])]) elif 'CONECT' in line: conect_terms = line.split() for atom in conect_terms[2:]: if int(atom): self.topology.add_edge(int(conect_terms[1]) - 1, int(atom) - 1) self.atoms = atoms self.coords = np.array(coords) self.residues = [res for res, group in groupby(self.Residues)]
def _read_qc_json(self): """ Given a QC JSON object, extracts the topology, atoms and coords of the molecule. """ self.topology = nx.Graph() atoms = [] for i, atom in enumerate(self.mol_input.symbols): atoms.append(Atom(atomic_number=Element().number(atom), atom_index=i, atom_name=f'{atom}{i}')) self.topology.add_node(i) for bond in self.mol_input.connectivity: self.topology.add_edge(*bond[:2]) self.coords = np.array(self.mol_input.geometry).reshape((len(atoms), 3)) * constants.BOHR_TO_ANGS self.atoms = atoms or None
def from_pdb(cls, file_name: str, name: Optional[str] = None): """ Read the protein input pdb file. :return: """ with open(file_name, "r") as pdb: lines = pdb.readlines() coords = [] atoms = [] bonds = [] Residues = [] pdb_names = [] # atom counter used for graph node generation atom_count = 0 for line in lines: if "ATOM" in line or "HETATM" in line: atomic_symbol = str(line[76:78]) atomic_symbol = re.sub("[0-9]+", "", atomic_symbol).strip() # If the element column is missing from the pdb, extract the atomic_symbol from the atom name. if not atomic_symbol: atomic_symbol = str(line.split()[2]) atomic_symbol = re.sub("[0-9]+", "", atomic_symbol) # now make sure we have a valid element if atomic_symbol.lower() != "cl" and atomic_symbol.lower( ) != "br": atomic_symbol = atomic_symbol[0] atom_name = f"{atomic_symbol}{atom_count}" # TODO should we use a protein pdb package for this? qube_atom = Atom( atomic_number=Element().number(atomic_symbol), atom_index=atom_count, atom_name=atom_name, formal_charge=0, aromatic=False, ) atoms.append(qube_atom) pdb_names.append(str(line.split()[2])) # also get the residue order from the pdb file so we can rewrite the file Residues.append(str(line.split()[3])) atom_count += 1 coords.append([ float(line[30:38]), float(line[38:46]), float(line[46:54]) ]) elif "CONECT" in line: conect_terms = line.split() for atom in conect_terms[2:]: if int(atom): bond = Bond( atom1_index=int(conect_terms[1]) - 1, atom2_index=int(atom) - 1, bond_order=1, aromatic=False, ) bonds.append(bond) coords = np.array(coords) residues = [res for res, group in groupby(Residues)] if name is None: name = Path(file_name).stem return cls( atoms=atoms, bonds=bonds, coords=coords, pdb_names=pdb_names, residues=residues, name=name, )
def _read_mol2(self): """ Internal mol2 reader. Only called when RDKit failed to read the mol2. Extracts the topology, atoms and coords of the molecule. """ coords = [] self.topology = nx.Graph() atoms = [] atom_count = 0 with open(self.mol_input, 'r') as mol2: atom_flag = False bond_flag = False for line in mol2: if '@<TRIPOS>ATOM' in line: atom_flag = True continue elif '@<TRIPOS>BOND' in line: atom_flag = False bond_flag = True continue elif '@<TRIPOS>SUBSTRUCTURE' in line: bond_flag = False continue if atom_flag: # Add the molecule information atomic_symbol = line.split()[1][:2] atomic_symbol = re.sub('[0-9]+', '', atomic_symbol) atomic_symbol = atomic_symbol.strip().title() atomic_number = Element().number(atomic_symbol) coords.append([float(line.split()[2]), float(line.split()[3]), float(line.split()[4])]) # Collect the atom names atom_name = str(line.split()[1]) # Add the nodes to the topology object self.topology.add_node(atom_count) atom_count += 1 # Get the atom types atom_type = line.split()[5] atom_type = atom_type.replace(".", "") # Make the qube_atom qube_atom = Atom(atomic_number, atom_count, atom_name) qube_atom.atom_type = atom_type atoms.append(qube_atom) if bond_flag: # Add edges to the topology network atom_index, bonded_index = int(line.split()[1]) - 1, int(line.split()[2]) - 1 self.topology.add_edge(atom_index, bonded_index) atoms[atom_index].add_bond(bonded_index) atoms[bonded_index].add_bond(atom_index) # put the object back into the correct place self.coords = np.array(coords) self.atoms = atoms or None