Esempio n. 1
0
    def from_qc_json(cls, qc_json) -> "ReadInput":
        """
        Given a QC JSON object, extracts the topology, atoms and coords of the molecule.
        #TODO we need to be absle to read mapped smiles for this to work with stereochem and aromaticity
        """

        topology = nx.Graph()
        atoms = []

        for i, atom in enumerate(qc_json.symbols):
            atoms.append(
                Atom(
                    atomic_number=Element().number(atom),
                    atom_index=i,
                    atom_name=f"{atom}{i}",
                ))
            topology.add_node(i)

        for bond in qc_json.connectivity:
            topology.add_edge(*bond[:2])

        coords = np.array(qc_json.geometry).reshape(
            (len(atoms), 3)) * BOHR_TO_ANGS
        atoms = atoms or None
        return cls(name=None, rdkit_mol=None, coords=coords)
Esempio n. 2
0
    def _mol_from_rdkit(self):
        """
        Using an RDKit Molecule object, extract the name, topology, coordinates and atoms
        """

        if self.name is None:
            self.name = self.rdkit_mol.GetProp("_Name")

        atoms = []
        self.topology = nx.Graph()
        # Collect the atom names and bonds
        for atom in self.rdkit_mol.GetAtoms():
            # Collect info about each atom
            atomic_number = atom.GetAtomicNum()
            index = atom.GetIdx()
            try:
                # PDB file extraction
                atom_name = atom.GetMonomerInfo().GetName().strip()
            except AttributeError:
                try:
                    # Mol2 file extraction
                    atom_name = atom.GetProp("_TriposAtomName")
                except KeyError:
                    # smiles and mol files have no atom names so generate them here if they are not declared
                    atom_name = f"{atom.GetSymbol()}{index}"

            qube_atom = Atom(atomic_number,
                             index,
                             atom_name,
                             formal_charge=atom.GetFormalCharge())

            # Instance the basic qube_atom
            qube_atom.atom_type = atom.GetSmarts()

            # Add the atoms as nodes
            self.topology.add_node(atom.GetIdx())

            # Add the bonds
            for bonded in atom.GetNeighbors():
                self.topology.add_edge(atom.GetIdx(), bonded.GetIdx())
                qube_atom.add_bond(bonded.GetIdx())

            # Now add the atom to the molecule
            atoms.append(qube_atom)

        self.coords = self.rdkit_mol.GetConformer().GetPositions()
        self.atoms = atoms or None
Esempio n. 3
0
    def _read_pdb(self):
        """
        Internal pdb reader. Only called when RDKit failed to read the pdb.
        Extracts the topology, atoms and coords of the molecule.
        """
        coords = []
        self.topology = nx.Graph()
        atoms = []

        atom_count = 0

        print('called!')
        with open(self.mol_input) as pdb:

            for line in pdb:
                if 'ATOM' in line or 'HETATM' in line:
                    print('reading!')
                    # start collecting the atom class info
                    atomic_symbol = str(line[76:78])
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)
                    atomic_symbol = atomic_symbol.strip()
                    atom_name = str(line.split()[2])

                    # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                    if not atomic_symbol:
                        atomic_symbol = str(line.split()[2])[:-1]
                        atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)

                    atomic_number = Element().number(atomic_symbol)
                    # Now instance the qube atom
                    qube_atom = Atom(atomic_number, atom_count, atom_name)
                    atoms.append(qube_atom)

                    # Also add the atom number as the node in the graph
                    self.topology.add_node(atom_count)
                    atom_count += 1
                    coords.append([
                        float(line[30:38]),
                        float(line[38:46]),
                        float(line[46:54])
                    ])

                if 'CONECT' in line:
                    atom_index = int(line.split()[1]) - 1
                    # Search the connectivity section and add all edges to the graph corresponding to the bonds.
                    for i in range(2, len(line.split())):
                        if int(line.split()[i]) != 0:
                            bonded_index = int(line.split()[i]) - 1
                            self.topology.add_edge(atom_index, bonded_index)
                            atoms[atom_index].add_bond(bonded_index)
                            atoms[bonded_index].add_bond(atom_index)

        # put the object back into the correct place
        self.coords = np.array(coords)
        self.atoms = atoms or None
Esempio n. 4
0
    def _read_pdb_protein(self):
        """

        :return:
        """
        with open(self.mol_input, 'r') as pdb:
            lines = pdb.readlines()

        coords = []
        atoms = []
        self.topology = nx.Graph()
        self.Residues = []
        self.pdb_names = []

        # atom counter used for graph node generation
        atom_count = 0
        for line in lines:
            if 'ATOM' in line or 'HETATM' in line:
                atomic_symbol = str(line[76:78])
                atomic_symbol = re.sub('[0-9]+', '', atomic_symbol).strip()

                # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                if not atomic_symbol:
                    atomic_symbol = str(line.split()[2])
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)

                # now make sure we have a valid element
                if atomic_symbol.lower() != 'cl' and atomic_symbol.lower() != 'br':
                    atomic_symbol = atomic_symbol[0]

                atom_name = f'{atomic_symbol}{atom_count}'
                qube_atom = Atom(Element().number(atomic_symbol), atom_count, atom_name)

                atoms.append(qube_atom)

                self.pdb_names.append(str(line.split()[2]))

                # also get the residue order from the pdb file so we can rewrite the file
                self.Residues.append(str(line.split()[3]))

                # Also add the atom number as the node in the graph
                self.topology.add_node(atom_count)
                atom_count += 1
                coords.append([float(line[30:38]), float(line[38:46]), float(line[46:54])])

            elif 'CONECT' in line:
                conect_terms = line.split()
                for atom in conect_terms[2:]:
                    if int(atom):
                        self.topology.add_edge(int(conect_terms[1]) - 1, int(atom) - 1)

        self.atoms = atoms
        self.coords = np.array(coords)
        self.residues = [res for res, group in groupby(self.Residues)]
Esempio n. 5
0
    def _read_qc_json(self):
        """
        Given a QC JSON object, extracts the topology, atoms and coords of the molecule.
        """

        self.topology = nx.Graph()
        atoms = []

        for i, atom in enumerate(self.mol_input.symbols):
            atoms.append(Atom(atomic_number=Element().number(atom), atom_index=i, atom_name=f'{atom}{i}'))
            self.topology.add_node(i)

        for bond in self.mol_input.connectivity:
            self.topology.add_edge(*bond[:2])

        self.coords = np.array(self.mol_input.geometry).reshape((len(atoms), 3)) * constants.BOHR_TO_ANGS
        self.atoms = atoms or None
Esempio n. 6
0
    def from_pdb(cls, file_name: str, name: Optional[str] = None):
        """
        Read the protein input pdb file.
        :return:
        """
        with open(file_name, "r") as pdb:
            lines = pdb.readlines()

        coords = []
        atoms = []
        bonds = []
        Residues = []
        pdb_names = []

        # atom counter used for graph node generation
        atom_count = 0
        for line in lines:
            if "ATOM" in line or "HETATM" in line:
                atomic_symbol = str(line[76:78])
                atomic_symbol = re.sub("[0-9]+", "", atomic_symbol).strip()

                # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                if not atomic_symbol:
                    atomic_symbol = str(line.split()[2])
                    atomic_symbol = re.sub("[0-9]+", "", atomic_symbol)

                # now make sure we have a valid element
                if atomic_symbol.lower() != "cl" and atomic_symbol.lower(
                ) != "br":
                    atomic_symbol = atomic_symbol[0]

                atom_name = f"{atomic_symbol}{atom_count}"
                # TODO should we use a protein pdb package for this?
                qube_atom = Atom(
                    atomic_number=Element().number(atomic_symbol),
                    atom_index=atom_count,
                    atom_name=atom_name,
                    formal_charge=0,
                    aromatic=False,
                )

                atoms.append(qube_atom)

                pdb_names.append(str(line.split()[2]))

                # also get the residue order from the pdb file so we can rewrite the file
                Residues.append(str(line.split()[3]))

                atom_count += 1
                coords.append([
                    float(line[30:38]),
                    float(line[38:46]),
                    float(line[46:54])
                ])

            elif "CONECT" in line:
                conect_terms = line.split()
                for atom in conect_terms[2:]:
                    if int(atom):
                        bond = Bond(
                            atom1_index=int(conect_terms[1]) - 1,
                            atom2_index=int(atom) - 1,
                            bond_order=1,
                            aromatic=False,
                        )
                        bonds.append(bond)

        coords = np.array(coords)
        residues = [res for res, group in groupby(Residues)]
        if name is None:
            name = Path(file_name).stem
        return cls(
            atoms=atoms,
            bonds=bonds,
            coords=coords,
            pdb_names=pdb_names,
            residues=residues,
            name=name,
        )
Esempio n. 7
0
    def _read_mol2(self):
        """
        Internal mol2 reader. Only called when RDKit failed to read the mol2.
        Extracts the topology, atoms and coords of the molecule.
        """

        coords = []
        self.topology = nx.Graph()
        atoms = []

        atom_count = 0

        with open(self.mol_input, 'r') as mol2:

            atom_flag = False
            bond_flag = False

            for line in mol2:
                if '@<TRIPOS>ATOM' in line:
                    atom_flag = True
                    continue
                elif '@<TRIPOS>BOND' in line:
                    atom_flag = False
                    bond_flag = True
                    continue
                elif '@<TRIPOS>SUBSTRUCTURE' in line:
                    bond_flag = False
                    continue

                if atom_flag:
                    # Add the molecule information
                    atomic_symbol = line.split()[1][:2]
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)
                    atomic_symbol = atomic_symbol.strip().title()

                    atomic_number = Element().number(atomic_symbol)

                    coords.append([float(line.split()[2]), float(line.split()[3]), float(line.split()[4])])

                    # Collect the atom names
                    atom_name = str(line.split()[1])

                    # Add the nodes to the topology object
                    self.topology.add_node(atom_count)
                    atom_count += 1

                    # Get the atom types
                    atom_type = line.split()[5]
                    atom_type = atom_type.replace(".", "")

                    # Make the qube_atom
                    qube_atom = Atom(atomic_number, atom_count, atom_name)
                    qube_atom.atom_type = atom_type

                    atoms.append(qube_atom)

                if bond_flag:
                    # Add edges to the topology network
                    atom_index, bonded_index = int(line.split()[1]) - 1, int(line.split()[2]) - 1
                    self.topology.add_edge(atom_index, bonded_index)
                    atoms[atom_index].add_bond(bonded_index)
                    atoms[bonded_index].add_bond(atom_index)

        # put the object back into the correct place
        self.coords = np.array(coords)
        self.atoms = atoms or None
Esempio n. 8
0
    def __init__(self):
        self.name = "water"
        self.atoms = [
            Atom(8, "O", 0, -0.827099, [1, 2]),
            Atom(1, "H", 1, 0.41355, [0]),
            Atom(1, "H", 1, 0.41355, [0]),
        ]

        self.coords = {
            "qm":
            np.array([
                [-0.00191868, 0.38989824, 0.0],
                [-0.7626204, -0.19870548, 0.0],
                [0.76453909, -0.19119276, 0.0],
            ])
        }

        self.topology = nx.Graph()
        self.topology.add_node(0)
        self.topology.add_node(1)
        self.topology.add_node(2)
        self.topology.add_edge(0, 1)
        self.topology.add_edge(0, 2)

        self.ddec_data = {
            0:
            CustomNamespace(
                a_i=44312.906375462444,
                atomic_symbol="O",
                b_i=47.04465571009466,
                charge=-0.827099,
                r_aim=1.7571614241044191,
                volume=29.273005,
            ),
            1:
            CustomNamespace(
                a_i=0.0,
                atomic_symbol="H",
                b_i=0,
                charge=0.41355,
                r_aim=0.6833737065249833,
                volume=2.425428,
            ),
            2:
            CustomNamespace(
                a_i=0.0,
                atomic_symbol="H",
                b_i=0,
                charge=0.413549,
                r_aim=0.6833737065249833,
                volume=2.425428,
            ),
        }

        self.dipole_moment_data = {
            0:
            CustomNamespace(x_dipole=-0.000457,
                            y_dipole=0.021382,
                            z_dipole=0.0),
            1:
            CustomNamespace(x_dipole=-0.034451,
                            y_dipole=-0.010667,
                            z_dipole=0.0),
            2:
            CustomNamespace(x_dipole=0.03439,
                            y_dipole=-0.010372,
                            z_dipole=-0.0),
        }

        self.quadrupole_moment_data = {
            0:
            CustomNamespace(q_3z2_r2=-0.786822,
                            q_x2_y2=0.307273,
                            q_xy=0.001842,
                            q_xz=-0.0,
                            q_yz=0.0),
            1:
            CustomNamespace(q_3z2_r2=-0.097215,
                            q_x2_y2=0.018178,
                            q_xy=0.001573,
                            q_xz=0.0,
                            q_yz=0.0),
            2:
            CustomNamespace(
                q_3z2_r2=-0.097264,
                q_x2_y2=0.018224,
                q_xy=-0.001287,
                q_xz=-0.0,
                q_yz=-0.0,
            ),
        }

        self.cloud_pen_data = {
            0: CustomNamespace(a=-0.126185, atomic_symbol="O", b=2.066872),
            1: CustomNamespace(a=-2.638211, atomic_symbol="H", b=1.917509),
            2: CustomNamespace(a=-2.627835, atomic_symbol="H", b=1.920499),
        }

        self.enable_symmetry = True
        self.v_site_error_factor = 1.005